diff --git a/.github/workflows/audit-cron.yml b/.github/workflows/audit-cron.yml new file mode 100644 index 0000000..e29f843 --- /dev/null +++ b/.github/workflows/audit-cron.yml @@ -0,0 +1,44 @@ +name: Security Audit (daily) + +# Runs `cargo audit` and `cargo deny` on a fixed cron so newly-published CVEs +# and dependency policy regressions are caught even if the repository has not +# been modified. + +on: + schedule: + # 04:17 UTC daily - off-peak for GitHub Actions scheduler. + - cron: "17 4 * * *" + workflow_dispatch: + +permissions: + contents: read + issues: write + +concurrency: + group: audit-cron-${{ github.ref }} + cancel-in-progress: true + +jobs: + audit: + name: cargo audit (scheduled) + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - uses: actions/checkout@v6 + - uses: rustsec/audit-check@v2.0.0 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + deny: + name: cargo deny (scheduled) + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - uses: actions/checkout@v6 + - uses: EmbarkStudios/cargo-deny-action@v2 diff --git a/.github/workflows/auto-merge-release.yml b/.github/workflows/auto-merge-release.yml new file mode 100644 index 0000000..e78e376 --- /dev/null +++ b/.github/workflows/auto-merge-release.yml @@ -0,0 +1,30 @@ +name: Auto-merge release PRs + +on: + pull_request: + types: [opened, reopened, labeled, synchronize, ready_for_review] + +permissions: + contents: write + pull-requests: write + +jobs: + auto-merge: + if: >- + contains(github.event.pull_request.labels.*.name, 'release') + && startsWith(github.event.pull_request.head.ref, 'release-plz-') + && github.event.pull_request.head.repo.full_name == github.repository + runs-on: ubuntu-latest + env: + HAS_RELEASE_PLZ_TOKEN: ${{ secrets.RELEASE_PLZ_TOKEN != '' }} + steps: + - name: Skip when RELEASE_PLZ_TOKEN is missing + if: env.HAS_RELEASE_PLZ_TOKEN != 'true' + run: echo "::notice::RELEASE_PLZ_TOKEN is not configured; release PR auto-merge skipped." + + - name: Enable auto-merge + if: env.HAS_RELEASE_PLZ_TOKEN == 'true' + run: gh pr merge --auto --squash "$PR_URL" + env: + PR_URL: ${{ github.event.pull_request.html_url }} + GH_TOKEN: ${{ secrets.RELEASE_PLZ_TOKEN }} diff --git a/.github/workflows/auto-update-branch.yml b/.github/workflows/auto-update-branch.yml new file mode 100644 index 0000000..a9cad99 --- /dev/null +++ b/.github/workflows/auto-update-branch.yml @@ -0,0 +1,24 @@ +name: Auto-update PR branches on main push + +on: + push: + branches: [main] + +permissions: + contents: write + pull-requests: write + +jobs: + update-prs: + runs-on: ubuntu-latest + steps: + - name: Update BEHIND PRs with auto-merge + run: | + gh pr list --base main --state open --json number,autoMergeRequest,mergeStateStatus \ + --jq '.[] | select(.autoMergeRequest != null) | select(.mergeStateStatus=="BEHIND") | .number' \ + | while read -r pr; do + echo "Updating PR #${pr}" + gh pr update-branch "${pr}" || echo "Skip PR #${pr}" + done + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f424be3..85fbdea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,10 +1,21 @@ name: CI on: + merge_group: push: branches: [main] tags: - "v*" + paths-ignore: + - "docs/**" + - "**.md" + - "LICENSE*" + - ".editorconfig" + - ".gitignore" + - ".gitattributes" + - "**.txt" + - "CODEOWNERS" + - ".vscode/**" pull_request: workflow_dispatch: inputs: diff --git a/.github/workflows/cleanup-branches.yml b/.github/workflows/cleanup-branches.yml new file mode 100644 index 0000000..bdb70f9 --- /dev/null +++ b/.github/workflows/cleanup-branches.yml @@ -0,0 +1,98 @@ +name: Cleanup stale branches + +on: + # Run weekly on Monday at 06:00 UTC. + schedule: + - cron: "0 6 * * 1" + # Allow manual trigger. + workflow_dispatch: + # Delete head branch right after PR merge. + pull_request: + types: [closed] + +permissions: + contents: write + +jobs: + # Delete the PR head branch immediately after merge. + delete-pr-branch: + if: github.event_name == 'pull_request' && github.event.pull_request.merged == true + runs-on: ubuntu-latest + steps: + - name: Delete merged branch + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + const branch = pr.head.ref; + const protectedNames = new Set(['main', 'master', 'develop', 'release']); + const protectedPrefixes = ['release/', 'hotfix/', 'dependabot/']; + if (pr.head.repo.full_name !== context.repo.owner + '/' + context.repo.repo) { + console.log(`Skipping branch from fork: ${pr.head.repo.full_name}:${branch}`); + return; + } + if (protectedNames.has(branch) || protectedPrefixes.some(prefix => branch.startsWith(prefix))) { + console.log(`Skipping protected branch: ${branch}`); + return; + } + try { + await github.rest.git.deleteRef({ + owner: context.repo.owner, + repo: context.repo.repo, + ref: `heads/${branch}`, + }); + console.log(`Deleted branch: ${branch}`); + } catch (e) { + console.log(`Could not delete ${branch}: ${e.message}`); + } + + # Weekly cleanup of old release-plz and agent (claude/* , codex/*) branches. + sweep-stale: + if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' + runs-on: ubuntu-latest + steps: + - uses: actions/github-script@v7 + with: + script: | + const owner = context.repo.owner; + const repo = context.repo.repo; + const protectedBranches = ['main', 'master', 'develop', 'release']; + const protectedPrefixes = ['release/', 'hotfix/', 'dependabot/']; + // Patterns for branches that should be auto-cleaned. + const stalePatterns = [/^release-plz-/, /^claude\//, /^codex\//]; + + const { data: branches } = await github.rest.repos.listBranches({ + owner, repo, per_page: 100, + }); + + let deleted = 0; + for (const branch of branches) { + if (protectedBranches.includes(branch.name)) continue; + if (protectedPrefixes.some(prefix => branch.name.startsWith(prefix))) continue; + if (!stalePatterns.some(p => p.test(branch.name))) continue; + + // Only delete if fully merged into main. + try { + const { data: comparison } = await github.rest.repos.compareCommits({ + owner, repo, + base: 'main', + head: branch.name, + }); + // ahead_by == 0 means all commits are in main. + if (comparison.ahead_by !== 0) continue; + } catch { + continue; + } + + try { + await github.rest.git.deleteRef({ + owner, repo, + ref: `heads/${branch.name}`, + }); + console.log(`Deleted: ${branch.name}`); + deleted++; + } catch (e) { + console.log(`Failed to delete ${branch.name}: ${e.message}`); + } + } + console.log(`Cleaned up ${deleted} stale branches.`); diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..e38ff9d --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,56 @@ +name: "CodeQL Security Analysis" + +on: + push: + branches: [main] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/codeql.yml" + pull_request: + branches: [main] + paths: + - "crates/**" + - "Cargo.toml" + - "Cargo.lock" + - ".github/workflows/codeql.yml" + schedule: + # Weekly scan (Mondays 06:00 UTC) + - cron: "0 6 * * 1" + +permissions: + security-events: write + contents: read + +jobs: + analyze: + name: CodeQL Rust Analysis + runs-on: macos-latest + timeout-minutes: 30 + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Rust cache + uses: Swatinem/rust-cache@v2 + with: + shared-key: codeql + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: rust + + - name: Build for CodeQL + run: cargo build --workspace --release --locked + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:rust" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml new file mode 100644 index 0000000..c61a40b --- /dev/null +++ b/.github/workflows/nightly.yml @@ -0,0 +1,116 @@ +name: Nightly Toolchain Check + +on: + schedule: + # Every day at 03:00 UTC. + - cron: "0 3 * * *" + workflow_dispatch: + +permissions: + contents: read + issues: write # Used by the failure alert step below + +env: + CARGO_INCREMENTAL: 0 + CARGO_NET_RETRY: 10 + RUST_BACKTRACE: short + +jobs: + check: + name: ${{ matrix.toolchain }} / ${{ matrix.scope }} / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + # Nightly may break - allow failure. Beta must pass. + continue-on-error: ${{ matrix.toolchain == 'nightly' }} + timeout-minutes: 30 + strategy: + fail-fast: false + matrix: + include: + - toolchain: beta + os: macos-latest + scope: workspace + cargo_args: "--workspace --all-features" + - toolchain: nightly + os: macos-latest + scope: workspace + cargo_args: "--workspace --all-features" + - toolchain: beta + os: ubuntu-latest + scope: pure-crates + cargo_args: "-p dunst-core -p dunst-graph -p dunst-vision" + - toolchain: nightly + os: ubuntu-latest + scope: pure-crates + cargo_args: "-p dunst-core -p dunst-graph -p dunst-vision" + steps: + - uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - uses: actions/checkout@v6 + with: + ref: main + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ matrix.toolchain }} + components: clippy + - uses: Swatinem/rust-cache@v2 + with: + shared-key: nightly-${{ matrix.toolchain }}-${{ matrix.scope }}-${{ matrix.os }} + save-if: true + - name: Clippy + run: cargo clippy ${{ matrix.cargo_args }} --all-targets --locked -- -D warnings + - name: Tests + run: cargo test ${{ matrix.cargo_args }} --all-targets --locked + - name: Doc tests + run: cargo test ${{ matrix.cargo_args }} --doc --locked + + # Open (or reuse) a tracking issue when the BETA matrix fails. Nightly + # failures stay silent (they are allowed to break). Avoids spamming: the + # step searches for an open "nightly-toolchain" labeled issue and comments + # on it instead of creating duplicates. + alert: + name: Alert on beta regression + needs: [check] + if: always() && contains(needs.check.result, 'failure') && github.event_name == 'schedule' + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - name: Open or update tracking issue + uses: actions/github-script@v7 + with: + script: | + const label = 'nightly-toolchain'; + const title = `Beta/nightly toolchain regression (${new Date().toISOString().slice(0,10)})`; + const body = [ + `Nightly run ${context.runId} observed a failure on the \`beta\` or \`nightly\` toolchain.`, + ``, + `Run: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, + ``, + `If this is a nightly-only soft failure, close the issue. If beta is broken, this is a release blocker.`, + ].join('\n'); + const { data: issues } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, + repo: context.repo.repo, + state: 'open', + labels: label, + per_page: 1, + }); + if (issues.length > 0) { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: issues[0].number, + body, + }); + } else { + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title, + body, + labels: [label], + }); + } diff --git a/.github/workflows/release-plz.yml b/.github/workflows/release-plz.yml index 5a6a488..cc09dc8 100644 --- a/.github/workflows/release-plz.yml +++ b/.github/workflows/release-plz.yml @@ -9,7 +9,7 @@ on: branches: - main paths: - - "src/**" + - "crates/**" - "Cargo.toml" - "Cargo.lock" pull_request: diff --git a/.github/workflows/semgrep.yml b/.github/workflows/semgrep.yml new file mode 100644 index 0000000..865d674 --- /dev/null +++ b/.github/workflows/semgrep.yml @@ -0,0 +1,46 @@ +name: "Semgrep SAST" + +on: + push: + branches: [main] + paths: + - "crates/**" + - ".github/workflows/semgrep.yml" + pull_request: + branches: [main] + paths: + - "crates/**" + - ".github/workflows/semgrep.yml" + schedule: + # Weekly scan (Wednesdays 06:00 UTC) + - cron: "0 6 * * 3" + +permissions: + security-events: write + contents: read + +jobs: + semgrep: + name: Semgrep Rust SAST + runs-on: ubuntu-latest + timeout-minutes: 15 + container: + image: semgrep/semgrep:1.144.0 + + steps: + - uses: actions/checkout@v6 + + - name: Run Semgrep + run: semgrep scan --config auto --sarif --output semgrep-results.sarif crates/ + env: + SEMGREP_RULES: >- + p/rust + p/secrets + p/owasp-top-ten + + - name: Upload SARIF + if: always() + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: semgrep-results.sarif + category: semgrep diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 0000000..01013a9 --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,69 @@ +# ShellCheck gate: static analysis for the repo's shell scripts. +# +# Lints every tracked script under scripts/**, plus any top-level *.sh, with +# `shellcheck -x` so `source`d files are followed. Runs on push to main and on +# PRs that touch shell scripts or this workflow. +# +# Severity is pinned to `warning`: this still flags genuine bugs while avoiding +# failures on info-level style notes. + +name: ShellCheck + +on: + push: + branches: [main] + paths: + - "**.sh" + - ".github/workflows/shellcheck.yml" + pull_request: + branches: [main] + paths: + - "**.sh" + - ".github/workflows/shellcheck.yml" + +concurrency: + group: shellcheck-${{ github.ref }} + cancel-in-progress: true + +permissions: read-all + +jobs: + shellcheck: + name: shellcheck + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - uses: actions/checkout@v6 + - name: Run shellcheck + run: | + set -euo pipefail + # ubuntu-latest ships shellcheck preinstalled; print the version so + # the log records exactly which ruleset produced the result. + shellcheck --version + + # Collect scripts/** and any top-level *.sh. `! -path './*/*'` pins + # the second alternative to the repo root so it does not pull in + # scripts owned by other trees. + mapfile -d '' -t scripts < <( + find . \ + -type d -name target -prune -o \ + -type f -name '*.sh' \ + \( -path './scripts/*' \ + -o \( -path './*.sh' ! -path './*/*' \) \) \ + -print0 + ) + + if [ "${#scripts[@]}" -eq 0 ]; then + echo "::error::No shell scripts found - check the find globs." + exit 1 + fi + + printf 'Linting %d shell script(s):\n' "${#scripts[@]}" + printf ' %s\n' "${scripts[@]}" + + # -x follows `source`/`.` directives; --severity=warning fails on + # errors and warnings but not info/style notes. + shellcheck -x --severity=warning "${scripts[@]}" diff --git a/BUGS-TODO.md b/BUGS-TODO.md new file mode 100644 index 0000000..2bc70b6 --- /dev/null +++ b/BUGS-TODO.md @@ -0,0 +1,99 @@ +# Bugs dunst-mcp — rencontrés en pilotant un champ web sparse-AX (Collective/Firefox) + +> Après correctifs : `cargo install --path crates/dunst-mcp --force` + reload du serveur MCP. + +## ⚠️ 0. CRITIQUE — ne JAMAIS coder en dur un keycode pour un raccourci lettre (layout !) + +Un keycode **physique** ne mappe pas la même lettre selon le clavier. Sur **AZERTY**, +keycode `0x00` (= 'A' en QWERTY) = **'Q'** → un faux « Cmd+A » devient **`Cmd+Q` = +Quitter l'app**. C'est ce qui a **fermé Firefox le 2026-06-25**, et pourquoi les +« Cmd+A / Cmd+V » codés en dur (`select_all_and_paste_background`, +`clear_field_background`) ne sélectionnaient/collaient jamais juste sur ce poste. +Le garde « layout-sensitive » de l'outil `hotkey` (qui **refuse `cmd+a`**) avait +raison ; le contourner avec un keycode brut était l'erreur. +→ Code keycode-brut **reverté** (clipboard.rs / lib.rs / text_input.rs revenus à +l'original). Pour un vrai Cmd+A/Cmd+V indépendant du layout : traduire le +**caractère** via la keymap courante (`UCKeyTranslate` / +`TISCopyCurrentKeyboardLayoutInputSource`), jamais un keycode fixe. + +**✅ Fix appliqué 2026-06-25** : `set_field_text` → `set_focused_field_text` appelle +maintenant `paste_replace_field_foreground` (`clipboard.rs`) : presse-papier + +`osascript` (`set frontmost of process` + `keystroke "a"/"v" using command down`, +**traduit par le layout courant**) → layout-safe, sélection **native** (donc pas de +queue 716/729), aucun keycode lettre codé en dur. ⚠️ foregrounde la fenêtre (pas +transparent) et requiert la permission **Automation → System Events** au 1er appel. +Ça résout aussi le bug #2 (queue) et #3 (frappes globales). Bug #4 (open_menu) reste. + +## 1. `set_field_text` impossible à approuver — ✅ CORRIGÉ 2026-06-25 + +`approve(keyboard@set_field_text::)` répondait `not a recognised +synthetic raw-input target`. Le préfixe `keyboard@set_field_text:` n'était géré ni +dans `validate_synthetic_raw_approval` ni dans `raw_approval_policy` +(`raw_input_gate.rs`). Fix : branches ajoutées + `validate_set_field_text_target_id` +(mirror de `paste_text`). **Vérifié live : approve accepté.** + +## 2. `set_field_text` laisse une queue DOM (React textarea) — ✅ FIX APPLIQUÉ (à vérifier live) + +Sur une textarea React (Collective), `set_field_text` appliquait bien le nouveau +texte mais laissait un **fragment résiduel en fin de champ** (ex. `…OpenBao` + +`ouverain (st4ck).`), de façon **déterministe**, tout en renvoyant `success` +(l'attribut AX `AXValue` lisait le texte propre — l'artefact est **invisible à +l'AX**, niveau DOM uniquement). + +Cause : `type_text_by_replacing_selection` (`text_input.rs`) sélectionne `{0,len}` +en AX puis **tape le texte caractère par caractère** (`post_window_bound_text` → +`type_text_background_impl`, 8 ms/char). La frappe synthétique dans un input +contrôlé React laisse un artefact DOM en queue. + +Fix v1 (échec) : `set AXSelectedText = text` — l'attribut n'est **pas settable** +sur la textarea web Firefox → retombait sur la frappe (queue persistante, fragment +différent). +Fix v2 (appliqué) : **coller** le texte (`paste_text_background` = presse-papier + +**Cmd+V window-bound** + restore) après la sélection `{0,len}`. La frappe +caractère-par-caractère reste en fallback si le paste échoue. Atomique → pas de +race React → pas de queue. **À vérifier live.** + +## 3. Champ web en arrière-plan ne reçoit pas les frappes globales — limitation (documenté) + +`hotkey`/`press_key` (chemin clavier global) **n'atteignent pas** un champ web dans +une fenêtre backgroundée : `cmd+Down` a déclenché la recherche de la *page* +Collective, pas la navigation curseur du textarea ; `Backspace` n'a rien supprimé. +Seuls **(a)** la frappe *window-bound* (`type_text_background_impl`) et **(b)** l'AX +(`set_field_text`) atteignent réellement le champ. ⇒ Pas de récupération d'édition +par touches brutes (curseur+Backspace) sur ces champs : passer par l'AX. + +## 4. `open_menu` n'ouvre pas le menu Firefox (multi-fenêtres) — ouvert + +`open_menu("Édition")` → `failed` (item AX visible mais l'AXPress n'ouvre pas), +même après `focus_window`. Probable : Firefox multi-fenêtres / fenêtre cible pas +*key window*. Empêche le fallback « Édition → Tout sélectionner ». Basse priorité. + +## 5. Pilotage LinkedIn (édition d'expériences) — notes + gotcha « formulaire vide » + +Édition des 6 expériences du profil le 2026-06-25 (sync sur le rendu-final). LinkedIn +est **sparse-AX** comme Collective : crayons par-ligne, textarea Description et scroll +du modal **absents de l'arbre AX** → tout en raw (`click_at`) + `find_ocr_text` + molette +réelle (`scroll_at borrow_cursor=true`, fallback appris Firefox+LinkedIn). + +**⚠️ Gotcha « formulaire vide » (≠ bug MCP)** : au 1er clic sur le crayon d'une +expérience, le **modal d'édition peut se charger VIDE** (champs en placeholder +« Ex. : chef des ventes au détail »), ce qui ressemble à une **création**. Ce n'en +est PAS une : c'est le **même form-id** (race de chargement LinkedIn). La coordonnée +était bonne. **Recharger la page** (`Cmd+R`) règle le glitch → le modal se rouvre +pré-rempli. **NE JAMAIS sauvegarder un modal aux champs requis vides** (ça écraserait +l'expérience). *Idée d'amélioration MCP* : sur ouverture d'un edit-form, détecter des +champs requis vides + avertir/retry au lieu de laisser croire à une création. + +**Méthode fiable (vérifiée ×6)** : +1. `find_ocr_text("")` → centre `(tx, ty)` de la ligne. +2. Crayon = `click_at(x≈3603 [bord droit de la carte], y=ty)`. Ne PAS deviner à + l'aveugle un y approximatif (risque de taper le « + créer » de la section ou un + hotspot inter-cartes). +3. Modal ouvert pré-rempli → `scroll_at(down, 1, borrow_cursor)` cadre la Description. +4. `click_at` dans la textarea → `pbcopy | osascript Cmd+A + Cmd+V` (layout-safe). +5. « Enregistrer » → puis fermer **2 pop-ups** post-save : « vérifiez l'emploi » + (`Passer`) pour les postes **actuels**, et « personnes que vous pourriez connaître » + (`Ignorer`) à chaque save. + +**Collage = lignes vides écrasées** (idem bug About) : LinkedIn supprime les lignes +vides au collage ; les blocs d'expérience n'en ont pas (puces consécutives) donc OK. diff --git a/crates/dunst-mcp/src/engine.rs b/crates/dunst-mcp/src/engine.rs index d745e8e..7d1fd6b 100644 --- a/crates/dunst-mcp/src/engine.rs +++ b/crates/dunst-mcp/src/engine.rs @@ -61,7 +61,7 @@ use query_support::*; use raw_input::page_scroll_target_id; use raw_input_gate::{ is_synthetic_approval_target_id, raw_paste_text_target_id, raw_press_key_target_id, - raw_type_keys_target_id, RawApprovalKey, + raw_set_field_text_target_id, raw_type_keys_target_id, RawApprovalKey, }; use runtime_support::*; use scene_query::*; diff --git a/crates/dunst-mcp/src/engine/app_ops.rs b/crates/dunst-mcp/src/engine/app_ops.rs index a489244..beb95c9 100644 --- a/crates/dunst-mcp/src/engine/app_ops.rs +++ b/crates/dunst-mcp/src/engine/app_ops.rs @@ -227,7 +227,14 @@ impl Engine { // tabs depending on browser preferences, which is the wrong primitive // for continuing inside an already-attached page. let existing_candidates = self.matching_windows_for_app(app); - if let Some(selected) = best_window_for_url(&existing_candidates, &terms) { + // TODO: expose this as a public MCP option: reuse = exact | host | never. + let reuse_policy = BrowserTabReusePolicy::Host; + if let Some(selected) = self.best_existing_window_for_url( + &existing_candidates, + &terms, + &host_labels, + reuse_policy, + ) { let launch = self.launch_app_result(app, Some(url), false); return self.attach_url_window_result( launch, @@ -255,6 +262,38 @@ impl Engine { self.attach_url_window_result(launch, candidates, selected, &terms, &host_labels) } + /// Navigate the attached browser to `url` and re-verify. Unlike + /// `open_url_and_attach_tab`, this ALWAYS forces a fresh load (it never + /// re-selects a stale existing tab/window that merely matches the URL terms — + /// the failure mode that lands on the wrong tab) and targets the + /// currently-attached app. This is the reliable way to drive a backgrounded + /// browser to a new page: address-bar keystrokes are not an option in the + /// background, where synthetic keys fall through to page content and arrive as + /// in-page shortcuts (e.g. GitHub's `g i` → Issues) instead of the URL bar. + #[cfg(target_os = "macos")] + pub fn navigate(&mut self, url: &str) -> OpenUrlAttachResult { + let app = self.window.app_name.clone(); + let terms = url_match_terms(url); + let host_labels = url_host_labels(url); + let launch = self.launch_app(&app, Some(url), &[]); + std::thread::sleep(Duration::from_millis(700)); + let candidates = launch.matching_windows.clone(); + let selected = best_window_for_url(&candidates, &terms).or_else(|| { + candidates + .iter() + .find(|window| window.window_id == self.target.window_id) + .cloned() + .or_else(|| candidates.first().cloned()) + }); + self.attach_url_window_result(launch, candidates, selected, &terms, &host_labels) + } + + #[cfg(not(target_os = "macos"))] + pub fn navigate(&mut self, url: &str) -> OpenUrlAttachResult { + let app = self.window.app_name.clone(); + self.open_url_and_attach_tab(&app, url, &[]) + } + fn matching_windows_for_app(&self, app: &str) -> Vec { let app_needle = normalize_match(app); self.list_windows(false) @@ -263,6 +302,81 @@ impl Engine { .collect() } + #[cfg(target_os = "macos")] + fn best_existing_window_for_url( + &mut self, + candidates: &[WindowSummary], + terms: &[String], + host_labels: &[String], + reuse_policy: BrowserTabReusePolicy, + ) -> Option { + if matches!(reuse_policy, BrowserTabReusePolicy::Never) { + return None; + } + best_window_for_url(candidates, terms).or_else(|| { + self.best_window_with_matching_selected_tab( + candidates, + terms, + host_labels, + reuse_policy, + ) + }) + } + + #[cfg(target_os = "macos")] + fn best_window_with_matching_selected_tab( + &mut self, + candidates: &[WindowSummary], + terms: &[String], + host_labels: &[String], + reuse_policy: BrowserTabReusePolicy, + ) -> Option { + let original = self.target.clone(); + let mut best = None; + + for window in candidates { + if self.target.window_id != window.window_id + && self.attach(window.pid, window.window_id).is_err() + { + continue; + } + let Some(tab) = self + .list_browser_tabs(None, true) + .into_iter() + .find(|tab| tab.selected) + else { + continue; + }; + let Some(score) = browser_tab_reuse_score(&tab, terms, host_labels, reuse_policy) + else { + continue; + }; + let rank = (score, window.on_screen, std::cmp::Reverse(window.window_id)); + if best + .as_ref() + .map(|(current, _)| rank > *current) + .unwrap_or(true) + { + best = Some((rank, window.clone())); + } + } + + if let Some((_, selected)) = best { + if self.target.window_id != selected.window_id + && self.attach(selected.pid, selected.window_id).is_err() + { + let _ = self.attach(original.pid, original.window_id); + return None; + } + Some(selected) + } else { + if self.target != original { + let _ = self.attach(original.pid, original.window_id); + } + None + } + } + fn attach_url_window_result( &mut self, launch: LaunchAppResult, @@ -350,6 +464,14 @@ impl Engine { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[allow(dead_code)] +enum BrowserTabReusePolicy { + Exact, + Host, + Never, +} + fn url_match_terms(url: &str) -> Vec { let decoded = percent_decode_lossy(url); let normalized = normalize_match(&decoded); @@ -392,6 +514,37 @@ fn best_window_for_url(windows: &[WindowSummary], terms: &[String]) -> Option Option { + if matches!(policy, BrowserTabReusePolicy::Never) { + return None; + } + if let Some(url) = tab.url.as_deref() { + let url = normalize_match(url); + let score = terms + .iter() + .filter(|term| url.contains(term.as_str())) + .count(); + if score > 0 { + return Some(score + 100); + } + } + + let title = normalize_match(&tab.title); + let score = terms + .iter() + .filter(|term| { + matches!(policy, BrowserTabReusePolicy::Host) || !is_generic_url_term(term, host_labels) + }) + .filter(|term| title.contains(term.as_str())) + .count(); + (score > 0).then_some(score) +} + fn percent_decode_lossy(value: &str) -> String { let bytes = value.as_bytes(); let mut decoded = Vec::with_capacity(bytes.len()); @@ -540,6 +693,40 @@ mod tests { assert_eq!(best_window_for_url(&windows, &terms).unwrap().window_id, 2); } + #[test] + fn browser_tab_reuse_policy_matches_host_titles_by_default() { + let url = "https://github.com/AlexsJones/llmfit"; + let terms = url_match_terms(url); + let host_labels = url_host_labels(url); + let tab = tab("GitHub", None); + + assert_eq!( + browser_tab_reuse_score(&tab, &terms, &host_labels, BrowserTabReusePolicy::Host), + Some(1) + ); + assert_eq!( + browser_tab_reuse_score(&tab, &terms, &host_labels, BrowserTabReusePolicy::Exact), + None + ); + assert_eq!( + browser_tab_reuse_score(&tab, &terms, &host_labels, BrowserTabReusePolicy::Never), + None + ); + } + + #[test] + fn browser_tab_reuse_exact_accepts_path_specific_title() { + let url = "https://github.com/AlexsJones/llmfit"; + let terms = url_match_terms(url); + let host_labels = url_host_labels(url); + let tab = tab("GitHub - AlexsJones/llmfit", None); + + assert_eq!( + browser_tab_reuse_score(&tab, &terms, &host_labels, BrowserTabReusePolicy::Exact), + Some(2) + ); + } + #[test] fn url_verification_reports_the_signal_that_matched() { let url = "https://github.com/AlexsJones/llmfit"; diff --git a/crates/dunst-mcp/src/engine/query_support.rs b/crates/dunst-mcp/src/engine/query_support.rs index 9037329..ab3eea0 100644 --- a/crates/dunst-mcp/src/engine/query_support.rs +++ b/crates/dunst-mcp/src/engine/query_support.rs @@ -1,5 +1,8 @@ use super::*; +const DEFAULT_USER_ACTIVE_GUARD_ATTEMPTS: usize = 4; +const USER_IDLE_RETRY_MS_ENV: &str = "DUNST_MCP_USER_IDLE_RETRY_MS"; + pub(super) fn likely_url(value: &str) -> Option { let trimmed = value.trim(); if trimmed.starts_with("http://") || trimmed.starts_with("https://") { @@ -109,8 +112,28 @@ where retry_user_active_guard_after(Duration::from_millis(400), f) } -pub(super) fn retry_user_active_guard_after( +pub(super) fn retry_user_active_guard_after(delay: Duration, f: F) -> dunst_core::Result +where + F: FnMut() -> dunst_core::Result, +{ + match user_active_guard_retry_budget() { + Some(budget) => retry_user_active_guard_for_budget(delay, budget, f), + None => { + retry_user_active_guard_fixed_attempts(delay, DEFAULT_USER_ACTIVE_GUARD_ATTEMPTS, f) + } + } +} + +fn user_active_guard_retry_budget() -> Option { + std::env::var(USER_IDLE_RETRY_MS_ENV) + .ok() + .and_then(|value| value.parse::().ok()) + .map(Duration::from_millis) +} + +fn retry_user_active_guard_fixed_attempts( delay: Duration, + attempts: usize, mut f: F, ) -> dunst_core::Result where @@ -118,7 +141,7 @@ where { let mut next_delay = delay; let mut last_guard_err = None; - for _ in 0..4 { + for _ in 0..attempts.max(1) { match f() { Err(err) if is_user_active_guard_error(&err) => { last_guard_err = Some(err); @@ -131,6 +154,31 @@ where Err(last_guard_err.expect("guard retry loop stores the last guard error")) } +pub(super) fn retry_user_active_guard_for_budget( + delay: Duration, + budget: Duration, + mut f: F, +) -> dunst_core::Result +where + F: FnMut() -> dunst_core::Result, +{ + let mut next_delay = delay; + let mut waited = Duration::ZERO; + loop { + match f() { + Err(err) if is_user_active_guard_error(&err) => { + if next_delay.is_zero() || waited.saturating_add(next_delay) > budget { + return Err(err); + } + std::thread::sleep(next_delay); + waited += next_delay; + next_delay += delay; + } + other => return other, + } + } +} + pub(super) fn is_user_active_guard_error(err: &DunstError) -> bool { err.to_string().contains("user-active guard blocked") } diff --git a/crates/dunst-mcp/src/engine/raw_input/keyboard.rs b/crates/dunst-mcp/src/engine/raw_input/keyboard.rs index a264aeb..6b481b2 100644 --- a/crates/dunst-mcp/src/engine/raw_input/keyboard.rs +++ b/crates/dunst-mcp/src/engine/raw_input/keyboard.rs @@ -107,6 +107,47 @@ impl Engine { ) } + /// Replace the FOCUSED field's text: clears whatever currently has keyboard + /// focus and sets it to `text` via the app's `AXFocusedUIElement` (AX + /// select-all-replace, with a keyboard fallback). Robust where raw + /// clear-by-keystroke (End/Backspace/double-click) garbles the value when + /// driving a backgrounded form. Focus the field first (e.g. click it). + #[cfg(target_os = "macos")] + pub fn set_field_text(&mut self, text: &str) -> dunst_core::Result { + let target_id = raw_set_field_text_target_id(text); + let risk = Self::raw_input_risk(vec![ + "replaces the entire contents of whatever field currently holds focus".into(), + ]); + if let Some(entry) = self.gate_raw_input( + &target_id, + SemanticAction::Type, + Some(text.to_string()), + Some("set focused field text (clear + replace)"), + risk.clone(), + ) { + return Ok(entry); + } + let outcome = retry_user_active_guard(|| { + dunst_platform::set_focused_field_text(self.target.pid, self.target.window_id, text) + }); + self.audit_raw_input( + target_id, + SemanticAction::Type, + Some(text.to_string()), + Some("set focused field text (AX select-all-replace, keyboard fallback)"), + risk, + outcome, + ) + } + + /// Non-macOS stub: AX field replacement needs the macOS backend. + #[cfg(not(target_os = "macos"))] + pub fn set_field_text(&mut self, _text: &str) -> dunst_core::Result { + Err(DunstError::Execution( + "set_field_text requires a macOS backend".into(), + )) + } + /// Paste `text` into the focused element by temporarily replacing the /// clipboard, sending Cmd+V to the target window, then restoring the /// previous plain-text clipboard contents. This keeps the platform clipboard diff --git a/crates/dunst-mcp/src/engine/raw_input_gate.rs b/crates/dunst-mcp/src/engine/raw_input_gate.rs index c06811f..bb5dc3a 100644 --- a/crates/dunst-mcp/src/engine/raw_input_gate.rs +++ b/crates/dunst-mcp/src/engine/raw_input_gate.rs @@ -294,6 +294,9 @@ impl Engine { if target_id.starts_with("keyboard@paste_text:") { return validate_paste_text_target_id(target_id); } + if target_id.starts_with("keyboard@set_field_text:") { + return validate_set_field_text_target_id(target_id); + } if let Some(rest) = target_id.strip_prefix("keyboard@scroll:") { return validate_scroll_target(rest, false, self); @@ -500,6 +503,10 @@ pub(super) fn raw_paste_text_target_id(text: &str) -> String { raw_text_payload_target_id("paste_text", text) } +pub(super) fn raw_set_field_text_target_id(text: &str) -> String { + raw_text_payload_target_id("set_field_text", text) +} + fn raw_text_payload_target_id(action: &str, text: &str) -> String { let mut hash = 0xcbf2_9ce4_8422_2325_u64; for byte in text.as_bytes() { @@ -556,6 +563,15 @@ fn raw_approval_policy(target_id: &str) -> Vec { cost_events: 1, }]; } + if target_id.starts_with("keyboard@set_field_text:") { + return vec![RawApprovalPolicy { + key: RawApprovalKey { + scope: RawApprovalScope::Exact(target_id.to_string()), + }, + grant_events: 1, + cost_events: 1, + }]; + } if let Some(scope) = ocr_click_approval_scope(target_id) { return vec![RawApprovalPolicy { key: RawApprovalKey { @@ -575,19 +591,18 @@ fn raw_approval_policy(target_id: &str) -> Vec { } fn scroll_direction_policy(rest: &str) -> Vec { - let mut parts = rest.split(':'); - let direction = parts.next().unwrap_or("down"); - let cost_events = parts - .next() - .and_then(|count| count.parse::().ok()) - .unwrap_or(1) - .clamp(1, 20); + let direction = rest.split(':').next().unwrap_or("down").to_string(); + // A scroll is one operator gesture regardless of page count or exact point. + // Approving "scroll " therefore grants a batch of same-direction scrolls + // — at any point, any page count — within the TTL, so repeated point-scrolls + // (scroll_at at successive coordinates) don't re-arm the approval gate on every + // call. Each scroll costs exactly one event; the grant covers a working batch. vec![RawApprovalPolicy { key: RawApprovalKey { - scope: RawApprovalScope::ScrollDirection(direction.to_string()), + scope: RawApprovalScope::ScrollDirection(direction), }, - grant_events: 5, - cost_events, + grant_events: 8, + cost_events: 1, }] } @@ -656,6 +671,10 @@ fn validate_paste_text_target_id(target_id: &str) -> dunst_core::Result<()> { validate_hashed_text_target_id(target_id, "keyboard@paste_text:", "paste_text") } +fn validate_set_field_text_target_id(target_id: &str) -> dunst_core::Result<()> { + validate_hashed_text_target_id(target_id, "keyboard@set_field_text:", "set_field_text") +} + fn validate_hashed_text_target_id( target_id: &str, prefix: &str, diff --git a/crates/dunst-mcp/src/engine/read.rs b/crates/dunst-mcp/src/engine/read.rs index 974c41c..c417427 100644 --- a/crates/dunst-mcp/src/engine/read.rs +++ b/crates/dunst-mcp/src/engine/read.rs @@ -217,6 +217,22 @@ impl Engine { .fingerprint } + /// OCR/vision fallback for the JSON-facing `find_element` read path. Action + /// resolution remains AX-only because synthetic hit targets are not + /// `SceneNode`s and must be driven through their advertised raw/OCR tools. + pub fn find_element_hit_target_fallback(&self, query: &str, limit: usize) -> Vec { + let q = normalize_match(query); + let mut targets: Vec = self + .hit_targets(false, "page", 500, None) + .targets + .into_iter() + .filter(|target| matches!(target.source.as_str(), "ocr" | "vision")) + .filter(|target| hit_target_matches_find_query(target, &q)) + .collect(); + targets.truncate(limit.clamp(1, 500)); + targets + } + fn append_ocr_hit_targets( &self, targets: &mut Vec, @@ -1264,6 +1280,21 @@ fn hit_source_rank(source: &str) -> u8 { } } +fn hit_target_matches_find_query(target: &HitTarget, query: &str) -> bool { + normalized_contains_query(&normalize_match(&target.id), query) + || normalized_contains_query(&normalize_match(target.role), query) + || target + .label + .as_deref() + .map(|label| normalized_contains_query(&normalize_match(label), query)) + .unwrap_or(false) + || target + .value + .as_deref() + .map(|value| normalized_contains_query(&normalize_match(value), query)) + .unwrap_or(false) +} + fn source_name(source: dunst_core::Source) -> &'static str { match source { dunst_core::Source::Accessibility => "accessibility", diff --git a/crates/dunst-mcp/src/engine/tests/raw_window.rs b/crates/dunst-mcp/src/engine/tests/raw_window.rs index 0b8df11..c071a86 100644 --- a/crates/dunst-mcp/src/engine/tests/raw_window.rs +++ b/crates/dunst-mcp/src/engine/tests/raw_window.rs @@ -19,6 +19,26 @@ fn user_active_guard_retry_runs_once_before_returning() { assert_eq!(attempts.load(Ordering::SeqCst), 2); } +#[test] +fn user_active_guard_retry_budget_limits_waits() { + let attempts = Arc::new(AtomicUsize::new(0)); + let attempts_in_closure = attempts.clone(); + let result: dunst_core::Result<()> = retry_user_active_guard_for_budget( + Duration::from_millis(1), + Duration::from_millis(1), + || { + attempts_in_closure.fetch_add(1, Ordering::SeqCst); + Err(DunstError::Execution( + "user-active guard blocked click: last keyboard/mouse input was 1 ms ago (< 150 ms)" + .into(), + )) + }, + ); + + assert!(result.is_err()); + assert_eq!(attempts.load(Ordering::SeqCst), 2); +} + #[test] fn internal_hover_lead_point_is_clamped_to_target_window() { let (eng, _) = engine_with_counter(); diff --git a/crates/dunst-mcp/src/serve.rs b/crates/dunst-mcp/src/serve.rs index 75ef515..dc8cceb 100644 --- a/crates/dunst-mcp/src/serve.rs +++ b/crates/dunst-mcp/src/serve.rs @@ -520,6 +520,16 @@ fn find_matches_value(matches: Vec<&SceneNode>) -> Value { serde_json::to_value(matches).unwrap_or(Value::Null) } +fn find_matches_value_or_fallback(engine: &Engine, query: &str, matches: Vec<&SceneNode>) -> Value { + if matches.is_empty() { + let fallback = engine.find_element_hit_target_fallback(query, 80); + if !fallback.is_empty() { + return serde_json::to_value(fallback).unwrap_or(Value::Null); + } + } + find_matches_value(matches) +} + fn find_element_value( engine: &mut Engine, query: &str, @@ -535,7 +545,9 @@ fn find_element_value( } ensure_recent_graph(engine, fresh, force)?; - Ok(find_matches_value( + Ok(find_matches_value_or_fallback( + engine, + query, engine.find_element_filtered(query, visible_only), )) } diff --git a/crates/dunst-mcp/src/serve/dispatch/raw_tools.rs b/crates/dunst-mcp/src/serve/dispatch/raw_tools.rs index 0690f08..9f10cbb 100644 --- a/crates/dunst-mcp/src/serve/dispatch/raw_tools.rs +++ b/crates/dunst-mcp/src/serve/dispatch/raw_tools.rs @@ -135,6 +135,15 @@ pub(super) fn dispatch( .map_err(|e| e.to_string()), None => Err("missing 'text'".into()), }, + "set_field_text" => match arg(args, "text") { + Some(text) => engine + .set_field_text(&text) + .map(|entry| { + audit_entry_value(entry, arg_bool(args, "include_diff").unwrap_or(false)) + }) + .map_err(|e| e.to_string()), + None => Err("missing 'text'".into()), + }, "paste_text" => match arg(args, "text") { Some(text) => engine .paste_text(&text, arg_bool(args, "restore_clipboard").unwrap_or(true)) diff --git a/crates/dunst-mcp/src/serve/dispatch/window_app_tools.rs b/crates/dunst-mcp/src/serve/dispatch/window_app_tools.rs index a269390..8f56f85 100644 --- a/crates/dunst-mcp/src/serve/dispatch/window_app_tools.rs +++ b/crates/dunst-mcp/src/serve/dispatch/window_app_tools.rs @@ -128,6 +128,10 @@ pub(super) fn dispatch( } _ => Err("open_url_and_attach_tab requires 'app' and 'url'".into()), }, + "navigate" => match arg(args, "url") { + Some(url) => Ok(serde_json::to_value(engine.navigate(&url)).unwrap_or(Value::Null)), + None => Err("navigate requires 'url'".into()), + }, "close_app" => match arg(args, "app") { Some(app) => Ok(json!({ "closed": engine.close_app(&app) })), None => Err("close_app requires 'app'".into()), diff --git a/crates/dunst-mcp/src/serve/registry.rs b/crates/dunst-mcp/src/serve/registry.rs index 036f5a9..b018469 100644 --- a/crates/dunst-mcp/src/serve/registry.rs +++ b/crates/dunst-mcp/src/serve/registry.rs @@ -65,6 +65,7 @@ pub(super) const TOOL_REGISTRY: &[RegisteredTool] = &[ tool("open_menu", ToolRoute::Raw), tool("press_key", ToolRoute::Raw), tool("type_keys", ToolRoute::Raw), + tool("set_field_text", ToolRoute::Raw), tool("paste_text", ToolRoute::Raw), tool("scroll", ToolRoute::Raw), tool("scroll_at", ToolRoute::Raw), @@ -81,6 +82,7 @@ pub(super) const TOOL_REGISTRY: &[RegisteredTool] = &[ tool("attach", ToolRoute::WindowApp), tool("launch_app", ToolRoute::WindowApp), tool("open_url_and_attach_tab", ToolRoute::WindowApp), + tool("navigate", ToolRoute::WindowApp), tool("close_app", ToolRoute::WindowApp), tool("screenshot", ToolRoute::Screenshot), ]; diff --git a/crates/dunst-mcp/src/serve/tests/catalog.rs b/crates/dunst-mcp/src/serve/tests/catalog.rs index 39aa83e..7025fa5 100644 --- a/crates/dunst-mcp/src/serve/tests/catalog.rs +++ b/crates/dunst-mcp/src/serve/tests/catalog.rs @@ -5,7 +5,7 @@ use crate::serve::registry::TOOL_REGISTRY; fn tools_list_exposes_read_text_with_object_schema() { std::env::remove_var("DUNST_MCP_ENABLE_APPROVE_TOOL"); let tools = tools_list(); - assert_eq!(tools.len(), 68, "tool count"); + assert_eq!(tools.len(), 70, "tool count"); // Every tool must declare a JSON-Schema object input (the type:object fix). for t in &tools { assert_eq!( diff --git a/crates/dunst-mcp/src/serve/tools.rs b/crates/dunst-mcp/src/serve/tools.rs index 56657ab..e39bb1e 100644 --- a/crates/dunst-mcp/src/serve/tools.rs +++ b/crates/dunst-mcp/src/serve/tools.rs @@ -190,7 +190,7 @@ fn query_tools() -> Vec { ), tool( "find_element", - "Find elements whose id/label/role contains the query (case-insensitive). Ensures a recent AX graph by default. Results are ranked with visible enabled targets first; visible_only drops off-window/latent noise.", + "Find elements whose id/label/role contains the query (case-insensitive). Ensures a recent AX graph by default. Results are ranked with visible enabled targets first; visible_only drops off-window/latent noise. If AX has no matches, falls back to OCR/vision hit targets tagged by source.", schema( json!({ "query": { "type": "string" }, @@ -543,6 +543,11 @@ fn window_app_tools() -> Vec { "Open a URL in an app, then attach Dunst to the best matching browser window and report whether the selected tab/window/page URL verifies against the URL, including verified_by when a signal matched. Use this instead of launch_app + manual tab guessing for browser navigation.", schema(json!({ "app": {"type":"string"}, "url": {"type":"string"}, "args": {"type":"array","items":{"type":"string"},"description":"extra argv passed when launching the app"} }), &["app", "url"]), ), + tool( + "navigate", + "Load a URL in the ATTACHED browser window and re-verify. Use this to drive a backgrounded browser to a new page: it always forces a fresh load (never re-selects a stale existing tab that merely matches the URL, the way open_url_and_attach_tab can), and it does not rely on the address bar — background keystrokes can't reach browser chrome (a typed URL falls through to the page as in-page shortcuts). Returns the same attach/verify result as open_url_and_attach_tab.", + schema(json!({ "url": {"type":"string"} }), &["url"]), + ), tool( "close_app", "Quit an app gracefully by name (no foreground).", @@ -583,6 +588,11 @@ fn keyboard_menu_tools() -> Vec { "Type text into the FOCUSED element via the SkyLight auth-signed keyboard path — reaches a backgrounded/occluded window's web content. Focus the field first (prefer click_element on a field; click_at is raw). Raw mutating keyboard input is high-risk and requires approval. If approval is unavailable or denied, switch to ui_fallback_hint and use type_into on a mapped field id. If the user-active guard blocks it, wait until the operator is idle and retry once.", schema(json!({ "text": {"type":"string"} }), &["text"]), ), + tool( + "set_field_text", + "Clear the FOCUSED field and set it to text in one robust step. Sets the app's AXFocusedUIElement value directly (AX select-all-replace, with a keyboard fallback) — use this instead of clearing with raw End/Backspace/double-click + type_keys, which gives erratic cursor results on backgrounded web forms (e.g. it garbles to 'copullntents'). Focus the field first (click it). Raw mutating input is high-risk and requires approval.", + schema(json!({ "text": {"type":"string"} }), &["text"]), + ), tool( "paste_text", "Paste text into the FOCUSED element by temporarily replacing the system clipboard, sending Cmd+V to the target window, then restoring the previous plain-text clipboard by default. Use for opaque browser fields where type_keys is unreliable after focusing and verifying the field. Raw mutating keyboard input is high-risk and requires approval; rich clipboard formats may not survive, and OCR/page_state verification is required before saving.", diff --git a/crates/dunst-platform/src/clipboard.rs b/crates/dunst-platform/src/clipboard.rs index 37db5de..e829897 100644 --- a/crates/dunst-platform/src/clipboard.rs +++ b/crates/dunst-platform/src/clipboard.rs @@ -3,6 +3,14 @@ use dunst_core::{DunstError, Result}; const CMD_FLAG: u64 = 0x0010_0000; const V_KEYCODE: u16 = 0x09; +/// Delay between issuing Cmd+V and restoring the previous clipboard. The paste +/// keystroke is delivered to the target app's event queue and consumed +/// asynchronously on its run loop, which reads the pasteboard *after* +/// `key_web_background` returns. Restoring sooner races that read and makes the +/// app paste the previous (stale) clipboard. 300 ms matches the proven foreground +/// path (`paste_replace_field_foreground`'s `delay 0.3`). +const PASTE_CONSUME_DELAY: std::time::Duration = std::time::Duration::from_millis(300); + #[cfg(target_os = "macos")] pub fn read_clipboard_bytes() -> Result> { let output = std::process::Command::new("pbpaste") @@ -72,6 +80,12 @@ pub fn paste_text_background( }; write_clipboard_bytes(text.as_bytes())?; let paste = crate::key_web_background(pid, window_id, V_KEYCODE, CMD_FLAG); + // Let the target app consume the paste (read the pasteboard) before putting + // the old clipboard back; restoring sooner races that read and the app ends + // up pasting the previous clipboard. Only matters when we actually restore. + if previous.is_some() { + std::thread::sleep(PASTE_CONSUME_DELAY); + } let restore = previous .as_deref() .map(write_clipboard_bytes) @@ -100,6 +114,55 @@ pub fn paste_text_background( )) } +/// Replace the focused field's whole content in one layout-safe step: put `text` on +/// the clipboard, foreground the target process, then native select-all + paste. +/// +/// AppleScript `keystroke "a"/"v"` is translated to the **current keyboard layout**, +/// so it works on AZERTY/QWERTY/etc. — unlike a hardcoded keycode, where Cmd+A +/// (keycode 0x00) becomes **Cmd+Q = Quit** on AZERTY. The native Cmd+A also selects +/// the field's real DOM content (no AX char-count under-report), so there is no +/// trailing fragment. Foregrounds the window (not transparent); the field must +/// already be focused (click it first). Restores the previous clipboard. +#[cfg(target_os = "macos")] +pub fn paste_replace_field_foreground(pid: i32, text: &str) -> Result<()> { + let previous = read_clipboard_bytes().ok(); + write_clipboard_bytes(text.as_bytes())?; + let script = format!( + "tell application \"System Events\"\n\ + set frontmost of (first process whose unix id is {pid}) to true\n\ + delay 0.4\n\ + keystroke \"a\" using command down\n\ + delay 0.2\n\ + keystroke \"v\" using command down\n\ + delay 0.3\n\ + end tell" + ); + let result = std::process::Command::new("osascript") + .arg("-e") + .arg(&script) + .output(); + if let Some(prev) = previous { + let _ = write_clipboard_bytes(&prev); + } + match result { + Ok(out) if out.status.success() => Ok(()), + Ok(out) => Err(DunstError::Execution(format!( + "osascript paste-replace failed: {}", + String::from_utf8_lossy(&out.stderr).trim() + ))), + Err(err) => Err(DunstError::Execution(format!( + "osascript spawn failed: {err}" + ))), + } +} + +#[cfg(not(target_os = "macos"))] +pub fn paste_replace_field_foreground(_pid: i32, _text: &str) -> Result<()> { + Err(DunstError::Execution( + "paste_replace_field_foreground requires a macOS backend".into(), + )) +} + #[cfg(test)] mod tests { use super::*; @@ -109,4 +172,12 @@ mod tests { assert_eq!(CMD_FLAG, 0x0010_0000); assert_eq!(V_KEYCODE, 0x09); } + + #[test] + fn paste_consume_delay_outlasts_async_paste() { + // Restoring the clipboard before the target app reads the pasteboard + // makes it paste stale content; the delay must stay non-zero and at + // least as long as the proven foreground path's `delay 0.3`. + assert!(PASTE_CONSUME_DELAY >= std::time::Duration::from_millis(300)); + } } diff --git a/crates/dunst-platform/src/lib.rs b/crates/dunst-platform/src/lib.rs index 407ecf6..1d1d3b3 100644 --- a/crates/dunst-platform/src/lib.rs +++ b/crates/dunst-platform/src/lib.rs @@ -24,7 +24,10 @@ pub use capabilities::{ AppCapabilities, ClipboardCapabilities, InputCapabilities, PerceptionCapabilities, PlatformCapabilities, PlatformKind, WindowCapabilities, }; -pub use clipboard::{paste_text_background, read_clipboard_bytes, write_clipboard_bytes}; +pub use clipboard::{ + paste_replace_field_foreground, paste_text_background, read_clipboard_bytes, + write_clipboard_bytes, +}; #[cfg(target_os = "macos")] pub use file_chooser::select_file_osascript_lines; pub use file_chooser::{borrow_target_frontmost, restore_frontmost_pid, select_file}; @@ -234,6 +237,16 @@ pub fn type_text_background(pid: i32, window_id: u32, text: &str) -> Result<()> macos::type_text_background(pid, window_id, text) } +/// Replace the text of the FOCUSED field in `pid` by setting the app's +/// `AXFocusedUIElement` value directly (AX select-all-replace, with a keyboard +/// fallback). Robust against the erratic cursor of raw clear-by-keystroke +/// (End/Backspace), even for sparse-AX web inputs absent from the scene graph. +/// Focus the field first (e.g. a click on it). +#[cfg(target_os = "macos")] +pub fn set_focused_field_text(pid: i32, window_id: u32, text: &str) -> Result<()> { + macos::set_focused_field_text(pid, window_id, text) +} + /// Post a named keycode (down+up) with optional modifier `flags` (CGEventFlags /// bits: Shift 0x20000, Control 0x40000, Alternate 0x80000, Command 0x100000) to /// a **backgrounded** window's (web) content via the SkyLight auth-signed keyboard diff --git a/crates/dunst-platform/src/macos.rs b/crates/dunst-platform/src/macos.rs index d6e3a1c..bdd3e7e 100644 --- a/crates/dunst-platform/src/macos.rs +++ b/crates/dunst-platform/src/macos.rs @@ -101,7 +101,8 @@ mod web_events; use ax_actions::*; use ax_backend::*; pub(crate) use ax_backend::{ - accessibility_trusted, capture, element_at_point, perform, set_window_frame, window_ref, + accessibility_trusted, capture, element_at_point, perform, set_focused_field_text, + set_window_frame, window_ref, }; use ax_tree::*; use cf::*; diff --git a/crates/dunst-platform/src/macos/ax_backend.rs b/crates/dunst-platform/src/macos/ax_backend.rs index 5cd4cc2..3f6be75 100644 --- a/crates/dunst-platform/src/macos/ax_backend.rs +++ b/crates/dunst-platform/src/macos/ax_backend.rs @@ -56,10 +56,7 @@ pub(super) fn last_input_age_ms() -> Option { // SAFETY: this CoreGraphics query has no pointer arguments or retained // ownership; both enum values use documented constants. let seconds = unsafe { - CGEventSourceSecondsSinceLastEventType( - CGEventSourceStateID::CombinedSessionState, - any_input, - ) + CGEventSourceSecondsSinceLastEventType(CGEventSourceStateID::HIDSystemState, any_input) }; if seconds.is_finite() && seconds >= 0.0 { Some((seconds * 1_000.0).round() as u64) @@ -77,6 +74,8 @@ pub(super) fn user_idle_block_message(operation: &str) -> Option { if age_ms >= guard_ms { return None; } + // Keep this message shape stable: MCP callers parse the age_ms and + // guard_ms values from "was {age_ms} ms ago (< {guard_ms} ms)". Some(format!( "user-active guard blocked {operation}: last keyboard/mouse input was {age_ms} ms ago (< {guard_ms} ms). Retry after the operator is idle, or set DUNST_MCP_USER_IDLE_GUARD_MS=0 to disable this guard." )) @@ -388,6 +387,27 @@ pub(super) fn app_element(pid: i32) -> Result { } } +/// Replace the text of whatever field currently holds keyboard focus in the app. +/// Fetches the app's `AXFocusedUIElement` directly (so it works even when the +/// focused field is a sparse-AX web input absent from the scene graph) and reuses +/// the robust [`type_text`] path (AX select-all-replace, with a keyboard +/// fallback). This avoids the erratic cursor results of clearing a field with raw +/// End/Backspace/double-click keystrokes (which produced garbled values like +/// "copullntents" when driving a backgrounded browser form). +pub(crate) fn set_focused_field_text(pid: i32, _window_id: u32, text: &str) -> Result<()> { + let app = app_element(pid)?; + // Confirm a text field is focused so we don't select-all + paste into nothing. + attr_ax_element(&app, "AXFocusedUIElement").ok_or_else(|| { + DunstError::Execution( + "no focused field to set text on; click or focus a text field first".into(), + ) + })?; + // Layout-safe replace: foreground + native Cmd+A + Cmd+V (AppleScript translates + // the keys to the current keyboard layout — no hardcoded keycode, which on AZERTY + // turns Cmd+A into Cmd+Q; native select-all also avoids the AX char-count tail bug). + crate::paste_replace_field_foreground(pid, text) +} + pub(super) fn resolve_window(app: &AxElement, requested_window_id: u32) -> Result { let mut windows = attr_array(app, kAXWindowsAttribute) .map(|windows| ax_elements(&windows)) @@ -478,3 +498,13 @@ impl From for DunstError { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn core_graphics_exposes_hid_system_state() { + assert_eq!(CGEventSourceStateID::HIDSystemState as i32, 1); + } +} diff --git a/crates/dunst-platform/src/macos/text_input.rs b/crates/dunst-platform/src/macos/text_input.rs index aa438ef..cd3801e 100644 --- a/crates/dunst-platform/src/macos/text_input.rs +++ b/crates/dunst-platform/src/macos/text_input.rs @@ -110,7 +110,7 @@ pub(super) fn post_window_bound_text( "element-bound typing requires a target window id; process-wide keyboard fallback suppressed".into(), )); } - type_text_background_impl(target.pid, target.window_id, text) + type_text_background_with_paste_fallback(target.pid, target.window_id, text) } pub(super) fn wait_for_string_attr( @@ -166,7 +166,13 @@ pub(super) fn attr_settable(element: &AxElement, attr: &str) -> bool { #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(super) enum TextInputAtom { Char(char), - Return, + Return { flags: u64 }, +} + +pub(super) const TEXT_NEWLINE_KEY_FLAGS: u64 = 0x0002_0000; + +pub(super) fn text_contains_line_break(text: &str) -> bool { + text.contains('\n') || text.contains('\r') } pub(super) fn for_text_input_atoms( @@ -180,14 +186,18 @@ where for ch in text.chars() { match ch { '\r' => { - f(TextInputAtom::Return)?; + f(TextInputAtom::Return { + flags: TEXT_NEWLINE_KEY_FLAGS, + })?; previous_was_cr = true; } '\n' if previous_was_cr => { previous_was_cr = false; } '\n' => { - f(TextInputAtom::Return)?; + f(TextInputAtom::Return { + flags: TEXT_NEWLINE_KEY_FLAGS, + })?; previous_was_cr = false; } ch => { @@ -201,10 +211,13 @@ where #[cfg(test)] mod tests { - use super::{for_text_input_atoms, ActionFailure, TextInputAtom}; + use super::{ + for_text_input_atoms, text_contains_line_break, ActionFailure, TextInputAtom, + TEXT_NEWLINE_KEY_FLAGS, + }; #[test] - pub(super) fn text_input_atoms_map_line_endings_to_return_keypresses() { + pub(super) fn text_input_atoms_map_line_endings_to_shift_return_keypresses() { let mut atoms = Vec::new(); let result = for_text_input_atoms("a\nb\r\nc\rd", |atom| { atoms.push(atom); @@ -216,13 +229,27 @@ mod tests { atoms, vec![ TextInputAtom::Char('a'), - TextInputAtom::Return, + TextInputAtom::Return { + flags: TEXT_NEWLINE_KEY_FLAGS + }, TextInputAtom::Char('b'), - TextInputAtom::Return, + TextInputAtom::Return { + flags: TEXT_NEWLINE_KEY_FLAGS + }, TextInputAtom::Char('c'), - TextInputAtom::Return, + TextInputAtom::Return { + flags: TEXT_NEWLINE_KEY_FLAGS + }, TextInputAtom::Char('d'), ] ); } + + #[test] + pub(super) fn text_contains_line_break_detects_all_supported_line_endings() { + assert!(!text_contains_line_break("single line")); + assert!(text_contains_line_break("two\nlines")); + assert!(text_contains_line_break("two\rlines")); + assert!(text_contains_line_break("two\r\nlines")); + } } diff --git a/crates/dunst-platform/src/macos/web_events.rs b/crates/dunst-platform/src/macos/web_events.rs index c3c9869..7d99fb1 100644 --- a/crates/dunst-platform/src/macos/web_events.rs +++ b/crates/dunst-platform/src/macos/web_events.rs @@ -273,7 +273,46 @@ pub(super) fn scroll_web_background_impl( /// `SLEventPostToPid`. No cursor, no foreground. Fails if SkyLight is absent /// or if any expected key event cannot be created and posted. pub fn type_text_background(pid: i32, window_id: u32, text: &str) -> Result<()> { - type_text_background_impl(pid, window_id, text).map_err(ActionFailure::into) + type_text_background_with_paste_fallback(pid, window_id, text).map_err(ActionFailure::into) +} + +pub(super) fn type_text_background_with_paste_fallback( + pid: i32, + window_id: u32, + text: &str, +) -> std::result::Result<(), ActionFailure> { + if should_prefer_clipboard_paste(text) { + match crate::clipboard::paste_text_background(pid, window_id, text, true) { + Ok(()) => return Ok(()), + Err(err) if paste_error_allows_key_fallback(&err) => {} + Err(err) => { + return Err(ActionFailure::Execution(format!( + "multi-line clipboard paste failed: {err}" + ))); + } + } + } + type_text_background_impl(pid, window_id, text) +} + +fn should_prefer_clipboard_paste(text: &str) -> bool { + text_contains_line_break(text) +} + +fn paste_error_allows_key_fallback(err: &DunstError) -> bool { + let DunstError::Execution(message) = err else { + return false; + }; + if message.contains("clipboard restore failed") || message.contains("restore also failed") { + return false; + } + message.contains("requires the SkyLight backend") + || message.contains("user-active guard blocked") + || message.contains("read clipboard") + || message.contains("write clipboard") + || message.contains("start pbcopy") + || message.contains("wait for pbcopy") + || message.contains("create background key CGEvent") } pub(super) fn type_text_background_impl( @@ -297,7 +336,9 @@ pub(super) fn type_text_background_impl( for_text_input_atoms(text, |atom| { match atom { TextInputAtom::Char(ch) => post_background_unicode_char(pid, ch)?, - TextInputAtom::Return => post_background_keycode_pair(pid, RETURN_KEYCODE)?, + TextInputAtom::Return { flags } => { + post_background_keycode_pair(pid, RETURN_KEYCODE, flags)?; + } } thread::sleep(Duration::from_millis(8)); Ok(()) @@ -324,12 +365,17 @@ pub(super) fn post_background_unicode_char( pub(super) fn post_background_keycode_pair( pid: i32, keycode: CGKeyCode, + flags: u64, ) -> std::result::Result<(), ActionFailure> { + let flags = CGEventFlags::from_bits_truncate(flags); for down in [true, false] { let source = event_source("skylight key CGEventSource")?; let event = CGEvent::new_keyboard_event(source, keycode, down).map_err(|err| { ActionFailure::Execution(format!("create background key CGEvent: {err:?}")) })?; + if !flags.is_empty() { + event.set_flags(flags); + } post_background_key_event(pid, &event)?; } Ok(()) @@ -386,8 +432,12 @@ pub fn key_web_background(pid: i32, window_id: u32, keycode: u16, flags: u64) -> } pub fn press_key(pid: i32, window_id: u32, key: &str) -> Result<()> { - let keycode = named_keycode(key).map_err(DunstError::from)?; - key_web_background(pid, window_id, keycode, 0) + let (keycode, flags) = named_key_event(key).map_err(DunstError::from)?; + key_web_background(pid, window_id, keycode, flags) +} + +pub(super) fn named_key_event(key: &str) -> std::result::Result<(CGKeyCode, u64), ActionFailure> { + Ok((named_keycode(key)?, 0)) } pub(super) fn named_keycode(key: &str) -> std::result::Result { @@ -507,3 +557,39 @@ pub(super) fn event_source( CGEventSource::new(CGEventSourceStateID::HIDSystemState) .map_err(|err| ActionFailure::Execution(format!("{operation}: {err:?}"))) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn explicit_return_key_event_has_no_modifier_flags() { + let (keycode, flags) = match named_key_event("Return") { + Ok(event) => event, + Err(_) => panic!("Return should resolve to a named key event"), + }; + + assert_eq!(keycode, KeyCode::RETURN); + assert_eq!(flags, 0); + } + + #[test] + fn multi_line_background_text_prefers_clipboard_paste() { + assert!(!should_prefer_clipboard_paste("single line")); + assert!(should_prefer_clipboard_paste("first\nsecond")); + assert!(should_prefer_clipboard_paste("first\rsecond")); + } + + #[test] + fn paste_error_fallback_avoids_possible_duplicate_text() { + assert!(paste_error_allows_key_fallback(&DunstError::Execution( + "key_web_background requires the SkyLight backend".into(), + ))); + assert!(!paste_error_allows_key_fallback(&DunstError::Execution( + "post background key CGEvent via SkyLight".into(), + ))); + assert!(!paste_error_allows_key_fallback(&DunstError::Execution( + "paste completed, but clipboard restore failed: write clipboard with pbcopy".into(), + ))); + } +} diff --git a/crates/dunst-vision/src/ocr.rs b/crates/dunst-vision/src/ocr.rs index d892afb..7103793 100644 --- a/crates/dunst-vision/src/ocr.rs +++ b/crates/dunst-vision/src/ocr.rs @@ -7,10 +7,10 @@ use foreign_types::ForeignType; use objc2::{rc::Retained, AnyThread, ClassType}; use objc2_core_foundation::{CGPoint, CGRect, CGSize}; use objc2_core_graphics::CGImage as ObjcCgImage; -use objc2_foundation::{NSArray, NSDictionary, NSString, NSURL}; +use objc2_foundation::{NSArray, NSDictionary, NSRange, NSString, NSURL}; use objc2_vision::{ VNImageOption, VNImageRequestHandler, VNRecognizeTextRequest, VNRecognizedText, - VNRecognizedTextObservation, VNRequest, VNRequestTextRecognitionLevel, + VNRecognizedTextObservation, VNRectangleObservation, VNRequest, VNRequestTextRecognitionLevel, }; use crate::{coords::window_rect_to_vision_roi, CaptureGeometry, NormRect, OcrBox}; @@ -86,9 +86,7 @@ pub fn ocr_region_with_mode( let mut out = Vec::new(); if let Some(results) = request.results() { for observation in results.iter() { - if let Some(ocr_box) = observation_to_box(&observation) { - out.push(ocr_box); - } + out.extend(observation_to_boxes(&observation)); } } Ok(out) @@ -128,39 +126,172 @@ pub fn ocr_image_file(path: &str, mode: RecognitionMode) -> Result, let mut out = Vec::new(); if let Some(results) = request.results() { for observation in results.iter() { - if let Some(ocr_box) = observation_to_box(&observation) { - out.push(ocr_box); - } + out.extend(observation_to_boxes(&observation)); } } Ok(out) } -/// An [`OcrBox`] carries only the Vision-normalised box; mapping it to screen -/// points is the consumer's job (via `coords::vision_norm_to_screen_pt`), so we -/// do not compute a screen box here (audit #2 — that result was being discarded). -fn observation_to_box(observation: &VNRecognizedTextObservation) -> Option { - let candidate: Retained = observation.topCandidates(1).firstObject()?; +/// Convert one Vision text observation into one or more [`OcrBox`]es. Vision +/// returns one observation per recognized *line*, but a line can span visually +/// separate UI runs that merely happen to be collinear — e.g. a dropdown row +/// label overlapping background page text on the same band ("Choose the +/// minimal… Contents"). Those merge into one observation and one box, so an +/// OCR-bound click on either run lands between them. We split a line into +/// spatially-separated runs using Vision's per-character-range boxes, but only at +/// LARGE horizontal gaps (clear layout separations, not normal word spacing). On +/// any uncertainty we fall back to the single whole-line box — worst case is the +/// previous behaviour. An [`OcrBox`] carries only the Vision-normalised box; +/// mapping it to screen points is the consumer's job. +fn observation_to_boxes(observation: &VNRecognizedTextObservation) -> Vec { + let Some(candidate) = observation.topCandidates(1).firstObject() else { + return Vec::new(); + }; let text = candidate.string().to_string(); if text.trim().is_empty() { - return None; + return Vec::new(); } + let confidence = candidate.confidence(); // SAFETY: `observation` is a live Vision object yielded by the request // results; `boundingBox` returns a value CGRect without retained pointers. - let rect = unsafe { observation.boundingBox() }; - let norm = NormRect { - x: rect.origin.x, - y: rect.origin.y, - w: rect.size.width, - h: rect.size.height, + let line_rect = unsafe { observation.boundingBox() }; + let line_box = OcrBox { + text: text.clone(), + norm: NormRect { + x: line_rect.origin.x, + y: line_rect.origin.y, + w: line_rect.size.width, + h: line_rect.size.height, + }, + confidence, }; - Some(OcrBox { - text, - norm, - confidence: candidate.confidence(), + + let words = word_boxes(&candidate, &text); + let split = split_line_by_gaps(&words, confidence); + if split.len() >= 2 { + split + } else { + vec![line_box] + } +} + +/// Per-word boxes for a recognized line via Vision's `boundingBoxForRange`. +/// Returns empty if any range query fails (caller falls back to the whole line). +fn word_boxes(candidate: &VNRecognizedText, text: &str) -> Vec<(String, NormRect)> { + let mut out = Vec::new(); + let mut word = String::new(); + let mut word_start_u16 = 0usize; + let mut u16_offset = 0usize; + for ch in text.chars() { + let w = ch.len_utf16(); + if ch.is_whitespace() { + if !word.is_empty() { + match word_rect(candidate, word_start_u16, u16_offset - word_start_u16) { + Some(rect) => out.push((std::mem::take(&mut word), rect)), + None => return Vec::new(), + } + } + word_start_u16 = u16_offset + w; + } else { + if word.is_empty() { + word_start_u16 = u16_offset; + } + word.push(ch); + } + u16_offset += w; + } + if !word.is_empty() { + match word_rect(candidate, word_start_u16, u16_offset - word_start_u16) { + Some(rect) => out.push((word, rect)), + None => return Vec::new(), + } + } + out +} + +fn word_rect(candidate: &VNRecognizedText, location: usize, length: usize) -> Option { + if length == 0 { + return None; + } + let range = NSRange { location, length }; + // SAFETY: `candidate` is a live VNRecognizedText; `range` is within the + // candidate string (derived from its own UTF-16 offsets). Vision errors are + // mapped to None and the caller falls back to the whole line. + let observation: Retained = + unsafe { candidate.boundingBoxForRange_error(range) }.ok()?; + // SAFETY: live observation; `boundingBox` returns a value CGRect. + let r = unsafe { observation.boundingBox() }; + if r.size.width <= 0.0 || r.size.height <= 0.0 { + return None; + } + Some(NormRect { + x: r.origin.x, + y: r.origin.y, + w: r.size.width, + h: r.size.height, }) } +/// Group consecutive (left-to-right) word boxes into runs, breaking only at a +/// LARGE horizontal gap (> 2.5x the line height) — a clear UI-layout separation, +/// not normal inter-word spacing. Returns one [`OcrBox`] per run, or empty when +/// the line is a single run (caller keeps the whole-line box). Pure: unit-tested. +fn split_line_by_gaps(words: &[(String, NormRect)], confidence: f32) -> Vec { + if words.len() < 2 { + return Vec::new(); + } + let mut sorted: Vec<&(String, NormRect)> = words.iter().collect(); + sorted.sort_by(|a, b| { + a.1.x + .partial_cmp(&b.1.x) + .unwrap_or(std::cmp::Ordering::Equal) + }); + + let mut runs: Vec> = vec![vec![sorted[0]]]; + for word in &sorted[1..] { + let prev = runs.last().unwrap().last().unwrap(); + let gap = word.1.x - (prev.1.x + prev.1.w); + let line_h = prev.1.h.max(word.1.h); + if gap > line_h * 2.5 { + runs.push(vec![*word]); + } else { + runs.last_mut().unwrap().push(*word); + } + } + if runs.len() < 2 { + return Vec::new(); + } + runs.into_iter() + .map(|run| { + let text = run + .iter() + .map(|(t, _)| t.as_str()) + .collect::>() + .join(" "); + let minx = run.iter().map(|(_, r)| r.x).fold(f64::INFINITY, f64::min); + let miny = run.iter().map(|(_, r)| r.y).fold(f64::INFINITY, f64::min); + let maxx = run + .iter() + .map(|(_, r)| r.x + r.w) + .fold(f64::NEG_INFINITY, f64::max); + let maxy = run + .iter() + .map(|(_, r)| r.y + r.h) + .fold(f64::NEG_INFINITY, f64::max); + OcrBox { + text, + norm: NormRect { + x: minx, + y: miny, + w: maxx - minx, + h: maxy - miny, + }, + confidence, + } + }) + .collect() +} + /// Vision `regionOfInterest` for an optional screen-point region (`None` = whole /// image). Audit #1: the Y-flip + edge-clamp is owned by /// [`coords::window_rect_to_vision_roi`] (proven by 14 unit tests) — we convert the @@ -276,4 +407,42 @@ mod tests { assert!(got.origin.x + got.size.width <= 1.0 + 1e-9); assert!(got.origin.y + got.size.height <= 1.0 + 1e-9); } + + fn word(t: &str, x: f64, w: f64) -> (String, NormRect) { + ( + t.to_string(), + NormRect { + x, + y: 0.5, + w, + h: 0.02, + }, + ) + } + + #[test] + fn split_keeps_normal_spacing_as_one_run() { + // Two words a normal inter-word gap apart (< 2.5x height) stay together, + // so the caller keeps the single whole-line box (empty split result). + let words = vec![word("Choose", 0.10, 0.06), word("minimal", 0.17, 0.06)]; + assert!(split_line_by_gaps(&words, 1.0).is_empty()); + } + + #[test] + fn split_breaks_on_large_layout_gap() { + // Background text then a far-right dropdown label: the big gap splits them + // into two tight runs (the "Choose the minimal… Contents" merge case). + let words = vec![ + word("Choose", 0.05, 0.06), + word("minimal", 0.12, 0.06), + word("Contents", 0.60, 0.08), + ]; + let runs = split_line_by_gaps(&words, 1.0); + assert_eq!(runs.len(), 2); + assert_eq!(runs[0].text, "Choose minimal"); + assert_eq!(runs[1].text, "Contents"); + // The second run is tight around "Contents", not the whole line. + assert!((runs[1].norm.x - 0.60).abs() < 1e-9); + assert!((runs[1].norm.w - 0.08).abs() < 1e-9); + } } diff --git a/docs/design/LLD-batch-choice-enumeration-selection.md b/docs/design/LLD-batch-choice-enumeration-selection.md new file mode 100644 index 0000000..f567a3b --- /dev/null +++ b/docs/design/LLD-batch-choice-enumeration-selection.md @@ -0,0 +1,943 @@ +# Batch Choice Enumeration & Selection — Low-Level Design + +> **Tier: L** (service with external deps — AX / OCR / vision / platform — multiple +> domain concepts, a batch-approval lifecycle, an epoch-guarded re-scan loop, and +> the existing per-session/window coordination path). Sections not needed by this +> component (DB schema, caching, migration) are omitted by design, not left as +> "N/A". + +**Status:** proposed — implementable directly from this document. +**Crate:** `dunst-mcp` (engine + serve), with one read-only reuse of +`dunst-platform` actuators already wrapped by the engine. + +--- + +## 1. Overview & HLD Anchor + +**Parent HLD / architecture anchor:** `docs/ARCHITECTURE.md` (AX-first scene → +affordance graph → MCP projection) and `docs/CONTRACTS.md` (risk gate, approval +transport boundary, mutation coordination, UI-epoch staleness). + +**Container(s) this LLD zooms into:** the `dunst-mcp` MCP server — specifically the +`serve` dispatch layer and the `engine` action/read layers. No new crate, no new +process, no new OS adapter. + +**Problem.** Filling a multi-field modal or a scrollable choice page currently costs +one `perceive → approve → click → screenshot` round trip **per option**. A ~14-field +order is ≈70 MCP tool calls. Each raw mutating action re-gates, re-perceives, and is +verified individually. + +**Goal.** Two new MCP primitives that collapse that loop: + +| Primitive | Route | Mutates? | Approval | +|-----------|-------|----------|----------| +| `enumerate_choices` | Read | No (default); position-restoring scroll survey when `scroll_scan=true` | None | +| `apply_selections` | Mutating | Yes | **One** operator approval for the whole batch | + +**Functional requirements covered:** + +- **FR-1** `enumerate_choices` surveys the whole choice surface (off-screen AX in one + shot, optionally a position-restoring scroll sweep for OCR-only surfaces) and + returns a **structured option model**: groups with single-select vs multi-select + semantics, required/optional, and per-choice `{id, label, coords, state}`. +- **FR-2** `apply_selections(plan, expected_epoch)` executes **all** picks as one + batch behind a **single** operator approval. +- **FR-3** The batch refuses a stale plan up front (`expected_epoch` mismatch) and + **re-scans only when `ui_epoch.fingerprint` changes mid-batch** (progressive + disclosure / reflow), re-resolving the remaining steps against fresh targets. +- **FR-4** A **single consolidated verify** at the end replaces N per-click + screenshots. +- **FR-5** Both primitives **reuse existing building blocks**: `get_hit_targets` + (`Engine::hit_targets`), the `ui_epoch.fingerprint`, the `expected_epoch` stale + refusal on the dispatch path, the raw/contextual approval gate, and the + `click_near_text` / `click_element` / `pick_option` / `set_field_text` actuators. + +**Non-goals.** No new perception backend (we consume `hit_targets` output verbatim). +No persistence. No change to how individual actuators talk to `dunst-platform`. + +**Architecture style.** Same layered hexagon as the rest of `dunst-mcp`: `serve` +(transport + JSON schema + coordination) → `engine` (semantic logic + gate + audit) +→ `dunst-platform` (OS side effects, untouched here). + +--- + +## 2. Component Architecture (C4 Level 3) + +```mermaid +graph TB + subgraph serve["serve (transport / coordination)"] + Reg[registry.rs
TOOL_REGISTRY + route] + Cat[tools.rs
JSON-Schema catalog] + Disp[dispatch.rs
epoch check + lock/lease] + RD[dispatch/read_tools.rs
enumerate_choices arg parse] + BD[dispatch/batch_tools.rs
NEW: apply_selections arg parse] + end + + subgraph engine["engine (semantics / gate / audit)"] + Enum[choices.rs NEW
ChoiceModel builder] + Apply[selections.rs NEW
batch executor + re-scan loop] + Hit[read.rs
hit_targets / ui_epoch / fingerprint] + Gate[raw_input_gate.rs
batch approval grant] + Act[action.rs / element_actions.rs
click_element / pick_option] + Raw[raw_input/keyboard.rs
set_field_text / click_near_text] + end + + Plat[(dunst-platform
AX / OCR / pointer / kbd)] + + Reg --> Disp + Cat --> Disp + Disp --> RD --> Enum + Disp --> BD --> Apply + Enum --> Hit + Apply --> Enum + Apply --> Hit + Apply --> Gate + Apply --> Act + Apply --> Raw + Hit --> Plat + Act --> Plat + Raw --> Plat +``` + +| Component | Responsibility | Depends on | Interface | +|-----------|----------------|-----------|-----------| +| `registry.rs` | Map the two new tool names to a route | — | `TOOL_REGISTRY` table | +| `tools.rs` | Advertise JSON Schemas for the two tools | `tool()`/`schema()` helpers | `tools_list()` | +| `dispatch.rs` | Pre-flight `expected_epoch` check + lock/lease for the mutating tool; survey-scroll coordination for the read tool | `CoordinationGuard`, `validate_expected_epoch` | `handle_tool_call` | +| `dispatch/batch_tools.rs` (new) | Parse `apply_selections` args → call engine | `args` helpers | `dispatch(engine, name, args)` | +| `engine/choices.rs` (new) | Build the `ChoiceModel` from `hit_targets` (classification + grouping) | `Engine::hit_targets` | `Engine::enumerate_choices(...)` | +| `engine/selections.rs` (new) | Batch executor: gate once, epoch-guarded re-scan loop, consolidated verify | choices.rs, gate, actuators | `Engine::apply_selections(...)` | +| `engine/raw_input_gate.rs` | Add a `batch@selections:*` synthetic approval target + grant | existing grant model | `approve_raw_input` / `consume_raw_approval` | + +**Dependency direction rule.** Same as today: `serve → engine → platform`. The new +code adds nothing that points back up. `choices.rs` is pure projection over an +existing read result; `selections.rs` orchestrates existing actuators and never +talks to the OS directly. + +--- + +## 3. Tactical Domain Model (the option model) + +The whole feature hangs on one new value-object cluster: a normalized **choice +surface** projected from `HitTargetsResult`. This is tactical DDD — value objects and +one read-model aggregate (`ChoiceModel`). No entity has a persisted lifecycle. + +| Type | Classification | Equality | Immutable? | +|------|----------------|----------|-----------| +| `ChoiceModel` | Read-model aggregate root | By value | Yes (snapshot) | +| `ChoiceGroup` | Value object | By value | Yes | +| `Choice` | Value object | By value (keyed by `id`) | Yes | +| `GroupKind` | Enum (`SingleSelect` / `MultiSelect` / `TextField` / `Action`) | By variant | Yes | +| `SelectionState` | Enum (`Selected` / `Unselected` / `Unknown`) | By variant | Yes | +| `Requirement` | Enum (`Required` / `Optional` / `Unknown`) | By variant | Yes | +| `SelectionPlan` | Value object (caller input) | By value | Yes | +| `SelectionStep` | Value object | By value | Yes | +| `BatchApprovalContext` | Transient engine state (not serialized) | — | No | + +**Invariants the model protects:** + +1. **Group/choice coherence.** Every `Choice.group_id` references a `ChoiceGroup.id` + present in the same `ChoiceModel`. Ungrouped controls land in a synthetic + `group_id = "ungrouped"`. +2. **Single-select cardinality.** In a `SingleSelect` group at most one `Choice` has + `state = Selected` at snapshot time; the executor enforces "selecting X implies the + previously selected sibling becomes Unselected" without a separate deselect step. +3. **Stable addressability.** `Choice.id` is the underlying `HitTarget.id` (AX node id, + `ocr_text_*`, `shape_*`, or `page@scroll:*`). It is reused verbatim by + `apply_selections` so a plan is addressable by id, with `label` + `bbox` carried as + re-resolution fallbacks after reflow. +4. **Epoch binding.** A `ChoiceModel` always carries the `ui_epoch.fingerprint` it was + built from; a plan built from it is only valid while that fingerprint holds (the + dispatch pre-flight) and is re-pinned on each mid-batch re-scan. + +**Classification heuristics (best-effort, confidence-scored).** Derived from the +already-computed `HitTarget` fields (`role`, `action_modes[].action`, `label`, +`value`, `source`): + +| Signal | Inferred `GroupKind` / state | +|--------|------------------------------| +| `role == "radio"` / `action == Pick` on radio cluster | `SingleSelect` | +| `role == "checkbox"` or `"switch"`, `action == Toggle` | `MultiSelect` | +| `role` in `{"popup_button","combobox","menu_button"}`, `action == Pick`/`OpenMenu` | `SingleSelect` (popover) | +| `role` in `{"text_field","text_area","search_field"}`, `action == Type` | `TextField` | +| `role == "button"` with `action == Click` and no select semantics | `Action` (e.g. submit; excluded from auto-select) | +| `value` is truthy (`"1"`, `"true"`, AX `AXValue==1`) | `SelectionState::Selected` | +| `label`/sibling label contains `*`, `required`, `obligatoire`, `requis` | `Requirement::Required` | + +Grouping uses, in priority order: (a) shared AX container/parent id when available +from the scene graph, (b) shared `role` + horizontal/vertical proximity band (radios +in a column), (c) a shared label prefix. Each `ChoiceGroup` carries a +`classification_confidence` in `[0,1]` so the agent (and tests) can treat low-confidence +groups as advisory. + +--- + +## 4. Class / Module Design + +```mermaid +classDiagram + class Engine { + +enumerate_choices(opts: EnumerateOpts) Result~ChoiceModel~ + +apply_selections(plan: SelectionPlan, expected_epoch: str) Result~ApplyOutcome~ + -hit_targets(latent, scope, limit, prev) HitTargetsResult + -current_ui_epoch_fingerprint() String + -gate_raw_input(id, action, arg, why, risk) Option~AuditEntry~ + -click_element(id, why) Result~AuditEntry~ + -pick_option(q, vis, why) Result~OptionPickResult~ + -click_near_text(q, OcrClickOptions) Result~OcrClickResult~ + -set_field_text(text) Result~AuditEntry~ + } + + class ChoiceModel { + +ui_epoch: String + +scope: String + +coverage: Coverage + +groups: Vec~ChoiceGroup~ + +warnings: Vec~String~ + +scroll_plan: Vec~ScrollHint~ + } + class ChoiceGroup { + +id: String + +label: Option~String~ + +kind: GroupKind + +requirement: Requirement + +classification_confidence: f32 + +choices: Vec~Choice~ + } + class Choice { + +id: String + +group_id: String + +label: String + +value: Option~String~ + +state: SelectionState + +bbox: Option~Bbox~ + +safe_click: Option~SafeClickZone~ + +actuator: ActuatorHint + +risk: RiskAssessment + +source: String + } + + class SelectionPlan { + +steps: Vec~SelectionStep~ + } + class SelectionStep { + +group_id: String + +choice_id: String + +label: Option~String~ + +op: SelectionOp + +value: Option~String~ + +expected_after: Option~ExpectedState~ + } + class ApplyOutcome { + +status: ApplyStatus + +batch_id: String + +ui_epoch: String + +steps: Vec~StepResult~ + +rescans: u32 + +verify: BatchVerify + +pending_preview: Option~BatchPreview~ + } + class BatchApprovalContext { + +batch_id: String + +budget: u32 + +expected_epoch: String + } + + Engine --> ChoiceModel + Engine --> SelectionPlan + Engine --> ApplyOutcome + Engine ..> BatchApprovalContext : holds during apply + ChoiceModel --> ChoiceGroup + ChoiceGroup --> Choice + SelectionPlan --> SelectionStep +``` + +**Enums** + +```rust +pub enum GroupKind { SingleSelect, MultiSelect, TextField, Action } +pub enum SelectionState { Selected, Unselected, Unknown } +pub enum Requirement { Required, Optional, Unknown } +pub enum Coverage { Complete, Partial } // Partial => scroll_plan non-empty +pub enum ActuatorHint { ClickElement, PickOption, ClickNearText, SetFieldText, Scroll } +pub enum SelectionOp { Select, Deselect, SetText } +pub enum ApplyStatus { PendingApproval, Applied, PartiallyApplied, Refused } +``` + +**New engine entry points** (signatures, `crates/dunst-mcp/src/engine/`): + +```rust +// engine/choices.rs +pub struct EnumerateOpts<'a> { + pub scope: &'a str, // "page" (default) | "all" | "browser_chrome" + pub include_latent: bool, // default true — capture off-screen AX choices in one shot + pub scroll_scan: bool, // default false — position-restoring OCR sweep + pub max_scroll_pages: usize, // clamp 1..=12 (default 6) + pub limit: usize, // max choices, clamp 1..=500 (default 200) +} +impl Engine { + pub fn enumerate_choices(&mut self, opts: EnumerateOpts) -> dunst_core::Result; +} + +// engine/selections.rs +impl Engine { + pub fn apply_selections( + &mut self, + plan: SelectionPlan, + expected_epoch: &str, + ) -> dunst_core::Result; +} +``` + +**Patterns used (documented because non-obvious):** + +| Pattern | Where | Why | +|---------|-------|-----| +| Read-model / Projection | `ChoiceModel` over `HitTargetsResult` | Keep perception in one place; choices.rs only re-shapes, never re-perceives | +| Strategy (`ActuatorHint`) | step → actuator dispatch in selections.rs | One executor, swappable per-choice actuator without branching logic leaking into callers | +| Unit-of-Work / batch context | `BatchApprovalContext` | One approval authorizes the whole unit; cleared on completion (SRP for "am I inside an approved batch?") | +| Guarded retry loop | epoch-guarded re-scan | Re-resolve remaining steps only when the fingerprint moves | + +**SOLID notes.** `enumerate_choices` is pure projection (SRP, no side effects in the +default path). `apply_selections` depends on the actuators through the existing +`Engine` methods (already the DI seam used by every other tool), so the executor is +testable against a mock platform exactly like `act` is today. + +--- + +## 5. API Contracts (MCP tools) + +Both tools are added to `tools_list()` in `crates/dunst-mcp/src/serve/tools.rs` using +the existing `tool(name, description, schema(props, required))` helpers, and to +`TOOL_REGISTRY` in `registry.rs`. + +### 5.1 `enumerate_choices` + +**Route:** `ToolRoute::Read`. **Mutates:** only when `scroll_scan=true` (then it is +mutation-coordinated like `scan_chart`/`read_at borrow_cursor`, but **not** +operator-approval-gated — see ADR-1). + +**Input schema** (catalog form): + +```rust +tool( + "enumerate_choices", + "Survey the whole choice surface once and return a structured option model: \ + groups (single-select vs multi-select vs text field), required/optional, and \ + per-choice {id, label, coords, current state}. Default mode captures off-screen \ + AX choices in a single pass (no scroll). scroll_scan=true performs a \ + position-restoring scroll sweep to also assemble OCR/vision choices on \ + virtualized or AX-sparse surfaces; it surveys without operator approval and \ + restores the original scroll position. Pass the returned ui_epoch to \ + apply_selections as expected_epoch.", + schema( + json!({ + "scope": { "type": "string", "enum": ["page", "all", "browser_chrome"], "description": "target surface (default page)" }, + "include_latent": { "type": "boolean", "description": "include off-screen AX choices (default true)" }, + "scroll_scan": { "type": "boolean", "description": "position-restoring scroll sweep for OCR-only surfaces (default false)" }, + "max_scroll_pages": { "type": "integer", "description": "scroll-sweep bound, 1-12 (default 6)" }, + "limit": { "type": "integer", "description": "max choices, 1-500 (default 200)" }, + "fresh": { "type": "boolean", "description": "ensure a recent graph first (default true)" }, + "force_refresh": { "type": "boolean", "description": "force AX refresh even inside the short TTL (default false)" } + }), + &[], + ), +) +``` + +**Result body** (`content[0].text` is the JSON below; `_meta.dunst` envelope as +usual): + +```json +{ + "ui_epoch": "a1b2c3d4e5f60718", + "scope": "page", + "coverage": "complete", + "groups": [ + { + "id": "grp_delivery_time", + "label": "Delivery time", + "kind": "single_select", + "requirement": "required", + "classification_confidence": 0.92, + "choices": [ + { "id": "AXRadioButton_17", "group_id": "grp_delivery_time", "label": "ASAP", + "value": "1", "state": "selected", "bbox": {"x":120,"y":340,"w":80,"h":24}, + "safe_click": {"center":[160,352]}, "actuator": "click_element", + "risk": {"level":"low","requires_approval":false,"reasons":[]}, "source": "ax" }, + { "id": "AXRadioButton_18", "label": "Schedule", "state": "unselected", + "actuator": "click_element", "source": "ax", "risk": {"level":"low","requires_approval":false} } + ] + }, + { + "id": "grp_extras", + "label": "Extras", + "kind": "multi_select", + "requirement": "optional", + "classification_confidence": 0.80, + "choices": [ + { "id": "AXCheckBox_3", "label": "Cutlery", "state": "unselected", "actuator": "click_element", "source": "ax" }, + { "id": "AXCheckBox_4", "label": "Napkins", "state": "selected", "actuator": "click_element", "source": "ax" } + ] + }, + { + "id": "grp_note", + "label": "Note to courier", + "kind": "text_field", + "requirement": "optional", + "choices": [ + { "id": "AXTextArea_9", "label": "Note to courier", "value": "", "state": "unknown", "actuator": "set_field_text", "source": "ax" } + ] + } + ], + "warnings": [], + "scroll_plan": [] +} +``` + +When `coverage = "partial"`, `scroll_plan` is non-empty (the `page@scroll:*` +pseudo-targets from `hit_targets`) and instructs the caller to scroll + re-enumerate +to complete the model. + +### 5.2 `apply_selections` + +**Route:** new `ToolRoute::Batch` (or reuse `Element`; see §Integration). **Mutates:** +yes — added to `tool_requires_mutation_coordination` and (for `fencing_token`) +`tool_accepts_mutation_preconditions`. + +**Input schema:** + +```rust +tool( + "apply_selections", + "Apply a whole choice plan as ONE batch behind a single operator approval. \ + Build the plan from enumerate_choices and pass that ui_epoch as expected_epoch. \ + The batch refuses a stale plan up front, re-scans ONLY when the UI epoch \ + fingerprint changes mid-batch (progressive disclosure / reflow) and re-resolves \ + the remaining steps, then runs a single consolidated verify. First call returns \ + status=pending_approval with a per-step preview incl. risk; an operator approves \ + the returned batch_id once, then re-call with the same plan to execute.", + schema( + json!({ + "expected_epoch": { "type": "string", "description": "ui_epoch.fingerprint the plan was built from (required)" }, + "plan": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "group_id": { "type": "string" }, + "choice_id": { "type": "string", "description": "Choice.id from enumerate_choices" }, + "label": { "type": "string", "description": "re-resolution fallback after reflow" }, + "op": { "type": "string", "enum": ["select", "deselect", "set_text"] }, + "value": { "type": "string", "description": "text for set_text" }, + "expected_after": { + "type": "object", + "properties": { + "state": { "type": "string", "enum": ["selected", "unselected"] }, + "value": { "type": "string" } + } + } + }, + "required": ["choice_id", "op"] + } + } + }, + "required": ["steps"] + }, + "include_diff": { "type": "boolean" } + }), + &["expected_epoch", "plan"], + ), +) +``` + +**Result body — pending approval (first call):** + +```json +{ + "status": "pending_approval", + "batch_id": "batch@selections:9f2c41ab77de0033:5", + "approval_hint": "operator must run approve(batch_id); approve authorizes the whole batch once", + "max_risk": "low", + "preview": [ + { "choice_id": "AXRadioButton_18", "op": "select", "label": "Schedule", "risk": "low" }, + { "choice_id": "AXCheckBox_3", "op": "select", "label": "Cutlery", "risk": "low" }, + { "choice_id": "AXTextArea_9", "op": "set_text","label": "Note to courier", "risk": "low" } + ] +} +``` + +**Result body — applied (re-call after approval):** + +```json +{ + "status": "applied", + "batch_id": "batch@selections:9f2c41ab77de0033:5", + "ui_epoch": "77de0033a1b2c3d4", + "rescans": 1, + "steps": [ + { "choice_id": "AXRadioButton_18", "op": "select", "result": "success", "resolved_by": "id" }, + { "choice_id": "AXCheckBox_3", "op": "select", "result": "success", "resolved_by": "id" }, + { "choice_id": "AXTextArea_9", "op": "set_text","result": "success", "resolved_by": "label_after_rescan" } + ], + "verify": { + "ok": true, + "checks": [ + { "choice_id": "AXRadioButton_18", "expected": "selected", "actual": "selected", "ok": true }, + { "choice_id": "AXCheckBox_3", "expected": "selected", "actual": "selected", "ok": true }, + { "choice_id": "AXTextArea_9", "expected_value": "leave at door", "actual_value": "leave at door", "ok": true } + ] + } +} +``` + +**Error responses** (tool returns `isError: true` with a text message; structured +machine codes inside the JSON where the caller must branch): + +| Condition | `status` / message | Caller action | +|-----------|--------------------|---------------| +| `expected_epoch` mismatch at pre-flight | dispatch returns `"stale UI epoch: … call get_hit_targets again before mutating"` + `_meta.dunst.coordination.epoch.status="stale"` | Re-run `enumerate_choices`, rebuild plan, retry | +| Plan references an unknown `choice_id` and label re-resolution fails | `status: "refused"`, `reason: "unresolvable_step"` | Re-enumerate | +| Operator approval missing | `status: "pending_approval"` (not an error) | Approve `batch_id`, re-call | +| Batch budget exhausted (too many reflows) | `status: "partially_applied"`, `remaining_steps: [...]` | Re-enumerate + apply remaining | +| A constituent actuator fails (platform error) | `status: "partially_applied"`, the failed step `result:"failed"` | Inspect, retry remaining | +| `DUNST_MCP_ENABLE_APPROVE_TOOL` unset | `pending_approval` returned but no `approve` tool advertised | Operator enables flag for the session | + +--- + +## 6. Sequence Diagrams (critical flows) + +### 6.1 `enumerate_choices` — AX-latent survey (default, no scroll, no gate) + +```mermaid +sequenceDiagram + participant Agent + participant Disp as dispatch.rs + participant Eng as engine/choices.rs + participant Hit as engine/read.rs + Agent->>Disp: tools/call enumerate_choices {scope:"page"} + Note over Disp: Read route, scroll_scan=false → no coordination, no epoch check + Disp->>Eng: enumerate_choices(opts) + Eng->>Hit: hit_targets(include_latent=true, "page", 500, None) + Hit-->>Eng: HitTargetsResult {targets, ui_epoch} + Eng->>Eng: classify + group → ChoiceModel (coverage=complete) + Eng-->>Disp: ChoiceModel + Disp-->>Agent: {ui_epoch, groups[...]} +``` + +### 6.2 `enumerate_choices` — scroll-scan survey (OCR-only / virtualized surface) + +```mermaid +sequenceDiagram + participant Agent + participant Disp as dispatch.rs + participant Eng as engine/choices.rs + participant Hit as engine/read.rs + Agent->>Disp: enumerate_choices {scroll_scan:true, max_scroll_pages:6} + Note over Disp: Read route + scroll_scan ⇒ mutation-coordinated (lock+lease) like scan_chart, NOT approval-gated + Disp->>Eng: enumerate_choices(opts) + Eng->>Hit: scroll "top", record origin + loop until bottom or max_scroll_pages + Eng->>Hit: hit_targets(latent=true) → accumulate, dedupe by bbox/id + Eng->>Hit: scroll "down" 1 page (survey scroll, position remembered) + end + Eng->>Hit: scroll back to origin offset (restore) + Eng->>Eng: merge accumulated targets → ChoiceModel (coverage=complete) + Eng-->>Agent: ChoiceModel +``` + +### 6.3 `apply_selections` — happy path (single approval, no reflow) + +```mermaid +sequenceDiagram + participant Agent + participant Op as Operator + participant Disp as dispatch.rs + participant Eng as engine/selections.rs + participant Gate as raw_input_gate.rs + participant Act as actuators + + Agent->>Disp: apply_selections {expected_epoch:E, plan} + Disp->>Disp: validate_expected_epoch(E) == current → OK + Disp->>Disp: CoordinationGuard.acquire (lock + window lease) + Disp->>Eng: apply_selections(plan, E) + Eng->>Gate: batch_id = batch@selections::; approved? + Gate-->>Eng: not approved → PendingApproval (1 audit entry) + Eng-->>Agent: status=pending_approval, batch_id, preview[risk] + Agent->>Op: surface preview + Op->>Disp: approve(batch_id) + Disp->>Gate: approve_raw_input(batch_id) → grant budget=n + Agent->>Disp: apply_selections {expected_epoch:E, plan} (re-call) + Disp->>Eng: apply_selections(plan, E) + Eng->>Gate: consume batch grant → enter BatchApprovalContext{budget=n} + loop each step + Eng->>Act: actuator(step) under batch context (no per-step operator prompt) + Act-->>Eng: AuditEntry success + Eng->>Eng: fingerprint unchanged → continue + end + Eng->>Eng: consolidated verify: hit_targets once, compare expected_after + Eng-->>Agent: status=applied, steps[], verify{ok:true}, ui_epoch +``` + +### 6.4 `apply_selections` — mid-batch reflow (progressive disclosure) + +```mermaid +sequenceDiagram + participant Eng as engine/selections.rs + participant Hit as engine/read.rs + participant Act as actuators + Note over Eng: inside approved BatchApprovalContext, fp = E + Eng->>Act: step 1 select → success + Eng->>Hit: current_ui_epoch_fingerprint() = fp' ≠ fp + Note over Eng: REFLOW detected (a new sub-section appeared) + Eng->>Hit: enumerate_choices(internal) → fresh ChoiceModel @ fp' + Eng->>Eng: re-resolve REMAINING steps by id, else by label/group/bbox + alt remaining step still resolvable + Eng->>Act: step 2..n on fresh targets, fp := fp' + else unresolvable & no budget + Eng-->>Eng: status=partially_applied, remaining_steps[...] + end + Eng->>Hit: single consolidated verify @ final fingerprint +``` + +### 6.5 `apply_selections` — stale plan refusal (pre-flight) + +```mermaid +sequenceDiagram + participant Agent + participant Disp as dispatch.rs + Agent->>Disp: apply_selections {expected_epoch:OLD, plan} + Disp->>Disp: refresh_if_stale(); current = fp_now ≠ OLD + Disp-->>Agent: isError "stale UI epoch …" + _meta.dunst.coordination.epoch{status:"stale", current_epoch} + Note over Agent: re-run enumerate_choices, rebuild plan, retry +``` + +--- + +## 7. State Machine — batch approval & epoch-guarded execution + +```mermaid +stateDiagram-v2 + [*] --> PreflightEpoch + PreflightEpoch --> Refused: expected_epoch ≠ current (stale) + PreflightEpoch --> GateCheck: epoch matches + lease acquired + GateCheck --> PendingApproval: batch grant absent + PendingApproval --> [*]: return preview (await operator) + GateCheck --> Executing: batch grant present → enter context (budget=n) + Executing --> Executing: step ok, fingerprint unchanged + Executing --> Rescanning: fingerprint changed (reflow) + Rescanning --> Executing: remaining steps re-resolved, budget left + Rescanning --> PartiallyApplied: unresolvable OR budget exhausted + Executing --> Verifying: all steps done + Executing --> PartiallyApplied: actuator failed (platform error) + Verifying --> Applied: all checks ok + Verifying --> PartiallyApplied: a check failed + Applied --> [*] + PartiallyApplied --> [*] + Refused --> [*] +``` + +**Transition table (guards + side effects):** + +| From | To | Trigger | Guard | Side effects | +|------|----|---------|-------|--------------| +| PreflightEpoch | Refused | epoch mismatch | `current ≠ expected_epoch` | Return stale message + epoch meta (no mutation) | +| GateCheck | PendingApproval | first call | batch grant not present | One `PendingApproval` audit entry; return preview incl. per-step risk + `max_risk` | +| GateCheck | Executing | re-call after approve | batch grant present (TTL valid) | Consume grant; set `BatchApprovalContext{budget=n}` | +| Executing | Rescanning | after a step | `current_ui_epoch_fingerprint() ≠ pinned fp` | Internal `enumerate_choices`; re-pin fp | +| Rescanning | Executing | re-resolution ok | step found by id or label/group/bbox & budget>0 | Continue from next unfinished step | +| Executing | Verifying | last step done | — | Single `hit_targets` read | +| Verifying | Applied | all `expected_after` hold | — | Clear `BatchApprovalContext`; consume batch grant (one-shot) | +| any | PartiallyApplied | failure/exhaustion | budget==0 or actuator Err or check fail | Clear context; return `remaining_steps` | + +**Budget.** `budget = plan.steps.len()` at entry, capped at `MAX_BATCH_PICKS = 64`. +Each executed actuator consumes one unit. A reflow re-resolution that re-executes an +already-attempted step also consumes a unit — so a plan that thrashes the UI runs out +of budget and degrades to `partially_applied` rather than looping. `MAX_RESCANS = 8` +bounds the re-scan loop independently. + +--- + +## 8. Error Handling & Resilience + +**Error taxonomy:** + +| Category | Examples | Strategy | +|----------|----------|----------| +| Stale plan (pre-flight) | `expected_epoch` ≠ current | Refuse before any mutation; reuse `validate_expected_epoch`; caller re-enumerates | +| Reflow (recoverable) | progressive disclosure reveals new fields | Re-scan + re-resolve remaining steps, bounded by `MAX_RESCANS` and budget | +| Unresolvable step | `choice_id` gone, label match ambiguous/absent | Mark step `failed:"unresolvable"`, continue others, return `partially_applied` | +| Approval missing | operator hasn't approved | Return `pending_approval` (not an error); idempotent on re-call with same plan hash | +| Actuator/platform failure | AX write failed, user-active guard block | Existing `audit_raw_input` semantics: failed step is audited; `user-active` block restores budget for that step and surfaces a retry hint | +| Coordination conflict | another session holds the window lease, stale `fencing_token` | Reuse `CoordinationGuard` refusal (already tested) | +| Budget/loop exhaustion | UI thrash | `partially_applied` with `remaining_steps`, never an infinite loop | + +**Idempotency.** `batch_id = batch@selections::`. The same plan +yields the same `batch_id`, so a re-call after the operator approves consumes the +existing grant; a *different* plan produces a *different* id and re-gates. A +already-`selected` choice targeted by `op:select` is a no-op verified as success (the +executor checks current state before acting). + +**No partial-approval leakage.** The batch grant is one-shot (CONTRACTS: "approvals +are validated; element/contextual approvals are one-shot"): a second `apply_selections` +re-gates. The `BatchApprovalContext` is always cleared on exit (success, partial, or +error) so a later single pick is never silently authorized. + +--- + +## 9. Concurrency Design + +This component runs inside the existing single-threaded MCP dispatch, but it touches +the cross-session coordination state and the per-engine approval maps. + +| Shared resource | Access pattern | Mechanism | Rationale | +|-----------------|----------------|-----------|-----------| +| Global mutation lock + window lease | one writer at a time | existing `CoordinationGuard::acquire` (lock + TTL lease + fencing token) | A batch must not interleave with another session's mutation on the same window | +| `Engine.raw_approvals` / `raw_approval_inflight` | single-threaded mutate | existing maps; add `batch@selections:*` scope | Reuse the proven event-budget/TTL grant model | +| `BatchApprovalContext` | set at entry, cleared at exit | `Option` field on `Engine` | Bounds the "inside an approved batch" window to exactly one `apply_selections` call | +| Scroll position (survey) | borrow + restore | record origin offset, restore after sweep | Survey must leave the surface where it found it | + +**Race mitigations.** The dispatch-level `expected_epoch` check + the window lease +together ensure the surface did not change between enumerate and apply (or the apply is +refused). Mid-batch, the fingerprint re-check is the optimistic-concurrency token: any +unexpected reflow forces a re-scan instead of acting on stale coordinates. + +--- + +## 10. Component-Level Security (STRIDE) + +| Threat | Vector | Mitigation | +|--------|--------|-----------| +| **Spoofing** approval | Agent forges a `batch_id` and "self-approves" | `approve` stays operator-only behind `DUNST_MCP_ENABLE_APPROVE_TOOL`; `validate_synthetic_raw_approval` must accept `batch@selections:` only when structurally valid (hex64 hash + positive count); a fabricated id is rejected exactly like other synthetic targets | +| **Tampering** with coords | Stale/forged `bbox` clicks the wrong control | Picks resolve by `choice_id` first; coordinate fallback only via `click_near_text` (OCR-verified) or `safe_click` zones; off-target points already refused by `ensure_point_in_target_window` | +| **Repudiation** | Batch hides which actions ran | Every constituent actuator still appends its own `AuditEntry` (CONTRACTS: "every attempt is audited"); plus one batch `PendingApproval` entry and `ApplyOutcome.steps[]` | +| **Information disclosure** | Preview leaks field contents | Preview surfaces labels + risk, not field values; `set_text` values are caller-supplied, never echoed from the screen | +| **Denial of service** | UI-thrash infinite re-scan | `MAX_RESCANS`, `MAX_BATCH_PICKS` budget, scroll-sweep `max_scroll_pages` clamp | +| **Elevation of privilege** | Low-risk batch sneaks a high-risk pick past the operator | Pending-approval preview MUST include per-step `risk` and an aggregate `max_risk`; the single approval is therefore informed, and the high-risk step is visible before approval | + +**Input validation rules.** `plan.steps` non-empty and ≤ `MAX_BATCH_PICKS`; each step +`op ∈ {select,deselect,set_text}`; `set_text` requires `value`; `choice_id` +non-empty. `expected_epoch` required and non-empty. Reject the whole call on schema +violation before gating. + +--- + +## 11. Testability Design + +**DI / mock boundaries.** The executor depends only on `Engine` methods that already +run against the mock platform in `engine/tests/*` (the same seam `act` uses). No new +external dependency is introduced, so the full feature is unit-testable headless +(off-macOS, where `set_field_text` returns the documented stub error and AX-latent +classification still runs on a mock scene graph). + +| Layer | What to mock | Test style | +|-------|--------------|-----------| +| `choices.rs` classification | mock `SceneGraph` + `AffordanceGraph` (existing `engine_with_window` / fixtures) | Pure assertion on the produced `ChoiceModel` | +| `selections.rs` executor | mock actuators via the existing executor trait | State-machine assertions (gate → approve → apply → verify) | +| `serve` catalog | `tools_list()` | Schema shape + count | +| `serve` dispatch | `call(&mut e, "apply_selections", json!{...})` | Stale-epoch + coordination refusals | + +**Unit tests to add** (names mirror the CONTRACTS convention): + +- `engine::tests::enumerate_classifies_radios_as_single_select_and_checkboxes_as_multi` +- `engine::tests::enumerate_marks_required_group_from_label_markers` +- `engine::tests::enumerate_ax_latent_captures_offscreen_choices_without_scroll` +- `engine::tests::enumerate_scroll_scan_restores_origin_and_sets_coverage_complete` +- `engine::tests::enumerate_partial_coverage_returns_scroll_plan` +- `engine::tests::apply_selections_first_call_is_pending_with_per_step_risk_preview` +- `engine::tests::apply_selections_single_approval_executes_whole_batch` +- `engine::tests::apply_selections_batch_grant_is_one_shot_resists_second_batch` +- `engine::tests::apply_selections_rescans_only_when_fingerprint_changes` +- `engine::tests::apply_selections_reflow_reresolves_remaining_steps_by_label` +- `engine::tests::apply_selections_budget_exhaustion_degrades_to_partial` +- `engine::tests::apply_selections_single_consolidated_verify_reports_per_check` +- `engine::tests::apply_selections_rejects_forged_batch_id_via_validate_synthetic` +- `serve::tests::stale_expected_epoch_refuses_apply_selections` (mirror existing + `mutating_tool_rejects_stale_expected_epoch`) +- `serve::tests::tools_list_exposes_enumerate_choices_and_apply_selections` +- Update `serve::tests::catalog::tools_list_exposes_read_text_with_object_schema`: + bump `assert_eq!(tools.len(), 70)` → `72`, and + `tool_registry_matches_advertised_catalog` accordingly. + +**Testability score:** new external dependencies = 0 (100% reuse of injected seams). + +--- + +## 12. Configuration & Feature Flags + +| Flag | Effect | Default | +|------|--------|---------| +| `DUNST_MCP_ENABLE_APPROVE_TOOL` | (existing) advertises/executes `approve`, including `batch@selections:*` | unset (operator opt-in) | +| `MAX_BATCH_PICKS` (const) | hard cap on plan length / budget | 64 | +| `MAX_RESCANS` (const) | bound on mid-batch re-scan loop | 8 | +| `BATCH_APPROVAL_TTL_SECS` (const, reuse `RAW_APPROVAL_TTL_SECS`) | grant lifetime | 120 | +| `enumerate_choices.max_scroll_pages` (arg) | survey-sweep bound | 6 (clamp 1..=12) | + +No new env var is required for the common path: `enumerate_choices` and the +`pending_approval` response work without any flag; only the operator's `approve` step +needs the existing flag, exactly like every other gated mutation today. + +--- + +## 13. Micro-ADRs + +**ADR-1 — Survey scroll is coordinated but not operator-approval-gated.** +*Context:* `enumerate_choices(scroll_scan=true)` must scroll to assemble OCR-only +choices, yet scrolling is normally raw-approval-gated. *Decision:* model survey scroll +on `scan_chart`/`read_at(borrow_cursor)` — it acquires the mutation lock + lease (so it +serializes against other sessions) but does **not** require operator approval, and it +**restores the original scroll position**. *Rationale:* surveying does not mutate +application data; gating a read would reintroduce the per-step approval cost the feature +exists to remove. *Consequence:* if the project later decides scroll itself is +operator-worthy, `enumerate_choices` must inherit that gate; flagged here so the +decision is explicit. *Alternative rejected:* AX-latent-only enumeration (no scroll) — +kept as the default mode, but insufficient for virtualized/AX-sparse lists. + +**ADR-2 — One approval = a bounded batch context, not a per-pick budget alone.** +*Decision:* the operator approves a single `batch_id`; the engine then runs inside a +`BatchApprovalContext` with a numeric budget (= step count, capped). *Rationale:* a +single context is simpler to reason about across a mid-batch re-scan than tracking +heterogeneous per-pick grants, while the budget still bounds blast radius. *Reuse:* the +grant itself lives in the existing `raw_approvals` map with the existing TTL, so +`attach`/expiry/exhaustion clearing is inherited for free (mirrors the scroll-grant +precedent that already lets one approval cover 8 scroll events). *Alternative rejected:* +auto-approving each constituent element gate individually — would either re-prompt or +silently widen authorization. + +**ADR-3 — Plan addresses choices by id, with label/group/bbox re-resolution.** +*Decision:* `SelectionStep.choice_id` is the `HitTarget.id`; on reflow the executor +re-resolves remaining steps by id, then by `(group_id, label)`, then by nearest `bbox`. +*Rationale:* ids are stable within an epoch (CONTRACTS: WP-D stable ids) and free to +carry; labels/bbox survive id churn after a reflow. *Consequence:* a step that matches +neither is reported `unresolvable` rather than guessing — fail-closed. + +**ADR-4 — Reuse `expected_epoch` for pre-flight, `fingerprint` re-check for in-flight.** +*Decision:* the dispatch-level `validate_expected_epoch` guards the plan↔surface gap +*before* execution; `current_ui_epoch_fingerprint()` guards reflow *during* execution. +*Rationale:* two distinct staleness moments; reusing the same fingerprint primitive for +both keeps one source of truth and zero new perception code. + +--- + +## 14. Integration Points (exact edits) + +| File | Edit | +|------|------| +| `crates/dunst-mcp/src/engine/choices.rs` | **new** — `EnumerateOpts`, `ChoiceModel` + value objects, `Engine::enumerate_choices`, classification/grouping helpers | +| `crates/dunst-mcp/src/engine/selections.rs` | **new** — `SelectionPlan`/`SelectionStep`/`ApplyOutcome`, `BatchApprovalContext`, `Engine::apply_selections`, epoch-guarded loop, consolidated verify | +| `crates/dunst-mcp/src/engine.rs` | declare `mod choices; mod selections;`; add `active_batch: Option` field (init `None`) | +| `crates/dunst-mcp/src/engine/raw_input_gate.rs` | add `raw_apply_selections_target_id(hash, n)`; extend `is_synthetic_approval_target_id` with `batch@`; add a `batch@selections:` arm to `validate_synthetic_raw_approval` and a `raw_approval_policy` entry (grant=n, cost=1) | +| `crates/dunst-mcp/src/serve/registry.rs` | add `tool("enumerate_choices", ToolRoute::Read)` and `tool("apply_selections", ToolRoute::Batch)` (or `::Element`) | +| `crates/dunst-mcp/src/serve/dispatch.rs` | route `apply_selections`; add it to `tool_requires_mutation_coordination`; make `tool_requires_mutation_coordination(Read, "enumerate_choices", args)` return `arg_bool(args,"scroll_scan")` | +| `crates/dunst-mcp/src/serve/dispatch/read_tools.rs` | parse + call `enumerate_choices` | +| `crates/dunst-mcp/src/serve/dispatch/batch_tools.rs` | **new** — parse + call `apply_selections` (or fold into `element_tools.rs`) | +| `crates/dunst-mcp/src/serve.rs` | add `apply_selections` to `tool_accepts_mutation_preconditions` (for `fencing_token`); add `mod batch_tools;` wiring if a new route is used | +| `crates/dunst-mcp/src/serve/tools.rs` | add the two `tool(...)` catalog entries (§5) | +| `crates/dunst-mcp/src/serve/tests/catalog.rs` | bump tool count `70 → 72`; add presence assertions | +| `docs/CONTRACTS.md` | add the three invariants in §15 | +| `docs/AGENT_GUIDE.md` | add a "Batch a multi-field choice page" recipe | + +**Reuse map (no reimplementation):** + +| Need | Existing building block | +|------|-------------------------| +| Enumerate targets + epoch | `Engine::hit_targets(include_latent, scope, limit, prev)` → `HitTargetsResult{targets, ui_epoch}` | +| Per-choice action + risk | `HitTarget.action_modes[].{action, tool_hint, risk}`, `HitTarget.safe_click` | +| Reflow fingerprint | `Engine::current_ui_epoch_fingerprint()` / `ui_fingerprint()` | +| Pre-flight stale refusal | `dispatch::validate_expected_epoch` | +| Single approval primitive | `Engine::gate_raw_input` + `approve_raw_input`/`consume_raw_approval` grant model | +| Pick actuators | `click_element`, `pick_option`, `click_near_text`, `set_field_text`, `type_into`, `scroll` | +| Lock / lease / fencing | `serve::coordination::CoordinationGuard` | + +--- + +## 15. CONTRACTS.md additions + +Append under "Risk gate": + +- **Batch selections are approved as one unit.** `apply_selections` records exactly one + `PendingApproval` for a `batch@selections::` target whose preview carries + per-step risk and an aggregate `max_risk`; a single operator `approve(batch_id)` + authorizes the whole batch, the grant is one-shot, and the `BatchApprovalContext` is + always cleared on exit so no later single action is silently authorized. + — `engine::tests::apply_selections_first_call_is_pending_with_per_step_risk_preview`, + `engine::tests::apply_selections_batch_grant_is_one_shot_resists_second_batch`. +- **Batch execution is epoch-guarded.** `apply_selections` refuses a plan whose + `expected_epoch` no longer matches (pre-flight) and re-scans **only** when the UI + fingerprint changes mid-batch, re-resolving remaining steps by id then label/bbox; + bounded by `MAX_RESCANS` and the step budget. + — `engine::tests::apply_selections_rescans_only_when_fingerprint_changes`, + `serve::tests::stale_expected_epoch_refuses_apply_selections`. +- **Enumeration is read-only or survey-only.** `enumerate_choices` mutates no + application state: default mode does not scroll; `scroll_scan` restores the original + scroll position and is mutation-coordinated but not operator-approval-gated. + — `engine::tests::enumerate_scroll_scan_restores_origin_and_sets_coverage_complete`. + +--- + +## 16. Test Plan + +**Pyramid.** + +| Level | Coverage | Where | +|-------|----------|-------| +| Unit (headless, mock platform) | classification, grouping, requirement inference, plan resolution, state machine, budget/loop bounds, forged-id rejection | `engine/tests/*` (new `choices`/`selections` test modules) | +| Contract | catalog count/shape, dispatch epoch + coordination refusals, approval transport | `serve/tests/*` | +| Integration (macOS, gated) | real AX modal + OCR-only list end-to-end, reflow on a progressive-disclosure form | manual rig / `docs/fixtures/mcp-transcript.jsonl` style capture | + +**Key scenarios (Gherkin-ish):** + +1. *Single-select + multi-select + text* — enumerate a 14-field order modal; assert the + `ChoiceModel` shape; build a 14-step plan; one approval; `status=applied`, verify + all 14 with one consolidated read; **≤ 4 tool calls total** (enumerate, apply→pending, + approve, apply→applied) vs ≈70 today. +2. *Reflow* — selecting "Schedule" reveals a time picker; assert exactly one re-scan, + `rescans=1`, the picker step re-resolved, single verify. +3. *Stale plan* — mutate the surface between enumerate and apply; assert pre-flight + refusal with `epoch.status="stale"` and no mutation. +4. *Partial* — feed an unresolvable `choice_id`; assert `partially_applied` + + `remaining_steps`, other steps still applied. +5. *Approval boundary* — with `DUNST_MCP_ENABLE_APPROVE_TOOL` unset, assert + `pending_approval` returned and no `approve` tool advertised; with it set, the batch + completes. +6. *DoS bound* — a thrashing surface (fingerprint flips every step) hits `MAX_RESCANS` + and degrades to `partially_applied`, never loops. + +**Negative/NFR:** off-target coordinates rejected; budget cap enforced; survey scroll +position restored; one `AuditEntry` per constituent action (audit completeness). + +--- + +## DCI-LLD Score + +| # | Item | Weight | Score | Note | +|---|------|--------|-------|------| +| 1 | Component architecture (C4 L3) | 5 | 1.0 | Layers + dependency direction + exact files | +| 2 | API contracts | 5 | 1.0 | Both tool schemas, result/error bodies, status codes | +| 3 | Database schema | 0 | — | No persistence (omitted by mitosis) | +| 4 | Sequence diagrams | 4 | 1.0 | Enumerate ×2, apply happy/reflow/stale | +| 5 | Class/module design | 4 | 1.0 | Signatures, value objects, patterns justified | +| 6 | Error handling | 4 | 1.0 | Taxonomy + idempotency + no-leak | +| 7 | State machine | 3 | 1.0 | Batch lifecycle + guards/side effects table | +| 8 | Security (STRIDE) | 3 | 1.0 | Component-level, with input validation rules | +| 9 | Testability | 3 | 1.0 | DI seam, named tests, 0 new deps | +| 10 | Domain model | 3 | 1.0 | Value objects, invariants, classification table | +| 11 | Concurrency | 2 | 1.0 | Lock/lease reuse + optimistic fingerprint token | +| 12 | Migration plan | 0 | — | Additive feature (omitted) | + +**DCI-LLD = Σ(wᵢ·sᵢ)/Σwᵢ × 10 = 36/36 × 10 = 10.0 → Excellent** (implementable +without further questions). Derived: Contract Coverage 100% (2/2 new tools specified); +Testability 100% (0 new external deps); Implementation Ambiguity < 10% (open choices +are explicit ADRs, not gaps). + +--- + +### Handoff suggestions + +- Run `/cli-audit-drift` after implementation to bootstrap the three new CONTRACTS + invariants into the drift baseline. +- Run `/cli-audit-code` on `engine/selections.rs` once written — the batch executor is + the highest-risk new surface (gate + loop + budget).