blackaxgit · blackaxgit · Jun 10, 2026 · Jun 8, 2026 · Jun 8, 2026 · Jun 8, 2026
diff --git a/.gitleaks.toml b/.gitleaks.toml
@@ -24,6 +24,7 @@ paths = [
   '''crates/clx-core/tests/.*_poc\.rs''',
   '''crates/clx-core/tests/redaction_scheme_floor_regression\.rs''',
   '''crates/clx-core/tests/security_fixed_vectors_regression\.rs''',
+  '''crates/clx-hook/tests/learning_mode_e2e\.rs''',
 ]
 # Named synthetic tokens, anchored and with a required tail.
 regexes = [

diff --git a/crates/clx-core/src/config/mod.rs b/crates/clx-core/src/config/mod.rs
@@ -14,6 +14,7 @@
 //! - `CLX_VALIDATOR_CACHE_ALLOW_TTL` (TTL for cached allow decisions, seconds)
 //! - `CLX_VALIDATOR_CACHE_ASK_TTL` (TTL for cached ask decisions, seconds)
 //! - `CLX_VALIDATOR_PROMPT_SENSITIVITY` (high/standard/low/custom)
+//! - `CLX_LEARNING_MODE` (opt-in learning/debug capture; observe-only)
 //! - `CLX_CONTEXT_ENABLED`
 //! - `CLX_CONTEXT_AUTO_SNAPSHOT`
 //! - `CLX_CONTEXT_EMBEDDING_MODEL`
@@ -199,6 +200,36 @@ impl FromStr for DefaultDecision {
     }
 }
 
+/// Policy applied to the FOUR runtime arms where an *enabled* layer 1 (LLM)
+/// validation is UNREACHABLE — provider init error, provider unavailable,
+/// request timeout, or generation failure.
+///
+/// This governs the validator-UNAVAILABLE case ONLY. It is distinct from
+/// `layer1_enabled = false` (a *deliberately disabled* layer, which is
+/// "unavailable on purpose" and unconditionally forces `ask`): disabled is not
+/// the same as unavailable, and this knob never relaxes the disabled-L1 arm.
+///
+/// - `Ask` (default): force a user prompt regardless of `default_decision`
+///   (the historical F7 fail-closed posture — `allow` is upgraded to `ask`).
+/// - `Deny`: hard-deny on an unreachable validator (strictest).
+/// - `HonorDefault`: opt in to honoring `default_decision` (allow/deny/ask)
+///   when the validator cannot be reached. This can fail OPEN if
+///   `default_decision = allow`, so it is security-relevant and lives under the
+///   trust-gated `validator` subtree (stripped from untrusted project config).
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum OnValidatorUnavailable {
+    /// Force a user prompt regardless of `default_decision` (fail-closed
+    /// default — preserves the historical F7 posture).
+    #[default]
+    Ask,
+    /// Hard-deny when the validator is unreachable (strictest).
+    Deny,
+    /// Honor `default_decision` (allow/deny/ask) when the validator is
+    /// unreachable. May fail open if `default_decision = allow`.
+    HonorDefault,
+}
+
 /// CLX configuration
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
 pub struct Config {
@@ -616,6 +647,18 @@ pub struct ValidatorConfig {
     #[serde(default)]
     pub default_decision: DefaultDecision,
 
+    /// Policy for the validator-UNREACHABLE case (provider init error,
+    /// provider unavailable, request timeout, generation failure). Default
+    /// `Ask` preserves the historical F7 fail-closed posture (an unreachable
+    /// validator upgrades `allow` to `ask`). Set to `honordefault` to instead
+    /// honor `default_decision` on those arms, or `deny` to hard-deny.
+    ///
+    /// NOTE: this is distinct from `layer1_enabled = false` (a deliberately
+    /// DISABLED layer, which always forces `ask`); disabled != unavailable, and
+    /// this knob never affects the disabled-L1 arm.
+    #[serde(default)]
+    pub on_validator_unavailable: OnValidatorUnavailable,
+
     /// Trust mode - auto-allow ALL commands without validation
     /// Still logs commands for audit. Use with caution!
     /// Can only be enabled via config file (~/.clx/config.yaml) for security.
@@ -650,6 +693,13 @@ pub struct ValidatorConfig {
     /// Default trust mode duration in seconds when no --duration given (default: 1h)
     #[serde(default = "default_trust_mode_default_duration")]
     pub trust_mode_default_duration: u64,
+
+    /// Opt-in learning/debug mode: record every `PreToolUse` decision + rationale
+    /// to the `learning_events` table. Observe-only; off by default; never
+    /// changes a decision. Trust-gated (lives under the `validator` subtree,
+    /// which is stripped from untrusted project config).
+    #[serde(default)]
+    pub learning_mode: bool,
 }
 
 /// Context configuration
@@ -963,6 +1013,7 @@ impl Default for ValidatorConfig {
             layer1_enabled: default_true(),
             layer1_timeout_ms: default_layer1_timeout(),
             default_decision: DefaultDecision::Ask,
+            on_validator_unavailable: OnValidatorUnavailable::default(),
             trust_mode: false,
             auto_allow_reads: default_true(),
             cache_enabled: default_true(),
@@ -971,6 +1022,7 @@ impl Default for ValidatorConfig {
             prompt_sensitivity: PromptSensitivity::Standard,
             trust_mode_max_duration: default_trust_mode_max_duration(),
             trust_mode_default_duration: default_trust_mode_default_duration(),
+            learning_mode: false,
         }
     }
 }
@@ -1461,6 +1513,9 @@ impl Config {
                 );
             }
         }
+        if let Ok(val) = env::var("CLX_LEARNING_MODE") {
+            apply_bool_override(&val, "CLX_LEARNING_MODE", &mut self.validator.learning_mode);
+        }
         if let Ok(val) = env::var("CLX_VALIDATOR_CACHE_ENABLED") {
             apply_bool_override(
                 &val,
@@ -2303,6 +2358,27 @@ mod tests {
     use super::*;
     use std::env;
 
+    /// FIX-1 config-trust regression: the new `on_validator_unavailable` key
+    /// lives under the `validator` subtree, which `NON_INERT_KEY_PATTERNS`
+    /// strips wholesale from an UNTRUSTED project config. An untrusted repo
+    /// must NOT be able to set `validator.on_validator_unavailable=honordefault`
+    /// (which, paired with `default_decision=allow`, would fail open). Reuses
+    /// the existing untrusted-validator-subtree strip path.
+    #[test]
+    fn on_validator_unavailable_stripped_from_untrusted_config() {
+        let raw = "validator:\n  on_validator_unavailable: honordefault\n  \
+                   default_decision: allow\n";
+        let out = crate::config::project::filter_inert_only(raw);
+        assert!(
+            !out.contains("on_validator_unavailable"),
+            "validator.on_validator_unavailable must be stripped from untrusted config; got: {out}"
+        );
+        assert!(
+            !out.contains("default_decision"),
+            "validator.default_decision must also be stripped; got: {out}"
+        );
+    }
+
     /// RAII guard that saves env var values on creation and restores them on drop.
     /// Guarantees cleanup even if the test panics.
     #[allow(unsafe_code)]
@@ -4007,4 +4083,52 @@ fallback:
             std::env::remove_var("CLX_CONFIG_PROJECT");
         }
     }
+
+    /// `learning_mode` defaults to false (observe-only, opt-in).
+    #[test]
+    fn learning_mode_defaults_to_false() {
+        assert!(
+            !Config::default().validator.learning_mode,
+            "learning_mode must default to false"
+        );
+        assert!(
+            !ValidatorConfig::default().learning_mode,
+            "ValidatorConfig::default().learning_mode must be false"
+        );
+    }
+
+    /// AC3: `CLX_LEARNING_MODE=1` enables capture via env override even when the
+    /// config flag defaults to false.
+    #[test]
+    #[serial_test::serial]
+    #[allow(unsafe_code)]
+    fn learning_mode_enabled_via_env() {
+        // SAFETY: test-only env var manipulation; serialized via serial_test.
+        unsafe {
+            std::env::set_var("CLX_LEARNING_MODE", "1");
+        }
+
+        let mut config = Config::default();
+        assert!(!config.validator.learning_mode);
+        config.apply_env_overrides();
+        let enabled = config.validator.learning_mode;
+
+        unsafe {
+            std::env::remove_var("CLX_LEARNING_MODE");
+        }
+        assert!(enabled, "CLX_LEARNING_MODE=1 must enable learning_mode");
+    }
+
+    /// AC3 (trust gate): an UNTRUSTED project config setting
+    /// `validator.learning_mode: true` is stripped along with the whole
+    /// `validator` subtree, so a hostile repo cannot enable capture.
+    #[test]
+    fn untrusted_validator_learning_mode_is_stripped() {
+        let raw = "validator:\n  learning_mode: true\n";
+        let out = crate::config::project::filter_inert_only(raw);
+        assert!(
+            !out.contains("learning_mode"),
+            "validator.learning_mode must be dropped from untrusted config; got: {out}"
+        );
+    }
 }
diff --git a/crates/clx-core/src/learned_pattern.rs b/crates/clx-core/src/learned_pattern.rs
@@ -198,6 +198,53 @@ fn is_tool_segment_byte(b: u8) -> bool {
     b.is_ascii_alphanumeric() || b == b'.' || b == b'_' || b == b'-'
 }
 
+/// Commands that should never be auto-whitelisted due to destructive potential.
+///
+/// Even if the user approves these commands repeatedly, they remain subject to
+/// manual confirmation. This prevents overly broad patterns (e.g.
+/// `Bash(rm:-i *)`) from silently whitelisting destructive variants (e.g.
+/// `rm -rf /`).
+///
+/// Lives in `clx-core` (rather than `clx-hook`) so both the hook auto-learning
+/// path and the `clx` CLI suggestion filter share one source of truth; the CLI
+/// does not depend on `clx-hook`.
+pub const NEVER_AUTO_WHITELIST: &[&str] = &[
+    "rm",
+    "rmdir",
+    "dd",
+    "mkfs",
+    "fdisk",
+    "chmod",
+    "chown",
+    "chgrp",
+    "kill",
+    "killall",
+    "pkill",
+    "shutdown",
+    "reboot",
+    "halt",
+    "poweroff",
+    "iptables",
+    "ip6tables",
+    "mount",
+    "umount",
+    "systemctl",
+    "service",
+];
+
+/// Check whether the base command (first word) of a command string is
+/// restricted from auto-whitelisting.
+///
+/// Leading `ENV=VALUE` assignments are stripped first (via
+/// [`strip_env_assignments`]) so a leading assignment cannot hide a restricted
+/// base command (e.g. `FOO=bar rm -rf /`).
+#[must_use]
+pub fn is_never_auto_whitelist(command: &str) -> bool {
+    let command = strip_env_assignments(command);
+    let base_cmd = command.split_whitespace().next().unwrap_or("");
+    NEVER_AUTO_WHITELIST.contains(&base_cmd)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -254,4 +301,20 @@ mod tests {
     fn is_well_formed_pattern_cases(#[case] input: &str, #[case] expected: bool) {
         assert_eq!(is_well_formed_pattern(input), expected);
     }
+
+    #[rstest]
+    // Restricted destructive base commands.
+    #[case("rm -rf /", true)]
+    #[case("dd if=/dev/zero of=/dev/sda", true)]
+    #[case("systemctl restart nginx", true)]
+    // Env-prefixed restricted command: the assignment must be stripped first.
+    #[case("FOO=bar rm -rf /", true)]
+    // Non-restricted commands.
+    #[case("cargo build", false)]
+    #[case("git status", false)]
+    #[case("ls -la", false)]
+    #[case("", false)]
+    fn is_never_auto_whitelist_cases(#[case] input: &str, #[case] expected: bool) {
+        assert_eq!(is_never_auto_whitelist(input), expected);
+    }
 }