diff --git a/crates/api-core/src/cfg/file.rs b/crates/api-core/src/cfg/file.rs index 2dbde9d57f..c32cc96fb6 100644 --- a/crates/api-core/src/cfg/file.rs +++ b/crates/api-core/src/cfg/file.rs @@ -734,6 +734,94 @@ pub struct CarbideConfig { /// encrypted in Postgres and vault leaves the credential chain /// entirely; when absent, vault remains the credential store. pub secrets: Option, + + /// Certificate vending backend. Selected independently of the credential + /// store; absent means certs are issued from the credential Vault. + #[serde(default)] + pub certificates: CertificatesConfig, +} + +/// `[certificates]` config section: selects the backend that vends machine and +/// service certificates, independently of where credentials are stored. +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct CertificatesConfig { + /// Which backend issues certificates. Defaults to sharing the credential + /// Vault client (historical behavior). + #[serde(default)] + pub backend: CertBackendKind, + + /// Connection settings for a dedicated certificate Vault. Required when + /// `backend = "dedicated_vault"`, ignored otherwise. + #[serde(default)] + pub dedicated_vault: Option, +} + +/// Tag selecting the certificate backend. The matching settings (if any) live +/// in their own sub-table, so the choice is explicit rather than inferred. +// The shared `Vault` suffix is intentional: both current backends are Vault +// backends. The lint resolves once a non-Vault backend is added. +#[allow(clippy::enum_variant_names)] +#[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum CertBackendKind { + /// Reuse the credential store's Vault client — one client, one token lease. + #[default] + SharedVault, + /// Use a dedicated Vault configured under `[certificates.dedicated_vault]`. + DedicatedVault, +} + +/// `[certificates.dedicated_vault]` settings. +/// +/// The connection-identifying fields are required, so a partial section fails +/// to parse rather than silently inheriting the credential Vault's process-wide +/// `VAULT_*` environment configuration. +#[derive(Clone, Debug, Deserialize, Serialize)] +#[serde(deny_unknown_fields)] +pub struct DedicatedVaultTomlConfig { + /// Vault address, e.g. `https://vault-certs.example:8200`. + pub address: String, + /// PKI secrets-engine mount path on the target Vault. + pub pki_mount_location: String, + /// PKI role used to sign leaf certificates. + pub pki_role_name: String, + /// Token for root-token auth; required only when the pod has no Kubernetes + /// service-account token. + #[serde(default)] + pub token: Option, + /// CA bundle that signs the target Vault's TLS cert. Defaults to the site + /// root / `VAULT_CACERT`. + #[serde(default)] + pub vault_cacert: Option, +} + +impl CertificatesConfig { + /// Convert the parsed section into the runtime certificate config, failing + /// fast if a dedicated backend was selected without its settings. + pub fn to_certificate_config(&self) -> eyre::Result { + let backend = match self.backend { + CertBackendKind::SharedVault => carbide_secrets::CertBackend::SharedVault, + CertBackendKind::DedicatedVault => { + let dedicated = self.dedicated_vault.as_ref().ok_or_else(|| { + eyre::eyre!( + "[certificates] backend = \"dedicated_vault\" requires a \ + [certificates.dedicated_vault] section" + ) + })?; + carbide_secrets::CertBackend::DedicatedVault( + carbide_secrets::DedicatedVaultConfig { + address: dedicated.address.clone(), + pki_mount_location: dedicated.pki_mount_location.clone(), + pki_role_name: dedicated.pki_role_name.clone(), + token: dedicated.token.clone(), + vault_cacert: dedicated.vault_cacert.clone(), + }, + ) + } + }; + Ok(carbide_secrets::CertificateConfig { backend }) + } } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -2503,6 +2591,90 @@ mod tests { const TEST_DATA_DIR: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/src/cfg/test_data"); + #[test] + fn certificates_absent_defaults_to_shared_vault() { + let cfg: CertificatesConfig = serde_json::from_str("{}").unwrap(); + assert_eq!(cfg.backend, CertBackendKind::SharedVault); + let runtime = cfg.to_certificate_config().unwrap(); + assert!(matches!( + runtime.backend, + carbide_secrets::CertBackend::SharedVault + )); + } + + #[test] + fn certificates_explicit_shared_vault() { + let cfg: CertificatesConfig = + serde_json::from_str(r#"{"backend":"shared_vault"}"#).unwrap(); + assert!(matches!( + cfg.to_certificate_config().unwrap().backend, + carbide_secrets::CertBackend::SharedVault + )); + } + + #[test] + fn certificates_dedicated_vault_maps_all_fields() { + let cfg: CertificatesConfig = serde_json::from_str( + r#"{ + "backend": "dedicated_vault", + "dedicated_vault": { + "address": "https://vault-certs.example:8200", + "pki_mount_location": "pki", + "pki_role_name": "machine", + "token": "s.abc123" + } + }"#, + ) + .unwrap(); + + match cfg.to_certificate_config().unwrap().backend { + carbide_secrets::CertBackend::DedicatedVault(dedicated) => { + assert_eq!(dedicated.address, "https://vault-certs.example:8200"); + assert_eq!(dedicated.pki_mount_location, "pki"); + assert_eq!(dedicated.pki_role_name, "machine"); + assert_eq!(dedicated.token.as_deref(), Some("s.abc123")); + assert!(dedicated.vault_cacert.is_none()); + } + other => panic!("expected dedicated vault backend, got {other:?}"), + } + } + + #[test] + fn certificates_dedicated_vault_without_section_fails_fast() { + // backend selected but no settings -> must error rather than fall back + // to the credential Vault. + let cfg: CertificatesConfig = + serde_json::from_str(r#"{"backend":"dedicated_vault"}"#).unwrap(); + let err = cfg.to_certificate_config().unwrap_err(); + assert!( + err.to_string().contains("dedicated_vault"), + "unexpected error: {err}" + ); + } + + #[test] + fn certificates_dedicated_vault_missing_required_field_fails_parse() { + // `address` is required; omitting it must fail at parse time, not vend + // certs from a half-specified Vault. + let result: Result = serde_json::from_str( + r#"{ + "backend": "dedicated_vault", + "dedicated_vault": { + "pki_mount_location": "pki", + "pki_role_name": "machine" + } + }"#, + ); + assert!(result.is_err(), "expected parse error for missing address"); + } + + #[test] + fn certificates_unknown_field_rejected() { + let result: Result = + serde_json::from_str(r#"{"backend":"shared_vault","typo":true}"#); + assert!(result.is_err(), "deny_unknown_fields should reject typos"); + } + #[test] fn deserialize_serialize_machine_controller_config() { let input = MachineStateControllerConfig { diff --git a/crates/api-core/src/run.rs b/crates/api-core/src/run.rs index c88a0e92e7..6dc6d26067 100644 --- a/crates/api-core/src/run.rs +++ b/crates/api-core/src/run.rs @@ -23,8 +23,8 @@ use carbide_kms_provider::{ }; use carbide_secrets::credentials::{CredentialManager, CredentialReader, CredentialWriter}; use carbide_secrets::{ - CredentialConfig, ForgeVaultClient, MemoryCredentialStore, VaultConfig, - create_credential_manager_from, create_vault_client, + CredentialConfig, ForgeVaultClient, MemoryCredentialStore, SpiffeIdentity, VaultConfig, + create_certificate_provider, create_credential_manager_from, create_vault_client, }; use carbide_utils::HostPortPair; use eyre::WrapErr; @@ -193,10 +193,25 @@ pub async fn run( let vault_config = vault_config_for_site(&credential_config.vault, &carbide_config); - // One vault client serves every vault role below. PKI certificates stay - // on vault no matter which credential backend is configured. + // One vault client serves every credential vault role below. let vault_client = create_vault_client(&vault_config, metrics.meter.clone())?; - let certificate_provider = vault_client.clone(); + + // Certificate vending is selected independently of the credential store. + // SharedVault (the default) reuses `vault_client` (no second client or token + // lease); a dedicated cert Vault decouples PKI issuance from credentials and + // is fully explicit, never inheriting the credential Vault's env config. The + // SPIFFE identity comes from the site-resolved credential Vault config so all + // backends mint under the same identity namespace. + let cert_config = carbide_config.certificates.to_certificate_config()?; + let certificate_provider = create_certificate_provider( + &cert_config, + &vault_client, + SpiffeIdentity { + trust_domain: vault_config.spiffe_trust_domain(), + machine_base_path: vault_config.spiffe_machine_base_path(), + }, + metrics.meter.clone(), + )?; let db_pool = setup::create_and_connect_postgres_pool(&carbide_config).await?; diff --git a/crates/api-core/src/test_support/default_config.rs b/crates/api-core/src/test_support/default_config.rs index 23ee2decc3..677a6101a3 100644 --- a/crates/api-core/src/test_support/default_config.rs +++ b/crates/api-core/src/test_support/default_config.rs @@ -242,6 +242,7 @@ pub fn get() -> CarbideConfig { tracing: TracingConfig::default(), ntp_servers: vec![], secrets: None, + certificates: Default::default(), } } diff --git a/crates/secrets/src/forge_vault.rs b/crates/secrets/src/forge_vault.rs index 83d40b1f3a..7bb958b025 100644 --- a/crates/secrets/src/forge_vault.rs +++ b/crates/secrets/src/forge_vault.rs @@ -1038,6 +1038,24 @@ pub fn create_vault_client( ForgeVaultAuthenticationType::Root(vault_config.token()?) }; + let forge_vault_metrics = build_vault_metrics(&meter); + + let vault_client_config = ForgeVaultClientConfig { + auth_type, + vault_address: vault_config.address()?, + kv_mount_location: vault_config.kv_mount_location()?, + pki_mount_location: vault_config.pki_mount_location()?, + pki_role_name: vault_config.pki_role_name()?, + spiffe_trust_domain: vault_config.spiffe_trust_domain(), + spiffe_machine_base_path: vault_config.spiffe_machine_base_path(), + vault_root_ca_path, + }; + + let forge_vault_client = ForgeVaultClient::new(vault_client_config, forge_vault_metrics); + Ok(Arc::new(forge_vault_client)) +} + +fn build_vault_metrics(meter: &Meter) -> ForgeVaultMetrics { let vault_requests_total_counter = meter .u64_counter("carbide-api.vault.requests_attempted") .with_description("The amount of tls connections that were attempted") @@ -1063,27 +1081,110 @@ pub fn create_vault_client( .with_unit("ms") .build(); - let forge_vault_metrics = ForgeVaultMetrics { + ForgeVaultMetrics { vault_requests_total_counter, vault_requests_succeeded_counter, vault_requests_failed_counter, vault_token_gauge: vault_token_time_remaining_until_refresh_gauge, vault_request_duration_histogram, + } +} + +/// Site-wide SPIFFE identity namespace used when minting machine certificates. +/// +/// Certificates are issued under the same identity namespace regardless of +/// which Vault signs them, so this is resolved once from the site's +/// `[auth.trust]` config and shared across cert backends. +#[derive(Debug, Clone)] +pub struct SpiffeIdentity { + pub trust_domain: String, + pub machine_base_path: String, +} + +/// Connection settings for a Vault used *only* to vend certificates, kept +/// separate from the credential store's Vault. +/// +/// The connection-identifying fields are required (non-optional), so a value +/// of this type cannot be constructed without naming the target Vault, its PKI +/// mount, and its role. None of these fields fall back to the process-global +/// `VAULT_*` environment variables — that fallback is exactly what would +/// silently re-point a half-configured cert Vault back at the credential Vault. +#[derive(Debug, Clone)] +pub struct DedicatedVaultConfig { + /// Vault address, e.g. `https://vault.example:8200`. Required. + pub address: String, + /// PKI secrets-engine mount path on the target Vault. Required. + pub pki_mount_location: String, + /// PKI role used to sign leaf certificates. Required. + pub pki_role_name: String, + /// Token for root-token auth. Required only when the pod has no Kubernetes + /// service-account token (the preferred auth path); ignored when SA auth + /// is available. + pub token: Option, + /// Path to the CA bundle that signs the target Vault's TLS certificate. + /// Defaults to the standard site root (`/var/run/secrets/forge-roots/ca.crt`, + /// or `VAULT_CACERT`) — this is TLS trust material, not a Vault selector. + pub vault_cacert: Option, +} + +/// Build a Vault client dedicated to certificate vending from fully explicit +/// settings, with NO environment-variable fallback for the connection fields. +/// A missing required setting fails here, at startup, rather than silently +/// inheriting the credential Vault's configuration. +pub fn create_dedicated_vault_client( + config: &DedicatedVaultConfig, + spiffe: SpiffeIdentity, + meter: Meter, +) -> eyre::Result> { + // Required fields are non-`Option`, but an empty string would still slip + // through serde and build a client that fails confusingly on first use. + for (field, value) in [ + ("address", &config.address), + ("pki_mount_location", &config.pki_mount_location), + ("pki_role_name", &config.pki_role_name), + ] { + if value.trim().is_empty() { + return Err(eyre!( + "dedicated certificate Vault requires a non-empty `{field}`" + )); + } + } + + let configured_ca_path = config + .vault_cacert + .clone() + .unwrap_or_else(|| DEFAULT_VAULT_CA_PATH.to_string()); + let vault_root_ca_path = resolve_vault_root_ca_path(configured_ca_path.as_str())?; + + let service_account_token_path = + Path::new("/var/run/secrets/kubernetes.io/serviceaccount/token"); + let auth_type = if service_account_token_path.exists() { + ForgeVaultAuthenticationType::ServiceAccount(service_account_token_path.to_owned()) + } else { + let token = config.token.clone().ok_or_else(|| { + eyre!( + "dedicated certificate Vault requires an explicit `token` when no Kubernetes service-account token is present" + ) + })?; + ForgeVaultAuthenticationType::Root(token) }; let vault_client_config = ForgeVaultClientConfig { auth_type, - vault_address: vault_config.address()?, - kv_mount_location: vault_config.kv_mount_location()?, - pki_mount_location: vault_config.pki_mount_location()?, - pki_role_name: vault_config.pki_role_name()?, - spiffe_trust_domain: vault_config.spiffe_trust_domain(), - spiffe_machine_base_path: vault_config.spiffe_machine_base_path(), + vault_address: config.address.clone(), + // Certificate vending never touches the KV engine. + kv_mount_location: String::new(), + pki_mount_location: config.pki_mount_location.clone(), + pki_role_name: config.pki_role_name.clone(), + spiffe_trust_domain: spiffe.trust_domain, + spiffe_machine_base_path: spiffe.machine_base_path, vault_root_ca_path, }; - let forge_vault_client = ForgeVaultClient::new(vault_client_config, forge_vault_metrics); - Ok(Arc::new(forge_vault_client)) + Ok(Arc::new(ForgeVaultClient::new( + vault_client_config, + build_vault_metrics(&meter), + ))) } /// Build raw vaultrs client settings for a separate vault consumer (the @@ -1121,7 +1222,48 @@ mod tests { use base64::Engine; use serde_json::json; - use super::{machine_spiffe_uri, service_account_role_name_from_jwt}; + use super::{ + DedicatedVaultConfig, SpiffeIdentity, create_dedicated_vault_client, machine_spiffe_uri, + service_account_role_name_from_jwt, + }; + + fn dedicated_config() -> DedicatedVaultConfig { + DedicatedVaultConfig { + address: "https://vault-certs.example:8200".to_string(), + pki_mount_location: "pki".to_string(), + pki_role_name: "machine".to_string(), + token: None, + vault_cacert: None, + } + } + + fn test_spiffe() -> SpiffeIdentity { + SpiffeIdentity { + trust_domain: "nico.local".to_string(), + machine_base_path: "/forge-system/machine/".to_string(), + } + } + + #[test] + fn dedicated_vault_rejects_empty_required_fields() { + let meter = opentelemetry::global::meter("test"); + for mutate in [ + |c: &mut DedicatedVaultConfig| c.address = " ".to_string(), + |c: &mut DedicatedVaultConfig| c.pki_mount_location = String::new(), + |c: &mut DedicatedVaultConfig| c.pki_role_name = String::new(), + ] { + let mut config = dedicated_config(); + mutate(&mut config); + let err = match create_dedicated_vault_client(&config, test_spiffe(), meter.clone()) { + Ok(_) => panic!("empty required field must be rejected"), + Err(err) => err, + }; + assert!( + err.to_string().contains("non-empty"), + "unexpected error: {err}" + ); + } + } #[test] fn machine_spiffe_uri_uses_trust_domain_and_base_path() { diff --git a/crates/secrets/src/lib.rs b/crates/secrets/src/lib.rs index c086fd54b5..8247a4af21 100644 --- a/crates/secrets/src/lib.rs +++ b/crates/secrets/src/lib.rs @@ -25,7 +25,8 @@ pub use crate::chained_reader::ChainedCredentialReader; /// via `create_raw_vault_client_settings`. Credential operations should go /// through `create_credential_manager` instead of using the vault client directly. pub use crate::forge_vault::{ - ForgeVaultClient, VaultConfig, create_raw_vault_client_settings, create_vault_client, + DedicatedVaultConfig, ForgeVaultClient, SpiffeIdentity, VaultConfig, + create_dedicated_vault_client, create_raw_vault_client_settings, create_vault_client, }; pub use crate::local_credentials::{ CredentialSnapshot, EnvCredentialsConfig, FileCredentialsConfig, MachineIdentityConfig, @@ -49,6 +50,8 @@ use credentials::{ use local_credentials::{EnvCredentials, FileCredentialsWatcher}; pub use memory_credentials::MemoryCredentialStore; +use crate::certificates::CertificateProvider; + #[derive(Default, Debug, Clone)] pub struct CredentialConfig { pub vault: VaultConfig, @@ -56,6 +59,66 @@ pub struct CredentialConfig { pub file: FileCredentialsConfig, } +/// Selects and configures the backend that vends machine/service certificates. +/// +/// Certificate vending is independent of the credential store: this lets the +/// API issue PKI certificates from a different Vault than the one backing +/// credentials — or, in future, from a non-Vault CA — without disturbing +/// credential storage. +#[derive(Default, Debug, Clone)] +pub struct CertificateConfig { + pub backend: CertBackend, +} + +/// Backend used to issue certificates. +/// +/// Today both variants are Vault-backed. The enum exists so additional backends +/// (e.g. an in-process CA whose key lives in a Kubernetes Secret) can be added +/// without touching the call sites that consume [`CertificateProvider`]. +// The shared `Vault` suffix is intentional: both current variants are Vault +// backends, distinguished by whether the client is shared with the credential +// store. The lint resolves once a non-Vault backend is added. +#[allow(clippy::enum_variant_names)] +#[derive(Default, Debug, Clone)] +pub enum CertBackend { + /// Reuse the credential store's Vault client — one client, one token lease. + /// This is the default and matches historical behavior. + #[default] + SharedVault, + /// Issue certificates from a dedicated Vault, decoupled from the credential + /// store. [`DedicatedVaultConfig`] is fully explicit: its connection fields + /// never fall back to the process-global `VAULT_*` env vars, so a partial + /// config fails fast instead of silently re-pointing at the credential + /// Vault. + DedicatedVault(DedicatedVaultConfig), +} + +/// Builds the certificate provider selected by `config`. +/// +/// `shared_vault` is the already-constructed credential Vault client, reused +/// for [`CertBackend::SharedVault`] so no second client or token lease is +/// created in the common case. `spiffe` is the site's SPIFFE identity; a +/// dedicated Vault issues certs under the same identity namespace as the rest +/// of the deployment. +pub fn create_certificate_provider( + config: &CertificateConfig, + shared_vault: &Arc, + spiffe: SpiffeIdentity, + meter: Meter, +) -> eyre::Result> { + match &config.backend { + CertBackend::SharedVault => { + let provider: Arc = shared_vault.clone(); + Ok(provider) + } + CertBackend::DedicatedVault(dedicated) => { + let provider: Arc = + create_dedicated_vault_client(dedicated, spiffe, meter)?; + Ok(provider) + } + } +} + /// create_credential_manager builds the default credential chain: env -> file -> vault. pub async fn create_credential_manager( config: &CredentialConfig,