Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ The Homebrew install sets up shell completions (bash/zsh/fish) and a `man smctl`
Controlling fans and charging from userspace demands paranoia. smctl treats these as first-class invariants:

- **Never leave a brick.** Uninstalling, stopping, or killing the daemon restores system control of fans and charging — enforced by termination hooks, startup reconciliation, and launchd restart.
- **Thermal safety guard.** While fans are under manual control, smctl monitors all temperature sensors every second. Sustained readings above the ceiling (default 100 °C, hard-capped at 105 °C, cannot be disabled) force fans back to system control and latch out manual control until things cool down. Losing temperature visibility counts as unsafe.
- **Thermal safety guard.** While fans are under manual control, smctl monitors all temperature sensors every second. Sustained readings above the ceiling force fans back to system control and latch out manual control until things cool down. The base ceiling defaults to 100 °C and is hard-capped at 105 °C; Apple Silicon `Tp*` hot-spot sensors get a narrow allowance up to 110 °C because they can run hotter than board/skin/proximity sensors under ordinary load. The guard cannot be disabled, and losing temperature visibility counts as unsafe.
- **Verified writes.** Every SMC write is read back and verified (with a settle window for firmware that applies writes asynchronously) — failures surface as errors, never as silent no-ops.
- **Graceful degradation.** If a macOS update changes SMC behavior, affected features degrade to read-only with an explicit message instead of pretending to work.

Expand Down
43 changes: 33 additions & 10 deletions Sources/PolicyEngine/PolicyEngine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -506,11 +506,13 @@ public struct FanSafetyDecision: Equatable, Sendable {

public struct FanSafetyGuard: Codable, Equatable, Sendable {
/// Calibrated against field data: Apple Silicon junction hot-spot sensors (Tp0E/Tp3P
/// class) routinely sit at 95–103C under ordinary compile load while the silicon is
/// rated to ~110C. 95C made manual control unusable under any load; 100C still trips
/// well before firmware-level throttling/shutdown territory.
/// class) can sit above 100C under ordinary sustained load. Non-hotspot sensors keep
/// the stricter base ceiling; Tp* sensors get a narrow allowance and still hard-trip.
public static let defaultCeilingCelsius = 100.0
public static let hardMaximumCeilingCelsius = 105.0
public static let hotSpotSensorPrefix = "Tp"
public static let hotSpotAllowanceCelsius = 10.0
public static let hotSpotHardMaximumCeilingCelsius = 110.0
/// The latch releases only after cooling this far below the ceiling, so a trip
/// cannot be immediately re-armed into a hot system.
public static let releaseHysteresisCelsius = 5.0
Expand Down Expand Up @@ -538,11 +540,14 @@ public struct FanSafetyGuard: Codable, Equatable, Sendable {
/// guard is the only thermal floor (thermalmonitord is hands-off), so *no readable
/// temperature* counts as unsafe — blind manual control is never allowed.
public mutating func evaluate(samples: [FanTemperatureSample], manualPolicyActive: Bool) -> FanSafetyDecision {
let peak = samples.map(\.celsius).max()

// Latch release requires a credible reading comfortably below the ceiling.
if isLatched, let peak, peak <= configuredCeilingCelsius - Self.releaseHysteresisCelsius {
// Latch release requires credible readings comfortably below each sensor's
// own effective ceiling. Hotspot-class Tp* readings use a higher ceiling
// than board/skin/proximity sensors.
if isLatched, !samples.isEmpty, samples.allSatisfy({ sample in
sample.celsius <= effectiveCeiling(for: sample) - Self.releaseHysteresisCelsius
}) {
isLatched = false
consecutiveOverCeiling = 0
}

guard manualPolicyActive else {
Expand All @@ -551,21 +556,29 @@ public struct FanSafetyGuard: Codable, Equatable, Sendable {
return FanSafetyDecision(forceAuto: false)
}

guard let peak else {
guard !samples.isEmpty else {
isLatched = true
consecutiveOverCeiling = 0
return FanSafetyDecision(
forceAuto: true,
reason: "no readable temperature sensors while fans are under manual control"
)
}
if peak >= configuredCeilingCelsius {
let offender = samples.compactMap { sample -> (sample: FanTemperatureSample, ceiling: Double)? in
let ceiling = effectiveCeiling(for: sample)
guard sample.celsius >= ceiling else { return nil }
return (sample, ceiling)
}.max { lhs, rhs in
(lhs.sample.celsius - lhs.ceiling) < (rhs.sample.celsius - rhs.ceiling)
}

if let offender {
consecutiveOverCeiling += 1
if consecutiveOverCeiling >= Self.consecutiveTripsRequired {
isLatched = true
return FanSafetyDecision(
forceAuto: true,
reason: "temperature \(peak)C held at/above safety ceiling \(configuredCeilingCelsius)C for \(consecutiveOverCeiling) consecutive checks"
reason: "temperature \(offender.sample.sensor) \(offender.sample.celsius)C held at/above safety ceiling \(offender.ceiling)C for \(consecutiveOverCeiling) consecutive checks"
)
}
} else {
Expand All @@ -579,6 +592,16 @@ public struct FanSafetyGuard: Codable, Equatable, Sendable {
}
return FanSafetyDecision(forceAuto: false)
}

private func effectiveCeiling(for sample: FanTemperatureSample) -> Double {
guard sample.sensor.hasPrefix(Self.hotSpotSensorPrefix) else {
return configuredCeilingCelsius
}
return min(
configuredCeilingCelsius + Self.hotSpotAllowanceCelsius,
Self.hotSpotHardMaximumCeilingCelsius
)
}
}

public struct FanStartupReconciler: Sendable {
Expand Down
54 changes: 52 additions & 2 deletions Tests/PolicyEngineTests/FanSafetyGuardTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@ final class FanSafetyGuardTests: XCTestCase {
XCTAssertTrue(clamped.evaluate(samples: [sample(105)], manualPolicyActive: true).forceAuto)
}

func testAppleSiliconHotspotSensorsHaveDedicatedCeiling() {
var guardrail = FanSafetyGuard()

XCTAssertFalse(
guardrail.evaluate(samples: [sample(105.859375, sensor: "Tp3P")], manualPolicyActive: true).forceAuto
)
XCTAssertFalse(
guardrail.evaluate(samples: [sample(105.859375, sensor: "Tp3P")], manualPolicyActive: true).forceAuto
)
XCTAssertFalse(guardrail.isLatched)
}

func testAppleSiliconHotspotSensorsStillTripAtHardCeiling() {
var guardrail = FanSafetyGuard()

XCTAssertFalse(
guardrail.evaluate(samples: [sample(110, sensor: "Tp3P")], manualPolicyActive: true).forceAuto
)
let decision = guardrail.evaluate(samples: [sample(110, sensor: "Tp3P")], manualPolicyActive: true)
XCTAssertTrue(decision.forceAuto)
XCTAssertTrue(guardrail.isLatched)
XCTAssertTrue(decision.reason?.contains("Tp3P") ?? false)
XCTAssertTrue(decision.reason?.contains("110") ?? false)
}

func testNonHotspotSensorsKeepBaseCeiling() {
var guardrail = FanSafetyGuard()

XCTAssertFalse(guardrail.evaluate(samples: [sample(100, sensor: "TC0P")], manualPolicyActive: true).forceAuto)
XCTAssertTrue(guardrail.evaluate(samples: [sample(100, sensor: "TC0P")], manualPolicyActive: true).forceAuto)
XCTAssertTrue(guardrail.isLatched)
}

func testTransientSpikeDoesNotTrip() {
// A single hot-spot spike between cool readings must not trip the guard.
var guardrail = FanSafetyGuard()
Expand Down Expand Up @@ -63,6 +96,20 @@ final class FanSafetyGuardTests: XCTestCase {
XCTAssertFalse(guardrail.isLatched)
}

func testHotspotLatchUsesHotspotReleaseThreshold() {
var guardrail = FanSafetyGuard() // Tp* effective ceiling 110, release at 105

_ = guardrail.evaluate(samples: [sample(110, sensor: "Tp3P")], manualPolicyActive: true)
_ = guardrail.evaluate(samples: [sample(110, sensor: "Tp3P")], manualPolicyActive: true)
XCTAssertTrue(guardrail.isLatched)

XCTAssertTrue(guardrail.evaluate(samples: [sample(106, sensor: "Tp3P")], manualPolicyActive: true).forceAuto)
XCTAssertTrue(guardrail.isLatched)

XCTAssertFalse(guardrail.evaluate(samples: [sample(105, sensor: "Tp3P")], manualPolicyActive: true).forceAuto)
XCTAssertFalse(guardrail.isLatched)
}

func testLatchReleasesWhileSystemInControl() {
// After a trip the daemon clears policy (manualPolicyActive becomes false);
// the latch must still release on cool readings so the user is not locked out.
Expand All @@ -73,6 +120,9 @@ final class FanSafetyGuardTests: XCTestCase {

_ = guardrail.evaluate(samples: [sample(94)], manualPolicyActive: false)
XCTAssertFalse(guardrail.isLatched)

XCTAssertFalse(guardrail.evaluate(samples: [sample(101)], manualPolicyActive: true).forceAuto)
XCTAssertFalse(guardrail.isLatched)
}

func testStartupReconciliationRestoresCrashResidueOnlyWithoutLocalPolicy() {
Expand All @@ -84,7 +134,7 @@ final class FanSafetyGuardTests: XCTestCase {
XCTAssertFalse(reconciler.shouldRestoreAuto(hasLocalManualPolicy: false, fanModes: [0: 0, 1: 3], ftstValue: 0))
}

private func sample(_ celsius: Double) -> FanTemperatureSample {
FanTemperatureSample(sensor: "cpu", celsius: celsius)
private func sample(_ celsius: Double, sensor: String = "cpu") -> FanTemperatureSample {
FanTemperatureSample(sensor: sensor, celsius: celsius)
}
}
2 changes: 1 addition & 1 deletion docs/README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Homebrew 安装会自动配置 shell 补全(bash/zsh/fish)和 `man smctl`
在用户态控制风扇和充电,必须以最坏情况为设计前提。smctl 把这些当作一等不变量:

- **绝不留砖。** 卸载、停止、杀死 daemon 都会把风扇和充电交还系统控制——由退出钩子、启动对账、launchd 自动重启三道防线保证。
- **温度护栏。** 风扇处于手动控制期间,smctl 每秒监控全部温度传感器;持续超过上限(默认 100°C,硬上限 105°C**不可关闭**)会强制风扇回到系统控制并锁定手动模式,直到降温。温度读不到同样视为不安全。
- **温度护栏。** 风扇处于手动控制期间,smctl 每秒监控全部温度传感器;持续超过上限会强制风扇回到系统控制并锁定手动模式,直到降温。基础上限默认 100°C,硬上限 105°C;Apple Silicon 的 `Tp*` 热点传感器在普通负载下可能高于板载/外壳/邻近传感器,因此单独允许到 110°C。护栏**不可关闭**温度读不到同样视为不安全。
- **写入校验。** 每次 SMC 写入都会回读验证(带沉降窗口,兼容异步生效的固件)——失败会如实报错,绝不静默装作成功。
- **优雅降级。** 如果 macOS 更新改变了 SMC 行为,受影响的功能会降级为只读并明确提示,而不是假装还能工作。

Expand Down
Loading