From 8959b10f3ced50e61478c54408b95bb4c4647892 Mon Sep 17 00:00:00 2001 From: sjfhsjfh Date: Tue, 9 Jun 2026 19:04:28 +0800 Subject: [PATCH 01/18] feat: add auto offset --- Cargo.lock | 179 ++++++++++++++++++++++++ Cargo.toml | 49 +++---- phira-monitor/Cargo.toml | 8 +- phira/Cargo.toml | 34 ++--- prpr-auto-offset/.gitignore | 2 + prpr-auto-offset/Cargo.toml | 8 ++ prpr-auto-offset/src/audio/energy.rs | 72 ++++++++++ prpr-auto-offset/src/audio/mod.rs | 5 + prpr-auto-offset/src/audio/spectral.rs | 97 +++++++++++++ prpr-auto-offset/src/estimate.rs | 127 +++++++++++++++++ prpr-auto-offset/src/lib.rs | 11 ++ prpr-auto-offset/src/note/gaussian.rs | 37 +++++ prpr-auto-offset/src/note/mod.rs | 3 + prpr-auto-offset/src/signal.rs | 11 ++ prpr-auto-offset/src/types.rs | 32 +++++ prpr-avc/Cargo.toml | 6 +- prpr/Cargo.toml | 54 ++++---- tools/auto-offset-cli/Cargo.toml | 14 ++ tools/auto-offset-cli/src/main.rs | 181 +++++++++++++++++++++++++ 19 files changed, 856 insertions(+), 74 deletions(-) create mode 100644 prpr-auto-offset/.gitignore create mode 100644 prpr-auto-offset/Cargo.toml create mode 100644 prpr-auto-offset/src/audio/energy.rs create mode 100644 prpr-auto-offset/src/audio/mod.rs create mode 100644 prpr-auto-offset/src/audio/spectral.rs create mode 100644 prpr-auto-offset/src/estimate.rs create mode 100644 prpr-auto-offset/src/lib.rs create mode 100644 prpr-auto-offset/src/note/gaussian.rs create mode 100644 prpr-auto-offset/src/note/mod.rs create mode 100644 prpr-auto-offset/src/signal.rs create mode 100644 prpr-auto-offset/src/types.rs create mode 100644 tools/auto-offset-cli/Cargo.toml create mode 100644 tools/auto-offset-cli/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 1ab55a0f6..0384ad33f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -88,6 +88,56 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + [[package]] name = "anyhow" version = "1.0.102" @@ -436,6 +486,46 @@ dependencies = [ "windows-link", ] +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "cmake" version = "0.1.57" @@ -460,6 +550,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + [[package]] name = "colored" version = "3.1.1" @@ -1797,6 +1893,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -3000,6 +3102,12 @@ version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "open" version = "5.3.3" @@ -3346,6 +3454,15 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "primal-check" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08" +dependencies = [ + "num-integer", +] + [[package]] name = "proc-macro-crate" version = "3.5.0" @@ -3459,6 +3576,26 @@ dependencies = [ "zip", ] +[[package]] +name = "prpr-auto-offset" +version = "0.7.1" +dependencies = [ + "rustfft", +] + +[[package]] +name = "prpr-auto-offset-cli" +version = "0.7.1" +dependencies = [ + "anyhow", + "clap", + "prpr", + "prpr-auto-offset", + "prpr-avc", + "tempfile", + "tokio", +] + [[package]] name = "prpr-avc" version = "0.1.0" @@ -3907,6 +4044,20 @@ dependencies = [ "semver", ] +[[package]] +name = "rustfft" +version = "6.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21db5f9893e91f41798c88680037dba611ca6674703c1a18601b01a72c8adb89" +dependencies = [ + "num-complex", + "num-integer", + "num-traits", + "primal-check", + "strength_reduce", + "transpose", +] + [[package]] name = "rustix" version = "1.1.4" @@ -4320,6 +4471,18 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "strength_reduce" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "subtle" version = "2.6.1" @@ -4860,6 +5023,16 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "transpose" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad61aed86bc3faea4300c7aee358b4c6d0c8d6ccc36524c96e4c92ccf26e77e" +dependencies = [ + "num-integer", + "strength_reduce", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -5005,6 +5178,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "uuid" version = "1.22.0" diff --git a/Cargo.toml b/Cargo.toml index b09e727bb..917bb4881 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,12 +1,14 @@ [workspace] members = [ - "prpr", - "prpr-avc", - "prpr-pbc", - "prpr-l10n", - "phira", - "phira-main", - "phira-monitor", + "phira", + "phira-main", + "phira-monitor", + "prpr", + "prpr-auto-offset", + "prpr-avc", + "prpr-l10n", + "prpr-pbc", + "tools/auto-offset-cli", ] resolver = "2" @@ -32,13 +34,30 @@ image = { version = "0.25.10", default-features = false } inputbox = "0.1.1" lru = "0.16.3" lyon = "1.0.19" + +macroquad = { git = "https://github.com/Mivik/prpr-macroquad", rev = "b2eab29", default-features = false } +miniquad = { git = "https://github.com/Mivik/prpr-miniquad", rev = "0c525a3" } nalgebra = "0.34.1" + +objc2 = "0.6.4" +objc2-core-foundation = "0.3.2" +objc2-foundation = "0.3.2" +objc2-ui-kit = "0.3.2" +objc2-uniform-type-identifiers = "0.3.2" once_cell = "1.21.4" +phira = { path = "phira", default-features = false } +phira-mp-client = { git = "https://github.com/TeamFlos/phira-mp", rev = "6967475" } +phira-mp-common = { git = "https://github.com/TeamFlos/phira-mp", rev = "6967475" } pollster = "0.4.0" +prpr = { path = "prpr", default-features = false } +prpr-auto-offset = { path = "prpr-auto-offset" } +prpr-avc = { path = "prpr-avc" } +prpr-l10n = { path = "prpr-l10n" } rand = "0.8.5" regex = "1.12.3" reqwest = { version = "0.13.2", default-features = false } rfd = "0.17.2" +sasa = { git = "https://github.com/Mivik/sasa", rev = "e76229b", default-features = false } serde = "1.0.228" serde_json = "1.0.149" serde_yaml = "0.9.34" @@ -51,22 +70,6 @@ uuid = "1.22.0" walkdir = "2.5.0" zip = { version = "8.3.0", default-features = false } -objc2 = "0.6.4" -objc2-core-foundation = "0.3.2" -objc2-foundation = "0.3.2" -objc2-ui-kit = "0.3.2" -objc2-uniform-type-identifiers = "0.3.2" - -macroquad = { git = "https://github.com/Mivik/prpr-macroquad", rev = "b2eab29", default-features = false } -miniquad = { git = "https://github.com/Mivik/prpr-miniquad", rev = "0c525a3" } -phira = { path = "phira", default-features = false } -phira-mp-client = { git = "https://github.com/TeamFlos/phira-mp", rev = "6967475" } -phira-mp-common = { git = "https://github.com/TeamFlos/phira-mp", rev = "6967475" } -prpr = { path = "prpr", default-features = false } -prpr-avc = { path = "prpr-avc" } -prpr-l10n = { path = "prpr-l10n" } -sasa = { git = "https://github.com/Mivik/sasa", rev = "e76229b", default-features = false } - [profile.release] opt-level = 2 strip = true diff --git a/phira-monitor/Cargo.toml b/phira-monitor/Cargo.toml index f706fb1e5..e98c1aa98 100644 --- a/phira-monitor/Cargo.toml +++ b/phira-monitor/Cargo.toml @@ -9,7 +9,11 @@ chrono = { workspace = true } futures-util = { workspace = true } log = "0.4.29" macroquad = { workspace = true, default-features = false } + +phira-mp-client = { workspace = true } +phira-mp-common = { workspace = true } pretty_env_logger = "0.5.0" +prpr = { workspace = true } reqwest = { workspace = true, default-features = false, features = [ "json", "stream", @@ -24,7 +28,3 @@ serde_json = { workspace = true } serde_yaml = { workspace = true } tokio = { workspace = true } uuid = { workspace = true, features = ["v4"] } - -phira-mp-client = { workspace = true } -phira-mp-common = { workspace = true } -prpr = { workspace = true } diff --git a/phira/Cargo.toml b/phira/Cargo.toml index 6356d89df..5bfaf39d2 100644 --- a/phira/Cargo.toml +++ b/phira/Cargo.toml @@ -4,9 +4,6 @@ version = { workspace = true } edition = { workspace = true } license = { workspace = true } -[lints] -workspace = true - [lib] crate-type = ["lib", "cdylib"] @@ -46,8 +43,12 @@ logos = "0.16.1" lru = { workspace = true } lyon = { workspace = true } macroquad = { workspace = true, default-features = false } + +miniquad = { workspace = true } nalgebra = { workspace = true } once_cell = { workspace = true } +phira-mp-client = { workspace = true } +phira-mp-common = { workspace = true } pollster = { workspace = true } prpr = { workspace = true, features = ["log"], default-features = false } prpr-l10n = { workspace = true } @@ -63,6 +64,7 @@ reqwest = { workspace = true, default-features = false, features = [ "rustls", "query", ] } +sanitize-filename = "0.6.0" semver = { version = "1.0.27", features = ["serde"] } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } @@ -78,29 +80,24 @@ walkdir = { workspace = true } zip = { workspace = true, features = ["chrono"] } zstd = "0.13" -miniquad = { workspace = true } -phira-mp-client = { workspace = true } -phira-mp-common = { workspace = true } -sanitize-filename = "0.6.0" +[target.'cfg(not(any(target_os = "android", target_env = "ohos")))'.dependencies] +sasa = { workspace = true, default-features = true } -[target.'cfg(target_os = "android")'.dependencies] -jni = "0.22.4" -ndk-context = "0.1" -sasa = { workspace = true, default-features = false, features = ["oboe"] } +[target.'cfg(not(any(target_os = "android", target_os = "ios", target_env = "ohos")))'.dependencies] +rfd = { workspace = true } [target.'cfg(target_env = "ohos")'.dependencies] -sasa = { workspace = true, default-features = false, features = ["ohos"] } napi-derive-ohos = { version = "1.1.6" } napi-ohos = { version = "1.1.6", default-features = false, features = [ "napi8", "async", ] } +sasa = { workspace = true, default-features = false, features = ["ohos"] } -[target.'cfg(not(any(target_os = "android", target_env = "ohos")))'.dependencies] -sasa = { workspace = true, default-features = true } - -[target.'cfg(not(any(target_os = "android", target_os = "ios", target_env = "ohos")))'.dependencies] -rfd = { workspace = true } +[target.'cfg(target_os = "android")'.dependencies] +jni = "0.22.4" +ndk-context = "0.1" +sasa = { workspace = true, default-features = false, features = ["oboe"] } [target.'cfg(target_os = "ios")'.dependencies] objc2 = { workspace = true } @@ -113,3 +110,6 @@ dotenv-build = "0.1" [dev-dependencies] fluent = { workspace = true } fluent-syntax = { workspace = true } + +[lints] +workspace = true diff --git a/prpr-auto-offset/.gitignore b/prpr-auto-offset/.gitignore new file mode 100644 index 000000000..db11c43ef --- /dev/null +++ b/prpr-auto-offset/.gitignore @@ -0,0 +1,2 @@ +# Test charts and extracted files +test-charts/ diff --git a/prpr-auto-offset/Cargo.toml b/prpr-auto-offset/Cargo.toml new file mode 100644 index 000000000..c86e87420 --- /dev/null +++ b/prpr-auto-offset/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "prpr-auto-offset" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +rustfft = "6.4.1" diff --git a/prpr-auto-offset/src/audio/energy.rs b/prpr-auto-offset/src/audio/energy.rs new file mode 100644 index 000000000..3926d62f4 --- /dev/null +++ b/prpr-auto-offset/src/audio/energy.rs @@ -0,0 +1,72 @@ +use crate::Signal; + +/// Energy-difference novelty signal. +/// +/// Computes the positive first-order difference of short-time RMS energy. +/// No thresholding — every frame gets a value. +pub struct EnergyDiff { + /// Native novelty samples at the energy hop rate. + native: Vec, + /// Time step between native samples, in seconds. + native_dt: f64, +} + +impl EnergyDiff { + pub fn new(pcm: &[f32], sample_rate: u32, frame_ms: f64, hop_ms: f64) -> Self { + let frame_samples = (frame_ms / 1000.0 * sample_rate as f64).round() as usize; + let hop_samples = (hop_ms / 1000.0 * sample_rate as f64).round() as usize; + let native_dt = hop_samples as f64 / sample_rate as f64; + + let native = compute_energy_diff(pcm, frame_samples, hop_samples); + Self { native, native_dt } + } +} + +impl Signal for EnergyDiff { + fn samples(&self, ts: &[f64]) -> Vec { + if ts.is_empty() { + return vec![]; + } + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() + } +} + +fn compute_energy_diff(pcm: &[f32], frame_samples: usize, hop_samples: usize) -> Vec { + if pcm.len() < frame_samples || frame_samples == 0 || hop_samples == 0 { + return vec![]; + } + + let energies: Vec = (0..) + .step_by(hop_samples) + .take_while(|&start| start + frame_samples <= pcm.len()) + .map(|start| { + let sum_sq: f32 = pcm[start..start + frame_samples].iter().map(|&x| x * x).sum(); + (sum_sq / frame_samples as f32).sqrt() + }) + .collect(); + + if energies.len() < 2 { + return vec![]; + } + + energies.windows(2).map(|w| (w[1] - w[0]).max(0.0)).collect() +} + +/// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. +fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { + if data.is_empty() { + return 0.0; + } + let idx = t / dt; + if idx < 0.0 { + return data[0]; + } + let i = idx as usize; + if i + 1 >= data.len() { + return data[data.len() - 1]; + } + let frac = (idx - i as f64) as f32; + let a = data[i]; + let b = data[i + 1]; + a + (b - a) * frac +} diff --git a/prpr-auto-offset/src/audio/mod.rs b/prpr-auto-offset/src/audio/mod.rs new file mode 100644 index 000000000..536acdbc4 --- /dev/null +++ b/prpr-auto-offset/src/audio/mod.rs @@ -0,0 +1,5 @@ +mod energy; +mod spectral; + +pub use energy::EnergyDiff; +pub use spectral::SpectralFlux; diff --git a/prpr-auto-offset/src/audio/spectral.rs b/prpr-auto-offset/src/audio/spectral.rs new file mode 100644 index 000000000..f680db790 --- /dev/null +++ b/prpr-auto-offset/src/audio/spectral.rs @@ -0,0 +1,97 @@ +use crate::Signal; +use rustfft::{num_complex::Complex32, FftPlanner}; + +/// Spectral-flux novelty signal computed via STFT. +/// +/// For each STFT frame, computes the sum of positive magnitude-spectrum +/// differences from the previous frame. The result is a dense time series +/// with one value per STFT frame. +pub struct SpectralFlux { + /// Native novelty samples at the STFT hop rate. + native: Vec, + /// Time step between native samples, in seconds. + native_dt: f64, +} + +impl SpectralFlux { + pub fn new(pcm: &[f32], sample_rate: u32, fft_size: usize, hop_size: usize) -> Self { + assert!(fft_size.is_power_of_two()); + let native_dt = hop_size as f64 / sample_rate as f64; + let native = compute_spectral_flux(pcm, fft_size, hop_size); + Self { native, native_dt } + } +} + +impl Signal for SpectralFlux { + fn samples(&self, ts: &[f64]) -> Vec { + if ts.is_empty() { + return vec![]; + } + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() + } +} + +fn compute_spectral_flux(pcm: &[f32], n: usize, hop: usize) -> Vec { + if pcm.len() < n { + return vec![]; + } + + let n2 = (n - 1) as f32; + let window: Vec = (0..n).map(|i| 0.5 - 0.5 * (2.0 * std::f32::consts::PI * i as f32 / n2).cos()).collect(); + + let mut planner = FftPlanner::new(); + let fft = planner.plan_fft_forward(n); + + let num_frames = (pcm.len() - n) / hop + 1; + let num_bins = n / 2 + 1; + let mut prev_mags = vec![0.0f32; num_bins]; + let mut buffer = vec![Complex32::new(0.0, 0.0); n]; + let mut novelty = Vec::with_capacity(num_frames); + + for frame in 0..num_frames { + let start = frame * hop; + for (i, &w) in window.iter().enumerate() { + buffer[i] = Complex32::new(pcm[start + i] * w, 0.0); + } + + fft.process(&mut buffer); + + for i in 0..num_bins { + prev_mags[i] = core::mem::replace(&mut prev_mags[i], buffer[i].norm()); + } + + if frame == 0 { + novelty.push(0.0); + continue; + } + + let flux: f32 = buffer[..num_bins] + .iter() + .enumerate() + .map(|(i, c)| (c.norm() - prev_mags[i]).max(0.0)) + .sum(); + + novelty.push(flux); + } + + novelty +} + +/// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. +fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { + if data.is_empty() { + return 0.0; + } + let idx = t / dt; + if idx < 0.0 { + return data[0]; + } + let i = idx as usize; + if i + 1 >= data.len() { + return data[data.len() - 1]; + } + let frac = (idx - i as f64) as f32; + let a = data[i]; + let b = data[i + 1]; + a + (b - a) * frac +} diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs new file mode 100644 index 000000000..00b579fa6 --- /dev/null +++ b/prpr-auto-offset/src/estimate.rs @@ -0,0 +1,127 @@ +use crate::{AlignConfig, AlignmentResult, Signal}; + +/// Reliability threshold for normalized cross-correlation. +/// +/// If the normalized peak `r` exceeds this value, the detected offset is +/// considered reliable. The threshold is heuristic; 0.05 works well across +/// the tested chart corpus. +const RELIABILITY_THRESHOLD: f64 = 0.05; + +/// Cross-correlation between two arrays, limited lag range. +/// +/// Returns `(correlation_values, best_lag_index, peak_value)` where +/// `correlation[lag]` is the dot product of `a` with `b` shifted by +/// `lag - max_lag_bins`. +fn cross_correlation(a: &[f32], b: &[f32], max_lag_bins: usize) -> (Vec, usize, f32) { + let n = a.len().min(b.len()); + if n == 0 { + return (vec![], 0, 0.0); + } + + let mut best_lag = max_lag_bins; + let mut best_val = f32::NEG_INFINITY; + let mut corr = Vec::with_capacity(2 * max_lag_bins + 1); + + for lag_offset in 0..=2 * max_lag_bins { + let lag = lag_offset as isize - max_lag_bins as isize; + let mut sum = 0.0f32; + (0..n).for_each(|i| { + let j = i as isize + lag; + if j >= 0 && j < b.len() as isize { + sum += a[i] * b[j as usize]; + } + }); + corr.push(sum); + if sum > best_val { + best_val = sum; + best_lag = lag_offset; + } + } + + (corr, best_lag, best_val) +} + +/// Build a uniform time grid from `t_min` to `t_max` (inclusive) with step `dt`. +fn build_ts_grid(t_min: f64, t_max: f64, dt: f64) -> Vec { + let n = ((t_max - t_min) / dt).ceil() as usize + 1; + (0..n).map(|i| t_min + i as f64 * dt).collect() +} + +/// Compute the normalized cross-correlation `r` at a specific lag. +/// +/// `r = Σ a[i] · b[i+lag] / √(Σ a[i]² · Σ b[i+lag]²)` over the overlapping +/// region. By Cauchy-Schwarz, `r ∈ [0, 1]` for non-negative signals. +fn normalized_correlation(a: &[f32], b: &[f32], lag: isize, best_val: f32) -> f64 { + let mut norm_a = 0.0f64; + let mut norm_b = 0.0f64; + + (0..a.len().min(b.len())).for_each(|i| { + let j = i as isize + lag; + if j >= 0 && j < b.len() as isize { + norm_a += (a[i] as f64).powi(2); + norm_b += (b[j as usize] as f64).powi(2); + } + }); + + let denom = (norm_a * norm_b).sqrt(); + if denom <= 0.0 { + return 0.0; + } + + (best_val as f64 / denom).clamp(0.0, 1.0) +} + +/// Estimate the timing offset between two signals. +/// +/// Uses default [`AlignConfig`]. See [`estimate_with`] for custom config. +pub fn estimate(audio: &A, note: &N, duration_sec: f64) -> AlignmentResult { + estimate_with(audio, note, duration_sec, &AlignConfig::default()) +} + +/// Estimate the timing offset between two signals with custom config. +/// +/// `audio` is a [`Signal`] produced from the audio track (e.g. +/// [`SpectralFlux`](crate::SpectralFlux)). `note` is a [`Signal`] +/// produced from the chart's note events (e.g. +/// [`NoteGaussian`](crate::NoteGaussian)). +pub fn estimate_with(audio: &A, note: &N, duration_sec: f64, config: &AlignConfig) -> AlignmentResult { + if duration_sec <= 0.0 { + return AlignmentResult { + offset: 0.0, + correlation: 0.0, + reliable: false, + }; + } + + // Build shared sampling grid + let t_min = -config.search_range_sec; + let t_max = duration_sec + config.search_range_sec; + let ts = build_ts_grid(t_min, t_max, config.sampling_interval_sec); + + // Sample both signals + let audio_samples = audio.samples(&ts); + let note_samples = note.samples(&ts); + + if audio_samples.is_empty() || note_samples.is_empty() { + return AlignmentResult { + offset: 0.0, + correlation: 0.0, + reliable: false, + }; + } + + // Cross-correlation + let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; + let (_corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); + let offset = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; + + // Normalized correlation at best lag + let lag = best_lag as isize - max_lag_bins as isize; + let correlation = normalized_correlation(¬e_samples, &audio_samples, lag, best_val); + + AlignmentResult { + offset, + correlation, + reliable: correlation > RELIABILITY_THRESHOLD, + } +} diff --git a/prpr-auto-offset/src/lib.rs b/prpr-auto-offset/src/lib.rs new file mode 100644 index 000000000..eb32e1f12 --- /dev/null +++ b/prpr-auto-offset/src/lib.rs @@ -0,0 +1,11 @@ +mod audio; +mod estimate; +mod note; +mod signal; +mod types; + +pub use audio::{EnergyDiff, SpectralFlux}; +pub use estimate::{estimate, estimate_with}; +pub use note::NoteGaussian; +pub use signal::Signal; +pub use types::{AlignConfig, AlignmentResult}; diff --git a/prpr-auto-offset/src/note/gaussian.rs b/prpr-auto-offset/src/note/gaussian.rs new file mode 100644 index 000000000..d65166357 --- /dev/null +++ b/prpr-auto-offset/src/note/gaussian.rs @@ -0,0 +1,37 @@ +use crate::Signal; + +/// A signal constructed from discrete note events by placing a Gaussian kernel +/// at each note time. +/// +/// `template(t) = Σ exp(-0.5 * ((t - tᵢ) / σ)²)` — an analytic function that +/// can be sampled at arbitrary timestamps. +pub struct NoteGaussian { + times: Vec, + sigma: f64, +} + +impl NoteGaussian { + pub fn new(times: Vec, sigma: f64) -> Self { + Self { times, sigma } + } +} + +impl Signal for NoteGaussian { + fn samples(&self, ts: &[f64]) -> Vec { + if ts.is_empty() || self.times.is_empty() { + return vec![0.0; ts.len()]; + } + let inv_sigma = 1.0 / self.sigma; + ts.iter() + .map(|&t| { + self.times + .iter() + .map(|&nt| { + let d = (t - nt) * inv_sigma; + ((-0.5 * d * d).exp()) as f32 + }) + .sum::() + }) + .collect() + } +} diff --git a/prpr-auto-offset/src/note/mod.rs b/prpr-auto-offset/src/note/mod.rs new file mode 100644 index 000000000..aefe0c7ad --- /dev/null +++ b/prpr-auto-offset/src/note/mod.rs @@ -0,0 +1,3 @@ +mod gaussian; + +pub use gaussian::NoteGaussian; diff --git a/prpr-auto-offset/src/signal.rs b/prpr-auto-offset/src/signal.rs new file mode 100644 index 000000000..6851e6cca --- /dev/null +++ b/prpr-auto-offset/src/signal.rs @@ -0,0 +1,11 @@ +/// A dense time-varying signal that can be sampled at arbitrary timestamps. +/// +/// The signal is conceptually continuous: implementations may store native +/// samples at a fixed internal resolution and interpolate, or compute values +/// analytically on demand. +pub trait Signal: Send + Sync { + /// Sample the signal at the given timestamps. + /// + /// Returns one value per timestamp, in the same order. + fn samples(&self, ts: &[f64]) -> Vec; +} diff --git a/prpr-auto-offset/src/types.rs b/prpr-auto-offset/src/types.rs new file mode 100644 index 000000000..3f810baff --- /dev/null +++ b/prpr-auto-offset/src/types.rs @@ -0,0 +1,32 @@ +/// Configuration for the alignment algorithm. +#[derive(Debug, Clone)] +pub struct AlignConfig { + /// Search range for offset, in seconds. Default ±5.0s. + pub search_range_sec: f64, + /// Time step for the shared sampling grid, in seconds. Default 0.001 (1ms). + pub sampling_interval_sec: f64, +} + +impl Default for AlignConfig { + fn default() -> Self { + Self { + search_range_sec: 5.0, + sampling_interval_sec: 0.001, + } + } +} + +/// Full result of automatic offset detection. +#[derive(Debug, Clone)] +pub struct AlignmentResult { + /// Suggested global offset in seconds. + /// Positive means notes should be delayed (hit later). + pub offset: f64, + /// Normalized cross-correlation peak, in [0.0, 1.0]. + /// + /// Values near 0 suggest the note pattern has no discernible match in + /// the audio novelty, and the detected offset may be unreliable. + pub correlation: f64, + /// Whether the correlation exceeds the default reliability threshold. + pub reliable: bool, +} diff --git a/prpr-avc/Cargo.toml b/prpr-avc/Cargo.toml index eea4b7b97..6b2a83386 100644 --- a/prpr-avc/Cargo.toml +++ b/prpr-avc/Cargo.toml @@ -9,8 +9,8 @@ edition = "2021" thiserror = "2.0.18" tracing = { workspace = true } -[target.'cfg(target_env = "ohos")'.dependencies] -sasa = { workspace = true, default-features = false, features = ["ohos"] } - [target.'cfg(not(target_env = "ohos"))'.dependencies] sasa = { workspace = true, default-features = false } + +[target.'cfg(target_env = "ohos")'.dependencies] +sasa = { workspace = true, default-features = false, features = ["ohos"] } diff --git a/prpr/Cargo.toml b/prpr/Cargo.toml index 38a469760..6e126f2f4 100644 --- a/prpr/Cargo.toml +++ b/prpr/Cargo.toml @@ -4,9 +4,6 @@ version = { workspace = true } edition = { workspace = true } license = { workspace = true } -[lints] -workspace = true - [lib] [features] @@ -46,12 +43,17 @@ intl-memoizer = "0.5.3" libc = "0.2.183" lru = { workspace = true } lyon = { workspace = true } + +macroquad = { workspace = true, default-features = false } +miniquad = { workspace = true } miniz_oxide = "0.9.1" nalgebra = { workspace = true } obfstr = "0.4.4" once_cell = { workspace = true } ordered-float = "5.1.0" phf = { version = "0.13.1", features = ["macros"] } +prpr-avc = { workspace = true, optional = true } +prpr-l10n = { workspace = true } rand = { workspace = true } regex = { workspace = true } serde = { workspace = true, features = ["derive"] } @@ -75,49 +77,47 @@ unic-langid = { version = "0.9.6", features = ["macros"] } uuid = { workspace = true, features = ["v4"] } zip = { workspace = true, default-features = false, features = ["deflate"] } -macroquad = { workspace = true, default-features = false } -miniquad = { workspace = true } -prpr-avc = { workspace = true, optional = true } -prpr-l10n = { workspace = true } +[target.'cfg(not(any(target_os = "android", target_env = "ohos")))'.dependencies] +sasa = { workspace = true, default-features = true } [target.'cfg(not(any(target_os = "android", target_os = "ios", target_env = "ohos")))'.dependencies] open = "5.3.3" rfd = { workspace = true } -[target.'cfg(target_os = "ios")'.dependencies] -objc2 = { workspace = true } -objc2-foundation = { workspace = true } -objc2-ui-kit = { workspace = true, features = ["objc2-uniform-type-identifiers"] } -objc2-uniform-type-identifiers = { workspace = true } - -[target.'cfg(target_os = "android")'.dependencies] -jni = "0.22.4" -ndk-context = "0.1" -sasa = { workspace = true, default-features = false, features = ["oboe"] } - -[target.'cfg(target_env = "ohos")'.dependencies] -sasa = { workspace = true, default-features = false, features = ["ohos"] } - -[target.'cfg(not(any(target_os = "android", target_env = "ohos")))'.dependencies] -sasa = { workspace = true, default-features = true } - [target.'cfg(not(target_arch = "wasm32"))'.dependencies] tokio = { workspace = true, features = ["rt-multi-thread", "fs"] } [target.'cfg(target_arch = "wasm32")'.dependencies] +js-sys = "*" tokio = { workspace = true } +wasm-bindgen = "*" +wasm-bindgen-futures = "0.4" web-sys = { version = "0.3", features = [ "Location", "Performance", "UrlSearchParams", "Window", ] } -js-sys = "*" -wasm-bindgen = "*" -wasm-bindgen-futures = "0.4" + +[target.'cfg(target_env = "ohos")'.dependencies] +sasa = { workspace = true, default-features = false, features = ["ohos"] } + +[target.'cfg(target_os = "android")'.dependencies] +jni = "0.22.4" +ndk-context = "0.1" +sasa = { workspace = true, default-features = false, features = ["oboe"] } + +[target.'cfg(target_os = "ios")'.dependencies] +objc2 = { workspace = true } +objc2-foundation = { workspace = true } +objc2-ui-kit = { workspace = true, features = ["objc2-uniform-type-identifiers"] } +objc2-uniform-type-identifiers = { workspace = true } [build-dependencies] walkdir = { workspace = true } [dev-dependencies] walkdir = { workspace = true } + +[lints] +workspace = true diff --git a/tools/auto-offset-cli/Cargo.toml b/tools/auto-offset-cli/Cargo.toml new file mode 100644 index 000000000..0ae8fc3f6 --- /dev/null +++ b/tools/auto-offset-cli/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "prpr-auto-offset-cli" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +anyhow = { workspace = true } +clap = { version = "4", features = ["derive"] } +prpr = { workspace = true } +prpr-auto-offset = { workspace = true } +prpr-avc = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros"] } diff --git a/tools/auto-offset-cli/src/main.rs b/tools/auto-offset-cli/src/main.rs new file mode 100644 index 000000000..672fcd8f9 --- /dev/null +++ b/tools/auto-offset-cli/src/main.rs @@ -0,0 +1,181 @@ +use anyhow::{Context, Result}; +use clap::Parser; +use prpr::{ + fs::{fs_from_file, load_info}, + parse::{parse_pec, parse_phigros, parse_rpe}, +}; +use prpr_auto_offset::{AlignConfig, AlignmentResult, EnergyDiff, NoteGaussian, SpectralFlux}; +use std::io::Write; +use std::path::PathBuf; + +#[derive(Parser)] +#[command(name = "prpr-auto-offset")] +#[command(about = "Automatic chart offset detection for Phira")] +struct Cli { + /// Path to a Phira chart file (zip archive) + chart: PathBuf, + + /// Search range in seconds + #[arg(short, long, default_value = "5.0")] + range: f64, + + /// Audio novelty method: spectral or energy + #[arg(long, default_value = "spectral")] + audio_method: String, + + /// Note signal method: gaussian + #[arg(long, default_value = "gaussian")] + note_method: String, + + /// Sampling interval for the cross-correlation grid, in seconds + #[arg(short, long, default_value = "0.001")] + interval: f64, + + /// Gaussian blur sigma for the note signal, in seconds + #[arg(long, default_value = "0.02")] + blur_sigma: f64, + + /// Verbose output + #[arg(short, long)] + verbose: bool, +} + +fn extract_note_times(chart: &prpr::core::Chart) -> Vec { + let mut times: Vec = chart + .lines + .iter() + .flat_map(|line| line.notes.iter().map(|note| note.time)) + .filter(|&t| t >= 0.0) + .collect(); + times.sort_by(|a, b| a.partial_cmp(b).unwrap()); + times +} + +fn print_result(result: &AlignmentResult, verbose: bool) { + if verbose { + println!(); + } + println!("═══════════════════════════════════════"); + println!(" Suggested offset: {:.3}s ({:.0}ms)", result.offset, result.offset * 1000.0); + println!(" Correlation: {:.4}", result.correlation); + println!(" Reliable: {}", if result.reliable { "yes" } else { "no" }); + println!("═══════════════════════════════════════"); +} + +async fn run( + chart_path: &PathBuf, + search_range: f64, + audio_method: &str, + note_method: &str, + sampling_interval: f64, + blur_sigma: f64, + verbose: bool, +) -> Result<()> { + // 1. Open zip as filesystem + let mut fs = fs_from_file(chart_path).with_context(|| format!("failed to open {:?}", chart_path))?; + + // 2. Load chart info + let info = load_info(&mut *fs).await.context("failed to load chart info")?; + + // 3. Load and parse chart + let chart_bytes = fs + .load_file(&info.chart) + .await + .with_context(|| format!("failed to load chart file: {}", info.chart))?; + + let extra = if let Ok(data) = fs.load_file("extra.json").await { + let s = String::from_utf8(data).context("extra.json is not valid UTF-8")?; + prpr::parse::parse_extra(&s, &mut *fs).await.context("failed to parse extra")? + } else { + Default::default() + }; + + let format = info.format.as_ref().map(|f| match f { + prpr::info::ChartFormat::Rpe => "rpe", + prpr::info::ChartFormat::Pec => "pec", + prpr::info::ChartFormat::Pgr => "pgr", + prpr::info::ChartFormat::Pbc => "pbc", + }); + + let source = String::from_utf8_lossy(&chart_bytes); + let chart = match format { + Some("rpe") | None => parse_rpe(&source, &mut *fs, extra, info.use_rpe_170_speed.unwrap_or_default()) + .await + .context("failed to parse RPE chart")?, + Some("pec") => parse_pec(&source, extra).context("failed to parse PEC chart")?, + Some("pgr") => parse_phigros(&source, extra).context("failed to parse PGR chart")?, + Some(other) => anyhow::bail!("unsupported chart format: {other}"), + }; + + // 4. Extract note times + let note_times = extract_note_times(&chart); + if verbose { + println!("Chart: {} — {} by {}", info.name, info.level, info.charter); + println!(" Notes: {}, Chart offset: {:.0}ms", note_times.len(), info.offset * 1000.0); + } + + // 5. Extract and decode audio + let audio_data = fs + .load_file(&info.music) + .await + .with_context(|| format!("failed to load audio: {}", info.music))?; + + let ext = info.music.rsplit('.').next().unwrap_or("ogg"); + let mut tmp = tempfile::Builder::new() + .suffix(&format!(".{ext}")) + .tempfile() + .context("failed to create temp file")?; + tmp.write_all(&audio_data).context("failed to write audio to temp file")?; + + let tmp_path = tmp.into_temp_path(); + let tmp_str = tmp_path.to_str().context("temp path is not valid UTF-8")?; + + let clip = prpr_avc::demux_audio(tmp_str) + .context("failed to decode audio")? + .context("no audio stream found")?; + + let pcm: Vec = clip.frames().iter().map(|f| (f.0 + f.1) / 2.0).collect(); + let sample_rate = clip.sample_rate(); + let duration = pcm.len() as f64 / sample_rate as f64; + if verbose { + println!(" Audio: {:.1}s, {}Hz, mono", duration, sample_rate); + } + + // 6. Configure + let config = AlignConfig { + search_range_sec: search_range, + sampling_interval_sec: sampling_interval, + }; + + // 7. Select methods and run + let result = match (audio_method, note_method) { + ("spectral", "gaussian") => { + if verbose { + println!(" Audio method: spectral flux"); + println!(" Note method: gaussian (sigma={}ms)", blur_sigma * 1000.0); + } + let audio = SpectralFlux::new(&pcm, sample_rate, 1024, 512); + let note = NoteGaussian::new(note_times, blur_sigma); + prpr_auto_offset::estimate_with(&audio, ¬e, duration, &config) + } + ("energy", "gaussian") => { + if verbose { + println!(" Audio method: energy diff"); + println!(" Note method: gaussian (sigma={}ms)", blur_sigma * 1000.0); + } + let audio = EnergyDiff::new(&pcm, sample_rate, 10.0, 5.0); + let note = NoteGaussian::new(note_times, blur_sigma); + prpr_auto_offset::estimate_with(&audio, ¬e, duration, &config) + } + _ => anyhow::bail!("unsupported combination: audio={audio_method} + note={note_method}"), + }; + + print_result(&result, verbose); + Ok(()) +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + run(&cli.chart, cli.range, &cli.audio_method, &cli.note_method, cli.interval, cli.blur_sigma, cli.verbose).await +} From 5c16d96fd9e7ba0757073082b194f02970a6c30d Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Fri, 12 Jun 2026 00:31:56 +0800 Subject: [PATCH 02/18] feat: superflux for auto offset --- Cargo.lock | 11 + prpr-auto-offset/.gitignore | 1 + prpr-auto-offset/Cargo.toml | 2 + prpr-auto-offset/src/audio/mod.rs | 2 + prpr-auto-offset/src/audio/superflux.rs | 383 ++++++++++++++++++++++++ prpr-auto-offset/src/estimate.rs | 6 +- prpr-auto-offset/src/lib.rs | 2 +- prpr-auto-offset/src/types.rs | 15 +- tools/auto-offset-cli/src/main.rs | 36 ++- 9 files changed, 444 insertions(+), 14 deletions(-) create mode 100644 prpr-auto-offset/src/audio/superflux.rs diff --git a/Cargo.lock b/Cargo.lock index 0384ad33f..da43ab23e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3580,6 +3580,8 @@ dependencies = [ name = "prpr-auto-offset" version = "0.7.1" dependencies = [ + "rayon", + "realfft", "rustfft", ] @@ -3874,6 +3876,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "realfft" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f821338fddb99d089116342c46e9f1fbf3828dba077674613e734e01d6ea8677" +dependencies = [ + "rustfft", +] + [[package]] name = "redox_syscall" version = "0.5.18" diff --git a/prpr-auto-offset/.gitignore b/prpr-auto-offset/.gitignore index db11c43ef..1a409039d 100644 --- a/prpr-auto-offset/.gitignore +++ b/prpr-auto-offset/.gitignore @@ -1,2 +1,3 @@ # Test charts and extracted files test-charts/ +test-charts-extracted/ diff --git a/prpr-auto-offset/Cargo.toml b/prpr-auto-offset/Cargo.toml index c86e87420..d8778deb0 100644 --- a/prpr-auto-offset/Cargo.toml +++ b/prpr-auto-offset/Cargo.toml @@ -5,4 +5,6 @@ edition.workspace = true license.workspace = true [dependencies] +rayon = "1" +realfft = "3" rustfft = "6.4.1" diff --git a/prpr-auto-offset/src/audio/mod.rs b/prpr-auto-offset/src/audio/mod.rs index 536acdbc4..d7d4e780f 100644 --- a/prpr-auto-offset/src/audio/mod.rs +++ b/prpr-auto-offset/src/audio/mod.rs @@ -1,5 +1,7 @@ mod energy; mod spectral; +mod superflux; pub use energy::EnergyDiff; pub use spectral::SpectralFlux; +pub use superflux::SuperFlux; diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs new file mode 100644 index 000000000..00ff4aea8 --- /dev/null +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -0,0 +1,383 @@ +use crate::Signal; + +/// SuperFlux onset detection signal. +/// +/// Computes a percussion-onset novelty curve using the SuperFlux algorithm: +/// 1. High-pass filter (50 Hz) to remove sub-bass rumble +/// 2. Mel filterbank (80 bands, 50 Hz – 12 kHz) +/// 3. Mel-spectrogram (Hann-windowed STFT → mel band energy in dB) +/// 4. Per-band spectral whitening (subtract local running mean) +/// 5. SuperFlux temporal difference (max-filtered spectral flux) +/// 6. Adaptive threshold via running median +/// +/// The result is a dense time series with one onset-strength value per STFT +/// frame, suitable for cross-correlation with note event signals. +pub struct SuperFlux { + /// Native onset-strength samples at the STFT hop rate. + native: Vec, + /// Time step between native samples, in seconds. + native_dt: f64, +} + +impl SuperFlux { + /// Build the SuperFlux onset signal from raw mono PCM audio. + /// + /// # Arguments + /// * `pcm` - Mono f32 audio samples. + /// * `sample_rate` - Sample rate in Hz. + /// * `window_size` - STFT window size in samples (default: 2048). + /// * `hop_size` - STFT hop size in samples (default: 1024). + pub fn new(pcm: &[f32], sample_rate: u32, window_size: usize, hop_size: usize) -> Self { + assert!(window_size.is_power_of_two()); + let native_dt = hop_size as f64 / sample_rate as f64; + + // 1. Clone and high-pass filter + let mut samples = pcm.to_vec(); + highpass_50hz(&mut samples, sample_rate); + + // 2. Mel filterbank (80 bands, 50Hz–12kHz) + let mel = MelFilterbank::new(sample_rate, window_size, 80, 50.0, 12000.0); + + // 3. Mel-spectrogram + let (mut mel_frames, frame_rate) = + compute_mel_spectrogram(&samples, sample_rate, window_size, hop_size, &mel); + + // 4. Spectral whitening (1-second window) + whiten_spectrogram(&mut mel_frames, (frame_rate * 1.0) as usize); + + // 5. SuperFlux temporal difference (lag=3) + let onset = compute_superflux(&mel_frames, 3); + + // 6. Adaptive threshold + let onset = adaptive_threshold(&onset, frame_rate * 2.0, 0.5); + + // Use the declared native_dt (frame_rate may differ slightly due to rounding) + let _ = frame_rate; + Self { + native: onset, + native_dt, + } + } +} + +impl Signal for SuperFlux { + fn samples(&self, ts: &[f64]) -> Vec { + if ts.is_empty() { + return vec![]; + } + ts.iter() + .map(|&t| interpolate(&self.native, self.native_dt, t)) + .collect() + } +} + +// ─── High-pass filter (50 Hz) ────────────────────────────────────────── + +fn highpass_50hz(samples: &mut [f32], sample_rate: u32) { + // 1st-order Butterworth: y[n] = alpha*y[n-1] + alpha*(x[n] - x[n-1]) + // Remove DC offset first, then initialize state to avoid transient + let dc = samples + .iter() + .take((sample_rate as usize / 10).min(samples.len())) + .sum::() + / (sample_rate as f32 / 10.0).min(samples.len() as f32); + for s in &mut *samples { + *s -= dc; + } + + let cutoff = 50.0; + let rc = 1.0 / (2.0 * std::f32::consts::PI * cutoff); + let dt = 1.0 / sample_rate as f32; + let alpha = rc / (rc + dt); + // Initial state: assume steady state (no change) + let mut x_prev = samples[0]; + let mut y_prev = 0.0; // HP filter: output is 0 at DC + samples[0] = y_prev; + for s in &mut samples[1..] { + let x = *s; + let y = alpha * y_prev + alpha * (x - x_prev); + *s = y; + x_prev = x; + y_prev = y; + } +} + +// ─── Mel scale conversion ────────────────────────────────────────────── + +fn hz_to_mel(hz: f32) -> f32 { + 2595.0 * (1.0 + hz / 700.0).log10() +} + +fn mel_to_hz(mel: f32) -> f32 { + 700.0 * (10.0f32.powf(mel / 2595.0) - 1.0) +} + +// ─── Mel filterbank ───────────────────────────────────────────────────── + +struct MelFilterbank { + /// Triangular filter weights: [mel_band][fft_bin] + weights: Vec>, + n_mels: usize, +} + +impl MelFilterbank { + fn new(sample_rate: u32, window_size: usize, n_mels: usize, f_min: f32, f_max: f32) -> Self { + let n_fft_bins = window_size / 2 + 1; + let mel_min = hz_to_mel(f_min); + let mel_max = hz_to_mel(f_max.min(sample_rate as f32 / 2.0)); + let mel_step = (mel_max - mel_min) / (n_mels + 1) as f32; + + // Center frequencies of mel bands + let mel_centers: Vec = (0..n_mels) + .map(|i| mel_to_hz(mel_min + (i + 1) as f32 * mel_step)) + .collect(); + + let bin_hz = |k: usize| k as f32 * sample_rate as f32 / window_size as f32; + + // Build triangular filter weights + let mut weights = vec![vec![0.0f32; n_fft_bins]; n_mels]; + for (m, ¢er) in mel_centers.iter().enumerate() { + let left = if m == 0 { + f_min + } else { + mel_centers[m - 1] + }; + let right = if m == n_mels - 1 { + f_max + } else { + mel_centers[m + 1] + }; + for k in 0..n_fft_bins { + let f = bin_hz(k); + if f >= left && f <= center { + weights[m][k] = (f - left) / (center - left).max(1e-10); + } else if f > center && f <= right { + weights[m][k] = (right - f) / (right - center).max(1e-10); + } + } + } + + MelFilterbank { weights, n_mels } + } + + /// Apply mel filterbank to a power spectrum, returns log-magnitudes per mel band (dB). + fn apply(&self, power_spectrum: &[f32]) -> Vec { + let mut mel = vec![0.0f32; self.n_mels]; + for (m, w) in self.weights.iter().enumerate() { + let sum: f32 = power_spectrum.iter().zip(w).map(|(&p, &w)| p * w).sum(); + mel[m] = 20.0 * (sum.sqrt().max(1e-10)).log10(); // dB + } + mel + } +} + +// ─── Mel-spectrogram computation ──────────────────────────────────────── + +fn compute_mel_spectrogram( + samples: &[f32], + sample_rate: u32, + window_size: usize, + hop_size: usize, + mel: &MelFilterbank, +) -> (Vec>, f32) { + use rayon::prelude::*; + use realfft::RealFftPlanner; + use std::sync::Arc; + + let num_frames = if samples.len() < window_size { + 0 + } else { + (samples.len() - window_size) / hop_size + 1 + }; + + let window: Vec = (0..window_size) + .map(|n| { + 0.5 * (1.0 + - (2.0 * std::f32::consts::PI * n as f32 / (window_size - 1) as f32).cos()) + }) + .collect(); + + let mut planner = RealFftPlanner::::new(); + let r2c = Arc::new(planner.plan_fft_forward(window_size)); + + let mel_frames: Vec> = (0..num_frames) + .into_par_iter() + .map(|frame_idx| { + let start = frame_idx * hop_size; + let mut windowed: Vec = samples[start..start + window_size] + .iter() + .zip(&window) + .map(|(&s, &w)| s * w) + .collect(); + + let mut spectrum = r2c.make_output_vec(); + r2c.process(&mut windowed, &mut spectrum).unwrap(); + + let power: Vec = spectrum + .iter() + .map(|c| c.re * c.re + c.im * c.im) + .collect(); + mel.apply(&power) + }) + .collect(); + + let frame_rate = sample_rate as f32 / hop_size as f32; + (mel_frames, frame_rate) +} + +// ─── Spectral whitening ───────────────────────────────────────────────── + +/// For each mel band, subtract a local running mean (half-width = window_frames/2). +/// Clamps negative values to -120 dB floor. +fn whiten_spectrogram(frames: &mut [Vec], window_frames: usize) { + let half = window_frames / 2; + let n_frames = frames.len(); + if n_frames == 0 { + return; + } + let n_bands = frames[0].len(); + + for band in 0..n_bands { + // Compute local means + let mut smoothed = vec![0.0f32; n_frames]; + for t in 0..n_frames { + let lo = if t >= half { t - half } else { 0 }; + let hi = (t + half).min(n_frames - 1); + let count = (hi - lo + 1) as f32; + let sum: f32 = frames[lo..=hi].iter().map(|f| f[band]).sum(); + smoothed[t] = sum / count; + } + // Subtract local mean from each frame + for t in 0..n_frames { + frames[t][band] -= smoothed[t]; + // Clamp negative values to a small floor (onset is about INCREASE in energy) + frames[t][band] = frames[t][band].max(-120.0); + } + } +} + +// ─── SuperFlux onset detection ────────────────────────────────────────── + +/// Core SuperFlux algorithm. +/// +/// For each frame `t` and each mel band `b`: +/// `diff(t) = Σ_b max(0, X[t][b] - max(X[t-1][b], ..., X[t-lag][b]))` +/// +/// Robust-normalized by the 99th percentile (skipping the first ~1 s to avoid +/// HP filter transient). +fn compute_superflux(mel_frames: &[Vec], lag: usize) -> Vec { + let n_frames = mel_frames.len(); + if n_frames <= lag { + return vec![0.0; n_frames]; + } + + let n_bands = mel_frames[0].len(); + let mut onset = vec![0.0f32; n_frames]; + + for t in lag..n_frames { + let mut flux = 0.0f32; + for b in 0..n_bands { + // Max of previous `lag` frames + let mut max_prev = mel_frames[t - 1][b]; + for d in 2..=lag { + max_prev = max_prev.max(mel_frames[t - d][b]); + } + let diff = mel_frames[t][b] - max_prev; + if diff > 0.0 { + flux += diff; + } + } + onset[t] = flux; + } + + // Robust normalize: skip first ~1s (HP filter transient), use 99th pct + let skip_frames = 40.min(onset.len() / 4); + if skip_frames < onset.len() { + let mut sorted: Vec = onset[skip_frames..] + .iter() + .cloned() + .filter(|&v| v > 0.0) + .collect(); + sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let p99 = if sorted.is_empty() { + 0.0 + } else { + sorted[(sorted.len() as f32 * 0.99) as usize] + }; + if p99 > 0.0 { + for v in &mut onset { + *v /= p99; + } + } + } + + onset +} + +// ─── Adaptive threshold ───────────────────────────────────────────────── + +/// Running median-based threshold with IQR multiplier. +/// +/// For each frame, computes `max(0, onset[t] - (median + multiplier * IQR))` +/// over a local window, then re-normalizes by the 99th percentile. +fn adaptive_threshold(onset: &[f32], median_window: f32, multiplier: f32) -> Vec { + let n = onset.len(); + let half = (median_window / 2.0).round() as usize; + let mut thresholded = vec![0.0f32; n]; + + for t in 0..n { + let lo = if t >= half { t - half } else { 0 }; + let hi = (t + half).min(n - 1); + let count = hi - lo + 1; + let mut window_vals: Vec = onset[lo..=hi].to_vec(); + window_vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let median = window_vals[count / 2]; + // IQR-based threshold + let iqr = window_vals[3 * count / 4] - median; + let threshold = median + multiplier * iqr; + thresholded[t] = (onset[t] - threshold).max(0.0); + } + + // Robust re-normalize: skip first ~1s, use 99th percentile + let skip = 40.min(thresholded.len() / 4); + if skip < thresholded.len() { + let mut vals: Vec = thresholded[skip..] + .iter() + .cloned() + .filter(|&v| v > 0.0) + .collect(); + vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); + let p99 = vals + .get((vals.len() as f32 * 0.99) as usize) + .copied() + .unwrap_or(0.0); + if p99 > 0.0 { + for v in &mut thresholded { + *v /= p99; + } + } + } + + thresholded +} + +// ─── Linear interpolation ─────────────────────────────────────────────── + +/// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. +fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { + if data.is_empty() { + return 0.0; + } + let idx = t / dt; + if idx < 0.0 { + return data[0]; + } + let i = idx as usize; + if i + 1 >= data.len() { + return data[data.len() - 1]; + } + let frac = (idx - i as f64) as f32; + let a = data[i]; + let b = data[i + 1]; + a + (b - a) * frac +} diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs index 00b579fa6..bc1533fce 100644 --- a/prpr-auto-offset/src/estimate.rs +++ b/prpr-auto-offset/src/estimate.rs @@ -93,9 +93,9 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 }; } - // Build shared sampling grid - let t_min = -config.search_range_sec; - let t_max = duration_sec + config.search_range_sec; + // Build shared sampling grid centered at search_center_sec + let t_min = config.search_center_sec - config.search_range_sec; + let t_max = config.search_center_sec + duration_sec + config.search_range_sec; let ts = build_ts_grid(t_min, t_max, config.sampling_interval_sec); // Sample both signals diff --git a/prpr-auto-offset/src/lib.rs b/prpr-auto-offset/src/lib.rs index eb32e1f12..052a8d764 100644 --- a/prpr-auto-offset/src/lib.rs +++ b/prpr-auto-offset/src/lib.rs @@ -4,7 +4,7 @@ mod note; mod signal; mod types; -pub use audio::{EnergyDiff, SpectralFlux}; +pub use audio::{EnergyDiff, SpectralFlux, SuperFlux}; pub use estimate::{estimate, estimate_with}; pub use note::NoteGaussian; pub use signal::Signal; diff --git a/prpr-auto-offset/src/types.rs b/prpr-auto-offset/src/types.rs index 3f810baff..2716ba055 100644 --- a/prpr-auto-offset/src/types.rs +++ b/prpr-auto-offset/src/types.rs @@ -1,17 +1,24 @@ /// Configuration for the alignment algorithm. #[derive(Debug, Clone)] pub struct AlignConfig { - /// Search range for offset, in seconds. Default ±5.0s. + /// Search range for offset, in seconds. Default ±0.30s (narrow, centered at + /// [`search_center_sec`]). pub search_range_sec: f64, - /// Time step for the shared sampling grid, in seconds. Default 0.001 (1ms). + /// Time step for the shared sampling grid, in seconds. Default 0.005 (5ms). pub sampling_interval_sec: f64, + /// Center of the search window, in seconds. + /// + /// Set this to the chart author's configured offset so the algorithm only + /// searches for a small correction nearby. Default 0.0. + pub search_center_sec: f64, } impl Default for AlignConfig { fn default() -> Self { Self { - search_range_sec: 5.0, - sampling_interval_sec: 0.001, + search_range_sec: 0.30, + sampling_interval_sec: 0.005, + search_center_sec: 0.0, } } } diff --git a/tools/auto-offset-cli/src/main.rs b/tools/auto-offset-cli/src/main.rs index 672fcd8f9..b7d63f9bd 100644 --- a/tools/auto-offset-cli/src/main.rs +++ b/tools/auto-offset-cli/src/main.rs @@ -4,7 +4,7 @@ use prpr::{ fs::{fs_from_file, load_info}, parse::{parse_pec, parse_phigros, parse_rpe}, }; -use prpr_auto_offset::{AlignConfig, AlignmentResult, EnergyDiff, NoteGaussian, SpectralFlux}; +use prpr_auto_offset::{AlignConfig, AlignmentResult, EnergyDiff, NoteGaussian, SpectralFlux, SuperFlux}; use std::io::Write; use std::path::PathBuf; @@ -15,11 +15,15 @@ struct Cli { /// Path to a Phira chart file (zip archive) chart: PathBuf, - /// Search range in seconds - #[arg(short, long, default_value = "5.0")] + /// Search range in seconds (centered at chart's author offset, or at 0 with --wide) + #[arg(short, long, default_value = "0.30")] range: f64, - /// Audio novelty method: spectral or energy + /// Wide-range search: ignore author offset, search full ±range from 0 + #[arg(short = 'w', long)] + wide: bool, + + /// Audio novelty method: superflux, spectral, or energy #[arg(long, default_value = "spectral")] audio_method: String, @@ -28,7 +32,7 @@ struct Cli { note_method: String, /// Sampling interval for the cross-correlation grid, in seconds - #[arg(short, long, default_value = "0.001")] + #[arg(short, long, default_value = "0.005")] interval: f64, /// Gaussian blur sigma for the note signal, in seconds @@ -65,6 +69,7 @@ fn print_result(result: &AlignmentResult, verbose: bool) { async fn run( chart_path: &PathBuf, search_range: f64, + wide: bool, audio_method: &str, note_method: &str, sampling_interval: f64, @@ -142,13 +147,32 @@ async fn run( } // 6. Configure + let author_offset = info.offset as f64; let config = AlignConfig { search_range_sec: search_range, sampling_interval_sec: sampling_interval, + search_center_sec: if wide { 0.0 } else { author_offset }, }; + if verbose { + if wide { + println!(" Search: +/-{:.0}ms (wide, centered at 0)", search_range * 1000.0); + } else { + println!(" Search: +/-{:.0}ms (centered at author offset {:.0}ms)", search_range * 1000.0, author_offset * 1000.0); + } + } + // 7. Select methods and run let result = match (audio_method, note_method) { + ("superflux", "gaussian") => { + if verbose { + println!(" Audio method: superflux"); + println!(" Note method: gaussian (sigma={}ms)", blur_sigma * 1000.0); + } + let audio = SuperFlux::new(&pcm, sample_rate, 2048, 1024); + let note = NoteGaussian::new(note_times, blur_sigma); + prpr_auto_offset::estimate_with(&audio, ¬e, duration, &config) + } ("spectral", "gaussian") => { if verbose { println!(" Audio method: spectral flux"); @@ -177,5 +201,5 @@ async fn run( #[tokio::main] async fn main() -> Result<()> { let cli = Cli::parse(); - run(&cli.chart, cli.range, &cli.audio_method, &cli.note_method, cli.interval, cli.blur_sigma, cli.verbose).await + run(&cli.chart, cli.range, cli.wide, &cli.audio_method, &cli.note_method, cli.interval, cli.blur_sigma, cli.verbose).await } From 7bbfa276ff0ca2f858c3932ae2066922af976331 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 00:06:11 +0800 Subject: [PATCH 03/18] feat: auto-offset for phira --- Cargo.lock | 23 ++- Cargo.toml | 1 + prpr-auto-offset/src/audio/mod.rs | 2 +- prpr-auto-offset/src/audio/superflux.rs | 22 ++- prpr-auto-offset/src/estimate.rs | 15 +- prpr-auto-offset/src/lib.rs | 2 +- prpr-auto-offset/src/types.rs | 3 + prpr/Cargo.toml | 1 + prpr/locales/en-US/game.ftl | 3 + prpr/locales/zh-CN/game.ftl | 3 + prpr/src/scene/game.rs | 235 +++++++++++++++++++++++- 11 files changed, 296 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index da43ab23e..456013b5d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3127,9 +3127,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "ordered-float" -version = "5.1.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" dependencies = [ "num-traits", ] @@ -3550,6 +3550,7 @@ dependencies = [ "open", "ordered-float", "phf", + "prpr-auto-offset", "prpr-avc", "prpr-l10n", "rand 0.8.5", @@ -3621,6 +3622,24 @@ dependencies = [ "walkdir", ] +[[package]] +name = "prpr-latency-visualizer" +version = "0.7.1" +dependencies = [ + "anyhow", + "glyph_brush", + "image", + "lyon", + "macroquad", + "prpr", + "prpr-auto-offset", + "prpr-avc", + "rfd", + "sasa", + "tempfile", + "tokio", +] + [[package]] name = "prpr-pbc" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 917bb4881..63c55cc93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "prpr-l10n", "prpr-pbc", "tools/auto-offset-cli", + "tools/latency-visualizer", ] resolver = "2" diff --git a/prpr-auto-offset/src/audio/mod.rs b/prpr-auto-offset/src/audio/mod.rs index d7d4e780f..d925e7dae 100644 --- a/prpr-auto-offset/src/audio/mod.rs +++ b/prpr-auto-offset/src/audio/mod.rs @@ -4,4 +4,4 @@ mod superflux; pub use energy::EnergyDiff; pub use spectral::SpectralFlux; -pub use superflux::SuperFlux; +pub use superflux::{SuperFlux, MelFilterbank, compute_mel_spectrogram}; diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 00ff4aea8..931850d66 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -58,6 +58,16 @@ impl SuperFlux { native_dt, } } + + /// Access the native onset-strength samples (after adaptive threshold). + pub fn onset_samples(&self) -> &[f32] { + &self.native + } + + /// Time step between native onset samples, in seconds. + pub fn onset_dt(&self) -> f64 { + self.native_dt + } } impl Signal for SuperFlux { @@ -114,14 +124,14 @@ fn mel_to_hz(mel: f32) -> f32 { // ─── Mel filterbank ───────────────────────────────────────────────────── -struct MelFilterbank { +pub struct MelFilterbank { /// Triangular filter weights: [mel_band][fft_bin] - weights: Vec>, - n_mels: usize, + pub weights: Vec>, + pub n_mels: usize, } impl MelFilterbank { - fn new(sample_rate: u32, window_size: usize, n_mels: usize, f_min: f32, f_max: f32) -> Self { + pub fn new(sample_rate: u32, window_size: usize, n_mels: usize, f_min: f32, f_max: f32) -> Self { let n_fft_bins = window_size / 2 + 1; let mel_min = hz_to_mel(f_min); let mel_max = hz_to_mel(f_max.min(sample_rate as f32 / 2.0)); @@ -161,7 +171,7 @@ impl MelFilterbank { } /// Apply mel filterbank to a power spectrum, returns log-magnitudes per mel band (dB). - fn apply(&self, power_spectrum: &[f32]) -> Vec { + pub fn apply(&self, power_spectrum: &[f32]) -> Vec { let mut mel = vec![0.0f32; self.n_mels]; for (m, w) in self.weights.iter().enumerate() { let sum: f32 = power_spectrum.iter().zip(w).map(|(&p, &w)| p * w).sum(); @@ -173,7 +183,7 @@ impl MelFilterbank { // ─── Mel-spectrogram computation ──────────────────────────────────────── -fn compute_mel_spectrogram( +pub fn compute_mel_spectrogram( samples: &[f32], sample_rate: u32, window_size: usize, diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs index bc1533fce..7c430076d 100644 --- a/prpr-auto-offset/src/estimate.rs +++ b/prpr-auto-offset/src/estimate.rs @@ -90,6 +90,7 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 offset: 0.0, correlation: 0.0, reliable: false, + correlation_curve: Vec::new(), }; } @@ -107,14 +108,25 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 offset: 0.0, correlation: 0.0, reliable: false, + correlation_curve: Vec::new(), }; } // Cross-correlation let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; - let (_corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); + let (corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); let offset = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; + // Build correlation curve + let correlation_curve: Vec<(f64, f32)> = corr + .iter() + .enumerate() + .map(|(i, &v)| { + let lag = i as isize - max_lag_bins as isize; + (lag as f64 * config.sampling_interval_sec, v) + }) + .collect(); + // Normalized correlation at best lag let lag = best_lag as isize - max_lag_bins as isize; let correlation = normalized_correlation(¬e_samples, &audio_samples, lag, best_val); @@ -123,5 +135,6 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 offset, correlation, reliable: correlation > RELIABILITY_THRESHOLD, + correlation_curve, } } diff --git a/prpr-auto-offset/src/lib.rs b/prpr-auto-offset/src/lib.rs index 052a8d764..d5e462351 100644 --- a/prpr-auto-offset/src/lib.rs +++ b/prpr-auto-offset/src/lib.rs @@ -4,7 +4,7 @@ mod note; mod signal; mod types; -pub use audio::{EnergyDiff, SpectralFlux, SuperFlux}; +pub use audio::{EnergyDiff, SpectralFlux, SuperFlux, MelFilterbank, compute_mel_spectrogram}; pub use estimate::{estimate, estimate_with}; pub use note::NoteGaussian; pub use signal::Signal; diff --git a/prpr-auto-offset/src/types.rs b/prpr-auto-offset/src/types.rs index 2716ba055..d342e56f7 100644 --- a/prpr-auto-offset/src/types.rs +++ b/prpr-auto-offset/src/types.rs @@ -36,4 +36,7 @@ pub struct AlignmentResult { pub correlation: f64, /// Whether the correlation exceeds the default reliability threshold. pub reliable: bool, + /// Full correlation curve: (offset_seconds, raw_correlation_score). + /// Useful for visualization of the score-vs-offset landscape. + pub correlation_curve: Vec<(f64, f32)>, } diff --git a/prpr/Cargo.toml b/prpr/Cargo.toml index 6e126f2f4..967d764cf 100644 --- a/prpr/Cargo.toml +++ b/prpr/Cargo.toml @@ -52,6 +52,7 @@ obfstr = "0.4.4" once_cell = { workspace = true } ordered-float = "5.1.0" phf = { version = "0.13.1", features = ["macros"] } +prpr-auto-offset = { workspace = true } prpr-avc = { workspace = true, optional = true } prpr-l10n = { workspace = true } rand = { workspace = true } diff --git a/prpr/locales/en-US/game.ftl b/prpr/locales/en-US/game.ftl index de174de44..d25ffd3e8 100644 --- a/prpr/locales/en-US/game.ftl +++ b/prpr/locales/en-US/game.ftl @@ -4,6 +4,9 @@ adjust-offset = Adjust Offset offset-cancel = Cancel offset-reset = Reset offset-save = Save +analysis-prompt = Press "Analyze" to auto-analyze +analysis-computing = Analyzing... +auto-offset-btn = Analyze speed = Speed Multiplier game-over = Game Over diff --git a/prpr/locales/zh-CN/game.ftl b/prpr/locales/zh-CN/game.ftl index 1a53fadc7..840b7a946 100644 --- a/prpr/locales/zh-CN/game.ftl +++ b/prpr/locales/zh-CN/game.ftl @@ -4,6 +4,9 @@ adjust-offset = 调整延迟 offset-cancel = 取消 offset-reset = 重置 offset-save = 保存 +analysis-prompt = 按下"分析"自动分析 +analysis-computing = 分析中... +auto-offset-btn = 分析 speed = 速度 game-over = 游戏失败 diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index 3a6797474..039a3eb65 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -26,6 +26,7 @@ use concat_string::concat_string; use inputbox::InputBox; use lyon::path::Path; use macroquad::{prelude::*, window::InternalGlContext}; +use prpr_auto_offset::{AlignConfig, AlignmentResult, NoteGaussian, SuperFlux, estimate_with}; use sasa::{Music, MusicParams}; use serde::{Deserialize, Serialize}; use std::{ @@ -114,6 +115,28 @@ enum State { Ending, } +#[derive(Clone)] +enum OffsetAnalysisState { + /// No analysis started; show prompt text. + Idle, + /// Background thread is running; show "Analyzing...". + Computing, + /// Analysis complete with result. + Done(AlignmentResult), +} + +/// Extract non-fake note hit times from the chart, sorted and filtered to t >= 0. +fn extract_note_times(chart: &Chart) -> Vec { + let mut times: Vec = chart + .lines + .iter() + .flat_map(|line| line.notes.iter().map(|note| note.time)) + .filter(|&t| t >= 0.0) + .collect(); + times.sort_by(|a, b| a.partial_cmp(b).unwrap()); + times +} + pub struct GameScene { should_exit: bool, next_scene: Option, @@ -128,6 +151,9 @@ pub struct GameScene { chart_format: ChartFormat, info_offset: f32, effects: Vec, + analysis_state: OffsetAnalysisState, + analysis_requested: bool, + analysis_handle: Option>>>, first_in: bool, exercise_range: Range, @@ -320,6 +346,10 @@ impl GameScene { effects, info_offset, + analysis_state: OffsetAnalysisState::Idle, + analysis_requested: false, + analysis_handle: None, + first_in: false, exercise_range, exercise_press: None, @@ -802,6 +832,155 @@ impl GameScene { self.chart.offset + self.res.config.offset + self.info_offset } + #[cfg(not(target_arch = "wasm32"))] + fn start_analysis(&mut self) { + use std::thread; + + // 1. Extract note times + let note_times = extract_note_times(&self.chart); + + // 2. Extract PCM from AudioClip (stereo -> mono) + let clip = self.res.music.clone(); + let pcm: Vec = clip.frames().iter().map(|f| (f.0 + f.1) / 2.0).collect(); + let sample_rate = clip.sample_rate(); + let author_offset = self.chart.offset + self.res.config.offset; + + // 3. Build config + let config = AlignConfig { + search_range_sec: 0.30, + sampling_interval_sec: 0.005, + search_center_sec: author_offset as f64, + }; + + // 4. Create shared result slot + let result_slot: Arc>> = Arc::new(Mutex::new(None)); + self.analysis_handle = Some(result_slot.clone()); + + // 5. Spawn background thread + let _handle = thread::spawn(move || { + let superflux = SuperFlux::new(&pcm, sample_rate, 2048, 1024); + let note = NoteGaussian::new(note_times, 0.02); + let duration = pcm.len() as f64 / sample_rate as f64; + let result = estimate_with(&superflux, ¬e, duration, &config); + if let Ok(mut guard) = result_slot.lock() { + *guard = Some(result); + } + }); + + // 6. Transition state + self.analysis_state = OffsetAnalysisState::Computing; + } + + #[cfg(target_arch = "wasm32")] + fn start_analysis(&mut self) { + // wasm32: no threading support; analysis not available on web + } + + fn draw_offset_graph( + &self, + ui: &mut Ui, + rect: Rect, + result: &AlignmentResult, + ) { + use lyon::math::point; + + let curve = &result.correlation_curve; + if curve.is_empty() { + return; + } + + // Map data ranges + let min_o = curve.first().map(|&(o, _)| o).unwrap_or(0.0); + let max_o = curve.last().map(|&(o, _)| o).unwrap_or(0.0); + let o_range = (max_o - min_o).max(1e-6); + let min_s = curve.iter().map(|&(_, s)| s).fold(f32::INFINITY, f32::min); + let max_s = curve.iter().map(|&(_, s)| s).fold(f32::NEG_INFINITY, f32::max); + let s_range = (max_s - min_s).max(1e-6); + + // Background — fill full width + ui.fill_rect( + rect, + Color::new(0.0, 0.0, 0.0, 0.3), + ); + + // Use full width, only pad vertically + let v_pad = 0.08; + let inner = Rect::new( + rect.x, + rect.y + rect.h * v_pad, + rect.w, + rect.h * (1.0 - 2.0 * v_pad), + ); + + // Correlation curve: gray, alpha 0.6, ~0.01 Ui-unit per point + let pt_spacing = 0.01; + let step = ((curve.len() as f64 * pt_spacing) / rect.w as f64).ceil() as usize; + let step = step.max(1); + let mut path_builder = lyon::path::Path::builder(); + let mut first = true; + for i in (0..curve.len()).step_by(step) { + let (o, s) = curve[i]; + let x = inner.x + ((o - min_o) / o_range) as f32 * inner.w; + let y = inner.y + (1.0 - (s - min_s) / s_range) as f32 * inner.h; + if first { + path_builder.begin(point(x, y)); + first = false; + } else { + path_builder.line_to(point(x, y)); + } + } + path_builder.end(false); + let line_w = rect.w * 0.003; + ui.stroke_path( + &path_builder.build(), + line_w, + Color::new(0.6, 0.6, 0.6, 0.6), // gray, a=0.6 + ); + + // Marker line width + let marker_w = line_w * 2.0; + + // Orange vertical line: chart author offset (without audio latency compensation) + let orig_o = self.chart.offset as f64; + if orig_o >= min_o && orig_o <= max_o { + let ox = inner.x + ((orig_o - min_o) / o_range) as f32 * inner.w; + ui.fill_rect( + Rect::new(ox - marker_w, inner.y, marker_w * 2.0, inner.h), + Color::new(1.0, 0.5, 0.0, 0.5), // orange, a=0.5 + ); + } + + // Green vertical line: recommended total offset + let best_o = result.offset; + if best_o >= min_o && best_o <= max_o { + let bx = inner.x + ((best_o - min_o) / o_range) as f32 * inner.w; + ui.fill_rect( + Rect::new(bx - marker_w, inner.y, marker_w * 2.0, inner.h), + Color::new(0.0, 1.0, 0.0, 0.5), // green, a=0.5 + ); + } + + // Blue vertical line: current chart delay (without audio latency compensation) + let cur_o = (self.chart.offset + self.info_offset) as f64; + if cur_o >= min_o && cur_o <= max_o { + let cx = inner.x + ((cur_o - min_o) / o_range) as f32 * inner.w; + ui.fill_rect( + Rect::new(cx - marker_w, inner.y, marker_w * 2.0, inner.h), + Color::new(0.0, 0.5, 1.0, 0.5), // blue, a=0.5 + ); + } + + // Suggested correction in top-right corner + let correction_ms = ((result.offset - self.chart.offset as f64) * 1000.0).round() as i32; + ui.text(format!("{correction_ms:+}ms")) + .pos(rect.x + rect.w - 0.01, rect.y) + .anchor(1.0, 0.0) + .size(0.35) + .color(Color::new(0.0, 1.0, 0.0, 0.7)) + .no_baseline() + .draw(); + } + fn tweak_offset(&mut self, ui: &mut Ui, ita: bool) { ui.scope(|ui| { let width = 0.55; @@ -810,8 +989,42 @@ impl GameScene { ui.dy(ui.top - height - 0.02); ui.fill_rect(Rect::new(0., 0., width, height), GRAY); ui.dy(0.02); - ui.text(tl!("adjust-offset")).pos(width / 2., 0.).anchor(0.5, 0.).size(0.7).draw(); - ui.dy(0.16); + let r = ui.text(tl!("adjust-offset")).pos(width / 2.-0.03, 0.).anchor(1.0, 0.).size(0.7).no_baseline().draw(); + if ui.button("auto-offset", Rect::new(width / 2.+0.03, r.top(), r.w, r.h), tl!("auto-offset-btn")) { + if !matches!(self.analysis_state, OffsetAnalysisState::Computing) { + self.analysis_requested = true; + } + } + ui.dy(0.04 + r.h/2.); + // Graph area + let graph_rect = Rect::new(0., 0., width, 0.17-r.h/2.); + match self.analysis_state.clone() { + OffsetAnalysisState::Idle => { + ui.dy(graph_rect.h / 2. - 0.03); + ui.text(tl!("analysis-prompt")) + .pos(width / 2., 0.) + .anchor(0.5, 0.5) + .size(0.5) + .no_baseline() + .draw(); + ui.dy(graph_rect.h / 2. + 0.03); + } + OffsetAnalysisState::Computing => { + ui.dy(graph_rect.h / 2. - 0.03); + ui.text(tl!("analysis-computing")) + .pos(width / 2., 0.) + .anchor(0.5, 0.5) + .size(0.5) + .no_baseline() + .draw(); + ui.dy(graph_rect.h / 2. + 0.03); + } + OffsetAnalysisState::Done(ref result) => { + self.draw_offset_graph(ui, graph_rect, result); + ui.dy(graph_rect.h); + } + } + ui.dy(0.02); let r = ui .text(format!("{}ms", (self.info_offset * 1000.).round() as i32)) .pos(width / 2., 0.) @@ -840,7 +1053,7 @@ impl GameScene { if ui.button("ti_add", Rect::new(width - d, r.center().y, 0., 0.).feather(0.017), "+") && ita { self.info_offset += 0.001; } - ui.dy(0.14); + ui.dy(0.07); let pad = 0.02; let spacing = 0.01; let mut r = Rect::new(pad, 0., (width - pad * 2. - spacing * 2.) / 3., 0.06); @@ -901,6 +1114,22 @@ impl Scene for GameScene { } fn update(&mut self, tm: &mut TimeManager) -> Result<()> { + // Trigger auto-offset analysis if button was clicked in tweak_offset panel + if self.analysis_requested { + self.analysis_requested = false; + self.start_analysis(); + } + // Check if background analysis completed + let handle = self.analysis_handle.clone(); + if let Some(handle) = handle { + if let Ok(mut guard) = handle.try_lock() { + if let Some(result) = guard.take() { + self.analysis_state = OffsetAnalysisState::Done(result); + self.analysis_handle = None; + } + } + } + self.res.audio.recover_if_needed()?; if matches!(self.state, State::Playing) { tm.update(self.music.position()); From 50222070da3c2f4ac6ccd5cf21cdd596adbd16b3 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 00:10:56 +0800 Subject: [PATCH 04/18] feat: remove latency visualizer from Cargo.toml --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 63c55cc93..6fc470e57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,7 @@ members = [ "prpr-avc", "prpr-l10n", "prpr-pbc", - "tools/auto-offset-cli", - "tools/latency-visualizer", + "tools/*", ] resolver = "2" From 63842e6cb55d4a6b19a90aff9d2b09a568618b78 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 00:18:19 +0800 Subject: [PATCH 05/18] fix: clippy warnings --- prpr-auto-offset/src/audio/superflux.rs | 19 +++++++++---------- prpr/src/scene/game.rs | 7 +++---- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 931850d66..7dee18084 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -157,12 +157,12 @@ impl MelFilterbank { } else { mel_centers[m + 1] }; - for k in 0..n_fft_bins { + for (k, w) in weights[m].iter_mut().enumerate() { let f = bin_hz(k); if f >= left && f <= center { - weights[m][k] = (f - left) / (center - left).max(1e-10); + *w = (f - left) / (center - left).max(1e-10); } else if f > center && f <= right { - weights[m][k] = (right - f) / (right - center).max(1e-10); + *w = (right - f) / (right - center).max(1e-10); } } } @@ -250,12 +250,12 @@ fn whiten_spectrogram(frames: &mut [Vec], window_frames: usize) { for band in 0..n_bands { // Compute local means let mut smoothed = vec![0.0f32; n_frames]; - for t in 0..n_frames { - let lo = if t >= half { t - half } else { 0 }; + for (t, s) in smoothed.iter_mut().enumerate() { + let lo = t.saturating_sub(half); let hi = (t + half).min(n_frames - 1); let count = (hi - lo + 1) as f32; let sum: f32 = frames[lo..=hi].iter().map(|f| f[band]).sum(); - smoothed[t] = sum / count; + *s = sum / count; } // Subtract local mean from each frame for t in 0..n_frames { @@ -281,18 +281,17 @@ fn compute_superflux(mel_frames: &[Vec], lag: usize) -> Vec { return vec![0.0; n_frames]; } - let n_bands = mel_frames[0].len(); let mut onset = vec![0.0f32; n_frames]; for t in lag..n_frames { let mut flux = 0.0f32; - for b in 0..n_bands { + for (b, &cur) in mel_frames[t].iter().enumerate() { // Max of previous `lag` frames let mut max_prev = mel_frames[t - 1][b]; for d in 2..=lag { max_prev = max_prev.max(mel_frames[t - d][b]); } - let diff = mel_frames[t][b] - max_prev; + let diff = cur - max_prev; if diff > 0.0 { flux += diff; } @@ -336,7 +335,7 @@ fn adaptive_threshold(onset: &[f32], median_window: f32, multiplier: f32) -> Vec let mut thresholded = vec![0.0f32; n]; for t in 0..n { - let lo = if t >= half { t - half } else { 0 }; + let lo = t.saturating_sub(half); let hi = (t + half).min(n - 1); let count = hi - lo + 1; let mut window_vals: Vec = onset[lo..=hi].to_vec(); diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index 039a3eb65..d0201096f 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -921,7 +921,7 @@ impl GameScene { for i in (0..curve.len()).step_by(step) { let (o, s) = curve[i]; let x = inner.x + ((o - min_o) / o_range) as f32 * inner.w; - let y = inner.y + (1.0 - (s - min_s) / s_range) as f32 * inner.h; + let y = inner.y + (1.0 - (s - min_s) / s_range) * inner.h; if first { path_builder.begin(point(x, y)); first = false; @@ -990,11 +990,10 @@ impl GameScene { ui.fill_rect(Rect::new(0., 0., width, height), GRAY); ui.dy(0.02); let r = ui.text(tl!("adjust-offset")).pos(width / 2.-0.03, 0.).anchor(1.0, 0.).size(0.7).no_baseline().draw(); - if ui.button("auto-offset", Rect::new(width / 2.+0.03, r.top(), r.w, r.h), tl!("auto-offset-btn")) { - if !matches!(self.analysis_state, OffsetAnalysisState::Computing) { + if ui.button("auto-offset", Rect::new(width / 2.+0.03, r.top(), r.w, r.h), tl!("auto-offset-btn")) + && !matches!(self.analysis_state, OffsetAnalysisState::Computing) { self.analysis_requested = true; } - } ui.dy(0.04 + r.h/2.); // Graph area let graph_rect = Rect::new(0., 0., width, 0.17-r.h/2.); From 32cc3c5696e49eabe3a113a1c65b182cbc75fec9 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 00:19:56 +0800 Subject: [PATCH 06/18] style: fmt --- prpr-auto-offset/src/audio/mod.rs | 2 +- prpr-auto-offset/src/audio/superflux.rs | 75 +++++-------------------- prpr-auto-offset/src/lib.rs | 2 +- prpr/src/scene/game.rs | 42 ++++++-------- 4 files changed, 34 insertions(+), 87 deletions(-) diff --git a/prpr-auto-offset/src/audio/mod.rs b/prpr-auto-offset/src/audio/mod.rs index d925e7dae..67cafb8c7 100644 --- a/prpr-auto-offset/src/audio/mod.rs +++ b/prpr-auto-offset/src/audio/mod.rs @@ -4,4 +4,4 @@ mod superflux; pub use energy::EnergyDiff; pub use spectral::SpectralFlux; -pub use superflux::{SuperFlux, MelFilterbank, compute_mel_spectrogram}; +pub use superflux::{compute_mel_spectrogram, MelFilterbank, SuperFlux}; diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 7dee18084..d255e1618 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -39,8 +39,7 @@ impl SuperFlux { let mel = MelFilterbank::new(sample_rate, window_size, 80, 50.0, 12000.0); // 3. Mel-spectrogram - let (mut mel_frames, frame_rate) = - compute_mel_spectrogram(&samples, sample_rate, window_size, hop_size, &mel); + let (mut mel_frames, frame_rate) = compute_mel_spectrogram(&samples, sample_rate, window_size, hop_size, &mel); // 4. Spectral whitening (1-second window) whiten_spectrogram(&mut mel_frames, (frame_rate * 1.0) as usize); @@ -53,10 +52,7 @@ impl SuperFlux { // Use the declared native_dt (frame_rate may differ slightly due to rounding) let _ = frame_rate; - Self { - native: onset, - native_dt, - } + Self { native: onset, native_dt } } /// Access the native onset-strength samples (after adaptive threshold). @@ -75,9 +71,7 @@ impl Signal for SuperFlux { if ts.is_empty() { return vec![]; } - ts.iter() - .map(|&t| interpolate(&self.native, self.native_dt, t)) - .collect() + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() } } @@ -86,11 +80,7 @@ impl Signal for SuperFlux { fn highpass_50hz(samples: &mut [f32], sample_rate: u32) { // 1st-order Butterworth: y[n] = alpha*y[n-1] + alpha*(x[n] - x[n-1]) // Remove DC offset first, then initialize state to avoid transient - let dc = samples - .iter() - .take((sample_rate as usize / 10).min(samples.len())) - .sum::() - / (sample_rate as f32 / 10.0).min(samples.len() as f32); + let dc = samples.iter().take((sample_rate as usize / 10).min(samples.len())).sum::() / (sample_rate as f32 / 10.0).min(samples.len() as f32); for s in &mut *samples { *s -= dc; } @@ -138,25 +128,15 @@ impl MelFilterbank { let mel_step = (mel_max - mel_min) / (n_mels + 1) as f32; // Center frequencies of mel bands - let mel_centers: Vec = (0..n_mels) - .map(|i| mel_to_hz(mel_min + (i + 1) as f32 * mel_step)) - .collect(); + let mel_centers: Vec = (0..n_mels).map(|i| mel_to_hz(mel_min + (i + 1) as f32 * mel_step)).collect(); let bin_hz = |k: usize| k as f32 * sample_rate as f32 / window_size as f32; // Build triangular filter weights let mut weights = vec![vec![0.0f32; n_fft_bins]; n_mels]; for (m, ¢er) in mel_centers.iter().enumerate() { - let left = if m == 0 { - f_min - } else { - mel_centers[m - 1] - }; - let right = if m == n_mels - 1 { - f_max - } else { - mel_centers[m + 1] - }; + let left = if m == 0 { f_min } else { mel_centers[m - 1] }; + let right = if m == n_mels - 1 { f_max } else { mel_centers[m + 1] }; for (k, w) in weights[m].iter_mut().enumerate() { let f = bin_hz(k); if f >= left && f <= center { @@ -183,13 +163,7 @@ impl MelFilterbank { // ─── Mel-spectrogram computation ──────────────────────────────────────── -pub fn compute_mel_spectrogram( - samples: &[f32], - sample_rate: u32, - window_size: usize, - hop_size: usize, - mel: &MelFilterbank, -) -> (Vec>, f32) { +pub fn compute_mel_spectrogram(samples: &[f32], sample_rate: u32, window_size: usize, hop_size: usize, mel: &MelFilterbank) -> (Vec>, f32) { use rayon::prelude::*; use realfft::RealFftPlanner; use std::sync::Arc; @@ -201,10 +175,7 @@ pub fn compute_mel_spectrogram( }; let window: Vec = (0..window_size) - .map(|n| { - 0.5 * (1.0 - - (2.0 * std::f32::consts::PI * n as f32 / (window_size - 1) as f32).cos()) - }) + .map(|n| 0.5 * (1.0 - (2.0 * std::f32::consts::PI * n as f32 / (window_size - 1) as f32).cos())) .collect(); let mut planner = RealFftPlanner::::new(); @@ -214,19 +185,12 @@ pub fn compute_mel_spectrogram( .into_par_iter() .map(|frame_idx| { let start = frame_idx * hop_size; - let mut windowed: Vec = samples[start..start + window_size] - .iter() - .zip(&window) - .map(|(&s, &w)| s * w) - .collect(); + let mut windowed: Vec = samples[start..start + window_size].iter().zip(&window).map(|(&s, &w)| s * w).collect(); let mut spectrum = r2c.make_output_vec(); r2c.process(&mut windowed, &mut spectrum).unwrap(); - let power: Vec = spectrum - .iter() - .map(|c| c.re * c.re + c.im * c.im) - .collect(); + let power: Vec = spectrum.iter().map(|c| c.re * c.re + c.im * c.im).collect(); mel.apply(&power) }) .collect(); @@ -302,11 +266,7 @@ fn compute_superflux(mel_frames: &[Vec], lag: usize) -> Vec { // Robust normalize: skip first ~1s (HP filter transient), use 99th pct let skip_frames = 40.min(onset.len() / 4); if skip_frames < onset.len() { - let mut sorted: Vec = onset[skip_frames..] - .iter() - .cloned() - .filter(|&v| v > 0.0) - .collect(); + let mut sorted: Vec = onset[skip_frames..].iter().cloned().filter(|&v| v > 0.0).collect(); sorted.sort_by(|a, b| a.partial_cmp(b).unwrap()); let p99 = if sorted.is_empty() { 0.0 @@ -350,16 +310,9 @@ fn adaptive_threshold(onset: &[f32], median_window: f32, multiplier: f32) -> Vec // Robust re-normalize: skip first ~1s, use 99th percentile let skip = 40.min(thresholded.len() / 4); if skip < thresholded.len() { - let mut vals: Vec = thresholded[skip..] - .iter() - .cloned() - .filter(|&v| v > 0.0) - .collect(); + let mut vals: Vec = thresholded[skip..].iter().cloned().filter(|&v| v > 0.0).collect(); vals.sort_by(|a, b| a.partial_cmp(b).unwrap()); - let p99 = vals - .get((vals.len() as f32 * 0.99) as usize) - .copied() - .unwrap_or(0.0); + let p99 = vals.get((vals.len() as f32 * 0.99) as usize).copied().unwrap_or(0.0); if p99 > 0.0 { for v in &mut thresholded { *v /= p99; diff --git a/prpr-auto-offset/src/lib.rs b/prpr-auto-offset/src/lib.rs index d5e462351..5a0951e9d 100644 --- a/prpr-auto-offset/src/lib.rs +++ b/prpr-auto-offset/src/lib.rs @@ -4,7 +4,7 @@ mod note; mod signal; mod types; -pub use audio::{EnergyDiff, SpectralFlux, SuperFlux, MelFilterbank, compute_mel_spectrogram}; +pub use audio::{compute_mel_spectrogram, EnergyDiff, MelFilterbank, SpectralFlux, SuperFlux}; pub use estimate::{estimate, estimate_with}; pub use note::NoteGaussian; pub use signal::Signal; diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index d0201096f..137afce7a 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -26,7 +26,7 @@ use concat_string::concat_string; use inputbox::InputBox; use lyon::path::Path; use macroquad::{prelude::*, window::InternalGlContext}; -use prpr_auto_offset::{AlignConfig, AlignmentResult, NoteGaussian, SuperFlux, estimate_with}; +use prpr_auto_offset::{estimate_with, AlignConfig, AlignmentResult, NoteGaussian, SuperFlux}; use sasa::{Music, MusicParams}; use serde::{Deserialize, Serialize}; use std::{ @@ -876,12 +876,7 @@ impl GameScene { // wasm32: no threading support; analysis not available on web } - fn draw_offset_graph( - &self, - ui: &mut Ui, - rect: Rect, - result: &AlignmentResult, - ) { + fn draw_offset_graph(&self, ui: &mut Ui, rect: Rect, result: &AlignmentResult) { use lyon::math::point; let curve = &result.correlation_curve; @@ -898,19 +893,11 @@ impl GameScene { let s_range = (max_s - min_s).max(1e-6); // Background — fill full width - ui.fill_rect( - rect, - Color::new(0.0, 0.0, 0.0, 0.3), - ); + ui.fill_rect(rect, Color::new(0.0, 0.0, 0.0, 0.3)); // Use full width, only pad vertically let v_pad = 0.08; - let inner = Rect::new( - rect.x, - rect.y + rect.h * v_pad, - rect.w, - rect.h * (1.0 - 2.0 * v_pad), - ); + let inner = Rect::new(rect.x, rect.y + rect.h * v_pad, rect.w, rect.h * (1.0 - 2.0 * v_pad)); // Correlation curve: gray, alpha 0.6, ~0.01 Ui-unit per point let pt_spacing = 0.01; @@ -989,14 +976,21 @@ impl GameScene { ui.dy(ui.top - height - 0.02); ui.fill_rect(Rect::new(0., 0., width, height), GRAY); ui.dy(0.02); - let r = ui.text(tl!("adjust-offset")).pos(width / 2.-0.03, 0.).anchor(1.0, 0.).size(0.7).no_baseline().draw(); - if ui.button("auto-offset", Rect::new(width / 2.+0.03, r.top(), r.w, r.h), tl!("auto-offset-btn")) - && !matches!(self.analysis_state, OffsetAnalysisState::Computing) { - self.analysis_requested = true; - } - ui.dy(0.04 + r.h/2.); + let r = ui + .text(tl!("adjust-offset")) + .pos(width / 2. - 0.03, 0.) + .anchor(1.0, 0.) + .size(0.7) + .no_baseline() + .draw(); + if ui.button("auto-offset", Rect::new(width / 2. + 0.03, r.top(), r.w, r.h), tl!("auto-offset-btn")) + && !matches!(self.analysis_state, OffsetAnalysisState::Computing) + { + self.analysis_requested = true; + } + ui.dy(0.04 + r.h / 2.); // Graph area - let graph_rect = Rect::new(0., 0., width, 0.17-r.h/2.); + let graph_rect = Rect::new(0., 0., width, 0.17 - r.h / 2.); match self.analysis_state.clone() { OffsetAnalysisState::Idle => { ui.dy(graph_rect.h / 2. - 0.03); From 44bcc6e365d529c00960689e71a6dd0ead525030 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 00:25:28 +0800 Subject: [PATCH 07/18] style: allow run function in auto-offset-cli to have many arguments --- tools/auto-offset-cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/auto-offset-cli/src/main.rs b/tools/auto-offset-cli/src/main.rs index b7d63f9bd..a6c750542 100644 --- a/tools/auto-offset-cli/src/main.rs +++ b/tools/auto-offset-cli/src/main.rs @@ -65,7 +65,7 @@ fn print_result(result: &AlignmentResult, verbose: bool) { println!(" Reliable: {}", if result.reliable { "yes" } else { "no" }); println!("═══════════════════════════════════════"); } - +#[allow(clippy::too_many_arguments)] async fn run( chart_path: &PathBuf, search_range: f64, From 48846a593d21cdbac6daa098faba1c5694ccb879 Mon Sep 17 00:00:00 2001 From: sjfhsjfh Date: Sun, 14 Jun 2026 04:14:26 +0800 Subject: [PATCH 08/18] chore: better offset graph --- prpr/src/scene/game.rs | 95 +++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index 137afce7a..8c63440b2 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -19,7 +19,7 @@ use crate::{ parse::{parse_extra, parse_pec, parse_phigros, parse_rpe}, task::Task, time::TimeManager, - ui::{RectButton, TextPainter, Ui}, + ui::{RectButton, Scroll, TextPainter, Ui}, }; use anyhow::{bail, Context, Result}; use concat_string::concat_string; @@ -54,6 +54,11 @@ use inner::*; const WAIT_TIME: f64 = 0.5; const AFTER_TIME: f64 = 0.7; +/// Ratio of graph content width to viewport width. +/// The visible viewport shows `o_range / GRAPH_CONTENT_RATIO` seconds of offset data. +/// E.g. with search_range=0.30s (o_range≈0.60s) and ratio=3.0, viewport ≈ 200ms. +const GRAPH_CONTENT_RATIO: f32 = 2.0; + #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct SimpleRecord { @@ -154,6 +159,8 @@ pub struct GameScene { analysis_state: OffsetAnalysisState, analysis_requested: bool, analysis_handle: Option>>>, + scroll: Scroll, + scroll_centered: bool, first_in: bool, exercise_range: Range, @@ -349,6 +356,8 @@ impl GameScene { analysis_state: OffsetAnalysisState::Idle, analysis_requested: false, analysis_handle: None, + scroll: Scroll::new().horizontal(), + scroll_centered: false, first_in: false, exercise_range, @@ -869,6 +878,7 @@ impl GameScene { // 6. Transition state self.analysis_state = OffsetAnalysisState::Computing; + self.scroll_centered = false; } #[cfg(target_arch = "wasm32")] @@ -876,7 +886,7 @@ impl GameScene { // wasm32: no threading support; analysis not available on web } - fn draw_offset_graph(&self, ui: &mut Ui, rect: Rect, result: &AlignmentResult) { + fn draw_offset_graph(chart_offset: f32, info_offset: f32, ui: &mut Ui, rect: Rect, result: &AlignmentResult) { use lyon::math::point; let curve = &result.correlation_curve; @@ -899,8 +909,8 @@ impl GameScene { let v_pad = 0.08; let inner = Rect::new(rect.x, rect.y + rect.h * v_pad, rect.w, rect.h * (1.0 - 2.0 * v_pad)); - // Correlation curve: gray, alpha 0.6, ~0.01 Ui-unit per point - let pt_spacing = 0.01; + // Correlation curve: keep point density roughly constant regardless of rect width + let pt_spacing = rect.w as f64 / 55.0; let step = ((curve.len() as f64 * pt_spacing) / rect.w as f64).ceil() as usize; let step = step.max(1); let mut path_builder = lyon::path::Path::builder(); @@ -924,48 +934,41 @@ impl GameScene { Color::new(0.6, 0.6, 0.6, 0.6), // gray, a=0.6 ); - // Marker line width - let marker_w = line_w * 2.0; + let marker_line_w = line_w * 1.5; // Orange vertical line: chart author offset (without audio latency compensation) - let orig_o = self.chart.offset as f64; + let orig_o = chart_offset as f64; if orig_o >= min_o && orig_o <= max_o { let ox = inner.x + ((orig_o - min_o) / o_range) as f32 * inner.w; - ui.fill_rect( - Rect::new(ox - marker_w, inner.y, marker_w * 2.0, inner.h), - Color::new(1.0, 0.5, 0.0, 0.5), // orange, a=0.5 - ); + let mut mb = lyon::path::Path::builder(); + mb.begin(point(ox, inner.y)); + mb.line_to(point(ox, inner.y + inner.h)); + mb.end(false); + ui.stroke_path(&mb.build(), marker_line_w, Color::new(1.0, 0.5, 0.0, 0.5)); } // Green vertical line: recommended total offset let best_o = result.offset; if best_o >= min_o && best_o <= max_o { let bx = inner.x + ((best_o - min_o) / o_range) as f32 * inner.w; - ui.fill_rect( - Rect::new(bx - marker_w, inner.y, marker_w * 2.0, inner.h), - Color::new(0.0, 1.0, 0.0, 0.5), // green, a=0.5 - ); + let mut mb = lyon::path::Path::builder(); + mb.begin(point(bx, inner.y)); + mb.line_to(point(bx, inner.y + inner.h)); + mb.end(false); + ui.stroke_path(&mb.build(), marker_line_w, Color::new(0.0, 1.0, 0.0, 0.5)); } // Blue vertical line: current chart delay (without audio latency compensation) - let cur_o = (self.chart.offset + self.info_offset) as f64; + let cur_o = (chart_offset + info_offset) as f64; if cur_o >= min_o && cur_o <= max_o { let cx = inner.x + ((cur_o - min_o) / o_range) as f32 * inner.w; - ui.fill_rect( - Rect::new(cx - marker_w, inner.y, marker_w * 2.0, inner.h), - Color::new(0.0, 0.5, 1.0, 0.5), // blue, a=0.5 - ); + let mut mb = lyon::path::Path::builder(); + mb.begin(point(cx, inner.y)); + mb.line_to(point(cx, inner.y + inner.h)); + mb.end(false); + ui.stroke_path(&mb.build(), marker_line_w, Color::new(0.0, 0.5, 1.0, 0.5)); } - // Suggested correction in top-right corner - let correction_ms = ((result.offset - self.chart.offset as f64) * 1000.0).round() as i32; - ui.text(format!("{correction_ms:+}ms")) - .pos(rect.x + rect.w - 0.01, rect.y) - .anchor(1.0, 0.0) - .size(0.35) - .color(Color::new(0.0, 1.0, 0.0, 0.7)) - .no_baseline() - .draw(); } fn tweak_offset(&mut self, ui: &mut Ui, ita: bool) { @@ -1013,7 +1016,35 @@ impl GameScene { ui.dy(graph_rect.h / 2. + 0.03); } OffsetAnalysisState::Done(ref result) => { - self.draw_offset_graph(ui, graph_rect, result); + self.scroll.size((width, graph_rect.h)); + let chart_offset = self.chart.offset; + let info_offset = self.info_offset; + self.scroll.render(ui, |ui| { + let content_width = width * GRAPH_CONTENT_RATIO; + let expanded_rect = Rect::new(0., 0., content_width, graph_rect.h); + Self::draw_offset_graph(chart_offset, info_offset, ui, expanded_rect, result); + (content_width, graph_rect.h) + }); + // Correction text pinned to viewport top-right + let correction_ms = ((result.offset - chart_offset as f64) * 1000.0).round() as i32; + ui.text(format!("{correction_ms:+}ms")) + .pos(width - 0.01, 0.) + .anchor(1.0, 0.0) + .size(0.35) + .color(Color::new(0.0, 1.0, 0.0, 0.7)) + .no_baseline() + .draw(); + if !self.scroll_centered && !result.correlation_curve.is_empty() { + let curve = &result.correlation_curve; + let min_o = curve.first().map(|&(o, _)| o).unwrap_or(0.0); + let max_o = curve.last().map(|&(o, _)| o).unwrap_or(0.0); + let o_range = (max_o - min_o).max(1e-6); + let content_width = width * GRAPH_CONTENT_RATIO; + let green_x = ((result.offset - min_o) / o_range) as f32 * content_width; + self.scroll.x_scroller.offset = + (green_x - width / 2.0).clamp(0.0, content_width - width); + self.scroll_centered = true; + } ui.dy(graph_rect.h); } } @@ -1123,6 +1154,7 @@ impl Scene for GameScene { } } + self.scroll.update(tm.real_time() as f32); self.res.audio.recover_if_needed()?; if matches!(self.state, State::Playing) { tm.update(self.music.position()); @@ -1374,6 +1406,9 @@ impl Scene for GameScene { } fn touch(&mut self, tm: &mut TimeManager, touch: &Touch) -> Result { + if self.mode == GameMode::TweakOffset { + self.scroll.touch(touch, tm.real_time() as f32); + } if self.mode == GameMode::Exercise && tm.paused() { let touch = Touch { position: touch.position * self.touch_scale(), From 87c51ae5e70962aaf5a29a00afe94ec41a454dce Mon Sep 17 00:00:00 2001 From: sjfhsjfh Date: Sun, 14 Jun 2026 04:54:55 +0800 Subject: [PATCH 09/18] fix: adjust offset calculation and limit correlation curve points --- prpr/src/scene/game.rs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index 8c63440b2..9be72c715 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -852,13 +852,11 @@ impl GameScene { let clip = self.res.music.clone(); let pcm: Vec = clip.frames().iter().map(|f| (f.0 + f.1) / 2.0).collect(); let sample_rate = clip.sample_rate(); - let author_offset = self.chart.offset + self.res.config.offset; - - // 3. Build config + // 3. Build config — search centered on chart's own offset let config = AlignConfig { search_range_sec: 0.30, sampling_interval_sec: 0.005, - search_center_sec: author_offset as f64, + search_center_sec: self.chart.offset as f64, }; // 4. Create shared result slot @@ -909,10 +907,11 @@ impl GameScene { let v_pad = 0.08; let inner = Rect::new(rect.x, rect.y + rect.h * v_pad, rect.w, rect.h * (1.0 - 2.0 * v_pad)); - // Correlation curve: keep point density roughly constant regardless of rect width - let pt_spacing = rect.w as f64 / 55.0; - let step = ((curve.len() as f64 * pt_spacing) / rect.w as f64).ceil() as usize; - let step = step.max(1); + // Correlation curve — limit path points to stay within geometry budget. + // lyon stroke tessellation generates 30-60 vertices per segment; with + // other UI elements, ~70 path points keeps total well under the drawcall cap. + let max_pts = 70usize; + let step = ((curve.len() as f64) / (max_pts as f64)).ceil() as usize; let mut path_builder = lyon::path::Path::builder(); let mut first = true; for i in (0..curve.len()).step_by(step) { @@ -968,7 +967,6 @@ impl GameScene { mb.end(false); ui.stroke_path(&mb.build(), marker_line_w, Color::new(0.0, 0.5, 1.0, 0.5)); } - } fn tweak_offset(&mut self, ui: &mut Ui, ita: bool) { @@ -1041,8 +1039,7 @@ impl GameScene { let o_range = (max_o - min_o).max(1e-6); let content_width = width * GRAPH_CONTENT_RATIO; let green_x = ((result.offset - min_o) / o_range) as f32 * content_width; - self.scroll.x_scroller.offset = - (green_x - width / 2.0).clamp(0.0, content_width - width); + self.scroll.x_scroller.offset = (green_x - width / 2.0).clamp(0.0, content_width - width); self.scroll_centered = true; } ui.dy(graph_rect.h); From 5934287da8de786780f5ec6bbee26c44b37d885e Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 12:59:53 +0800 Subject: [PATCH 10/18] feat: align implementation with paper --- Cargo.lock | 18 -- prpr-auto-offset/src/audio/mod.rs | 2 +- prpr-auto-offset/src/audio/superflux.rs | 275 +++++++++++++++++------- prpr-auto-offset/src/lib.rs | 2 +- 4 files changed, 201 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 456013b5d..27e10f2af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3622,24 +3622,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "prpr-latency-visualizer" -version = "0.7.1" -dependencies = [ - "anyhow", - "glyph_brush", - "image", - "lyon", - "macroquad", - "prpr", - "prpr-auto-offset", - "prpr-avc", - "rfd", - "sasa", - "tempfile", - "tokio", -] - [[package]] name = "prpr-pbc" version = "0.1.0" diff --git a/prpr-auto-offset/src/audio/mod.rs b/prpr-auto-offset/src/audio/mod.rs index 67cafb8c7..36d189d40 100644 --- a/prpr-auto-offset/src/audio/mod.rs +++ b/prpr-auto-offset/src/audio/mod.rs @@ -4,4 +4,4 @@ mod superflux; pub use energy::EnergyDiff; pub use spectral::SpectralFlux; -pub use superflux::{compute_mel_spectrogram, MelFilterbank, SuperFlux}; +pub use superflux::{compute_spectrogram, Filterbank, SuperFlux}; diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index d255e1618..24be85860 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -2,12 +2,21 @@ use crate::Signal; /// SuperFlux onset detection signal. /// -/// Computes a percussion-onset novelty curve using the SuperFlux algorithm: +/// Computes a percussion-onset novelty curve using the SuperFlux algorithm +/// from: +/// "Maximum Filter Vibrato Suppression for Onset Detection" +/// Sebastian Böck and Gerhard Widmer, DAFx-13, Maynooth, Ireland, September 2013. +/// +/// Paper: https://www.dafx.de/paper-archive/2013/papers/09.dafx2013_submission_12.pdf +/// +/// Reference Python implementation: https://github.com/CPJKU/SuperFlux/blob/master/SuperFlux.py +/// +/// Processing steps: /// 1. High-pass filter (50 Hz) to remove sub-bass rumble -/// 2. Mel filterbank (80 bands, 50 Hz – 12 kHz) -/// 3. Mel-spectrogram (Hann-windowed STFT → mel band energy in dB) +/// 2. Log-scale triangular filterbank (24 bands/octave, 30 Hz – 17 kHz) +/// 3. Magnitude spectrogram → filterbank → log10 scaling /// 4. Per-band spectral whitening (subtract local running mean) -/// 5. SuperFlux temporal difference (max-filtered spectral flux) +/// 5. Frequency-direction maximum filter (vibrato suppression) + temporal difference /// 6. Adaptive threshold via running median /// /// The result is a dense time series with one onset-strength value per STFT @@ -35,17 +44,18 @@ impl SuperFlux { let mut samples = pcm.to_vec(); highpass_50hz(&mut samples, sample_rate); - // 2. Mel filterbank (80 bands, 50Hz–12kHz) - let mel = MelFilterbank::new(sample_rate, window_size, 80, 50.0, 12000.0); + // 2. Log-scale filterbank (24 bands/octave, 30Hz–17kHz, matching paper) + let filterbank = Filterbank::new(sample_rate, window_size, 24, 30.0, 17000.0, false); - // 3. Mel-spectrogram - let (mut mel_frames, frame_rate) = compute_mel_spectrogram(&samples, sample_rate, window_size, hop_size, &mel); + // 3. Spectrogram: |STFT| → filterbank → log10 (matching paper) + let (mut spec_frames, frame_rate) = compute_spectrogram(&samples, sample_rate, window_size, hop_size, &filterbank, 1.0, 1.0); // 4. Spectral whitening (1-second window) - whiten_spectrogram(&mut mel_frames, (frame_rate * 1.0) as usize); + whiten_spectrogram(&mut spec_frames, (frame_rate * 1.0) as usize); - // 5. SuperFlux temporal difference (lag=3) - let onset = compute_superflux(&mel_frames, 3); + // 5. SuperFlux: frequency-direction max filter + temporal diff + // diff_frames=3, max_bins=3 (matching paper defaults) + let onset = compute_superflux(&spec_frames, 3, 3); // 6. Adaptive threshold let onset = adaptive_threshold(&onset, frame_rate * 2.0, 0.5); @@ -102,68 +112,141 @@ fn highpass_50hz(samples: &mut [f32], sample_rate: u32) { } } -// ─── Mel scale conversion ────────────────────────────────────────────── +// ─── Log-scale frequency generation ──────────────────────────────────── -fn hz_to_mel(hz: f32) -> f32 { - 2595.0 * (1.0 + hz / 700.0).log10() -} +/// Generate frequencies on a logarithmic scale, matching Python's +/// `Filter.frequencies()` with A0 = 440 Hz as the reference. +fn log_frequencies(bands_per_octave: usize, fmin: f32, fmax: f32) -> Vec { + let factor = 2.0f32.powf(1.0 / bands_per_octave as f32); + let a = 440.0f32; + + let mut frequencies = vec![a]; + + // Go upwards from A0 + let mut freq = a; + while freq <= fmax { + freq *= factor; + frequencies.push(freq); + } + + // Go downwards from A0 + freq = a; + while freq >= fmin { + freq /= factor; + frequencies.push(freq); + } -fn mel_to_hz(mel: f32) -> f32 { - 700.0 * (10.0f32.powf(mel / 2595.0) - 1.0) + frequencies.sort_by(|a, b| a.partial_cmp(b).unwrap()); + frequencies } -// ─── Mel filterbank ───────────────────────────────────────────────────── +// ─── Triangular filterbank (log-scale, paper-compatible) ──────────────── -pub struct MelFilterbank { - /// Triangular filter weights: [mel_band][fft_bin] +/// Log-spaced triangular filterbank matching the Python reference `Filter` class. +/// +/// Uses logarithmic frequency spacing (bands per octave) with A0 = 440 Hz as +/// the reference pitch, and maps triangular filters to FFT bins. +pub struct Filterbank { + /// Triangular filter weights: `[fft_bin][filter_band]` pub weights: Vec>, - pub n_mels: usize, + pub n_bands: usize, } -impl MelFilterbank { - pub fn new(sample_rate: u32, window_size: usize, n_mels: usize, f_min: f32, f_max: f32) -> Self { - let n_fft_bins = window_size / 2 + 1; - let mel_min = hz_to_mel(f_min); - let mel_max = hz_to_mel(f_max.min(sample_rate as f32 / 2.0)); - let mel_step = (mel_max - mel_min) / (n_mels + 1) as f32; - - // Center frequencies of mel bands - let mel_centers: Vec = (0..n_mels).map(|i| mel_to_hz(mel_min + (i + 1) as f32 * mel_step)).collect(); - - let bin_hz = |k: usize| k as f32 * sample_rate as f32 / window_size as f32; - - // Build triangular filter weights - let mut weights = vec![vec![0.0f32; n_fft_bins]; n_mels]; - for (m, ¢er) in mel_centers.iter().enumerate() { - let left = if m == 0 { f_min } else { mel_centers[m - 1] }; - let right = if m == n_mels - 1 { f_max } else { mel_centers[m + 1] }; - for (k, w) in weights[m].iter_mut().enumerate() { - let f = bin_hz(k); - if f >= left && f <= center { - *w = (f - left) / (center - left).max(1e-10); - } else if f > center && f <= right { - *w = (right - f) / (right - center).max(1e-10); - } +impl Filterbank { + /// Create a log-spaced triangular filterbank. + /// + /// # Arguments + /// * `sample_rate` - Audio sample rate in Hz. + /// * `window_size` - STFT window size in samples. + /// * `bands_per_octave` - Number of filter bands per octave (default: 24). + /// * `fmin` - Minimum frequency in Hz (default: 30). + /// * `fmax` - Maximum frequency in Hz (default: 17000, capped at Nyquist). + /// * `equal` - If true, normalize each triangular filter to have area 1. + pub fn new( + sample_rate: u32, + window_size: usize, + bands_per_octave: usize, + fmin: f32, + fmax: f32, + equal: bool, + ) -> Self { + let n_fft_bins = window_size / 2; + let fmax = fmax.min(sample_rate as f32 / 2.0); + + // Generate log-spaced frequencies and map to FFT bins + let frequencies = log_frequencies(bands_per_octave, fmin, fmax); + let factor = (sample_rate as f32 / 2.0) / n_fft_bins as f32; + let mut bins: Vec = frequencies + .iter() + .map(|&f| (f / factor).round() as usize) + .collect(); + bins.sort(); + bins.dedup(); + bins.retain(|&b| b < n_fft_bins); + + let n_bands = bins.len().saturating_sub(2); + assert!(n_bands >= 3, "cannot create filterbank with less than 3 frequencies"); + + let mut weights = vec![vec![0.0f32; n_bands]; n_fft_bins]; + + for band in 0..n_bands { + let start = bins[band]; + let mid = bins[band + 1]; + let stop = bins[band + 2]; + + let height = if equal { + 2.0 / (stop - start) as f32 + } else { + 1.0 + }; + + // Rising edge: start..mid + let n_rise = mid - start; + for i in start..mid { + weights[i][band] = height * (i - start) as f32 / n_rise as f32; + } + // Falling edge: mid..stop + let n_fall = stop - mid; + for i in mid..stop { + weights[i][band] = height * (stop - i) as f32 / n_fall as f32; } } - MelFilterbank { weights, n_mels } + Filterbank { weights, n_bands } } - /// Apply mel filterbank to a power spectrum, returns log-magnitudes per mel band (dB). - pub fn apply(&self, power_spectrum: &[f32]) -> Vec { - let mut mel = vec![0.0f32; self.n_mels]; - for (m, w) in self.weights.iter().enumerate() { - let sum: f32 = power_spectrum.iter().zip(w).map(|(&p, &w)| p * w).sum(); - mel[m] = 20.0 * (sum.sqrt().max(1e-10)).log10(); // dB + /// Apply filterbank to a **magnitude** spectrum, returning per-band + /// energy (linear magnitude, not dB). + pub fn apply(&self, magnitude_spectrum: &[f32]) -> Vec { + let mut bands = vec![0.0f32; self.n_bands]; + for (b, w) in self.weights.iter().enumerate() { + // w is [band] at this FFT bin — sum up contributions per band + // weights layout: [fft_bin][band] + for band in 0..self.n_bands { + bands[band] += magnitude_spectrum[b] * w[band]; + } } - mel + bands } } -// ─── Mel-spectrogram computation ──────────────────────────────────────── +// ─── Spectrogram computation ──────────────────────────────────────────── -pub fn compute_mel_spectrogram(samples: &[f32], sample_rate: u32, window_size: usize, hop_size: usize, mel: &MelFilterbank) -> (Vec>, f32) { +/// Compute a log-magnitude spectrogram through a filterbank. +/// +/// Matches the Python reference `Spectrogram` class: +/// `|STFT| → filterbank → log10(mul · X + add)` +/// +/// Defaults: `mul = 1.0`, `add = 1.0` (log scaling on, matching Python defaults). +pub fn compute_spectrogram( + samples: &[f32], + sample_rate: u32, + window_size: usize, + hop_size: usize, + filterbank: &Filterbank, + mul: f32, + add: f32, +) -> (Vec>, f32) { use rayon::prelude::*; use realfft::RealFftPlanner; use std::sync::Arc; @@ -174,34 +257,49 @@ pub fn compute_mel_spectrogram(samples: &[f32], sample_rate: u32, window_size: u (samples.len() - window_size) / hop_size + 1 }; + // Hann window + let n2 = (window_size - 1) as f32; let window: Vec = (0..window_size) - .map(|n| 0.5 * (1.0 - (2.0 * std::f32::consts::PI * n as f32 / (window_size - 1) as f32).cos())) + .map(|n| 0.5 * (1.0 - (2.0 * std::f32::consts::PI * n as f32 / n2).cos())) .collect(); let mut planner = RealFftPlanner::::new(); let r2c = Arc::new(planner.plan_fft_forward(window_size)); - let mel_frames: Vec> = (0..num_frames) + let spec_frames: Vec> = (0..num_frames) .into_par_iter() .map(|frame_idx| { let start = frame_idx * hop_size; - let mut windowed: Vec = samples[start..start + window_size].iter().zip(&window).map(|(&s, &w)| s * w).collect(); + let mut windowed: Vec = samples[start..start + window_size] + .iter() + .zip(&window) + .map(|(&s, &w)| s * w) + .collect(); let mut spectrum = r2c.make_output_vec(); r2c.process(&mut windowed, &mut spectrum).unwrap(); - let power: Vec = spectrum.iter().map(|c| c.re * c.re + c.im * c.im).collect(); - mel.apply(&power) + // Magnitude spectrum (not power) + let magnitude: Vec = spectrum.iter().map(|c| c.norm()).collect(); + + // Apply filterbank → linear per-band energy + let mut bands = filterbank.apply(&magnitude); + + // Log scaling: log10(mul * X + add), matching Python defaults + for v in &mut bands { + *v = (mul * *v + add).log10(); + } + bands }) .collect(); let frame_rate = sample_rate as f32 / hop_size as f32; - (mel_frames, frame_rate) + (spec_frames, frame_rate) } // ─── Spectral whitening ───────────────────────────────────────────────── -/// For each mel band, subtract a local running mean (half-width = window_frames/2). +/// For each filter band, subtract a local running mean (half-width = window_frames/2). /// Clamps negative values to -120 dB floor. fn whiten_spectrogram(frames: &mut [Vec], window_frames: usize) { let half = window_frames / 2; @@ -234,28 +332,53 @@ fn whiten_spectrogram(frames: &mut [Vec], window_frames: usize) { /// Core SuperFlux algorithm. /// -/// For each frame `t` and each mel band `b`: -/// `diff(t) = Σ_b max(0, X[t][b] - max(X[t-1][b], ..., X[t-lag][b]))` +/// Implements the method described in: +/// "Maximum Filter Vibrato Suppression for Onset Detection" +/// Sebastian Böck and Gerhard Widmer, DAFx-13, Maynooth, Ireland, September 2013. +/// +/// Steps: +/// 1. Apply a maximum filter of width `max_bins` in the **frequency** direction +/// on the spectrogram to suppress vibrato (the key contribution). +/// 2. For each frame `t` and band `b`: +/// `diff(t,b) = max(0, X[t][b] - max_filtered(X)[t-diff_frames][b])` +/// 3. Sum across all bands: `onset(t) = Σ_b diff(t,b)` /// /// Robust-normalized by the 99th percentile (skipping the first ~1 s to avoid /// HP filter transient). -fn compute_superflux(mel_frames: &[Vec], lag: usize) -> Vec { - let n_frames = mel_frames.len(); - if n_frames <= lag { - return vec![0.0; n_frames]; +fn compute_superflux(spec_frames: &[Vec], diff_frames: usize, max_bins: usize) -> Vec { + let n_frames = spec_frames.len(); + if n_frames == 0 { + return vec![]; } + let n_bands = spec_frames[0].len(); let mut onset = vec![0.0f32; n_frames]; - for t in lag..n_frames { + if n_frames <= diff_frames { + return onset; + } + + // Step 1: Maximum filter in frequency direction (vibrato suppression). + // For each bin [t][b], replace with max over [b - half, b + half]. + let half = max_bins / 2; + let max_spec: Vec> = spec_frames + .iter() + .map(|frame| { + (0..n_bands) + .map(|b| { + let lo = b.saturating_sub(half); + let hi = (b + half).min(n_bands - 1); + frame[lo..=hi].iter().cloned().fold(0.0f32, f32::max) + }) + .collect() + }) + .collect(); + + // Step 2: Temporal difference — current raw spec vs. max-filtered previous frame. + for t in diff_frames..n_frames { let mut flux = 0.0f32; - for (b, &cur) in mel_frames[t].iter().enumerate() { - // Max of previous `lag` frames - let mut max_prev = mel_frames[t - 1][b]; - for d in 2..=lag { - max_prev = max_prev.max(mel_frames[t - d][b]); - } - let diff = cur - max_prev; + for b in 0..n_bands { + let diff = spec_frames[t][b] - max_spec[t - diff_frames][b]; if diff > 0.0 { flux += diff; } diff --git a/prpr-auto-offset/src/lib.rs b/prpr-auto-offset/src/lib.rs index 5a0951e9d..64c292b58 100644 --- a/prpr-auto-offset/src/lib.rs +++ b/prpr-auto-offset/src/lib.rs @@ -4,7 +4,7 @@ mod note; mod signal; mod types; -pub use audio::{compute_mel_spectrogram, EnergyDiff, MelFilterbank, SpectralFlux, SuperFlux}; +pub use audio::{compute_spectrogram, EnergyDiff, Filterbank, SpectralFlux, SuperFlux}; pub use estimate::{estimate, estimate_with}; pub use note::NoteGaussian; pub use signal::Signal; From 792aa584ef05a70b211551da3ce4debe0626c3ff Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 13:15:50 +0800 Subject: [PATCH 11/18] fix: use absolute coord in estimate.rs --- prpr-auto-offset/src/audio/superflux.rs | 8 ++++---- prpr-auto-offset/src/estimate.rs | 15 +++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 24be85860..5302ec9fd 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -202,13 +202,13 @@ impl Filterbank { // Rising edge: start..mid let n_rise = mid - start; - for i in start..mid { - weights[i][band] = height * (i - start) as f32 / n_rise as f32; + for (offset, w) in weights[start..mid].iter_mut().enumerate() { + w[band] = height * offset as f32 / n_rise as f32; } // Falling edge: mid..stop let n_fall = stop - mid; - for i in mid..stop { - weights[i][band] = height * (stop - i) as f32 / n_fall as f32; + for (offset, w) in weights[mid..stop].iter_mut().enumerate() { + w[band] = height * (n_fall - offset) as f32 / n_fall as f32; } } diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs index 7c430076d..17d01d2f2 100644 --- a/prpr-auto-offset/src/estimate.rs +++ b/prpr-auto-offset/src/estimate.rs @@ -115,15 +115,22 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 // Cross-correlation let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; let (corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); - let offset = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; - // Build correlation curve + // Best lag in seconds (relative to search center) + let best_lag_sec = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; + // Absolute offset: search center + measured lag correction + let offset = config.search_center_sec + best_lag_sec; + + // Build correlation curve with absolute offset on the x-axis. + // This makes the curve consistent with chart_offset and info_offset values, + // so the orange (chart offset) and green (suggested offset) markers are + // positioned correctly on the graph. let correlation_curve: Vec<(f64, f32)> = corr .iter() .enumerate() .map(|(i, &v)| { - let lag = i as isize - max_lag_bins as isize; - (lag as f64 * config.sampling_interval_sec, v) + let lag = (i as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; + (config.search_center_sec + lag, v) }) .collect(); From 39dc5d17659a8eeb203e49ad3dec673e3f4554d7 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 23:07:42 +0800 Subject: [PATCH 12/18] fix: remaining absolute coord in estimate.rs --- prpr-auto-offset/src/estimate.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs index 17d01d2f2..684c11f10 100644 --- a/prpr-auto-offset/src/estimate.rs +++ b/prpr-auto-offset/src/estimate.rs @@ -94,14 +94,22 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 }; } - // Build shared sampling grid centered at search_center_sec + // Build absolute-time sampling grid for the audio signal. let t_min = config.search_center_sec - config.search_range_sec; let t_max = config.search_center_sec + duration_sec + config.search_range_sec; let ts = build_ts_grid(t_min, t_max, config.sampling_interval_sec); - // Sample both signals + // Sample audio on the absolute-time grid. let audio_samples = audio.samples(&ts); - let note_samples = note.samples(&ts); + + // Shift the note signal into absolute time by sampling it at + // ts_note[i] = ts[i] - search_center_sec + // so that a note event at chart time `note.time` appears at absolute + // time `note.time + search_center_sec`. After this shift the two + // signals share a single (absolute) coordinate system and the + // cross-correlation lag is a small residual rather than the full offset. + let note_ts: Vec = ts.iter().map(|&t| t - config.search_center_sec).collect(); + let note_samples = note.samples(¬e_ts); if audio_samples.is_empty() || note_samples.is_empty() { return AlignmentResult { @@ -112,19 +120,15 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 }; } - // Cross-correlation + // Cross-correlation — now the best lag is a small residual around zero let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; let (corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); - // Best lag in seconds (relative to search center) + // Residual lag, then add search_center_sec to get absolute offset let best_lag_sec = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; - // Absolute offset: search center + measured lag correction let offset = config.search_center_sec + best_lag_sec; - // Build correlation curve with absolute offset on the x-axis. - // This makes the curve consistent with chart_offset and info_offset values, - // so the orange (chart offset) and green (suggested offset) markers are - // positioned correctly on the graph. + // Correlation curve: x = absolute offset (search_center + lag) let correlation_curve: Vec<(f64, f32)> = corr .iter() .enumerate() From cb665bf242a7d5221a176ae6ce72c29e016bac84 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 23:18:00 +0800 Subject: [PATCH 13/18] doc: correct doc comments on AlignmentResult --- prpr-auto-offset/src/types.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prpr-auto-offset/src/types.rs b/prpr-auto-offset/src/types.rs index d342e56f7..a5eeeb9ca 100644 --- a/prpr-auto-offset/src/types.rs +++ b/prpr-auto-offset/src/types.rs @@ -27,7 +27,8 @@ impl Default for AlignConfig { #[derive(Debug, Clone)] pub struct AlignmentResult { /// Suggested global offset in seconds. - /// Positive means notes should be delayed (hit later). + /// This value is in absolute time. To get the chart offset correction, subtract the search center: + /// chart_offset_correction = offset - search_center_sec pub offset: f64, /// Normalized cross-correlation peak, in [0.0, 1.0]. /// @@ -38,5 +39,6 @@ pub struct AlignmentResult { pub reliable: bool, /// Full correlation curve: (offset_seconds, raw_correlation_score). /// Useful for visualization of the score-vs-offset landscape. + /// The offset_seconds values are in absolute time, so the search center is at `search_center_sec`. pub correlation_curve: Vec<(f64, f32)>, } From 7b91606e5cc3986a6823db3815e0797d3343735a Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Sun, 14 Jun 2026 23:19:05 +0800 Subject: [PATCH 14/18] chore: fmt --- prpr-auto-offset/src/audio/superflux.rs | 32 ++++++------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 5302ec9fd..93198623b 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -6,11 +6,11 @@ use crate::Signal; /// from: /// "Maximum Filter Vibrato Suppression for Onset Detection" /// Sebastian Böck and Gerhard Widmer, DAFx-13, Maynooth, Ireland, September 2013. -/// +/// /// Paper: https://www.dafx.de/paper-archive/2013/papers/09.dafx2013_submission_12.pdf -/// +/// /// Reference Python implementation: https://github.com/CPJKU/SuperFlux/blob/master/SuperFlux.py -/// +/// /// Processing steps: /// 1. High-pass filter (50 Hz) to remove sub-bass rumble /// 2. Log-scale triangular filterbank (24 bands/octave, 30 Hz – 17 kHz) @@ -162,24 +162,14 @@ impl Filterbank { /// * `fmin` - Minimum frequency in Hz (default: 30). /// * `fmax` - Maximum frequency in Hz (default: 17000, capped at Nyquist). /// * `equal` - If true, normalize each triangular filter to have area 1. - pub fn new( - sample_rate: u32, - window_size: usize, - bands_per_octave: usize, - fmin: f32, - fmax: f32, - equal: bool, - ) -> Self { + pub fn new(sample_rate: u32, window_size: usize, bands_per_octave: usize, fmin: f32, fmax: f32, equal: bool) -> Self { let n_fft_bins = window_size / 2; let fmax = fmax.min(sample_rate as f32 / 2.0); // Generate log-spaced frequencies and map to FFT bins let frequencies = log_frequencies(bands_per_octave, fmin, fmax); let factor = (sample_rate as f32 / 2.0) / n_fft_bins as f32; - let mut bins: Vec = frequencies - .iter() - .map(|&f| (f / factor).round() as usize) - .collect(); + let mut bins: Vec = frequencies.iter().map(|&f| (f / factor).round() as usize).collect(); bins.sort(); bins.dedup(); bins.retain(|&b| b < n_fft_bins); @@ -194,11 +184,7 @@ impl Filterbank { let mid = bins[band + 1]; let stop = bins[band + 2]; - let height = if equal { - 2.0 / (stop - start) as f32 - } else { - 1.0 - }; + let height = if equal { 2.0 / (stop - start) as f32 } else { 1.0 }; // Rising edge: start..mid let n_rise = mid - start; @@ -270,11 +256,7 @@ pub fn compute_spectrogram( .into_par_iter() .map(|frame_idx| { let start = frame_idx * hop_size; - let mut windowed: Vec = samples[start..start + window_size] - .iter() - .zip(&window) - .map(|(&s, &w)| s * w) - .collect(); + let mut windowed: Vec = samples[start..start + window_size].iter().zip(&window).map(|(&s, &w)| s * w).collect(); let mut spectrum = r2c.make_output_vec(); r2c.process(&mut windowed, &mut spectrum).unwrap(); From ff71a485aa4b6d4da0e660b3c2fa926fb9a734c0 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Thu, 25 Jun 2026 19:48:13 +0800 Subject: [PATCH 15/18] fix: Center auto-offset search on current chart delay This is to prevent outbound latencies. --- prpr/src/scene/game.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/prpr/src/scene/game.rs b/prpr/src/scene/game.rs index 9be72c715..8a30f84ed 100644 --- a/prpr/src/scene/game.rs +++ b/prpr/src/scene/game.rs @@ -852,11 +852,12 @@ impl GameScene { let clip = self.res.music.clone(); let pcm: Vec = clip.frames().iter().map(|f| (f.0 + f.1) / 2.0).collect(); let sample_rate = clip.sample_rate(); - // 3. Build config — search centered on chart's own offset + // 3. Build config — search centered on the currently applied chart delay + let search_center_sec = (self.chart.offset + self.info_offset) as f64; let config = AlignConfig { search_range_sec: 0.30, sampling_interval_sec: 0.005, - search_center_sec: self.chart.offset as f64, + search_center_sec, }; // 4. Create shared result slot From 23ed1f499b152d3f55b7da19ec7a142e51c9d728 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Thu, 25 Jun 2026 20:11:00 +0800 Subject: [PATCH 16/18] fix: Correct auto-offset audio signal timestamps --- prpr-auto-offset/src/audio/energy.rs | 15 ++-- prpr-auto-offset/src/audio/spectral.rs | 15 ++-- prpr-auto-offset/src/audio/superflux.rs | 20 ++++-- prpr-auto-offset/tests/systematic_offset.rs | 79 +++++++++++++++++++++ 4 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 prpr-auto-offset/tests/systematic_offset.rs diff --git a/prpr-auto-offset/src/audio/energy.rs b/prpr-auto-offset/src/audio/energy.rs index 3926d62f4..865d21e72 100644 --- a/prpr-auto-offset/src/audio/energy.rs +++ b/prpr-auto-offset/src/audio/energy.rs @@ -9,6 +9,8 @@ pub struct EnergyDiff { native: Vec, /// Time step between native samples, in seconds. native_dt: f64, + /// Timestamp of the first native sample, in seconds. + native_t0: f64, } impl EnergyDiff { @@ -16,9 +18,14 @@ impl EnergyDiff { let frame_samples = (frame_ms / 1000.0 * sample_rate as f64).round() as usize; let hop_samples = (hop_ms / 1000.0 * sample_rate as f64).round() as usize; let native_dt = hop_samples as f64 / sample_rate as f64; + let native_t0 = native_dt + frame_samples as f64 / sample_rate as f64 / 2.0; let native = compute_energy_diff(pcm, frame_samples, hop_samples); - Self { native, native_dt } + Self { + native, + native_dt, + native_t0, + } } } @@ -27,7 +34,7 @@ impl Signal for EnergyDiff { if ts.is_empty() { return vec![]; } - ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, self.native_t0, t)).collect() } } @@ -53,11 +60,11 @@ fn compute_energy_diff(pcm: &[f32], frame_samples: usize, hop_samples: usize) -> } /// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. -fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { +fn interpolate(data: &[f32], dt: f64, t0: f64, t: f64) -> f32 { if data.is_empty() { return 0.0; } - let idx = t / dt; + let idx = (t - t0) / dt; if idx < 0.0 { return data[0]; } diff --git a/prpr-auto-offset/src/audio/spectral.rs b/prpr-auto-offset/src/audio/spectral.rs index f680db790..f171dd696 100644 --- a/prpr-auto-offset/src/audio/spectral.rs +++ b/prpr-auto-offset/src/audio/spectral.rs @@ -11,14 +11,21 @@ pub struct SpectralFlux { native: Vec, /// Time step between native samples, in seconds. native_dt: f64, + /// Timestamp of the first native sample, in seconds. + native_t0: f64, } impl SpectralFlux { pub fn new(pcm: &[f32], sample_rate: u32, fft_size: usize, hop_size: usize) -> Self { assert!(fft_size.is_power_of_two()); let native_dt = hop_size as f64 / sample_rate as f64; + let native_t0 = fft_size as f64 / sample_rate as f64 / 2.0; let native = compute_spectral_flux(pcm, fft_size, hop_size); - Self { native, native_dt } + Self { + native, + native_dt, + native_t0, + } } } @@ -27,7 +34,7 @@ impl Signal for SpectralFlux { if ts.is_empty() { return vec![]; } - ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, self.native_t0, t)).collect() } } @@ -78,11 +85,11 @@ fn compute_spectral_flux(pcm: &[f32], n: usize, hop: usize) -> Vec { } /// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. -fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { +fn interpolate(data: &[f32], dt: f64, t0: f64, t: f64) -> f32 { if data.is_empty() { return 0.0; } - let idx = t / dt; + let idx = (t - t0) / dt; if idx < 0.0 { return data[0]; } diff --git a/prpr-auto-offset/src/audio/superflux.rs b/prpr-auto-offset/src/audio/superflux.rs index 93198623b..d36a35667 100644 --- a/prpr-auto-offset/src/audio/superflux.rs +++ b/prpr-auto-offset/src/audio/superflux.rs @@ -26,6 +26,8 @@ pub struct SuperFlux { native: Vec, /// Time step between native samples, in seconds. native_dt: f64, + /// Timestamp of the first native sample, in seconds. + native_t0: f64, } impl SuperFlux { @@ -39,6 +41,7 @@ impl SuperFlux { pub fn new(pcm: &[f32], sample_rate: u32, window_size: usize, hop_size: usize) -> Self { assert!(window_size.is_power_of_two()); let native_dt = hop_size as f64 / sample_rate as f64; + let native_t0 = window_size as f64 / sample_rate as f64 / 2.0; // 1. Clone and high-pass filter let mut samples = pcm.to_vec(); @@ -62,7 +65,11 @@ impl SuperFlux { // Use the declared native_dt (frame_rate may differ slightly due to rounding) let _ = frame_rate; - Self { native: onset, native_dt } + Self { + native: onset, + native_dt, + native_t0, + } } /// Access the native onset-strength samples (after adaptive threshold). @@ -74,6 +81,11 @@ impl SuperFlux { pub fn onset_dt(&self) -> f64 { self.native_dt } + + /// Timestamp of the first native onset sample, in seconds. + pub fn onset_t0(&self) -> f64 { + self.native_t0 + } } impl Signal for SuperFlux { @@ -81,7 +93,7 @@ impl Signal for SuperFlux { if ts.is_empty() { return vec![]; } - ts.iter().map(|&t| interpolate(&self.native, self.native_dt, t)).collect() + ts.iter().map(|&t| interpolate(&self.native, self.native_dt, self.native_t0, t)).collect() } } @@ -431,11 +443,11 @@ fn adaptive_threshold(onset: &[f32], median_window: f32, multiplier: f32) -> Vec // ─── Linear interpolation ─────────────────────────────────────────────── /// Linear interpolation at time `t` (seconds) in a signal sampled every `dt`. -fn interpolate(data: &[f32], dt: f64, t: f64) -> f32 { +fn interpolate(data: &[f32], dt: f64, t0: f64, t: f64) -> f32 { if data.is_empty() { return 0.0; } - let idx = t / dt; + let idx = (t - t0) / dt; if idx < 0.0 { return data[0]; } diff --git a/prpr-auto-offset/tests/systematic_offset.rs b/prpr-auto-offset/tests/systematic_offset.rs new file mode 100644 index 000000000..4cd10051c --- /dev/null +++ b/prpr-auto-offset/tests/systematic_offset.rs @@ -0,0 +1,79 @@ +use prpr_auto_offset::{estimate_with, AlignConfig, EnergyDiff, NoteGaussian, SpectralFlux, SuperFlux}; + +fn config() -> AlignConfig { + AlignConfig { + search_range_sec: 0.4, + sampling_interval_sec: 0.001, + search_center_sec: 0.0, + } +} + +#[test] +fn analytic_signal_has_expected_sign() { + let note_times = vec![1.0, 1.75, 2.5, 3.125, 4.0, 5.25, 6.0]; + let true_offset = 0.123; + let audio = NoteGaussian::new(note_times.iter().map(|t| t + true_offset).collect(), 0.02); + let note = NoteGaussian::new(note_times, 0.02); + let result = estimate_with(&audio, ¬e, 7.0, &config()); + + eprintln!("analytic: true={true_offset:.3}s estimated={:.3}s corr={:.4}", result.offset, result.correlation); + assert!((result.offset - true_offset).abs() <= config().sampling_interval_sec); +} + +#[test] +fn measure_audio_frontend_bias_on_synthetic_clicks() { + let sample_rate = 44_100u32; + let duration = 9.0; + let note_times = vec![1.0, 1.75, 2.5, 3.125, 4.0, 5.25, 6.0, 7.1]; + for true_offset in [-0.157, 0.0, 0.123, 0.278] { + let mut pcm = vec![0.0f32; (duration * sample_rate as f64) as usize]; + + for ¬e_time in ¬e_times { + let onset = note_time + true_offset; + let start = (onset * sample_rate as f64).round() as usize; + let burst_len = (0.035 * sample_rate as f64).round() as usize; + for i in 0..burst_len { + let idx = start + i; + if idx >= pcm.len() { + break; + } + let t = i as f32 / sample_rate as f32; + let env = (-t * 120.0).exp(); + let tone = (2.0 * std::f32::consts::PI * 2200.0 * t).sin(); + pcm[idx] += 0.8 * env * tone; + } + } + + let note = NoteGaussian::new(note_times.clone(), 0.02); + + let energy = EnergyDiff::new(&pcm, sample_rate, 10.0, 5.0); + let result = estimate_with(&energy, ¬e, duration, &config()); + eprintln!( + "energy: true={true_offset:+.3}s estimated={:+.3}s bias={:+.0}ms corr={:.4}", + result.offset, + (result.offset - true_offset) * 1000.0, + result.correlation + ); + assert!((result.offset - true_offset).abs() <= 0.005); + + let spectral = SpectralFlux::new(&pcm, sample_rate, 1024, 512); + let result = estimate_with(&spectral, ¬e, duration, &config()); + eprintln!( + "spectral: true={true_offset:+.3}s estimated={:+.3}s bias={:+.0}ms corr={:.4}", + result.offset, + (result.offset - true_offset) * 1000.0, + result.correlation + ); + assert!((result.offset - true_offset).abs() <= 0.005); + + let superflux = SuperFlux::new(&pcm, sample_rate, 2048, 1024); + let result = estimate_with(&superflux, ¬e, duration, &config()); + eprintln!( + "super: true={true_offset:+.3}s estimated={:+.3}s bias={:+.0}ms corr={:.4}", + result.offset, + (result.offset - true_offset) * 1000.0, + result.correlation + ); + assert!((result.offset - true_offset).abs() <= 0.005); + } +} From 19858527f71992bd0db74b10f838b2691ec53225 Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Thu, 25 Jun 2026 21:55:08 +0800 Subject: [PATCH 17/18] fix: normalization of auto-offset correlation curve --- prpr-auto-offset/src/estimate.rs | 73 ++++++++------------- prpr-auto-offset/src/types.rs | 2 +- prpr-auto-offset/tests/systematic_offset.rs | 47 ++++++++++++- 3 files changed, 74 insertions(+), 48 deletions(-) diff --git a/prpr-auto-offset/src/estimate.rs b/prpr-auto-offset/src/estimate.rs index 684c11f10..2ba5992ac 100644 --- a/prpr-auto-offset/src/estimate.rs +++ b/prpr-auto-offset/src/estimate.rs @@ -7,33 +7,42 @@ use crate::{AlignConfig, AlignmentResult, Signal}; /// the tested chart corpus. const RELIABILITY_THRESHOLD: f64 = 0.05; -/// Cross-correlation between two arrays, limited lag range. +/// Normalized cross-correlation between two arrays, limited lag range. /// -/// Returns `(correlation_values, best_lag_index, peak_value)` where -/// `correlation[lag]` is the dot product of `a` with `b` shifted by +/// Returns `(correlation_values, best_lag_index, peak_value)` where each +/// correlation value is the normalized dot product of `a` with `b` shifted by /// `lag - max_lag_bins`. -fn cross_correlation(a: &[f32], b: &[f32], max_lag_bins: usize) -> (Vec, usize, f32) { +fn normalized_cross_correlation(a: &[f32], b: &[f32], max_lag_bins: usize) -> (Vec, usize, f32) { let n = a.len().min(b.len()); if n == 0 { return (vec![], 0, 0.0); } + let norm_a = a.iter().map(|&v| (v as f64).powi(2)).sum::(); + let norm_b = b.iter().map(|&v| (v as f64).powi(2)).sum::(); + let denom = (norm_a * norm_b).sqrt(); + let mut best_lag = max_lag_bins; let mut best_val = f32::NEG_INFINITY; let mut corr = Vec::with_capacity(2 * max_lag_bins + 1); for lag_offset in 0..=2 * max_lag_bins { let lag = lag_offset as isize - max_lag_bins as isize; - let mut sum = 0.0f32; + let mut dot = 0.0f64; + (0..n).for_each(|i| { let j = i as isize + lag; if j >= 0 && j < b.len() as isize { - sum += a[i] * b[j as usize]; + let av = a[i] as f64; + let bv = b[j as usize] as f64; + dot += av * bv; } }); - corr.push(sum); - if sum > best_val { - best_val = sum; + + let value = if denom > 0.0 { (dot / denom).clamp(0.0, 1.0) as f32 } else { 0.0 }; + corr.push(value); + if value > best_val { + best_val = value; best_lag = lag_offset; } } @@ -47,30 +56,6 @@ fn build_ts_grid(t_min: f64, t_max: f64, dt: f64) -> Vec { (0..n).map(|i| t_min + i as f64 * dt).collect() } -/// Compute the normalized cross-correlation `r` at a specific lag. -/// -/// `r = Σ a[i] · b[i+lag] / √(Σ a[i]² · Σ b[i+lag]²)` over the overlapping -/// region. By Cauchy-Schwarz, `r ∈ [0, 1]` for non-negative signals. -fn normalized_correlation(a: &[f32], b: &[f32], lag: isize, best_val: f32) -> f64 { - let mut norm_a = 0.0f64; - let mut norm_b = 0.0f64; - - (0..a.len().min(b.len())).for_each(|i| { - let j = i as isize + lag; - if j >= 0 && j < b.len() as isize { - norm_a += (a[i] as f64).powi(2); - norm_b += (b[j as usize] as f64).powi(2); - } - }); - - let denom = (norm_a * norm_b).sqrt(); - if denom <= 0.0 { - return 0.0; - } - - (best_val as f64 / denom).clamp(0.0, 1.0) -} - /// Estimate the timing offset between two signals. /// /// Uses default [`AlignConfig`]. See [`estimate_with`] for custom config. @@ -105,9 +90,9 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 // Shift the note signal into absolute time by sampling it at // ts_note[i] = ts[i] - search_center_sec // so that a note event at chart time `note.time` appears at absolute - // time `note.time + search_center_sec`. After this shift the two - // signals share a single (absolute) coordinate system and the - // cross-correlation lag is a small residual rather than the full offset. + // time `note.time + search_center_sec`. After this shift the two + // signals share a single coordinate system and the cross-correlation lag + // is a small residual rather than the full offset. let note_ts: Vec = ts.iter().map(|&t| t - config.search_center_sec).collect(); let note_samples = note.samples(¬e_ts); @@ -120,15 +105,15 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 }; } - // Cross-correlation — now the best lag is a small residual around zero + // Normalized cross-correlation: now the best lag is a small residual around zero. let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; - let (corr, best_lag, best_val) = cross_correlation(¬e_samples, &audio_samples, max_lag_bins); + let (corr, best_lag, best_val) = normalized_cross_correlation(¬e_samples, &audio_samples, max_lag_bins); - // Residual lag, then add search_center_sec to get absolute offset + // Residual lag, then add search_center_sec to get absolute offset. let best_lag_sec = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; let offset = config.search_center_sec + best_lag_sec; - // Correlation curve: x = absolute offset (search_center + lag) + // Correlation curve: x = absolute offset (search_center + lag). let correlation_curve: Vec<(f64, f32)> = corr .iter() .enumerate() @@ -138,14 +123,10 @@ pub fn estimate_with(audio: &A, note: &N, duration_sec: f6 }) .collect(); - // Normalized correlation at best lag - let lag = best_lag as isize - max_lag_bins as isize; - let correlation = normalized_correlation(¬e_samples, &audio_samples, lag, best_val); - AlignmentResult { offset, - correlation, - reliable: correlation > RELIABILITY_THRESHOLD, + correlation: best_val as f64, + reliable: best_val as f64 > RELIABILITY_THRESHOLD, correlation_curve, } } diff --git a/prpr-auto-offset/src/types.rs b/prpr-auto-offset/src/types.rs index a5eeeb9ca..7da0d15ad 100644 --- a/prpr-auto-offset/src/types.rs +++ b/prpr-auto-offset/src/types.rs @@ -37,7 +37,7 @@ pub struct AlignmentResult { pub correlation: f64, /// Whether the correlation exceeds the default reliability threshold. pub reliable: bool, - /// Full correlation curve: (offset_seconds, raw_correlation_score). + /// Full correlation curve: (offset_seconds, normalized_correlation_score). /// Useful for visualization of the score-vs-offset landscape. /// The offset_seconds values are in absolute time, so the search center is at `search_center_sec`. pub correlation_curve: Vec<(f64, f32)>, diff --git a/prpr-auto-offset/tests/systematic_offset.rs b/prpr-auto-offset/tests/systematic_offset.rs index 4cd10051c..63aa6e2bf 100644 --- a/prpr-auto-offset/tests/systematic_offset.rs +++ b/prpr-auto-offset/tests/systematic_offset.rs @@ -1,4 +1,4 @@ -use prpr_auto_offset::{estimate_with, AlignConfig, EnergyDiff, NoteGaussian, SpectralFlux, SuperFlux}; +use prpr_auto_offset::{estimate_with, AlignConfig, EnergyDiff, NoteGaussian, Signal, SpectralFlux, SuperFlux}; fn config() -> AlignConfig { AlignConfig { @@ -20,6 +20,51 @@ fn analytic_signal_has_expected_sign() { assert!((result.offset - true_offset).abs() <= config().sampling_interval_sec); } +struct SparseSignal { + values: Vec<(f64, f32)>, +} + +impl Signal for SparseSignal { + fn samples(&self, ts: &[f64]) -> Vec { + ts.iter() + .map(|&t| { + self.values + .iter() + .find(|&&(at, _)| (t - at).abs() < 1e-9) + .map(|&(_, value)| value) + .unwrap_or(0.0) + }) + .collect() + } +} + +#[test] +fn normalized_correlation_is_amplitude_scale_invariant() { + let config = AlignConfig { + search_range_sec: 0.2, + sampling_interval_sec: 0.1, + search_center_sec: 0.0, + }; + let note = SparseSignal { + values: vec![(1.0, 1.0), (1.1, 1.0), (1.2, 1.0)], + }; + let audio = SparseSignal { + values: vec![(1.0, 1.0), (1.1, 1.0), (1.2, 1.0)], + }; + let louder_audio = SparseSignal { + values: vec![(1.0, 10.0), (1.1, 10.0), (1.2, 10.0)], + }; + + let result = estimate_with(&audio, ¬e, 2.0, &config); + let louder_result = estimate_with(&louder_audio, ¬e, 2.0, &config); + + assert_eq!(result.offset, 0.0); + assert!((result.correlation - 1.0).abs() < 1e-6); + assert_eq!(louder_result.offset, result.offset); + assert!((louder_result.correlation - result.correlation).abs() < 1e-6); + assert!(result.correlation_curve.iter().all(|&(_, v)| (0.0..=1.0).contains(&v))); +} + #[test] fn measure_audio_frontend_bias_on_synthetic_clicks() { let sample_rate = 44_100u32; From 6ada5e7c1200b48748647ee83e65648f8e56805c Mon Sep 17 00:00:00 2001 From: liquidhelium Date: Fri, 26 Jun 2026 00:22:15 +0800 Subject: [PATCH 18/18] feat: offset study tool --- Cargo.lock | 302 ++++++++++- tools/auto-offset-study/Cargo.toml | 20 + tools/auto-offset-study/src/main.rs | 742 ++++++++++++++++++++++++++++ 3 files changed, 1060 insertions(+), 4 deletions(-) create mode 100644 tools/auto-offset-study/Cargo.toml create mode 100644 tools/auto-offset-study/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 27e10f2af..fc449d417 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -642,6 +642,42 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "core-graphics-types", + "foreign-types", + "libc", +] + +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation 0.9.4", + "libc", +] + +[[package]] +name = "core-text" +version = "20.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9d2790b5c08465d49f8dc05c8bcae9fea467855947db39b0f8145c091aaced5" +dependencies = [ + "core-foundation 0.9.4", + "core-graphics", + "foreign-types", + "libc", +] + [[package]] name = "core2" version = "0.4.0" @@ -844,6 +880,27 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.61.2", +] + [[package]] name = "dispatch2" version = "0.3.1" @@ -909,6 +966,18 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dwrote" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b35532432acc8b19ceed096e35dfa088d3ea037fe4f3c085f1f97f33b4d02" +dependencies = [ + "lazy_static", + "libc", + "winapi", + "wio", +] + [[package]] name = "either" version = "1.15.0" @@ -1041,6 +1110,12 @@ dependencies = [ "zlib-rs", ] +[[package]] +name = "float-ord" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce81f49ae8a0482e4c55ea62ebbd7e5a686af544c00b9d090bba3ff9be97b3d" + [[package]] name = "float_next_after" version = "1.0.0" @@ -1110,6 +1185,31 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "font-kit" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c7e611d49285d4c4b2e1727b72cf05353558885cc5252f93707b845dfcaf3d3" +dependencies = [ + "bitflags 2.11.0", + "byteorder", + "core-foundation 0.9.4", + "core-graphics", + "core-text", + "dirs", + "dwrote", + "float-ord", + "freetype-sys", + "lazy_static", + "libc", + "log", + "pathfinder_geometry", + "pathfinder_simd", + "walkdir", + "winapi", + "yeslogic-fontconfig-sys", +] + [[package]] name = "fontdue" version = "0.9.3" @@ -1120,6 +1220,33 @@ dependencies = [ "ttf-parser 0.21.1", ] +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -1129,6 +1256,17 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "freetype-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7edc5b9669349acfda99533e9e0bcf26a51862ab43b08ee7745c55d28eb134" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -1740,6 +1878,20 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "image" +version = "0.24.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5690139d2f55868e080017335e4b94cb7414274c74f1669c84fb5feba2c9f69d" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "jpeg-decoder", + "num-traits", + "png 0.17.16", +] + [[package]] name = "image" version = "0.25.10" @@ -1754,7 +1906,7 @@ dependencies = [ "image-webp", "moxcms", "num-traits", - "png", + "png 0.18.1", "ravif", "rayon", "zune-core", @@ -1995,6 +2147,12 @@ dependencies = [ "libc", ] +[[package]] +name = "jpeg-decoder" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00810f1d8b74be64b13dbf3db89ac67740615d6c891f0e7b6179326533011a07" + [[package]] name = "js-sys" version = "0.3.91" @@ -2075,6 +2233,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "libredox" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1744e39d1d6a9948f4f388969627434e31128196de472883b39f148769bfe30a" +dependencies = [ + "libc", +] + [[package]] name = "linked-hash-map" version = "0.5.6" @@ -2233,7 +2400,7 @@ dependencies = [ "bumpalo", "fontdue", "glam 0.30.10", - "image", + "image 0.25.10", "macroquad_macro", "miniquad", "quad-rand", @@ -3125,6 +3292,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "ordered-float" version = "5.3.0" @@ -3184,6 +3357,25 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3" +[[package]] +name = "pathfinder_geometry" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b7e7b4ea703700ce73ebf128e1450eb69c3a8329199ffbfb9b2a0418e5ad3" +dependencies = [ + "log", + "pathfinder_simd", +] + +[[package]] +name = "pathfinder_simd" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf9027960355bf3afff9841918474a81a5f972ac6d226d518060bba758b5ad57" +dependencies = [ + "rustc_version", +] + [[package]] name = "percent-encoding" version = "2.3.2" @@ -3253,7 +3445,7 @@ dependencies = [ "fluent-syntax", "futures-util", "hex", - "image", + "image 0.25.10", "inputbox", "jni 0.22.4", "logos", @@ -3382,6 +3574,52 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "font-kit", + "lazy_static", + "num-traits", + "pathfinder_geometry", + "plotters-backend", + "plotters-bitmap", + "ttf-parser 0.20.0", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-bitmap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ce181e3f6bf82d6c1dc569103ca7b1bd964c60ba03d7e6cdfbb3e3eb7f7405" +dependencies = [ + "image 0.24.9", + "plotters-backend", +] + +[[package]] +name = "png" +version = "0.17.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide 0.8.9", +] + [[package]] name = "png" version = "0.18.1" @@ -3528,7 +3766,7 @@ dependencies = [ "glyph_brush", "hex", "hmac", - "image", + "image 0.25.10", "inputbox", "intl-memoizer", "jni 0.22.4", @@ -3599,6 +3837,25 @@ dependencies = [ "tokio", ] +[[package]] +name = "prpr-auto-offset-study" +version = "0.7.1" +dependencies = [ + "anyhow", + "clap", + "futures-util", + "plotters", + "prpr", + "prpr-auto-offset", + "prpr-avc", + "reqwest", + "serde", + "serde_json", + "serde_yaml", + "tokio", + "zip", +] + [[package]] name = "prpr-avc" version = "0.1.0" @@ -3895,6 +4152,17 @@ dependencies = [ "bitflags 2.11.0", ] +[[package]] +name = "redox_users" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" +dependencies = [ + "getrandom 0.2.17", + "libredox", + "thiserror 2.0.18", +] + [[package]] name = "reflink-copy" version = "0.1.29" @@ -5051,6 +5319,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "ttf-parser" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17f77d76d837a7830fe1d4f12b7b4ba4192c1888001c7164257e4bc6d21d96b4" + [[package]] name = "ttf-parser" version = "0.21.1" @@ -5803,6 +6077,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wio" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d129932f4644ac2396cb456385cbf9e63b5b30c6e8dc4820bdca4eb082037a5" +dependencies = [ + "winapi", +] + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -5926,6 +6209,17 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448" +[[package]] +name = "yeslogic-fontconfig-sys" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503a066b4c037c440169d995b869046827dbc71263f6e8f3be6d77d4f3229dbd" +dependencies = [ + "dlib", + "once_cell", + "pkg-config", +] + [[package]] name = "yoke" version = "0.8.1" diff --git a/tools/auto-offset-study/Cargo.toml b/tools/auto-offset-study/Cargo.toml new file mode 100644 index 000000000..36915984a --- /dev/null +++ b/tools/auto-offset-study/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "prpr-auto-offset-study" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +anyhow = { workspace = true } +clap = { version = "4", features = ["derive"] } +futures-util = { workspace = true } +plotters = { version = "0.3.7", default-features = false, features = ["bitmap_backend", "bitmap_encoder", "line_series", "point_series", "ttf"] } +prpr = { workspace = true } +prpr-auto-offset = { workspace = true } +prpr-avc = { workspace = true } +reqwest = { workspace = true, default-features = false, features = ["json", "stream", "gzip", "rustls", "query"] } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +serde_yaml = { workspace = true } +tokio = { workspace = true, features = ["rt-multi-thread", "macros", "fs"] } +zip = { workspace = true, features = ["deflate"] } diff --git a/tools/auto-offset-study/src/main.rs b/tools/auto-offset-study/src/main.rs new file mode 100644 index 000000000..29ae7402b --- /dev/null +++ b/tools/auto-offset-study/src/main.rs @@ -0,0 +1,742 @@ +use anyhow::{bail, Context, Result}; +use clap::Parser; +use futures_util::StreamExt; +use plotters::prelude::*; +use prpr::{ + core::{BpmList, Chart, Triple}, + fs::{fs_from_file, load_info, FileSystem}, + info::{ChartFormat, ChartInfo}, + parse::{parse_pec, parse_phigros, parse_rpe}, +}; +use prpr_auto_offset::{AlignConfig, NoteGaussian, Signal, SuperFlux}; +use serde::{Deserialize, Serialize}; +use std::{ + collections::BTreeSet, + fs::{self, File}, + io::{Cursor, Write}, + path::{Path, PathBuf}, + time::Duration, +}; + +const API_URL: &str = "https://phira.5wyxi.com"; +const DEFAULT_ROOT: &str = "data/auto-offset-study"; + +#[derive(Parser)] +#[command(name = "prpr-auto-offset-study")] +#[command(about = "Download chart samples and study auto-offset energy/correlation relationships")] +struct Cli { + /// Working directory for cached charts, CSV and plot. + #[arg(long, default_value = DEFAULT_ROOT)] + root: PathBuf, + + /// Target number of downloaded chart samples. + #[arg(short, long, default_value_t = 300)] + samples: usize, + + /// Fetch additional charts even if the cache already has enough samples. + #[arg(long)] + download: bool, + + /// Number of remote list pages to scan while looking for downloadable charts. + #[arg(long, default_value_t = 20)] + pages: u64, + + /// Number of charts requested per remote page. + #[arg(long, default_value_t = 30)] + page_num: u64, + + /// Chart ordering passed to the Phira API. + #[arg(long, default_value = "-updated")] + order: String, + + /// Search range in seconds for offset estimation. + #[arg(long, default_value_t = 0.30)] + range: f64, + + /// Sampling interval in seconds for the correlation grid. + #[arg(long, default_value_t = 0.005)] + interval: f64, + + /// Gaussian blur sigma in seconds for the note signal. + #[arg(long, default_value_t = 0.02)] + blur_sigma: f64, + + /// Recompute rows even if results.csv already contains a chart id. + #[arg(long)] + recompute: bool, + + /// Per-request timeout in milliseconds. + #[arg(long, default_value_t = 8000)] + request_timeout_ms: u64, + + /// Number of attempts per HTTP request. + #[arg(long, default_value_t = 10)] + retries: usize, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +struct RemoteChart { + id: i32, + file: String, +} + +#[derive(Debug, Deserialize)] +struct PagedResult { + results: Vec, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct RpeTimingChart { + #[serde(rename = "META")] + meta: RpeTimingMeta, + #[serde(rename = "BPMList")] + bpm_list: Vec, + judge_line_list: Vec, +} + +#[derive(Deserialize)] +struct RpeTimingMeta { + offset: f32, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct RpeTimingBpm { + start_time: Triple, + bpm: f64, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct RpeTimingLine { + notes: Option>, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct RpeTimingNote { + start_time: Triple, + is_fake: u8, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct StudyRow { + chart_id: i32, + chart_name: String, + notes: usize, + duration_sec: f64, + search_center_sec: f64, + suggested_offset_sec: f64, + lag_sec: f64, + raw_peak: f64, + note_energy: f64, + audio_energy: f64, + normalized_peak: f64, + reliable: bool, +} + +fn csv_escape(value: &str) -> String { + if value.contains([',', '"', '\n', '\r']) { + format!("\"{}\"", value.replace('"', "\"\"")) + } else { + value.to_owned() + } +} + +impl StudyRow { + fn header() -> &'static str { + "chart_id,chart_name,notes,duration_sec,search_center_sec,suggested_offset_sec,lag_sec,raw_peak,note_energy,audio_energy,normalized_peak,reliable" + } + + fn to_csv(&self) -> String { + format!( + "{},{},{},{:.6},{:.6},{:.6},{:.6},{:.9},{:.9},{:.9},{:.9},{}", + self.chart_id, + csv_escape(&self.chart_name), + self.notes, + self.duration_sec, + self.search_center_sec, + self.suggested_offset_sec, + self.lag_sec, + self.raw_peak, + self.note_energy, + self.audio_energy, + self.normalized_peak, + self.reliable + ) + } +} + +#[tokio::main] +async fn main() -> Result<()> { + let cli = Cli::parse(); + fs::create_dir_all(cli.root.join("charts"))?; + + ensure_samples(&cli).await?; + let rows = analyze_samples(&cli).await?; + write_csv(&cli.root.join("results.csv"), &rows)?; + draw_plot(&cli.root.join("peak-energy-3d.png"), &rows)?; + write_plotly_html(&cli.root.join("peak-energy-3d.html"), &rows)?; + + println!("rows: {}", rows.len()); + println!("csv: {}", cli.root.join("results.csv").display()); + println!("plot: {}", cli.root.join("peak-energy-3d.png").display()); + println!("html: {}", cli.root.join("peak-energy-3d.html").display()); + Ok(()) +} + +async fn ensure_samples(cli: &Cli) -> Result<()> { + let charts_dir = cli.root.join("charts"); + let existing = cached_chart_ids(&charts_dir)?; + if existing.len() >= cli.samples && !cli.download { + println!("using {} cached charts", existing.len()); + return Ok(()); + } + + let target = if cli.download { existing.len() + cli.samples } else { cli.samples }; + println!("fetching charts until cache reaches {target} samples"); + let client = reqwest::Client::builder() + .timeout(Duration::from_millis(cli.request_timeout_ms)) + .build()?; + let mut seen = existing; + + for page in 1..=cli.pages { + if seen.len() >= target { + break; + } + let request = || { + client.get(format!("{API_URL}/chart")).query(&[ + ("page", page.to_string()), + ("pageNum", cli.page_num.to_string()), + ("order", cli.order.clone()), + ("type", "-1".to_owned()), + ("rating", "0,16".to_owned()), + ]) + }; + let res: PagedResult = send_with_retries(request, cli.retries).await?.error_for_status()?.json().await?; + + for chart in res.results { + if seen.len() >= target { + break; + } + if seen.contains(&chart.id) { + continue; + } + match download_chart(&client, &charts_dir, &chart, cli.retries).await { + Ok(()) => { + seen.insert(chart.id); + println!("downloaded {} ({}/{target})", chart.id, seen.len()); + } + Err(err) => eprintln!("skip {}: {err:#}", chart.id), + } + } + } + + if seen.len() < cli.samples { + bail!("only {} charts cached; increase --pages or check network", seen.len()); + } + Ok(()) +} + +fn cached_chart_ids(charts_dir: &Path) -> Result> { + let mut ids = BTreeSet::new(); + if !charts_dir.exists() { + return Ok(ids); + } + for entry in fs::read_dir(charts_dir)? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + if let Some(id) = entry.file_name().to_str().and_then(|it| it.parse::().ok()) { + ids.insert(id); + } + } + Ok(ids) +} + +async fn download_chart(client: &reqwest::Client, charts_dir: &Path, chart: &RemoteChart, retries: usize) -> Result<()> { + let tmp_dir = charts_dir.join(format!("{}.tmp", chart.id)); + let final_dir = charts_dir.join(chart.id.to_string()); + if final_dir.exists() { + return Ok(()); + } + if tmp_dir.exists() { + fs::remove_dir_all(&tmp_dir)?; + } + fs::create_dir_all(&tmp_dir)?; + + let bytes = fetch_bytes(client, &chart.file, retries).await?; + unzip_bytes(&bytes, &tmp_dir).context("failed to unzip chart archive")?; + + let info_path = tmp_dir.join("info.yml"); + let mut info: ChartInfo = serde_yaml::from_reader(File::open(&info_path)?)?; + info.id = Some(chart.id); + serde_yaml::to_writer(File::create(&info_path)?, &info)?; + + fs::rename(tmp_dir, final_dir)?; + Ok(()) +} + +async fn fetch_bytes(client: &reqwest::Client, url: &str, retries: usize) -> Result> { + let response = send_with_retries(|| client.get(url), retries).await?.error_for_status()?; + if response.url().as_str().starts_with("anys://") { + bail!("anys:// redirect is not supported by this study tool") + } + let mut stream = response.bytes_stream(); + let mut bytes = Vec::new(); + while let Some(chunk) = stream.next().await { + bytes.extend_from_slice(&chunk?); + } + Ok(bytes) +} + +async fn send_with_retries(request: impl Fn() -> reqwest::RequestBuilder, retries: usize) -> Result { + let attempts = retries.max(1); + let mut last_error = None; + for attempt in 1..=attempts { + match request().send().await { + Ok(response) => { + if !should_retry_status(response.status()) || attempt == attempts { + return Ok(response); + } + tokio::time::sleep(Duration::from_millis(150 * attempt as u64)).await; + } + Err(err) => { + last_error = Some(err); + if attempt < attempts { + tokio::time::sleep(Duration::from_millis(150 * attempt as u64)).await; + } + } + } + } + Err(last_error.expect("at least one request attempt")) +} + +fn should_retry_status(status: reqwest::StatusCode) -> bool { + status == reqwest::StatusCode::TOO_MANY_REQUESTS || status.is_server_error() +} + +fn unzip_bytes(bytes: &[u8], out_dir: &Path) -> Result<()> { + let mut archive = zip::ZipArchive::new(Cursor::new(bytes))?; + for i in 0..archive.len() { + let mut entry = archive.by_index(i)?; + if entry.is_dir() { + continue; + } + let enclosed = entry.enclosed_name().context("invalid zip path")?; + let out_path = out_dir.join(enclosed); + if let Some(parent) = out_path.parent() { + fs::create_dir_all(parent)?; + } + let mut out = File::create(out_path)?; + std::io::copy(&mut entry, &mut out)?; + } + Ok(()) +} + +async fn analyze_samples(cli: &Cli) -> Result> { + let charts_dir = cli.root.join("charts"); + let existing_rows = if cli.recompute { + Vec::new() + } else { + read_existing_csv(&cli.root.join("results.csv"))? + }; + let done: BTreeSet = existing_rows.iter().map(|row| row.chart_id).collect(); + let mut rows = existing_rows; + + for id in cached_chart_ids(&charts_dir)? { + if rows.len() >= cli.samples { + break; + } + if done.contains(&id) { + continue; + } + let dir = charts_dir.join(id.to_string()); + match analyze_chart(id, &dir, cli).await { + Ok(row) => { + println!("analyzed {id}: raw={:.3} norm={:.4}", row.raw_peak, row.normalized_peak); + rows.push(row); + } + Err(err) => eprintln!("skip analysis {id}: {err:#}"), + } + } + + rows.sort_by_key(|row| row.chart_id); + rows.truncate(cli.samples.min(rows.len())); + Ok(rows) +} + +fn read_existing_csv(path: &Path) -> Result> { + if !path.exists() { + return Ok(Vec::new()); + } + let text = fs::read_to_string(path)?; + let mut rows = Vec::new(); + for line in text.lines().skip(1) { + let cols = split_csv_line(line); + if cols.len() != 12 { + continue; + } + rows.push(StudyRow { + chart_id: cols[0].parse()?, + chart_name: cols[1].clone(), + notes: cols[2].parse()?, + duration_sec: cols[3].parse()?, + search_center_sec: cols[4].parse()?, + suggested_offset_sec: cols[5].parse()?, + lag_sec: cols[6].parse()?, + raw_peak: cols[7].parse()?, + note_energy: cols[8].parse()?, + audio_energy: cols[9].parse()?, + normalized_peak: cols[10].parse()?, + reliable: cols[11].parse()?, + }); + } + Ok(rows) +} + +fn split_csv_line(line: &str) -> Vec { + let mut cols = Vec::new(); + let mut cur = String::new(); + let mut quoted = false; + let mut chars = line.chars().peekable(); + while let Some(ch) = chars.next() { + match ch { + '"' if quoted && chars.peek() == Some(&'"') => { + cur.push('"'); + chars.next(); + } + '"' => quoted = !quoted, + ',' if !quoted => { + cols.push(std::mem::take(&mut cur)); + } + _ => cur.push(ch), + } + } + cols.push(cur); + cols +} + +async fn analyze_chart(id: i32, dir: &Path, cli: &Cli) -> Result { + let mut fs = fs_from_file(dir)?; + let info = load_info(&mut *fs).await?; + let (chart_offset, note_times) = load_chart_timing(&mut *fs, &info).await?; + if note_times.len() < 16 { + bail!("too few notes") + } + + let audio_path = dir.join(&info.music); + let clip = prpr_avc::demux_audio(audio_path.to_str().context("invalid audio path")?)?.context("no audio stream found")?; + let pcm: Vec = clip.frames().iter().map(|f| (f.0 + f.1) / 2.0).collect(); + let sample_rate = clip.sample_rate(); + let duration = pcm.len() as f64 / sample_rate as f64; + + let audio = SuperFlux::new(&pcm, sample_rate, 2048, 1024); + let note = NoteGaussian::new(note_times.clone(), cli.blur_sigma); + let search_center = chart_offset + info.offset as f64; + let config = AlignConfig { + search_range_sec: cli.range, + sampling_interval_sec: cli.interval, + search_center_sec: search_center, + }; + + let stats = estimate_energy_stats(&audio, ¬e, duration, &config); + Ok(StudyRow { + chart_id: id, + chart_name: info.name, + notes: note_times.len(), + duration_sec: duration, + search_center_sec: search_center, + suggested_offset_sec: stats.offset, + lag_sec: stats.offset - search_center, + raw_peak: stats.raw_peak, + note_energy: stats.note_energy, + audio_energy: stats.audio_energy, + normalized_peak: stats.normalized_peak, + reliable: stats.normalized_peak > 0.05, + }) +} + +async fn load_chart_timing(fs: &mut dyn FileSystem, info: &ChartInfo) -> Result<(f64, Vec)> { + let bytes = fs.load_file(&info.chart).await?; + let format = infer_chart_format(info, &bytes); + if matches!(format, ChartFormat::Rpe) { + return parse_rpe_timing(&String::from_utf8_lossy(&bytes)); + } + let chart = load_chart(fs, info, &bytes, format).await?; + Ok((chart.offset as f64, extract_note_times(&chart))) +} + +async fn load_chart(fs: &mut dyn FileSystem, info: &ChartInfo, bytes: &[u8], format: ChartFormat) -> Result { + match format { + ChartFormat::Rpe => parse_rpe(&String::from_utf8_lossy(&bytes), fs, Default::default(), info.use_rpe_170_speed.unwrap_or_default()).await, + ChartFormat::Pgr => parse_phigros(&String::from_utf8_lossy(&bytes), Default::default()), + ChartFormat::Pec => parse_pec(&String::from_utf8_lossy(&bytes), Default::default()), + ChartFormat::Pbc => bail!("pbc charts are not supported by this study tool"), + } +} + +fn parse_rpe_timing(source: &str) -> Result<(f64, Vec)> { + let rpe: RpeTimingChart = serde_json::from_str(source).context("failed to parse RPE timing")?; + let mut bpm = BpmList::new(rpe.bpm_list.into_iter().map(|it| (it.start_time.beats(), it.bpm)).collect()); + let mut note_times: Vec = rpe + .judge_line_list + .into_iter() + .flat_map(|line| line.notes.unwrap_or_default()) + .filter(|note| note.is_fake == 0) + .map(|note| bpm.time(¬e.start_time)) + .filter(|&time| time >= 0.0) + .collect(); + note_times.sort_by(|a, b| a.partial_cmp(b).unwrap()); + Ok((rpe.meta.offset as f64 / 1000.0, note_times)) +} + +fn infer_chart_format(info: &ChartInfo, bytes: &[u8]) -> ChartFormat { + info.format.clone().unwrap_or_else(|| { + if let Ok(text) = String::from_utf8(bytes.to_vec()) { + if text.starts_with('{') { + if text.contains("\"META\"") { + ChartFormat::Rpe + } else { + ChartFormat::Pgr + } + } else { + ChartFormat::Pec + } + } else { + ChartFormat::Pbc + } + }) +} + +fn extract_note_times(chart: &Chart) -> Vec { + let mut times: Vec = chart + .lines + .iter() + .flat_map(|line| line.notes.iter()) + .filter(|note| !note.fake) + .map(|note| note.time) + .filter(|&t| t >= 0.0) + .collect(); + times.sort_by(|a, b| a.partial_cmp(b).unwrap()); + times +} + +struct EnergyStats { + offset: f64, + raw_peak: f64, + note_energy: f64, + audio_energy: f64, + normalized_peak: f64, +} + +fn estimate_energy_stats(audio: &A, note: &N, duration: f64, config: &AlignConfig) -> EnergyStats { + let t_min = config.search_center_sec - config.search_range_sec; + let t_max = config.search_center_sec + duration + config.search_range_sec; + let count = ((t_max - t_min) / config.sampling_interval_sec).ceil() as usize + 1; + let ts: Vec = (0..count).map(|i| t_min + i as f64 * config.sampling_interval_sec).collect(); + let audio_samples = audio.samples(&ts); + let note_ts: Vec = ts.iter().map(|&t| t - config.search_center_sec).collect(); + let note_samples = note.samples(¬e_ts); + + let note_energy = note_samples.iter().map(|&v| (v as f64).powi(2)).sum::(); + let audio_energy = audio_samples.iter().map(|&v| (v as f64).powi(2)).sum::(); + let denom = (note_energy * audio_energy).sqrt(); + let max_lag_bins = (config.search_range_sec / config.sampling_interval_sec).ceil() as usize; + let n = note_samples.len().min(audio_samples.len()); + + let mut best_lag = max_lag_bins; + let mut best_norm = f64::NEG_INFINITY; + let mut best_raw = 0.0; + for lag_offset in 0..=2 * max_lag_bins { + let lag = lag_offset as isize - max_lag_bins as isize; + let mut raw = 0.0; + for (i, ¬e_value) in note_samples.iter().take(n).enumerate() { + let j = i as isize + lag; + if j >= 0 && j < audio_samples.len() as isize { + raw += note_value as f64 * audio_samples[j as usize] as f64; + } + } + let normalized = if denom > 0.0 { (raw / denom).clamp(0.0, 1.0) } else { 0.0 }; + if normalized > best_norm { + best_norm = normalized; + best_raw = raw; + best_lag = lag_offset; + } + } + + let lag_sec = (best_lag as isize - max_lag_bins as isize) as f64 * config.sampling_interval_sec; + EnergyStats { + offset: config.search_center_sec + lag_sec, + raw_peak: best_raw, + note_energy, + audio_energy, + normalized_peak: best_norm, + } +} + +fn write_csv(path: &Path, rows: &[StudyRow]) -> Result<()> { + let mut file = File::create(path)?; + writeln!(file, "{}", StudyRow::header())?; + for row in rows { + writeln!(file, "{}", row.to_csv())?; + } + Ok(()) +} + +fn write_plotly_html(path: &Path, rows: &[StudyRow]) -> Result<()> { + let x: Vec = rows.iter().map(|row| (row.note_energy.max(1e-12)).log10()).collect(); + let y: Vec = rows.iter().map(|row| (row.audio_energy.max(1e-12)).log10()).collect(); + let z: Vec = rows.iter().map(|row| (row.raw_peak.max(1e-12)).log10()).collect(); + let color: Vec = rows.iter().map(|row| row.normalized_peak).collect(); + let text: Vec = rows + .iter() + .map(|row| { + format!( + "#{} {}
notes: {}
offset: {:.0}ms
raw: {:.3}
norm: {:.4}", + row.chart_id, + row.chart_name, + row.notes, + row.suggested_offset_sec * 1000.0, + row.raw_peak, + row.normalized_peak + ) + }) + .collect(); + let html = format!( + r#" + + + + Auto-offset energy study + + + + +
+ + + +"#, + x = serde_json::to_string(&x)?, + y = serde_json::to_string(&y)?, + z = serde_json::to_string(&z)?, + color = serde_json::to_string(&color)?, + text = serde_json::to_string(&text)?, + ); + fs::write(path, html)?; + Ok(()) +} + +fn draw_plot(path: &Path, rows: &[StudyRow]) -> Result<()> { + let root = BitMapBackend::new(path, (1200, 900)).into_drawing_area(); + root.fill(&RGBColor(250, 250, 248))?; + + let mut chart = ChartBuilder::on(&root) + .caption("Auto-offset energy study: log raw peak vs. log note/audio energy", ("sans-serif", 28).into_font()) + .margin(28) + .x_label_area_size(48) + .y_label_area_size(58) + .build_cartesian_2d(-0.75f64..0.9f64, -0.75f64..0.9f64)?; + + chart + .configure_mesh() + .x_desc("3D projection: log10(note energy) + log10(audio energy)") + .y_desc("3D projection: log10(raw peak)") + .light_line_style(RGBColor(225, 225, 225)) + .draw()?; + + if rows.is_empty() { + root.present()?; + return Ok(()); + } + + let xs: Vec = rows.iter().map(|r| (r.note_energy.max(1e-12)).log10()).collect(); + let ys: Vec = rows.iter().map(|r| (r.audio_energy.max(1e-12)).log10()).collect(); + let zs: Vec = rows.iter().map(|r| (r.raw_peak.max(1e-12)).log10()).collect(); + let (xmin, xmax) = min_max(&xs); + let (ymin, ymax) = min_max(&ys); + let (zmin, zmax) = min_max(&zs); + + chart.draw_series(rows.iter().zip(&xs).zip(ys.iter().zip(&zs)).map(|((row, &x), (&y, &z))| { + let px = norm(x, xmin, xmax) - 0.5; + let py = norm(y, ymin, ymax) - 0.5; + let pz = norm(z, zmin, zmax) - 0.5; + let sx = px + py * 0.38; + let sy = pz - py * 0.30; + Circle::new((sx, sy), 4, ShapeStyle::from(&heat(row.normalized_peak)).filled()) + }))?; + + draw_color_legend(&root)?; + root.present()?; + Ok(()) +} + +fn min_max(values: &[f64]) -> (f64, f64) { + let min = values.iter().copied().fold(f64::INFINITY, f64::min); + let max = values.iter().copied().fold(f64::NEG_INFINITY, f64::max); + if (max - min).abs() < 1e-9 { + (min, min + 1.0) + } else { + (min, max) + } +} + +fn norm(value: f64, min: f64, max: f64) -> f64 { + ((value - min) / (max - min)).clamp(0.0, 1.0) +} + +fn heat(value: f64) -> RGBColor { + let t = value.clamp(0.0, 1.0); + let r = (40.0 + 210.0 * t) as u8; + let g = (80.0 + 120.0 * (1.0 - (t - 0.5).abs() * 2.0).max(0.0)) as u8; + let b = (220.0 - 180.0 * t) as u8; + RGBColor(r, g, b) +} + +fn draw_color_legend(root: &DrawingArea, plotters::coord::Shift>) -> Result<()> { + let x0 = 1030; + let y0 = 140; + let h = 300; + for i in 0..h { + let t = 1.0 - i as f64 / (h - 1) as f64; + root.draw(&Rectangle::new([(x0, y0 + i), (x0 + 24, y0 + i + 1)], heat(t).filled()))?; + } + root.draw(&Text::new("normalized", (x0 - 15, y0 - 28), ("sans-serif", 18).into_font()))?; + root.draw(&Text::new("peak", (x0 + 1, y0 - 8), ("sans-serif", 18).into_font()))?; + root.draw(&Text::new("1.0", (x0 + 34, y0 + 6), ("sans-serif", 16).into_font()))?; + root.draw(&Text::new("0.0", (x0 + 34, y0 + h - 4), ("sans-serif", 16).into_font()))?; + Ok(()) +}