Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/build-visqol.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ on:
- 'Dockerfile.visqol'
- 'config.py'
- 'phase2_mos.py'
pull_request:
branches: [ "master" ]
paths:
- 'Dockerfile.visqol'
- 'config.py'
- 'phase2_mos.py'

jobs:
build:
Expand Down Expand Up @@ -55,7 +61,7 @@ jobs:
context: .
file: Dockerfile.visqol
platforms: linux/amd64
push: true
push: ${{ github.event_name != 'pull_request' }}
tags: |
ghcr.io/nschimme/faac-benchmark-visqol:latest
ghcr.io/nschimme/faac-benchmark-visqol:${{ github.sha }}
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ Runs the encoding benchmark and MOS computation for a single configuration.
| `scenarios` | Comma-separated list of scenarios to run (e.g., `voip,vss`). | No | |
| `include-tests` | Comma-separated list of test filename globs to include (e.g., `TCD_*`). | No | |
| `exclude-tests` | Comma-separated list of test filename globs to exclude. | No | |
| `backend` | ViSQOL backend to use (`auto`, `docker`, `visqol`, `visqol-py`, `visqol-python`). | No | `docker` |
| `backend` | ViSQOL backend to use (`auto`, `docker`, `visqol`, `visqol-py`, `visqol-python`). | No | `visqol-python` |

### Action: `nschimme/faac-benchmark/report`

Expand Down Expand Up @@ -211,10 +211,10 @@ python3 run_benchmark.py ... --exclude-tests "white_noise.wav"
This script manages everything for you:
1. **Phase 1**: Encodes samples and measures throughput and library size.
2. **Phase 2**: Computes perceptual quality (MOS). In `auto` mode (default), it attempts to use a ViSQOL backend in the following order:
- **Python (Modern)**: `visqol-python` package (Preferred).
- **Process**: `visqol` binary (found in PATH or via `VISQOL_BIN` env var).
- **Docker**: Containerized execution via **Docker** or **Podman**.
- **Python (Legacy)**: `visqol_py` package.
- **Python (Modern)**: `visqol-python` package.

#### Docker Image Discovery
The benchmark suite uses a deterministic approach to find the correct ViSQOL Docker image:
Expand Down
2 changes: 1 addition & 1 deletion action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ inputs:
backend:
description: 'ViSQOL backend to use (auto, docker, visqol, visqol-py, visqol-python)'
required: false
default: 'docker'
default: 'visqol-python'

runs:
using: 'composite'
Expand Down
169 changes: 95 additions & 74 deletions phase2_mos.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
# Global paths for binary mode
VISQOL_BIN = os.environ.get("VISQOL_BIN")
MODEL_DIR = os.environ.get("VISQOL_MODEL_DIR")
SPEECH_MODEL_NAME = "lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite"

def find_visqol_assets():
global VISQOL_BIN, MODEL_DIR
Expand Down Expand Up @@ -87,18 +88,83 @@ def find_visqol_assets():
_process_visqol_instances = {}
_process_visqol_api_instances = {}

def get_process_visqol_python(mode_str):
def get_process_visqol_python(mode_str, model_dir=None):
if not HAS_VISQOL_PYTHON:
return None
if mode_str not in _process_visqol_api_instances:

cache_key = f"{mode_str}_{model_dir}"
if cache_key not in _process_visqol_api_instances:
try:
api = VisqolApi()
api.create(mode=mode_str)
_process_visqol_api_instances[mode_str] = api
kwargs = {"mode": mode_str}
if mode_str == "speech" and model_dir:
model_path = os.path.join(model_dir, SPEECH_MODEL_NAME)
if os.path.exists(model_path):
kwargs["lattice_model_path"] = model_path

api.create(**kwargs)
_process_visqol_api_instances[cache_key] = api
except Exception as e:
print(f" Failed to initialize ViSQOL Python (modern): {e}")
_process_visqol_api_instances[mode_str] = None
return _process_visqol_api_instances[mode_str]
_process_visqol_api_instances[cache_key] = None
return _process_visqol_api_instances[cache_key]

def run_visqol_python_batch(pending, aac_dir, external_data_dir, results_path, aac_files=None):
"""
Attempts to process a batch of samples using visqol-python's internal parallelization.
This is often more efficient for large batches when visqol-python is the preferred backend.
"""
# We need to group by mode (audio vs speech)
modes = {"audio": [], "speech": []}
for key, entry in pending.items():
info = get_sample_info(key, entry, aac_dir, external_data_dir, results_path, aac_files=aac_files)
if info:
modes[info["cfg"]["mode"]].append((key, entry, info))

results = {}
with tempfile.TemporaryDirectory() as batch_tmpdir:
for mode, items in modes.items():
if not items:
continue

print(f" Batch processing {len(items)} {mode} samples with visqol-python...")
api = get_process_visqol_python(mode, MODEL_DIR)
if not api:
print(f" Failed to initialize VisqolApi for {mode}, skipping batch.")
continue

file_pairs = []
valid_keys = []
for key, entry, info in items:
v_rate = info["v_rate"]
v_channels = info["v_channels"]
ref_input_path = info["ref_input_path"]
aac_path = info["aac_path"]

if aac_path and os.path.exists(ref_input_path):
v_ref = os.path.join(batch_tmpdir, f"{key}_ref.wav")
v_deg = os.path.join(batch_tmpdir, f"{key}_deg.wav")

if convert_to_wav(ref_input_path, v_ref, v_rate, v_channels) and \
convert_to_wav(aac_path, v_deg, v_rate, v_channels):
file_pairs.append((v_ref, v_deg))
valid_keys.append(key)
else:
# Don't print error here, let the fallback handle it
pass

if file_pairs:
try:
batch_results = api.measure_batch(file_pairs, parallel=True)
for key, result in zip(valid_keys, batch_results):
if isinstance(result, Exception):
print(f" Error for {key} in batch: {result}")
else:
results[key] = float(result.moslqo)
except Exception as e:
print(f" Batch execution failed for {mode}: {e}")

return results

def get_process_visqol_py(mode_str):
if not HAS_VISQOL_PY:
Expand Down Expand Up @@ -199,14 +265,25 @@ def compute_single_mos(key, entry, aac_dir, external_data_dir, results_path, bac
return key, 1.0

try:
# 1. Try Binary Mode (Highest consistency with known-good results)
# 1. Try visqol-python (Modern) - Now preferred as MOS discrepancy is fixed
if backend in ["auto", "visqol-python"]:
if HAS_VISQOL_PYTHON:
api = get_process_visqol_python(cfg["mode"], MODEL_DIR)
if api:
result = api.measure(v_ref, v_deg)
return key, float(result.moslqo)
elif backend == "visqol-python":
print(f" ERROR: visqol-python not found but requested for {key}")
return key, None

# 2. Try Binary Mode
if backend in ["auto", "visqol"]:
if VISQOL_BIN and os.path.exists(VISQOL_BIN):
cmd = [VISQOL_BIN, "--reference_file", v_ref, "--degraded_file", v_deg]
if cfg["mode"] == "speech":
cmd.append("--use_speech_mode")
if MODEL_DIR:
cmd.extend(["--similarity_to_quality_model", os.path.join(MODEL_DIR, "lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite")])
cmd.extend(["--similarity_to_quality_model", os.path.join(MODEL_DIR, SPEECH_MODEL_NAME)])
else:
if MODEL_DIR:
cmd.extend(["--similarity_to_quality_model", os.path.join(MODEL_DIR, "libsvm_nu_svr_model.txt")])
Expand All @@ -220,7 +297,7 @@ def compute_single_mos(key, entry, aac_dir, external_data_dir, results_path, bac
print(f" ERROR: visqol binary not found but requested for {key}")
return key, None

# 2. Try visqol_py (Legacy)
# 3. Try visqol_py (Legacy)
if backend in ["auto", "visqol-py"]:
if HAS_VISQOL_PY:
visqol = get_process_visqol_py(cfg["mode"])
Expand All @@ -231,73 +308,11 @@ def compute_single_mos(key, entry, aac_dir, external_data_dir, results_path, bac
print(f" ERROR: visqol-py not found but requested for {key}")
return key, None

# 3. Try visqol-python (Modern) - Moved to end due to MOS discrepancy in speech mode
if backend in ["auto", "visqol-python"]:
if HAS_VISQOL_PYTHON:
api = get_process_visqol_python(cfg["mode"])
if api:
result = api.measure(v_ref, v_deg)
return key, float(result.moslqo)
elif backend == "visqol-python":
print(f" ERROR: visqol-python not found but requested for {key}")
return key, None

except Exception as e:
print(f" Error computing MOS for {key}: {e}")

return key, None

def run_visqol_python_batch(pending, aac_dir, external_data_dir, results_path, aac_files=None):
print(f"Using visqol-python batch mode for {len(pending)} samples...")

# We need to group by mode (audio vs speech)
modes = {"audio": [], "speech": []}
for key, entry in pending.items():
info = get_sample_info(key, entry, aac_dir, external_data_dir, results_path, aac_files=aac_files)
if info:
modes[info["cfg"]["mode"]].append((key, entry, info))

results = {}
with tempfile.TemporaryDirectory() as batch_tmpdir:
for mode, items in modes.items():
if not items:
continue

print(f" Processing {len(items)} {mode} samples...")
api = get_process_visqol_python(mode)
if not api:
print(f" Failed to get VisqolApi for {mode}, skipping batch.")
continue

file_pairs = []
valid_keys = []
for key, entry, info in items:
v_rate = info["v_rate"]
v_channels = info["v_channels"]
ref_input_path = info["ref_input_path"]
aac_path = info["aac_path"]

if aac_path and os.path.exists(ref_input_path):
v_ref = os.path.join(batch_tmpdir, f"{key}_ref.wav")
v_deg = os.path.join(batch_tmpdir, f"{key}_deg.wav")

if convert_to_wav(ref_input_path, v_ref, v_rate, v_channels) and \
convert_to_wav(aac_path, v_deg, v_rate, v_channels):
file_pairs.append((v_ref, v_deg))
valid_keys.append(key)
else:
print(f" Missing file for {key}, skipping.")

if file_pairs:
batch_results = api.measure_batch(file_pairs, parallel=True)
for key, result in zip(valid_keys, batch_results):
if isinstance(result, Exception):
print(f" Error for {key}: {result}")
else:
results[key] = float(result.moslqo)

return results

def main():
parser = argparse.ArgumentParser(description="ViSQOL MOS computation (Phase 2)")
parser.add_argument("results_json", help="Path to results JSON file")
Expand Down Expand Up @@ -337,12 +352,18 @@ def main():

mos_results = {}

# Sequential/Parallel fallback stack (Binary -> Legacy -> Modern)
# We no longer use unconditional visqol-python batching to ensure binary priority.
# 1. Attempt batch processing with visqol-python if it's the preferred backend
if (args.backend in ["auto", "visqol-python"]) and HAS_VISQOL_PYTHON:
batch_results = run_visqol_python_batch(pending, aac_dir, external_data_dir, results_path, aac_files)
mos_results.update(batch_results)
# Update pending list to only include those that still need processing (failures)
pending = {k: v for k, v in pending.items() if k not in mos_results}

# 2. Sequential/Parallel fallback stack for remaining samples (Modern -> Binary -> Legacy)
still_pending = pending
if still_pending:
if args.backend == "auto":
mode_str = "Binary" if VISQOL_BIN else "visqol_py" if HAS_VISQOL_PY else "visqol-python" if HAS_VISQOL_PYTHON else "None"
mode_str = "visqol-python" if HAS_VISQOL_PYTHON else "Binary" if VISQOL_BIN else "visqol_py" if HAS_VISQOL_PY else "None"
print(f"Computing MOS for {len(still_pending)} samples using prioritized stack (Primary: {mode_str}, {num_cpus} cores)...")
else:
print(f"Computing MOS for {len(still_pending)} samples using backend '{args.backend}' ({num_cpus} cores)...")
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
ffmpeg-python
visqol-python[accel]
visqol-python[accel,lattice]>=3.5.0
6 changes: 3 additions & 3 deletions run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,14 @@ def main():

# Auto-selection logic
if selected_backend == "auto":
if has_visqol_bin:
if has_visqol_python:
selected_backend = "visqol-python"
elif has_visqol_bin:
selected_backend = "visqol"
elif container_tool:
selected_backend = "docker"
elif has_visqol_py:
selected_backend = "visqol-py"
elif has_visqol_python:
selected_backend = "visqol-python"
else:
print(">>> ERROR: No ViSQOL backend found.")
print("Please either:")
Expand Down