diff --git a/research/classifier/measurements-meta.json b/research/classifier/measurements-meta.json index 35e9b81..6037ef5 100644 --- a/research/classifier/measurements-meta.json +++ b/research/classifier/measurements-meta.json @@ -1,5 +1,5 @@ { - "_note": "Hand-recorded from the v0.2.0 collection run done on the author's laptop before the audit fields were added to collect_measurements.py. Future runs will populate this file automatically, with per-cell tokens + wall-clock aggregated by the collector. Fields with `null` are unknown for this specific run; everything else came from the PR description (#1) and the row count of measurements.csv.", + "_note": "Hand-recorded from the v0.2.0 collection run done on the author's laptop before the audit fields were added to collect_measurements.py. Future runs will populate this file automatically, with per-cell tokens + wall-clock aggregated by the collector. Token totals remain `null` because the OpenAI-compat `usage` block was not persisted during this run; the started_at / ended_at / wall_clock_s fields below are *reconstructed* from the modification timestamps of the run's stdout log file (`/tmp/mnemoscope-full.log` on the author's machine), not measured by the collector — they bound the wall-clock but include any time the orchestrator spent idle between cells.", "model": "gemma4-ctx:latest", "model_base": "gemma4:26b", "model_quantization": "Q4_K_M", @@ -7,11 +7,11 @@ "endpoint": "http://localhost:11434/v1", "endpoint_kind": "ollama", "offline": false, - "started_at": null, - "ended_at": null, - "wall_clock_s": null, + "started_at": "2026-04-29T04:11:20-04:00", + "ended_at": "2026-04-30T03:13:50-04:00", + "wall_clock_s": 82950, "wall_clock_s_grading_only": null, - "wall_clock_human": "multi-hour, single-machine local run", + "wall_clock_human": "23h02m30s end-to-end (reconstructed from log file mtime, not measured by collector)", "variants_attempted": 50, "variants_kept": 50, "variants_skipped_no_needles": 0, diff --git a/research/classifier/model.json b/research/classifier/model.json index b602930..cadf0bd 100644 --- a/research/classifier/model.json +++ b/research/classifier/model.json @@ -23,7 +23,7 @@ }, "rows_train": 40, "rows_test": 10, - "onnx_round_trip_max_delta": 1.5894571991914574e-08, + "onnx_round_trip_max_delta": 1.5894571880892272e-08, "data_source": "classifier/measurements.csv", "seed": 42, "grader_models": [ @@ -31,7 +31,7 @@ ], "offline_rows": 0, "dataset_collection": { - "_note": "Hand-recorded from the v0.2.0 collection run done on the author's laptop before the audit fields were added to collect_measurements.py. Future runs will populate this file automatically, with per-cell tokens + wall-clock aggregated by the collector. Fields with `null` are unknown for this specific run; everything else came from the PR description (#1) and the row count of measurements.csv.", + "_note": "Hand-recorded from the v0.2.0 collection run done on the author's laptop before the audit fields were added to collect_measurements.py. Future runs will populate this file automatically, with per-cell tokens + wall-clock aggregated by the collector. Token totals remain `null` because the OpenAI-compat `usage` block was not persisted during this run; the started_at / ended_at / wall_clock_s fields below are *reconstructed* from the modification timestamps of the run's stdout log file (`/tmp/mnemoscope-full.log` on the author's machine), not measured by the collector \u2014 they bound the wall-clock but include any time the orchestrator spent idle between cells.", "model": "gemma4-ctx:latest", "model_base": "gemma4:26b", "model_quantization": "Q4_K_M", @@ -39,11 +39,11 @@ "endpoint": "http://localhost:11434/v1", "endpoint_kind": "ollama", "offline": false, - "started_at": null, - "ended_at": null, - "wall_clock_s": null, + "started_at": "2026-04-29T04:11:20-04:00", + "ended_at": "2026-04-30T03:13:50-04:00", + "wall_clock_s": 82950, "wall_clock_s_grading_only": null, - "wall_clock_human": "multi-hour, single-machine local run", + "wall_clock_human": "23h02m30s end-to-end (reconstructed from log file mtime, not measured by collector)", "variants_attempted": 50, "variants_kept": 50, "variants_skipped_no_needles": 0,