From 1aed0876610ac8147853cfd1de0e97c0d17ed678 Mon Sep 17 00:00:00 2001 From: Aafaq-rashid-comprinno Date: Wed, 24 Jun 2026 20:23:08 +0530 Subject: [PATCH] feat(index): add build timing and summary line MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After index build completes, logs: 'Done in 5m 23s (3 docs → 40 vectors)' Gives users immediate feedback on total build time and output size. Addresses #99. --- index/src/pixelrag_index/pipelines.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/index/src/pixelrag_index/pipelines.py b/index/src/pixelrag_index/pipelines.py index f943ec8..204d053 100644 --- a/index/src/pixelrag_index/pipelines.py +++ b/index/src/pixelrag_index/pipelines.py @@ -17,6 +17,9 @@ def build(config: dict, limit: int | None = None, force: bool = False) -> Path: Stages: source → ingest (render) → chunk → embed → build index """ import itertools + import time as _time + + _build_start = _time.time() source = make_source(config) try: @@ -179,6 +182,7 @@ def build(config: dict, limit: int | None = None, force: bool = False) -> Path: total_vectors = sum( np.load(f, mmap_mode="r")["embeddings"].shape[0] for f in npz_files ) + n_vectors = total_vectors nlist = min(4096, max(1, total_vectors // 40)) logger.info( "Stage 4/4: Building FAISS index (%d vectors, nlist=%d)...", @@ -202,6 +206,15 @@ def build(config: dict, limit: int | None = None, force: bool = False) -> Path: ) logger.info("Index built at %s", output) + + # Summary + elapsed = _time.time() - _build_start + mins, secs = divmod(int(elapsed), 60) + n_docs = len(articles) + logger.info( + "Done in %dm %ds (%d docs → %d vectors)", + mins, secs, n_docs, n_vectors, + ) return output