From d1bd8606679d2ff64ab304e0bd3417f49867b291 Mon Sep 17 00:00:00 2001 From: Kevin-Li-2025 <2242139@qq.com> Date: Wed, 24 Jun 2026 01:59:52 +0100 Subject: [PATCH] fix: validate LanceDB vector dimensions --- .../graphrag_vectors/lancedb.py | 9 +++++++ .../integration/vector_stores/test_lancedb.py | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/packages/graphrag-vectors/graphrag_vectors/lancedb.py b/packages/graphrag-vectors/graphrag_vectors/lancedb.py index a7b1de7c01..04021557ef 100644 --- a/packages/graphrag-vectors/graphrag_vectors/lancedb.py +++ b/packages/graphrag-vectors/graphrag_vectors/lancedb.py @@ -91,6 +91,15 @@ def load_documents(self, documents: list[VectorStoreDocument]) -> None: if document.vector is None: continue + actual_vector_size = len(document.vector) + if actual_vector_size != self.vector_size: + msg = ( + f"Vector for document '{document.id}' has dimension " + f"{actual_vector_size}, but index '{self.index_name}' is " + f"configured with vector_size {self.vector_size}." + ) + raise ValueError(msg) + ids.append(str(document.id)) vectors.append(np.array(document.vector, dtype=np.float32)) create_dates.append(document.create_date) diff --git a/tests/integration/vector_stores/test_lancedb.py b/tests/integration/vector_stores/test_lancedb.py index b45f95f861..9b1951c0ae 100644 --- a/tests/integration/vector_stores/test_lancedb.py +++ b/tests/integration/vector_stores/test_lancedb.py @@ -163,6 +163,32 @@ def test_load_documents(self, store_with_fields, sample_documents_with_metadata) store.load_documents(sample_documents_with_metadata) assert store.count() == 3 + def test_load_documents_rejects_mismatched_vector_size(self): + """Test loading a batch with a wrong-sized vector raises a clear error.""" + temp_dir = tempfile.mkdtemp() + try: + vector_store = LanceDBVectorStore( + db_uri=temp_dir, index_name="dim_mismatch", vector_size=5 + ) + vector_store.connect() + vector_store.create_index() + + documents = [ + VectorStoreDocument(id="good", vector=[0.1, 0.2, 0.3, 0.4, 0.5]), + VectorStoreDocument(id="bad", vector=[0.1, 0.2, 0.3]), + ] + + with pytest.raises( + ValueError, + match=( + "Vector for document 'bad' has dimension 3, " + "but index 'dim_mismatch' is configured with vector_size 5" + ), + ): + vector_store.load_documents(documents) + finally: + shutil.rmtree(temp_dir) + def test_search_by_id(self, store_with_fields, sample_documents_with_metadata): """Test searching for a document by id returns all fields.""" store = store_with_fields