diff --git a/src/datacell/flatten_datacell_test.cpp b/src/datacell/flatten_datacell_test.cpp index ab8b94e82..1ed08891e 100644 --- a/src/datacell/flatten_datacell_test.cpp +++ b/src/datacell/flatten_datacell_test.cpp @@ -145,3 +145,216 @@ TEST_CASE("RaBitQSplitDataCell direct split compute", "[ut][RaBitQSplitDataCell] } } } +TEST_CASE("RaBitQSplitDataCell serialize and methods", "[ut][RaBitQSplitDataCell]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + constexpr uint64_t dim = 64; + constexpr InnerIdType count = 32; + auto vectors = fixtures::generate_vectors(count, dim); + + constexpr const char* param_str = R"( + { + "codes_type": "rabitq_split", + "io_params": { + "type": "memory_io" + }, + "quantization_params": { + "type": "rabitq", + "rabitq_version": "split_1bit_7bit", + "rabitq_bits_per_dim_query": 32, + "rabitq_bits_per_dim_base": 4 + } + } + )"; + + auto param_json = JsonType::Parse(param_str); + auto param = std::make_shared(); + param->FromJson(param_json); + + IndexCommonParam common_param; + common_param.allocator_ = allocator; + common_param.dim_ = dim; + common_param.metric_ = MetricType::METRIC_TYPE_L2SQR; + + auto flatten = FlattenInterface::MakeInstance(param, common_param); + flatten->Train(vectors.data(), count); + + SECTION("InsertVector and UpdateVector") { + for (InnerIdType i = 0; i < count; ++i) { + flatten->InsertVector(vectors.data() + i * dim); + } + REQUIRE(flatten->TotalCount() == count); + + REQUIRE(flatten->UpdateVector(vectors.data(), 0) == true); + REQUIRE(flatten->UpdateVector(vectors.data(), count + 10) == false); + } + + SECTION("BatchInsertVector with explicit ids") { + std::vector ids(count); + std::iota(ids.begin(), ids.end(), 0); + flatten->BatchInsertVector(vectors.data(), count, ids.data()); + REQUIRE(flatten->TotalCount() == count); + } + + SECTION("Serialize and Deserialize") { + flatten->BatchInsertVector(vectors.data(), count); + + std::stringstream ss; + IOStreamWriter writer(ss); + flatten->Serialize(writer); + ss.seekg(0, std::ios::beg); + IOStreamReader reader(ss); + + auto other = FlattenInterface::MakeInstance(param, common_param); + other->Train(vectors.data(), count); + other->Deserialize(reader); + REQUIRE(other->TotalCount() == flatten->TotalCount()); + + auto query = fixtures::generate_vectors(1, dim, 99); + auto computer = flatten->FactoryComputer(query.data()); + std::vector idx(count); + std::iota(idx.begin(), idx.end(), 0); + std::vector dists1(count), dists2(count); + flatten->Query(dists1.data(), computer, idx.data(), count); + other->Query(dists2.data(), computer, idx.data(), count); + for (InnerIdType i = 0; i < count; ++i) { + REQUIRE(dists1[i] == dists2[i]); + } + } + + SECTION("GetCodesById") { + flatten->BatchInsertVector(vectors.data(), count); + bool need_release = false; + const auto* code0 = flatten->GetCodesById(0, need_release); + REQUIRE(code0 != nullptr); + if (need_release) { + flatten->Release(code0); + } + } + + SECTION("Encode and Decode") { + flatten->BatchInsertVector(vectors.data(), count); + auto code_size = flatten->code_size_; + std::vector codes(code_size); + REQUIRE(flatten->Encode(vectors.data(), codes.data()) == true); + std::vector decoded(dim); + flatten->Decode(codes.data(), decoded.data()); + } + + SECTION("Resize and ShrinkToFit") { + flatten->BatchInsertVector(vectors.data(), count); + flatten->Resize(count * 2); + flatten->ShrinkToFit(count); + } + + SECTION("Move") { + flatten->BatchInsertVector(vectors.data(), count); + flatten->Move(0, count); + } + + SECTION("GetCodesById variants") { + flatten->BatchInsertVector(vectors.data(), count); + bool need_release = false; + const auto* codes = flatten->GetCodesById(0, need_release); + REQUIRE(codes != nullptr); + if (need_release) { + flatten->Release(codes); + } + + auto code_size = flatten->code_size_; + std::vector buf(code_size); + REQUIRE(flatten->GetCodesById(0, buf.data()) == true); + } + + SECTION("ExportModel") { + flatten->BatchInsertVector(vectors.data(), count); + auto other = FlattenInterface::MakeInstance(param, common_param); + other->Train(vectors.data(), count); + flatten->ExportModel(other); + } + + SECTION("MergeOther") { + flatten->BatchInsertVector(vectors.data(), count / 2); + auto other_param = std::make_shared(); + other_param->FromJson(param_json); + auto other = FlattenInterface::MakeInstance(other_param, common_param); + other->Train(vectors.data(), count); + other->BatchInsertVector(vectors.data() + (count / 2) * dim, count / 2); + flatten->MergeOther(other, count / 2); + REQUIRE(flatten->TotalCount() == count); + } + + SECTION("Metadata methods") { + REQUIRE_FALSE(flatten->GetQuantizerName().empty()); + REQUIRE(flatten->GetMetricType() == MetricType::METRIC_TYPE_L2SQR); + REQUIRE(flatten->InMemory() == true); + auto memory = flatten->GetMemoryUsage(); + REQUIRE(memory > 0); + } + + SECTION("QueryWithDistanceFilter") { + flatten->BatchInsertVector(vectors.data(), count); + auto query = fixtures::generate_vectors(1, dim, 42); + auto computer = flatten->FactoryComputer(query.data()); + std::vector idx(count); + std::iota(idx.begin(), idx.end(), 0); + std::vector dists(count); + flatten->QueryWithDistanceFilter( + dists.data(), computer, idx.data(), count, std::numeric_limits::max()); + for (InnerIdType i = 0; i < count; ++i) { + REQUIRE(std::isfinite(dists[i])); + } + } +} + +TEST_CASE("RaBitQSplitDataCell IP metric", "[ut][RaBitQSplitDataCell]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + constexpr uint64_t dim = 64; + constexpr InnerIdType count = 16; + auto vectors = fixtures::generate_vectors(count, dim); + auto queries = fixtures::generate_vectors(2, dim, 42); + + constexpr const char* param_str = R"( + { + "codes_type": "rabitq_split", + "io_params": { + "type": "memory_io" + }, + "quantization_params": { + "type": "rabitq", + "rabitq_version": "split_1bit_7bit", + "rabitq_bits_per_dim_query": 32, + "rabitq_bits_per_dim_base": 4 + } + } + )"; + + auto param_json = JsonType::Parse(param_str); + auto param = std::make_shared(); + param->FromJson(param_json); + + IndexCommonParam common_param; + common_param.allocator_ = allocator; + common_param.dim_ = dim; + common_param.metric_ = MetricType::METRIC_TYPE_IP; + + auto flatten = FlattenInterface::MakeInstance(param, common_param); + flatten->Train(vectors.data(), count); + flatten->BatchInsertVector(vectors.data(), count); + + std::vector idx(count); + std::iota(idx.begin(), idx.end(), 0); + std::vector dists(count); + std::vector lower_bounds(count); + + auto computer = flatten->FactoryComputer(queries.data()); + flatten->Query(dists.data(), computer, idx.data(), count); + for (InnerIdType i = 0; i < count; ++i) { + REQUIRE(std::isfinite(dists[i])); + } + + flatten->QueryWithDistanceLowerBound( + dists.data(), lower_bounds.data(), computer, idx.data(), count); + for (InnerIdType i = 0; i < count; ++i) { + REQUIRE(std::isfinite(dists[i])); + } +} diff --git a/src/io/mmap_io_test.cpp b/src/io/mmap_io_test.cpp index 46a01c2db..e176085a4 100644 --- a/src/io/mmap_io_test.cpp +++ b/src/io/mmap_io_test.cpp @@ -60,3 +60,75 @@ TEST_CASE("MMapIO Serialize & Deserialize", "[ut][MMapIO]") { auto rio = std::make_unique(path2, allocator.get()); TestSerializeAndDeserialize(*wio, *rio); } + +TEST_CASE("MMapIO directory path error", "[ut][MMapIO]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + fixtures::TempDir dir("mmap_io_dir_test"); + auto dir_path = dir.path; + REQUIRE_THROWS(std::make_unique(dir_path, allocator.get())); +} + +TEST_CASE("MMapIO resize shrink", "[ut][MMapIO]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + fixtures::TempDir dir("mmap_io_resize"); + auto path = dir.GenerateRandomFile(false); + auto io = std::make_unique(path, allocator.get()); + + std::vector data(4096, 0xAB); + io->Write(data.data(), data.size(), 0); + + io->Resize(8192); + REQUIRE(io->size_ >= 8192); + + io->Resize(2048); + REQUIRE(io->size_ == 2048); + + std::vector read_buf(2048); + REQUIRE(io->Read(2048, 0, read_buf.data()) == true); + for (uint64_t i = 0; i < 2048; ++i) { + REQUIRE(read_buf[i] == 0xAB); + } +} + +TEST_CASE("MMapIO MultiRead", "[ut][MMapIO]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + fixtures::TempDir dir("mmap_io_multi"); + auto path = dir.GenerateRandomFile(false); + auto io = std::make_unique(path, allocator.get()); + + std::vector data(256); + for (uint64_t i = 0; i < 256; ++i) { + data[i] = static_cast(i); + } + io->Write(data.data(), data.size(), 0); + + std::vector sizes = {64, 64, 64}; + std::vector offsets = {0, 64, 128}; + std::vector result(192); + io->MultiRead(result.data(), sizes.data(), offsets.data(), 3); + + for (uint64_t i = 0; i < 192; ++i) { + REQUIRE(result[i] == static_cast(i)); + } +} + +TEST_CASE("MMapIO existing file", "[ut][MMapIO]") { + auto allocator = SafeAllocator::FactoryDefaultAllocator(); + fixtures::TempDir dir("mmap_io_exist"); + auto path = dir.GenerateRandomFile(true); + + { + auto io = std::make_unique(path, allocator.get()); + std::vector data(128, 0xCD); + io->Write(data.data(), data.size(), 0); + } + + auto io2 = std::make_unique(path, allocator.get()); + std::vector data2(64, 0xEF); + io2->Write(data2.data(), data2.size(), 0); + std::vector read_buf(64); + io2->Read(64, 0, read_buf.data()); + for (uint64_t i = 0; i < 64; ++i) { + REQUIRE(read_buf[i] == 0xEF); + } +} diff --git a/src/vsag_c_api_test.cpp b/src/vsag_c_api_test.cpp index f0f356c69..1ef5bbf7e 100644 --- a/src/vsag_c_api_test.cpp +++ b/src/vsag_c_api_test.cpp @@ -47,6 +47,7 @@ static constexpr const char* hgraph_search_parameters = R"( static constexpr int64_t num_vectors = 500; static constexpr int64_t dim = 128; +thread_local std::string deserialize_read_func_path; class VsagTestCase { public: @@ -129,7 +130,7 @@ TEST_CASE("vsag_c_api basic test", "[vsag_c_api][ut]") { using SizeFuncType = SizeType (*)(); SizeFuncType size_func = []() { - struct stat st; + struct stat st {}; stat(path.data(), &st); return static_cast(st.st_size); }; @@ -137,8 +138,10 @@ TEST_CASE("vsag_c_api basic test", "[vsag_c_api][ut]") { using ReadFuncType = void (*)(OffsetType offset, SizeType size, void* data); ReadFuncType read_func = [](OffsetType offset, SizeType size, void* data) { std::ifstream ifile(path.data(), std::ios::binary); + REQUIRE(ifile.is_open()); ifile.seekg(offset); ifile.read(reinterpret_cast(data), size); + REQUIRE(ifile.gcount() == static_cast(size)); ifile.close(); }; vsag_index_t index2 = vsag_index_factory(index_name, index_param); @@ -417,3 +420,210 @@ TEST_CASE("vsag_c_api update operations and get vector by ids", "[vsag_c_api][ut vsag_index_destroy(index); } + +TEST_CASE("vsag_c_api null handle paths", "[vsag_c_api][ut]") { + VsagTestCase test_case; + SearchResult_t result{}; + std::vector ids(3, 0); + std::vector dists(3, 0.0F); + std::vector vectors(3 * dim, 0.0F); + result.ids = ids.data(); + result.dists = dists.data(); + + REQUIRE(vsag_index_destroy(nullptr).code == VSAG_SUCCESS); + REQUIRE( + vsag_index_build(nullptr, test_case.datas.data(), test_case.ids.data(), dim, num_vectors) + .code == VSAG_INTERNAL_ERROR); + vsag_index_add(nullptr, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + vsag_index_train(nullptr, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + vsag_index_knn_search( + nullptr, test_case.datas.data(), dim, 3, hgraph_search_parameters, &result); + vsag_index_knn_search_with_filter( + nullptr, test_case.datas.data(), dim, 3, hgraph_search_parameters, nullptr, &result); + vsag_index_range_search( + nullptr, test_case.datas.data(), dim, 1.0F, hgraph_search_parameters, &result); + vsag_index_range_search_with_filter( + nullptr, test_case.datas.data(), dim, 1.0F, hgraph_search_parameters, nullptr, &result); + + vsag_index_t clone_index = nullptr; + REQUIRE(vsag_index_clone(nullptr, &clone_index).code == VSAG_INTERNAL_ERROR); + REQUIRE(clone_index == nullptr); + + vsag_index_t model_index = nullptr; + REQUIRE(vsag_index_export_model(nullptr, &model_index).code == VSAG_INTERNAL_ERROR); + REQUIRE(model_index == nullptr); + + REQUIRE( + vsag_index_calculate_distance_by_ids( + nullptr, test_case.datas.data(), dim, test_case.ids.data(), ids.size(), dists.data()) + .code == VSAG_INTERNAL_ERROR); + REQUIRE(vsag_index_update_ids(nullptr, ids.data(), ids.data(), dim, ids.size()).code == + VSAG_INTERNAL_ERROR); + REQUIRE(vsag_index_update_vector(nullptr, 0, test_case.datas.data(), dim).code == + VSAG_INTERNAL_ERROR); + REQUIRE(vsag_index_update_vector_force(nullptr, 0, test_case.datas.data(), dim).code == + VSAG_INTERNAL_ERROR); + REQUIRE(vsag_index_get_vector_by_ids(nullptr, ids.data(), ids.size(), vectors.data()).code == + VSAG_INTERNAL_ERROR); + auto dir = fixtures::TempDir("vsag_c_api_null_test"); + auto null_path = dir.GenerateRandomFile(); + REQUIRE(vsag_serialize_file(nullptr, null_path.data()).code == VSAG_INTERNAL_ERROR); + REQUIRE(vsag_deserialize_file(nullptr, null_path.data()).code == VSAG_INTERNAL_ERROR); + REQUIRE(vsag_serialize_write_func(nullptr, nullptr).code == VSAG_INTERNAL_ERROR); + REQUIRE(vsag_deserialize_read_func(nullptr, nullptr, nullptr).code == VSAG_INTERNAL_ERROR); +} + +TEST_CASE("vsag_c_api failure paths on valid index", "[vsag_c_api][ut]") { + auto index = vsag_index_factory(index_name, index_param); + REQUIRE(index != nullptr); + + VsagTestCase test_case; + auto ret = + vsag_index_build(index, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + REQUIRE(ret.code == VSAG_SUCCESS); + + std::vector vectors(dim, 0.0F); + int64_t missing_id = 999999; + ret = vsag_index_get_vector_by_ids(index, &missing_id, 1, vectors.data()); + REQUIRE(ret.code != VSAG_SUCCESS); + + float dist = 0.0F; + ret = vsag_index_calculate_distance_by_ids( + index, test_case.datas.data(), dim, &missing_id, 1, &dist); + REQUIRE(ret.code == VSAG_SUCCESS); + + int64_t existing_old_id = 3; + int64_t occupied_new_id = 4; + ret = vsag_index_update_ids(index, &existing_old_id, &occupied_new_id, dim, 1); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_update_ids(index, &missing_id, &occupied_new_id, dim, 1); + REQUIRE(ret.code != VSAG_SUCCESS); + + std::vector new_vector(dim, 0.0F); + for (int64_t i = 0; i < dim; ++i) { + new_vector[i] = test_case.datas[i]; + } + ret = vsag_index_update_vector(index, missing_id, new_vector.data(), dim); + REQUIRE(ret.code != VSAG_SUCCESS); + ret = vsag_index_update_vector_force(index, missing_id, new_vector.data(), dim); + REQUIRE(ret.code != VSAG_SUCCESS); + + auto dir = fixtures::TempDir("vsag_c_api_error"); + auto missing_path = dir.GenerateRandomFile(); + auto index2 = vsag_index_factory(index_name, index_param); + REQUIRE(index2 != nullptr); + ret = vsag_deserialize_file(index2, missing_path.data()); + REQUIRE(ret.code != VSAG_SUCCESS); + ret = vsag_deserialize_read_func(index2, nullptr, nullptr); + REQUIRE(ret.code != VSAG_SUCCESS); + + vsag_index_destroy(index2); + vsag_index_destroy(index); +} + +TEST_CASE("vsag_c_api search error paths", "[vsag_c_api][ut]") { + auto index = vsag_index_factory(index_name, index_param); + REQUIRE(index != nullptr); + + VsagTestCase test_case; + auto ret = + vsag_index_build(index, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + REQUIRE(ret.code == VSAG_SUCCESS); + + int64_t topk = 10; + std::vector ids(topk, -1); + std::vector dists(topk, 0.0F); + SearchResult_t result{}; + result.ids = ids.data(); + result.dists = dists.data(); + + auto filter_func = [](int64_t id) -> bool { return id >= 0; }; + + constexpr const char* invalid_search_parameters = "not-json"; + ret = vsag_index_knn_search( + index, test_case.datas.data(), dim, topk, invalid_search_parameters, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_knn_search_with_filter( + index, test_case.datas.data(), dim, topk, invalid_search_parameters, filter_func, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_range_search( + index, test_case.datas.data(), dim, 10.0F, invalid_search_parameters, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_range_search_with_filter( + index, test_case.datas.data(), dim, 10.0F, invalid_search_parameters, filter_func, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_knn_search( + index, test_case.datas.data(), dim - 1, topk, hgraph_search_parameters, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + ret = vsag_index_range_search( + index, test_case.datas.data(), dim - 1, 10.0F, hgraph_search_parameters, &result); + REQUIRE(ret.code != VSAG_SUCCESS); + + vsag_index_destroy(index); +} + +TEST_CASE("vsag_c_api serialize and update edge paths", "[vsag_c_api][ut]") { + auto index = vsag_index_factory(index_name, index_param); + REQUIRE(index != nullptr); + + VsagTestCase test_case; + auto ret = + vsag_index_build(index, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + REQUIRE(ret.code == VSAG_SUCCESS); + + int64_t same_id = 42; + ret = vsag_index_update_ids(index, &same_id, &same_id, dim, 1); + REQUIRE(ret.code == VSAG_SUCCESS); + + std::vector vector(dim, 0.0F); + ret = vsag_index_get_vector_by_ids(index, &same_id, 1, vector.data()); + REQUIRE(ret.code == VSAG_SUCCESS); + for (int64_t i = 0; i < dim; ++i) { + REQUIRE(vector[i] == test_case.datas[same_id * dim + i]); + } + + auto dir = fixtures::TempDir("vsag_c_api_serialize_edge"); + auto path = dir.GenerateRandomFile(); + ret = vsag_serialize_file(index, path.data()); + REQUIRE(ret.code == VSAG_SUCCESS); + + auto non_empty_index = vsag_index_factory(index_name, index_param); + REQUIRE(non_empty_index != nullptr); + ret = vsag_index_build( + non_empty_index, test_case.datas.data(), test_case.ids.data(), dim, num_vectors); + REQUIRE(ret.code == VSAG_SUCCESS); + + ret = vsag_deserialize_file(non_empty_index, path.data()); + REQUIRE(ret.code != VSAG_SUCCESS); + + deserialize_read_func_path = path; + + using SizeFuncType = SizeType (*)(); + SizeFuncType size_func = []() -> SizeType { + struct stat st {}; + REQUIRE(stat(deserialize_read_func_path.data(), &st) == 0); + return static_cast(st.st_size); + }; + + using ReadFuncType = void (*)(OffsetType offset, SizeType size, void* data); + ReadFuncType read_func = [](OffsetType offset, SizeType size, void* data) { + std::ifstream ifile(deserialize_read_func_path.data(), std::ios::binary); + REQUIRE(ifile.is_open()); + ifile.seekg(offset); + ifile.read(reinterpret_cast(data), size); + REQUIRE(ifile.gcount() == static_cast(size)); + ifile.close(); + }; + + ret = vsag_deserialize_read_func(non_empty_index, read_func, size_func); + REQUIRE(ret.code != VSAG_SUCCESS); + + vsag_index_destroy(non_empty_index); + vsag_index_destroy(index); +} diff --git a/tests/test_warp.cpp b/tests/test_warp.cpp index 2850f9f58..f06dd04ba 100644 --- a/tests/test_warp.cpp +++ b/tests/test_warp.cpp @@ -159,3 +159,17 @@ TEST_CASE_PERSISTENT_FIXTURE(fixtures::WarpTestIndex, } vsag::Options::Instance().set_block_size_limit(origin_size); } + +TEST_CASE_PERSISTENT_FIXTURE(fixtures::WarpTestIndex, "Warp IP Multiple Dims", "[ft][warp]") { + WarpParam warp_param; + warp_param.base_quantization_type = "fp32"; + const std::string name = "warp"; + auto search_param = GenerateWarpSearchParametersString(); + for (auto dim : {16, 128, 256}) { + auto param = GenerateWarpBuildParametersString("ip", dim, warp_param); + auto index = TestFactory(name, param, true); + auto dataset = pool.GetDatasetAndCreate(dim, base_count, "ip", false, 0.8, 0, 16, "multi"); + TestBuildIndex(index, dataset, true); + TestKnnSearch(index, dataset, search_param, 0.99, true); + } +}