Skip to content

Commit 94d4646

Browse files
committed
feat: use tl::unexpected in Index default implementations
Replace throw std::runtime_error with tl::unexpected(Error) in Index virtual method default implementations that return tl::expected<T, Error>. This ensures error handling is consistent with the API contract. - Use ErrorType::UNSUPPORTED_INDEX_OPERATION for all unsupported operations - Standardize error messages to "Index does not support <operation>" - Change CheckFeature default to return false instead of throwing - Update tests to use REQUIRE_FALSE(has_value()) for expected-returning methods - Remove is_old_index parameter from TestBatchCalcDistanceById Methods returning non-expected types (GetStats, GetMemoryUsageDetail, etc.) retain throw behavior as they cannot use tl::unexpected. Closes #2140 Signed-off-by: LHT129 <tianlan.lht@antgroup.com>
1 parent 674eb3b commit 94d4646

18 files changed

Lines changed: 323 additions & 131 deletions

include/vsag/constants.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ extern const char* const HGRAPH_PARAMETER_BRUTE_FORCE_THRESHOLD;
203203
extern const char* const HGRAPH_EXTRA_INFO_SIZE;
204204
extern const char* const HGRAPH_SUPPORT_DUPLICATE;
205205
extern const char* const HGRAPH_DUPLICATE_DISTANCE_THRESHOLD;
206+
extern const char* const HGRAPH_SUPPORT_TOMBSTONE;
206207
extern const char* const HGRAPH_LABEL_REMAP_TYPE;
207208
extern const char* const HGRAPH_USE_EXTRA_INFO_FILTER;
208209
extern const char* const STORE_RAW_VECTOR;

include/vsag/index.h

Lines changed: 108 additions & 70 deletions
Large diffs are not rendered by default.

src/algorithm/hgraph/hgraph.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ HGraph::HGraph(const HGraphParameterPtr& hgraph_param, const vsag::IndexCommonPa
6464
graph_type_(hgraph_param->graph_type),
6565
hierarchical_datacell_param_(hgraph_param->hierarchical_graph_param),
6666
use_old_serial_format_(common_param.use_old_serial_format_) {
67+
this->label_table_->support_tombstone_ = hgraph_param->support_tombstone;
6768
this->support_duplicate_ = hgraph_param->support_duplicate;
6869
neighbors_mutex_ = std::make_shared<PointsMutex>(0, common_param.allocator_.get());
6970
this->basic_flatten_codes_ =

src/algorithm/hgraph/hgraph.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,15 @@ class HGraph : public InnerIndexInterface {
370370
void
371371
elp_optimize();
372372

373+
void
374+
recover_remove(int64_t id);
375+
376+
bool
377+
try_recover_tombstone(const DatasetPtr& data, std::vector<int64_t>& failed_ids);
378+
379+
DatasetPtr
380+
get_single_dataset(const DatasetPtr& data, uint32_t j);
381+
373382
void
374383
check_and_init_raw_vector(const FlattenInterfaceParamPtr& raw_vector_param,
375384
const IndexCommonParam& common_param,

src/algorithm/hgraph/hgraph_build.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -287,18 +287,17 @@ HGraph::Add(const DatasetPtr& data, AddMode mode) {
287287
bool use_parallel_add = this->thread_pool_ != nullptr;
288288
Vector<std::pair<InnerIdType, LabelType>> inner_ids(allocator_);
289289
for (int64_t j = 0; j < total; ++j) {
290-
// Check if label already exists (skip removed IDs)
291-
{
292-
std::shared_lock label_lock(this->label_lookup_mutex_);
293-
auto [found, _] = this->label_table_->TryGetIdByLabel(labels[j]);
294-
if (found) {
295-
failed_ids.emplace_back(labels[j]);
290+
InnerIdType inner_id;
291+
292+
// try recover tombstone
293+
if (this->data_type_ != DataTypes::DATA_TYPE_SPARSE) {
294+
auto one_base = get_single_dataset(data, j);
295+
bool is_process_finished = try_recover_tombstone(one_base, failed_ids);
296+
if (is_process_finished) {
296297
continue;
297298
}
298299
}
299300

300-
InnerIdType inner_id;
301-
302301
{
303302
std::scoped_lock lock(this->add_mutex_);
304303
inner_id = this->get_unique_inner_ids(1).at(0);

src/algorithm/hgraph/hgraph_modify.cpp

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,100 @@ HGraph::shrink_to_fit() {
206206
label_table_->ShrinkToFit(total_count);
207207
}
208208

209+
void
210+
HGraph::recover_remove(int64_t id) {
211+
// note:
212+
// 1. this function doesn't recover entry_point and route_graphs caused by Remove()
213+
// 2. use this function only when is_tombstone is checked
214+
215+
std::shared_lock label_lock(this->label_lookup_mutex_);
216+
auto inner_id = this->label_table_->GetIdByLabel(id, true);
217+
this->bottom_graph_->RecoverDeleteNeighborsById(inner_id);
218+
this->label_table_->RecoverRemove(id);
219+
delete_count_--;
220+
}
221+
222+
DatasetPtr
223+
HGraph::get_single_dataset(const DatasetPtr& data, uint32_t j) {
224+
void* vectors = nullptr;
225+
uint64_t data_size = 0;
226+
get_vectors(data_type_, dim_, data, &vectors, &data_size);
227+
const auto* labels = data->GetIds();
228+
auto one_data = Dataset::Make();
229+
one_data->Ids(labels + j)
230+
->Float32Vectors((float*)((char*)vectors + data_size * j))
231+
->Int8Vectors((int8_t*)((char*)vectors + data_size * j))
232+
->NumElements(1)
233+
->Owner(false);
234+
return one_data;
235+
}
236+
237+
bool
238+
HGraph::try_recover_tombstone(const DatasetPtr& data, std::vector<int64_t>& failed_ids) {
239+
/*
240+
* return:
241+
* True : No processing required — data already exists or was recovered successfully
242+
* False: Processing required — data not found or recovery failed
243+
*
244+
*
245+
* [case 1] fail to insert -> continue + record failed id
246+
* exist + not delete : is_label_valid = true, is_tombstone = false
247+
*
248+
* [case 2] fail to recovery -> add process
249+
* exist + delete + not recovery: is_label_valid = false, is_tombstone = ture, is_recovered = false
250+
*
251+
* [case 3] tombstone recovery -> continue
252+
* exist + delete + recovery: is_label_valid = false, is_tombstone = ture, is_recovered = true
253+
*
254+
* [case 4] no old point -> add process
255+
* not exists + not delete: is_label_valid = false, is_tombstone = false
256+
*
257+
* [case 5] error
258+
* exists + deleted: is_label_valid = true, is_tombstone = true
259+
*/
260+
261+
auto label = data->GetIds()[0];
262+
263+
bool is_label_valid = false;
264+
bool is_tombstone = false;
265+
bool is_recovered = false;
266+
{
267+
std::scoped_lock label_lock(this->label_lookup_mutex_);
268+
is_label_valid = this->label_table_->CheckLabel(label);
269+
if (not is_label_valid) {
270+
is_tombstone = this->label_table_->IsTombstoneLabel(label);
271+
}
272+
}
273+
274+
if (is_tombstone) {
275+
try {
276+
// try recover and update
277+
recover_remove(label);
278+
auto update_res = UpdateVector(label, data, false);
279+
if (update_res) {
280+
// [case 3]
281+
is_recovered = true;
282+
return is_recovered;
283+
}
284+
// recover failed: roll back
285+
Remove({label});
286+
} catch (std::runtime_error& e) {
287+
// recover failed: roll back
288+
Remove({label});
289+
}
290+
}
291+
292+
// is_recovered = false
293+
if (is_label_valid) {
294+
// [case 1]
295+
failed_ids.emplace_back(label);
296+
return true;
297+
}
298+
299+
// [case 2, 4]
300+
return false;
301+
}
302+
209303
void
210304
HGraph::UpdateAttribute(int64_t id, const AttributeSet& new_attrs) {
211305
auto inner_id = this->label_table_->GetIdByLabel(id);

src/algorithm/hgraph/hgraph_param_mapping.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,12 @@ HGraph::map_hgraph_param(const JsonType& hgraph_json) {
361361
SUPPORT_DUPLICATE,
362362
},
363363
},
364+
{
365+
HGRAPH_SUPPORT_TOMBSTONE,
366+
{
367+
SUPPORT_TOMBSTONE,
368+
},
369+
},
364370
{
365371
HGRAPH_LABEL_REMAP_TYPE,
366372
{
@@ -453,6 +459,7 @@ HGraph::map_hgraph_param(const JsonType& hgraph_json) {
453459
"{ATTR_HAS_BUCKETS_KEY}": false
454460
},
455461
"{HGRAPH_SUPPORT_DUPLICATE}": false,
462+
"{HGRAPH_SUPPORT_TOMBSTONE}": false,
456463
"{SUPPORT_FORCE_REMOVE}": false,
457464
"{EF_CONSTRUCTION_KEY}": 400
458465
})";

src/algorithm/hgraph/hgraph_parameter.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ HGraphParameter::FromJson(const JsonType& json) {
128128
if (json.Contains(DUPLICATE_DISTANCE_THRESHOLD)) {
129129
this->duplicate_distance_threshold = json[DUPLICATE_DISTANCE_THRESHOLD].GetFloat();
130130
}
131+
if (json.Contains(SUPPORT_TOMBSTONE)) {
132+
this->support_tombstone = json[SUPPORT_TOMBSTONE].GetBool();
133+
}
131134
if (json.Contains(SUPPORT_FORCE_REMOVE)) {
132135
this->support_force_remove = json[SUPPORT_FORCE_REMOVE].GetBool();
133136
}

src/algorithm/hgraph/hgraph_parameter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class HGraphParameter : public InnerIndexParameter {
6464

6565
bool support_duplicate{false};
6666
float duplicate_distance_threshold{0.0F};
67+
bool support_tombstone{false};
6768
bool support_force_remove{false};
6869

6970
DataTypes data_type{DataTypes::DATA_TYPE_FLOAT};

src/constants.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ const char* const HGRAPH_PARAMETER_BRUTE_FORCE_THRESHOLD = "brute_force_threshol
183183
const char* const HGRAPH_EXTRA_INFO_SIZE = "extra_info_size";
184184
const char* const HGRAPH_SUPPORT_DUPLICATE = "support_duplicate";
185185
const char* const HGRAPH_DUPLICATE_DISTANCE_THRESHOLD = "duplicate_distance_threshold";
186+
const char* const HGRAPH_SUPPORT_TOMBSTONE = "support_tomb_stone";
186187
const char* const HGRAPH_LABEL_REMAP_TYPE = "label_remap_type";
187188
const char* const HGRAPH_USE_EXTRA_INFO_FILTER = "use_extra_info_filter";
188189
const char* const STORE_RAW_VECTOR = "store_raw_vector";

0 commit comments

Comments
 (0)