Skip to content

Commit 904d4c0

Browse files
committed
[fix](variant) allow inverted index pushdown for cast predicates on variant subcolumns
1 parent 8e79778 commit 904d4c0

3 files changed

Lines changed: 124 additions & 6 deletions

File tree

be/src/storage/segment/segment.h

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
#include "common/status.h" // Status
3434
#include "core/column/column.h"
3535
#include "core/data_type/data_type.h"
36+
#include "core/data_type/data_type_nullable.h"
37+
#include "core/data_type/primitive_type.h"
3638
#include "io/fs/file_reader.h"
3739
#include "io/fs/file_reader_writer_fwd.h"
3840
#include "io/fs/file_system.h"
@@ -182,7 +184,23 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
182184
std::shared_ptr<const IDataType> get_data_type_of(const TabletColumn& column,
183185
const StorageReadOptions& read_options);
184186

185-
// If column in segment is the same type in schema, then it is safe to apply predicate.
187+
static bool _is_variant_predicate_type_compatible(const DataTypePtr& storage_type,
188+
const DataTypePtr& target_type) {
189+
auto normalized_storage_type = remove_nullable(storage_type);
190+
auto normalized_target_type = remove_nullable(target_type);
191+
if (normalized_storage_type->equals(*normalized_target_type)) {
192+
return true;
193+
}
194+
195+
auto storage_primitive_type = normalized_storage_type->get_primitive_type();
196+
auto target_primitive_type = normalized_target_type->get_primitive_type();
197+
return (is_int(storage_primitive_type) && is_int(target_primitive_type)) ||
198+
(is_string_type(storage_primitive_type) && is_string_type(target_primitive_type));
199+
}
200+
201+
// If column in segment is compatible with the cast target in schema, then it is safe to
202+
// apply predicate. Variant subcolumns may be inferred as nullable or promoted within the
203+
// same primitive family, while still using the same inverted index encoding/query path.
186204
bool can_apply_predicate_safely(
187205
int cid, const Schema& schema,
188206
const std::map<std::string, DataTypePtr>& target_cast_type_for_variants,
@@ -195,11 +213,8 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
195213
// Default column iterator or not variant column
196214
return true;
197215
}
198-
if (storage_column_type->equals(*target_cast_type_for_variants.at(col->name()))) {
199-
return true;
200-
} else {
201-
return false;
202-
}
216+
return _is_variant_predicate_type_compatible(storage_column_type,
217+
target_cast_type_for_variants.at(col->name()));
203218
}
204219

205220
const TabletSchemaSPtr& tablet_schema() const { return _tablet_schema; }
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- This file is automatically generated. You should know what you did if you want to edit this
2+
-- !sql1 --
3+
13 13
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
import java.util.regex.Pattern
19+
import org.apache.doris.regression.action.ProfileAction
20+
21+
suite("test_variant_inverted_index_cast", "nonConcurrent") {
22+
def tableName = "test_variant_inverted_index_cast"
23+
24+
def fetchProfileText = { sqlText ->
25+
def profileAction = new ProfileAction(context)
26+
def profiles = profileAction.getProfileList()
27+
assertTrue(profiles.size() > 0)
28+
29+
def profileId = null
30+
for (def profile in profiles) {
31+
if (profile["Sql Statement"].contains(sqlText)) {
32+
profileId = profile["Profile ID"]
33+
break
34+
}
35+
}
36+
assertTrue(profileId != null)
37+
return profileAction.getProfile(profileId)
38+
}
39+
40+
def assertIndexFilterHit = { profileText ->
41+
assertTrue(profileText.contains("IndexFilter:"))
42+
assertTrue(Pattern.compile("HitRows(?:_\\d+)?:\\s*1").matcher(profileText).find())
43+
assertTrue(Pattern.compile("ScanRows:\\s*(?:sum\\s+)?1\\b").matcher(profileText).find())
44+
}
45+
46+
sql """ DROP TABLE IF EXISTS ${tableName} """
47+
sql """
48+
CREATE TABLE ${tableName} (
49+
row_id BIGINT,
50+
v VARIANT,
51+
INDEX idx_v(v) USING INVERTED COMMENT ''
52+
)
53+
ENGINE=OLAP
54+
DUPLICATE KEY(row_id)
55+
DISTRIBUTED BY HASH(row_id) BUCKETS 1
56+
PROPERTIES (
57+
"replication_allocation" = "tag.location.default: 1",
58+
"disable_auto_compaction" = "true",
59+
"inverted_index_storage_format" = "v2"
60+
)
61+
"""
62+
63+
sql """
64+
INSERT INTO ${tableName} VALUES
65+
(1, '{"int_key": 1}'),
66+
(2, '{"int_key": 2}'),
67+
(3, '{"int_key": 3}'),
68+
(4, '{"int_key": 4}'),
69+
(5, '{"int_key": 5}'),
70+
(6, '{"int_key": 6}'),
71+
(7, '{"int_key": 7}'),
72+
(8, '{"int_key": 8}'),
73+
(9, '{"int_key": 9}'),
74+
(10, '{"int_key": 10}'),
75+
(11, '{"int_key": 11}'),
76+
(12, '{"int_key": 12}'),
77+
(13, '{"int_key": 13}'),
78+
(14, '{"int_key": 14}'),
79+
(15, '{"int_key": 15}'),
80+
(16, '{"int_key": 16}'),
81+
(17, '{"int_key": 17}'),
82+
(18, '{"int_key": 18}'),
83+
(19, '{"int_key": 19}'),
84+
(20, '{"int_key": 20}')
85+
"""
86+
87+
sql """ set enable_profile = true """
88+
sql """ set profile_level = 2 """
89+
sql """ set enable_common_expr_pushdown = true """
90+
sql """ set enable_common_expr_pushdown_for_inverted_index = true """
91+
sql """ clean all profile """
92+
93+
def castSql = """SELECT row_id, cast(v["int_key"] as int) AS int_key
94+
FROM ${tableName}
95+
WHERE cast(v["int_key"] as int) = 13"""
96+
qt_sql1 """ ${castSql} """
97+
assertIndexFilterHit(fetchProfileText(castSql))
98+
99+
sql """ set enable_profile = false """
100+
}

0 commit comments

Comments
 (0)