Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
c40e7fc
feat(eap-items): read array attributes from typed columns on the read…
claude Jun 26, 2026
320817f
fix(eap-items): use typed array columns in GROUP BY / ORDER BY to mat…
claude Jun 26, 2026
12d1e8c
test(eap-items): cover array reads before and after the typed-column …
claude Jun 26, 2026
ff2b4eb
feat(eap-items): read arrays from typed columns in get_trace and export
claude Jun 26, 2026
f796e15
ref(eap-items): read array columns with SubscriptableReference (map[k…
claude Jun 26, 2026
1b59e2a
ref(eap-items): select array columns like attributes_int/bool (non-bu…
claude Jun 26, 2026
7cc8760
fix(eap-items): gate export array columns on the routing-adjusted que…
claude Jun 26, 2026
8ef8f6d
fix(eap-items): handle typed array tuple delivered as a list in get_t…
claude Jun 26, 2026
1fef4e2
ref(eap-items): read array attributes as unpacked native typed sub-co…
claude Jun 26, 2026
1a6145f
ref(eap-items): trim verbose docstrings on the typed-array read path
claude Jun 26, 2026
6d118e3
ref(eap-items): dedupe the typed array column list into one constant
claude Jun 26, 2026
2d4887f
ref(eap-items): make array attributes select-only
claude Jun 26, 2026
b8b885f
ref(eap-items): keep array filtering; reject only aggregations/group-…
claude Jun 26, 2026
6c27916
ref(eap-items): narrow typed-column array filters to the value's type
claude Jun 26, 2026
6f08cfa
ref(eap-items): match typed array filters per column by coercing the …
claude Jun 26, 2026
f1fba46
fix(eap-items): surface an absent array attribute as NULL on select
claude Jun 26, 2026
f1d7d3b
test(eap-items): expect LIKE array filter to read only the string column
claude Jun 26, 2026
dfa2169
fix(eap-items): keep uniq aggregation on array attributes
claude Jun 26, 2026
3567f99
Merge branch 'master' into claude/remove-array-query-workaround-imcfu8
claude Jun 26, 2026
bf8b5e5
fix(eap-items): add strict= to zip() for ruff B905 after tooling merge
claude Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions snuba/protos/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,35 @@ def type_array_to_stored_array_json_path(attr_key: AttributeKey) -> JsonPath:
)


# The typed array map columns (Map(String, Array(T))), in element-type order. Shared by
# the per-attribute SELECT (type_array_typed_columns_select_expressions) and the
# whole-map reads / merges in snuba.web.rpc.common.common.
TYPED_ARRAY_MAP_COLUMNS: tuple[str, ...] = (
"attributes_array_string",
"attributes_array_int",
"attributes_array_float",
"attributes_array_bool",
)


def type_array_typed_columns_select_expressions(attr_key: AttributeKey) -> list[FunctionCall]:
"""Native ``arrayElement`` read per typed array map column for a TYPE_ARRAY SELECT
past the cutoff (replaces the legacy ``toJSONString`` JSON-column form). Arrays are
homogeneous, so one sub-column is non-empty; the caller merges them back into one
array (``merge_typed_array_subcolumns``). Aliased ``"<label_mapping_key>.<column>"``
so SELECT and GROUP BY / ORDER BY agree."""
if attr_key.type != AttributeKey.Type.TYPE_ARRAY:
raise MalformedAttributeException(
f"type_array_typed_columns_select_expressions expected TYPE_ARRAY, got "
f"{AttributeKey.Type.Name(attr_key.type)}"
)
label_mapping_key = _build_label_mapping_key(attr_key)
return [
arrayElement(f"{label_mapping_key}.{col}", column(col), literal(attr_key.name))
for col in TYPED_ARRAY_MAP_COLUMNS
]


def type_array_to_membership_array_expression_from_typed_columns(
attr_key: AttributeKey,
) -> FunctionCall:
Expand Down Expand Up @@ -260,6 +289,21 @@ def _to_string_elements(col_name: str) -> FunctionCall:
)


def type_array_typed_column_native_array(attr_key: AttributeKey, col: str) -> FunctionCall:
"""Native ``Array(T)`` of one typed array map column's elements, for a TYPE_ARRAY
membership comparison past the cutoff. The caller compares against the filter value
coerced to this column's native type, with no string conversion — unlike
``type_array_to_membership_array_expression_from_typed_columns``, which normalizes
every column to ``Array(String)`` for the value-less exists/notEmpty check."""
if attr_key.type != AttributeKey.Type.TYPE_ARRAY:
raise MalformedAttributeException(
f"type_array_typed_column_native_array expected TYPE_ARRAY, got "
f"{AttributeKey.Type.Name(attr_key.type)}"
)
alias = f"{_build_label_mapping_key(attr_key)}__array_members_{col}"
return arrayElement(alias, column(col), literal(attr_key.name))


def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
"""Convert an AttributeKey proto to a Snuba Expression.

Expand Down Expand Up @@ -317,9 +361,8 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
)

if attr_key.type == AttributeKey.Type.TYPE_ARRAY:
# Tagged array under attributes_array.* as Array(JSON). Select toJSONString(...)
# so the result column is String; callers decode in application code. Raw
# Array(JSON) is not returned in the SELECT to avoid native client limits.
# Legacy JSON column (used pre-cutoff and for aggregations); the typed-column read
# path is built separately (type_array_typed_columns_select_expressions).
return FunctionCall(
alias=alias,
function_name="toJSONString",
Expand Down
171 changes: 168 additions & 3 deletions snuba/web/rpc/common/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import math
from collections.abc import Callable
from collections.abc import Callable, Iterable
from datetime import UTC, datetime, timedelta
from typing import Any, TypeVar, cast

Expand All @@ -20,9 +20,11 @@
COLUMN_PREFIX,
PROTO_TYPE_TO_ATTRIBUTE_COLUMN,
PROTO_TYPE_TO_CLICKHOUSE_TYPE,
TYPED_ARRAY_MAP_COLUMNS,
MalformedAttributeException,
type_array_to_membership_array_expression,
type_array_to_membership_array_expression_from_typed_columns,
type_array_typed_column_native_array,
)
from snuba.protos.common import (
attribute_key_to_expression as _attribute_key_to_expression,
Expand Down Expand Up @@ -179,6 +181,60 @@ def attributes_array_selected_expressions() -> list[SelectedExpression]:
]


def typed_array_map_selected_expressions() -> list[SelectedExpression]:
"""Select the four typed array map columns whole, for endpoints that return every
attribute of an item (TraceItemDetails, GetTrace, ExportTraceItems). Replaces the
JSON-column allowlist (see ``attributes_array_selected_expressions``) past the
cutoff so all array attributes are returned, not just the allowlisted paths."""
return [SelectedExpression(col, column(col, alias=col)) for col in TYPED_ARRAY_MAP_COLUMNS]


def merge_typed_array_maps(row: dict[str, Any]) -> list[tuple[str, list[Any]]]:
"""Pop the four typed array map columns from ``row`` and merge them into a list of
``(attribute_name, elements)`` pairs.

Each map is ``{name: [native elements of one type]}``. An array attribute whose
elements span several types appears in multiple maps; its elements are concatenated
in column order (string, int, float, bool) — the typed columns store each element
type separately, so cross-type element order is not preserved (homogeneous arrays,
the common case, keep their order). Names are returned in first-seen order. Callers
convert each ``elements`` list to a ``val_array`` and skip empty ones."""
merged: dict[str, list[Any]] = {}
order: list[str] = []
for col in TYPED_ARRAY_MAP_COLUMNS:
column_map = row.pop(col, None) or {}
for name, values in column_map.items():
if name not in merged:
merged[name] = []
order.append(name)
merged[name].extend(values)
return [(name, merged[name]) for name in order]


def typed_array_select_subcolumn_name(base: str, typed_col: str) -> str:
"""Result-column name ``"<base>.<typed_col>"`` for one typed sub-column of a
per-attribute array SELECT (``base`` is the column label or attribute name)."""
return f"{base}.{typed_col}"


def merge_typed_array_subcolumns(
row: dict[str, Any], bases: Iterable[str]
) -> list[tuple[str, list[Any]]]:
"""Pop the four typed sub-columns of each ``base`` array attribute and merge them into
``(base, elements)`` pairs (per-attribute counterpart of ``merge_typed_array_maps``).
Arrays are homogeneous, so one sub-column is non-empty; the four are concatenated in
column order."""
merged: list[tuple[str, list[Any]]] = []
for base in bases:
elements: list[Any] = []
for typed_col in TYPED_ARRAY_MAP_COLUMNS:
values = row.pop(typed_array_select_subcolumn_name(base, typed_col), None)
if values:
elements.extend(values)
merged.append((base, elements))
return merged
Comment on lines +220 to +235

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

have we considered (and rejected) not doing this for our upstream clients?

I understand right now that we allow them to treat arrays as heterogeneous, and we need to do this mapping for a transition, but it would be nice if we could drop all this pre/post-processing later

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's accepted we're not going to support heterogeneous arrays. The problem I'm trying to dance around here is we don't have a good type system for array attributes.

We defined a TYPE_ARRAY that doesn't tell you what scalar values you store, so when you query the array attribute, it has to look into the 4 columns and "merge", basically picking whatever value exists. And then this would pick the one that returned something and extend the results.

It's an evolution of a function that was trying to merge arrays with different types together so that's why it's complicated and I can definitely simplifies.



def decode_attributes_array_value(key: str, raw: Any) -> list[Any] | str | None:
"""Decode a `toJSONString(...:Array(JSON))` payload for an allowlisted path.

Expand Down Expand Up @@ -644,6 +700,96 @@ def _type_array_includes_scalar_expression(
return f.arrayExists(Lambda(None, ("x",), f.equals(x, rhs)), array_expr)


def _coerce_int(s: str) -> int | None:
try:
return int(s)
except ValueError:
return None


def _coerce_float(s: str) -> float | None:
try:
return float(s)
except ValueError:
return None


def _typed_array_native_membership_candidates(
v: AttributeValue,
) -> list[tuple[str, Expression]]:
"""``(typed column, native rhs)`` pairs to OR for an array-membership comparison on
the typed ``attributes_array_*`` columns.

Sentry sends every array-membership value as ``val_str`` (the filter key only says
TYPE_ARRAY, never the element type), so coerce the string to each native type it
parses as and match that column natively: a numeric string searches the int/float
columns, ``true``/``false`` the bool column, and every string searches the string
column. A natively-typed value, if one is ever sent, maps to its own column.
"""
value_type = v.WhichOneof("value")
candidates: list[tuple[str, Expression]] = []
if value_type == "val_str":
s = v.val_str
candidates.append(("attributes_array_string", literal(s)))
int_val = _coerce_int(s)
if int_val is not None:
candidates.append(("attributes_array_int", literal(int_val)))
float_val = _coerce_float(s)
if float_val is not None:
candidates.append(("attributes_array_float", literal(float_val)))
if s.lower() in ("true", "false"):
candidates.append(("attributes_array_bool", literal(s.lower() == "true")))
elif value_type == "val_int":
candidates.append(("attributes_array_int", literal(v.val_int)))
candidates.append(("attributes_array_float", literal(float(v.val_int))))
elif value_type in ("val_float", "val_double"):
candidates.append(("attributes_array_float", literal(getattr(v, value_type))))
elif value_type == "val_bool":
candidates.append(("attributes_array_bool", literal(v.val_bool)))
else:
raise BadSnubaRPCRequestException(
f"unsupported AttributeValue for array membership: {value_type}"
)
return candidates


def _typed_array_includes_scalar_expression(
attr_key: AttributeKey,
v: AttributeValue,
ignore_case: bool,
) -> Expression:
"""Any element equals scalar (includes / [*]) against the typed ``attributes_array_*``
columns: a native ``arrayExists`` per candidate column, OR-ed together (see
``_typed_array_native_membership_candidates``)."""
if v.WhichOneof("value") == "val_null" or v.is_null:
raise BadSnubaRPCRequestException("Arrays can't be NULL or cannot have NULL elements")
exprs: list[Expression] = []
for col, rhs in _typed_array_native_membership_candidates(v):
array_expr = type_array_typed_column_native_array(attr_key, col)
x = Argument(None, "x")
if ignore_case and col == "attributes_array_string":
lam = Lambda(None, ("x",), f.equals(f.lower(x), f.lower(rhs)))
else:
lam = Lambda(None, ("x",), f.equals(x, rhs))
exprs.append(f.arrayExists(lam, array_expr))
if len(exprs) == 1:
return exprs[0]
return or_cond(exprs[0], exprs[1], *exprs[2:])


def _typed_array_like_expression(
attr_key: AttributeKey, pattern: Expression, ignore_case: bool
) -> Expression:
"""LIKE membership against the typed columns. A pattern can only match string
elements, so read just ``attributes_array_string``."""
array_expr = type_array_typed_column_native_array(attr_key, "attributes_array_string")
like_fn = f.ilike if ignore_case else f.like
return f.arrayExists(
Lambda(None, ("x",), like_fn(Argument(None, "x"), pattern)),
array_expr,
)


def _any_attribute_filter_to_expression(
filt: AnyAttributeFilter,
*,
Expand Down Expand Up @@ -929,6 +1075,10 @@ def trace_item_filters_to_expression(
_check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)

if k.type == AttributeKey.Type.TYPE_ARRAY:
if use_array_map_columns:
return _typed_array_includes_scalar_expression(
k, v, item_filter.comparison_filter.ignore_case
)
return _type_array_includes_scalar_expression(
k_expression, v, item_filter.comparison_filter.ignore_case
)
Expand Down Expand Up @@ -959,11 +1109,16 @@ def trace_item_filters_to_expression(
if op == ComparisonFilter.OP_NOT_EQUALS:
_check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)
if k.type == AttributeKey.Type.TYPE_ARRAY:
return not_cond(
_type_array_includes_scalar_expression(
includes = (
_typed_array_includes_scalar_expression(
k, v, item_filter.comparison_filter.ignore_case
)
if use_array_map_columns
else _type_array_includes_scalar_expression(
k_expression, v, item_filter.comparison_filter.ignore_case
)
)
return not_cond(includes)
if _contains_subscriptable_reference(k_expression):
# Negation of OP_EQUALS; an absent key is "not equal".
value, exists = _map_backed_operands(k)
Expand All @@ -988,6 +1143,10 @@ def trace_item_filters_to_expression(
return expr_with_null
if op == ComparisonFilter.OP_LIKE:
if k.type == AttributeKey.Type.TYPE_ARRAY:
if use_array_map_columns:
return _typed_array_like_expression(
k, v_expression, item_filter.comparison_filter.ignore_case
)
like_fn = f.ilike if item_filter.comparison_filter.ignore_case else f.like
return f.arrayExists(
Lambda(
Expand All @@ -1008,6 +1167,12 @@ def trace_item_filters_to_expression(
return comparison_function(k_expression, v_expression)
if op == ComparisonFilter.OP_NOT_LIKE:
if k.type == AttributeKey.Type.TYPE_ARRAY:
if use_array_map_columns:
return not_cond(
_typed_array_like_expression(
k, v_expression, item_filter.comparison_filter.ignore_case
)
)
like_fn = f.ilike if item_filter.comparison_filter.ignore_case else f.like
return not_cond(
f.arrayExists(
Expand Down
Loading
Loading