From 3a5f78fbe6e6b239b493ebb48f72cbedeb6de571 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Wed, 24 Jun 2026 15:49:12 -0400 Subject: [PATCH 1/2] feat(replays): Add segment_names column --- rust_snuba/src/processors/replays.rs | 14 +++++ ...ingest-replay-events__1__archive.json.snap | 1 + ...play-events__1__click-serialized.json.snap | 1 + ...r-ingest-replay-events__1__click.json.snap | 1 + ...est-replay-events__1__event-link.json.snap | 1 + ...ingest-replay-events__1__segment.json.snap | 1 + ...-ingest-replay-events__1__viewed.json.snap | 1 + .../replays/entities/replays.yaml | 1 + .../replays/storages/replays.yaml | 1 + .../replays/0025_add_segment_names_column.py | 57 +++++++++++++++++++ tests/fixtures.py | 1 + tests/test_replays_api.py | 28 +++++++++ 12 files changed, 108 insertions(+) create mode 100644 snuba/snuba_migrations/replays/0025_add_segment_names_column.py diff --git a/rust_snuba/src/processors/replays.rs b/rust_snuba/src/processors/replays.rs index 1ef1a4548f6..21189f91908 100644 --- a/rust_snuba/src/processors/replays.rs +++ b/rust_snuba/src/processors/replays.rs @@ -184,6 +184,7 @@ pub fn deserialize_message( timestamp: event.timestamp as u32, trace_ids: event.trace_ids.unwrap_or_default(), urls: event.urls.unwrap_or_default(), + segment_names: event.segment_names.unwrap_or_default(), user, user_email: event.user.email.unwrap_or_default(), user_id: user_id.unwrap_or_default(), @@ -374,6 +375,8 @@ struct ReplayEvent { #[serde(default)] urls: Option>, #[serde(default)] + segment_names: Option>, + #[serde(default)] user: User, #[serde(default)] trace_ids: Option>, @@ -551,6 +554,7 @@ pub struct ReplayRow { title: Option, trace_ids: Vec, urls: Vec, + segment_names: Vec, user_email: String, user_id: String, user_name: String, @@ -612,6 +616,7 @@ mod tests { "replay_start_timestamp": 1702659277, "replay_type": "buffer", "urls": ["urls"], + "segment_names": ["segment1", "segment2"], "trace_ids": ["2cd798d70f9346089026d2014a826629"], "error_ids": ["df11e6d952da470386a64340f13151c4"], "tags": [ @@ -694,6 +699,7 @@ mod tests { "replay_start_timestamp": 1702659277, "replay_type": "buffer", "urls": ["urls"], + "segment_names": ["segment1", "segment2"], "trace_ids": ["2cd798d70f9346089026d2014a826629"], "error_ids": ["df11e6d952da470386a64340f13151c4"], "tags": [ @@ -772,6 +778,7 @@ mod tests { vec![Uuid::parse_str("2cd798d70f9346089026d2014a826629").unwrap()] ); assert_eq!(replay_row.urls, vec!["urls"]); + assert_eq!(replay_row.segment_names, vec!["segment1", "segment2"]); // Default columns - not providable on this event. assert_eq!(&replay_row.click_alt, ""); @@ -812,6 +819,7 @@ mod tests { ["transaction.name", null] ], "urls": null, + "segment_names": null, "is_archived": null, "trace_ids": null, "error_ids": null, @@ -921,6 +929,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); // Default columns - not providable on this event. assert_eq!(&replay_row.click_alt, ""); @@ -1044,6 +1053,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); assert_eq!(replay_row.viewed_by_id, 0); assert_eq!(replay_row.warning_id, Uuid::nil()); } @@ -1129,6 +1139,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); assert_eq!(replay_row.viewed_by_id, 0); assert_eq!(replay_row.warning_id, Uuid::nil()); } @@ -1224,6 +1235,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); assert_eq!(replay_row.viewed_by_id, 0); } @@ -1307,6 +1319,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); assert_eq!(replay_row.viewed_by_id, 0); assert_eq!(replay_row.warning_id, Uuid::nil()); } @@ -1391,6 +1404,7 @@ mod tests { assert_eq!(replay_row.title, None); assert_eq!(replay_row.trace_ids, vec![]); assert_eq!(replay_row.urls, Vec::::new()); + assert_eq!(replay_row.segment_names, Vec::::new()); assert_eq!(&replay_row.user_email, ""); assert_eq!(&replay_row.user_id, ""); assert_eq!(&replay_row.user_name, ""); diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__archive.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__archive.json.snap index 54a08becb20..e45dbb77e9d 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__archive.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__archive.json.snap @@ -53,6 +53,7 @@ expression: snapshot_payload "sdk_name": "", "sdk_version": "", "segment_id": null, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [], "tags.value": [], diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click-serialized.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click-serialized.json.snap index 2e917f31c32..98cdad0665f 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click-serialized.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click-serialized.json.snap @@ -56,6 +56,7 @@ expression: snapshot_payload "sdk_name": "", "sdk_version": "", "segment_id": null, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [], "tags.value": [], diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click.json.snap index 316e965a385..3145a2ca57a 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__click.json.snap @@ -56,6 +56,7 @@ expression: snapshot_payload "sdk_name": "", "sdk_version": "", "segment_id": null, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [], "tags.value": [], diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__event-link.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__event-link.json.snap index dab85c5eb97..56bccfe2c60 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__event-link.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__event-link.json.snap @@ -53,6 +53,7 @@ expression: snapshot_payload "sdk_name": "", "sdk_version": "", "segment_id": null, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [], "tags.value": [], diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__segment.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__segment.json.snap index 01e0901f0c0..779583477f7 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__segment.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__segment.json.snap @@ -53,6 +53,7 @@ expression: snapshot_payload "sdk_name": "sentry.javascript.browser", "sdk_version": "7.52.1", "segment_id": 1, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [ "correlationId" diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__viewed.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__viewed.json.snap index dfcb451e8f7..6bf8fd5b105 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__viewed.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@ingest-replay-events-ReplaysProcessor-ingest-replay-events__1__viewed.json.snap @@ -53,6 +53,7 @@ expression: snapshot_payload "sdk_name": "", "sdk_version": "", "segment_id": null, + "segment_names": [], "session_sample_rate": -1.0, "tags.key": [], "tags.value": [], diff --git a/snuba/datasets/configuration/replays/entities/replays.yaml b/snuba/datasets/configuration/replays/entities/replays.yaml index 9cbd7531550..c77242416b7 100644 --- a/snuba/datasets/configuration/replays/entities/replays.yaml +++ b/snuba/datasets/configuration/replays/entities/replays.yaml @@ -52,6 +52,7 @@ schema: }, { name: url, type: String, args: { schema_modifiers: [nullable] } }, { name: urls, type: Array, args: { inner_type: { type: String } } }, + { name: segment_names, type: Array, args: { inner_type: { type: String } } }, { name: count_urls, type: UInt, diff --git a/snuba/datasets/configuration/replays/storages/replays.yaml b/snuba/datasets/configuration/replays/storages/replays.yaml index a53170b2082..d0e78df21e2 100644 --- a/snuba/datasets/configuration/replays/storages/replays.yaml +++ b/snuba/datasets/configuration/replays/storages/replays.yaml @@ -50,6 +50,7 @@ schema: }, { name: url, type: String, args: { schema_modifiers: [nullable] } }, { name: urls, type: Array, args: { inner_type: { type: String } } }, + { name: segment_names, type: Array, args: { inner_type: { type: String } } }, { name: is_archived, type: UInt, diff --git a/snuba/snuba_migrations/replays/0025_add_segment_names_column.py b/snuba/snuba_migrations/replays/0025_add_segment_names_column.py new file mode 100644 index 00000000000..c3578518bee --- /dev/null +++ b/snuba/snuba_migrations/replays/0025_add_segment_names_column.py @@ -0,0 +1,57 @@ +from typing import Iterator, Sequence + +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.migration import ClickhouseNodeMigration +from snuba.migrations.operations import ( + AddColumn, + DropColumn, + OperationTarget, + SqlOperation, +) +from snuba.utils.schemas import Array, Column, String + +column: Column[Modifiers] = Column("segment_names", Array(String())) +after = "urls" + + +class Migration(ClickhouseNodeMigration): + blocking = False + + def forwards_ops(self) -> Sequence[SqlOperation]: + return list(forward_iter()) + + def backwards_ops(self) -> Sequence[SqlOperation]: + return list(backward_iter()) + + +def forward_iter() -> Iterator[SqlOperation]: + yield AddColumn( + storage_set=StorageSetKey.REPLAYS, + table_name="replays_local", + column=column, + after=after, + target=OperationTarget.LOCAL, + ) + yield AddColumn( + storage_set=StorageSetKey.REPLAYS, + table_name="replays_dist", + column=column, + after=after, + target=OperationTarget.DISTRIBUTED, + ) + + +def backward_iter() -> Iterator[SqlOperation]: + yield DropColumn( + storage_set=StorageSetKey.REPLAYS, + table_name="replays_dist", + target=OperationTarget.DISTRIBUTED, + column_name=column.name, + ) + yield DropColumn( + storage_set=StorageSetKey.REPLAYS, + table_name="replays_local", + target=OperationTarget.LOCAL, + column_name=column.name, + ) diff --git a/tests/fixtures.py b/tests/fixtures.py index 41408599fc3..485253bfe40 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -323,6 +323,7 @@ def get_replay_event(replay_id: str | None = None) -> Mapping[str, Any]: "36e980a9-c602-4cde-9f5d-089f15b83b5f", "8bea4461-d8b9-44f3-93c1-5a3cb1c4169a", ], + "segment_names": ["segment-a", "segment-b"], "dist": "", "platform": "python", "timestamp": now, diff --git a/tests/test_replays_api.py b/tests/test_replays_api.py index 0ff422e881a..0b236837001 100644 --- a/tests/test_replays_api.py +++ b/tests/test_replays_api.py @@ -112,3 +112,31 @@ def test_sdk_user_title_nullability(self) -> None: "sdk_version": "", } ] + + def test_segment_names_query(self) -> None: + replays_storage = get_entity(EntityKey.REPLAYS).get_writable_storage() + assert replays_storage is not None + write_raw_unprocessed_events(replays_storage, [self.event]) + + response = self.post( + "/replays/snql", + data=json.dumps( + { + "query": f""" + MATCH (replays) + SELECT replay_id, segment_names + BY replay_id + WHERE project_id = {self.project_id} + AND timestamp >= toDateTime('{self.base_time.isoformat()}') + AND timestamp < toDateTime('{self.next_time.isoformat()}') + LIMIT 10 OFFSET 0 + """, + "debug": True, + "tenant_ids": {"referrer": "replays", "organization_id": 1}, + } + ), + ) + + data = json.loads(response.data) + assert response.status_code == 200, data + assert data["data"][0]["segment_names"] == ["segment-a", "segment-b"] From fb32c12b35f0af0bd961308181c5810cea8a9595 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Thu, 25 Jun 2026 11:31:17 -0400 Subject: [PATCH 2/2] fix(replays): Wrap segment_names in groupArrayArray aggregate in test The segment_names column needs an aggregate function since the replays query uses GROUP BY replay_id. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_replays_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_replays_api.py b/tests/test_replays_api.py index 0b236837001..50ee586cd10 100644 --- a/tests/test_replays_api.py +++ b/tests/test_replays_api.py @@ -124,7 +124,7 @@ def test_segment_names_query(self) -> None: { "query": f""" MATCH (replays) - SELECT replay_id, segment_names + SELECT replay_id, groupArrayArray(segment_names) AS `segment_names` BY replay_id WHERE project_id = {self.project_id} AND timestamp >= toDateTime('{self.base_time.isoformat()}')