From db45f35e5f84994576c136e2c54e5eacf55eee80 Mon Sep 17 00:00:00 2001 From: Rock Date: Thu, 26 Mar 2026 12:18:17 +0100 Subject: [PATCH 01/29] feat: add initial StreamingPull producer with gRPC support Implement the foundation for a gRPC-based StreamingPull producer as an alternative to the existing REST-based pull producer. This provides low-latency, push-based message delivery from Google Cloud Pub/Sub. Key additions: - Streaming.Producer GenStage producer with Broadway integration - Streaming.StreamManager for managing bidirectional gRPC streams - Streaming.Acknowledger for message acknowledgment - Streaming.Options for configuration validation - MessageBuilder for constructing Broadway messages from Pub/Sub responses - Protobuf definitions for the Pub/Sub v1 API - Backoff module for exponential retry logic - HTTP/2 connection handling with Mint adapter support --- README.md | 59 + lib/broadway_cloud_pub_sub/backoff.ex | 112 + lib/broadway_cloud_pub_sub/message_builder.ex | 78 + .../proto/google/pubsub/v1/pubsub.pb.ex | 2095 +++++++++++++++++ .../proto/google/pubsub/v1/schema.pb.ex | 280 +++ lib/broadway_cloud_pub_sub/pull_client.ex | 17 +- .../streaming/acknowledger.ex | 92 + .../streaming/options.ex | 268 +++ .../streaming/producer.ex | 233 ++ .../streaming/stream_manager.ex | 771 ++++++ mix.exs | 2 + mix.lock | 55 +- test/broadway_cloud_pub_sub/backoff_test.exs | 152 ++ test/broadway_cloud_pub_sub/producer_test.exs | 10 +- .../streaming/acknowledger_test.exs | 200 ++ .../streaming/options_test.exs | 252 ++ .../streaming/producer_integration_test.exs | 194 ++ test/support/pubsub_emulator.ex | 178 ++ test/test_helper.exs | 2 +- 19 files changed, 5011 insertions(+), 39 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/backoff.ex create mode 100644 lib/broadway_cloud_pub_sub/message_builder.ex create mode 100644 lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex create mode 100644 lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/acknowledger.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/options.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/producer.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/stream_manager.ex create mode 100644 test/broadway_cloud_pub_sub/backoff_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/options_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs create mode 100644 test/support/pubsub_emulator.ex diff --git a/README.md b/README.md index f39cd20..8f019ff 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Documentation can be found at [https://hexdocs.pm/broadway_cloud_pub_sub](https: This project provides: * `BroadwayCloudPubSub.Producer` - A GenStage producer that continuously receives messages from a Pub/Sub subscription acknowledges them after being successfully processed. +* `BroadwayCloudPubSub.Streaming.Producer` - A GenStage producer that uses the gRPC StreamingPull API for low-latency, push-based message delivery. * `BroadwayCloudPubSub.Client` - A generic behaviour to implement Pub/Sub clients. * `BroadwayCloudPubSub.PullClient` - Default REST client used by `BroadwayCloudPubSub.Producer`. @@ -27,6 +28,19 @@ end > Note the [goth](https://hexdocs.pm/goth) package, which handles Google Authentication, is required for the default token generator. +If you are using `BroadwayCloudPubSub.Streaming.Producer`, also add the gRPC dependencies: + +```elixir +def deps do + [ + {:broadway_cloud_pub_sub, "~> 0.10.0"}, + {:goth, "~> 1.3"}, + {:grpc, "~> 0.9"}, + {:protobuf, "~> 0.12"} + ] +end +``` + ## Usage Configure Broadway with one or more producers using `BroadwayCloudPubSub.Producer`: @@ -43,6 +57,51 @@ Broadway.start_link(MyBroadway, ) ``` +## Streaming Usage + +For lower latency and higher throughput workloads, use `BroadwayCloudPubSub.Streaming.Producer`. +It opens a persistent bidirectional gRPC stream to Pub/Sub and receives messages as the server +pushes them, rather than polling via HTTP. + +```elixir +Broadway.start_link(MyBroadway, + name: MyBroadway, + producer: [ + module: {BroadwayCloudPubSub.Streaming.Producer, + goth: MyGoth, + subscription: "projects/my-project/subscriptions/my-subscription", + max_outstanding_messages: 1000 + } + ] +) +``` + +### gRPC adapter + +The streaming producer supports two HTTP/2 adapters, both provided by the `grpc` dependency: + +- `:gun` (default) — Uses the [Gun](https://github.com/ninenines/gun) HTTP/2 client. This is the + traditional adapter and works out of the box with the standard `grpc` dependency. +- `:mint` — Uses the [Mint](https://github.com/elixir-mint/mint) HTTP/2 client. Mint may be + preferable in environments where Gun is not available or not desired. + +```elixir +Broadway.start_link(MyBroadway, + name: MyBroadway, + producer: [ + module: {BroadwayCloudPubSub.Streaming.Producer, + goth: MyGoth, + subscription: "projects/my-project/subscriptions/my-subscription", + adapter: :mint + } + ] +) +``` + +See `BroadwayCloudPubSub.Streaming.Producer` for the full list of configuration options, +including flow control (`max_outstanding_messages`, `max_outstanding_bytes`), reconnection +backoff, and shutdown behaviour. + ## License Copyright 2019 Michael Crumm \ diff --git a/lib/broadway_cloud_pub_sub/backoff.ex b/lib/broadway_cloud_pub_sub/backoff.ex new file mode 100644 index 0000000..f312cc9 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/backoff.ex @@ -0,0 +1,112 @@ +defmodule BroadwayCloudPubSub.Backoff do + @moduledoc false + + # Pure-functional backoff calculator for StreamingPull reconnection. + # Supports :rand_exp (randomized exponential), :exp (pure exponential), + # :rand (pure random), and :stop (no reconnect). + + @default_min 1_000 + @default_max 30_000 + + @type type :: :rand_exp | :exp | :rand | :stop + + @type t :: %__MODULE__{ + type: type(), + min: non_neg_integer(), + max: non_neg_integer(), + state: term() + } + + defstruct [:type, :min, :max, :state] + + @doc """ + Creates a new Backoff struct. + + Returns `nil` if `type` is `:stop`, indicating no reconnection. + + ## Options + + * `:type` - `:rand_exp` (default), `:exp`, `:rand`, or `:stop` + * `:min` - minimum backoff in milliseconds (default: 1000) + * `:max` - maximum backoff in milliseconds (default: 30000) + + """ + @spec new(keyword()) :: t() | nil + def new(opts \\ []) do + type = Keyword.get(opts, :type, :rand_exp) + min = Keyword.get(opts, :min, @default_min) + max = Keyword.get(opts, :max, @default_max) + + case type do + :stop -> + nil + + :rand_exp -> + lower = max(min, div(max, 3)) + %__MODULE__{type: :rand_exp, min: min, max: max, state: {min, lower, seed()}} + + :exp -> + %__MODULE__{type: :exp, min: min, max: max, state: min} + + :rand -> + %__MODULE__{type: :rand, min: min, max: max, state: nil} + end + end + + @doc """ + Returns the next backoff timeout and an updated Backoff struct. + + Returns `{nil, nil}` if the Backoff is `nil` (`:stop` type). + """ + @spec backoff(t() | nil) :: {non_neg_integer() | nil, t() | nil} + def backoff(nil), do: {nil, nil} + + def backoff(%__MODULE__{type: :rand_exp, min: _min, max: max, state: {prev, lower, seed}} = b) do + next_min = min(prev, lower) + next_max = min(prev * 3, max) + {timeout, seed} = rand(next_min, next_max, seed) + {timeout, %{b | state: {min(next_max, max), lower, seed}}} + end + + def backoff(%__MODULE__{type: :exp, min: _min, max: max, state: prev} = b) do + timeout = min(prev, max) + {timeout, %{b | state: min(prev * 2, max)}} + end + + def backoff(%__MODULE__{type: :rand, min: min, max: max} = b) do + {timeout, _} = rand(min, max, seed()) + {timeout, b} + end + + @doc """ + Resets the backoff state to its initial value after a successful connection. + """ + @spec reset(t() | nil) :: t() | nil + def reset(nil), do: nil + + def reset(%__MODULE__{type: :rand_exp, min: min, max: max} = b) do + lower = max(min, div(max, 3)) + %{b | state: {min, lower, seed()}} + end + + def reset(%__MODULE__{type: :exp, min: min} = b) do + %{b | state: min} + end + + def reset(%__MODULE__{type: :rand} = b), do: b + + # --- Private helpers --- + + defp rand(min, max, seed) when min >= max do + {min, seed} + end + + defp rand(min, max, seed) do + {value, new_seed} = :rand.uniform_s(max - min, seed) + {value + min, new_seed} + end + + defp seed do + :rand.seed_s(:exsplus) + end +end diff --git a/lib/broadway_cloud_pub_sub/message_builder.ex b/lib/broadway_cloud_pub_sub/message_builder.ex new file mode 100644 index 0000000..63eb6ba --- /dev/null +++ b/lib/broadway_cloud_pub_sub/message_builder.ex @@ -0,0 +1,78 @@ +defmodule BroadwayCloudPubSub.MessageBuilder do + @moduledoc false + + # Shared message-building logic used by both the pull client and the + # streaming client to ensure consistent Broadway.Message metadata structure. + + alias Broadway.Message + + @doc """ + Builds a `Broadway.Message` metadata map from a normalized fields map. + + Both the pull client (REST/JSON) and the streaming client (gRPC/protobuf) + normalize their transport-specific message representation into the same + intermediate shape before calling this function, guaranteeing that all + producers emit identical metadata keys. + + ## Input fields + + The `fields` map must have the following atom keys: + + * `:message_id` — the Pub/Sub message ID + * `:ordering_key` — the ordering key (may be `nil` or `""`) + * `:publish_time` — a `%DateTime{}` or `nil` + * `:delivery_attempt` — a positive integer or `nil` + * `:attributes` — a `%{String.t() => String.t()}` map or `nil` + + ## Output + + Returns a metadata map with the following camelCase atom keys, which is + the established API convention for this library: + + %{ + messageId: message_id, + orderingKey: ordering_key, + publishTime: publish_time, + deliveryAttempt: delivery_attempt, + attributes: attributes + } + """ + @spec build_metadata(%{ + message_id: term(), + ordering_key: term(), + publish_time: DateTime.t() | nil, + delivery_attempt: non_neg_integer() | nil, + attributes: map() | nil + }) :: map() + def build_metadata(%{ + message_id: message_id, + ordering_key: ordering_key, + publish_time: publish_time, + delivery_attempt: delivery_attempt, + attributes: attributes + }) do + %{ + messageId: message_id, + orderingKey: ordering_key, + publishTime: publish_time, + deliveryAttempt: delivery_attempt, + attributes: attributes + } + end + + @doc """ + Builds a `Broadway.Message` from `data`, a metadata fields map, and an + `acknowledger` tuple. + + The `fields` map is passed to `build_metadata/1` — see that function for + the expected keys. + """ + @spec build_message(term(), map(), Broadway.Message.acknowledger()) :: Message.t() + def build_message(data, fields, acknowledger) do + %Message{ + data: data, + metadata: build_metadata(fields), + acknowledger: acknowledger + } + end +end diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex new file mode 100644 index 0000000..28c2b11 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex @@ -0,0 +1,2095 @@ +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsKinesis.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:KINESIS_PERMISSION_DENIED, 2) + field(:PUBLISH_PERMISSION_DENIED, 3) + field(:STREAM_NOT_FOUND, 4) + field(:CONSUMER_NOT_FOUND, 5) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:CLOUD_STORAGE_PERMISSION_DENIED, 2) + field(:PUBLISH_PERMISSION_DENIED, 3) + field(:BUCKET_NOT_FOUND, 4) + field(:TOO_MANY_OBJECTS, 5) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AzureEventHubs.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:EVENT_HUBS_PERMISSION_DENIED, 2) + field(:PUBLISH_PERMISSION_DENIED, 3) + field(:NAMESPACE_NOT_FOUND, 4) + field(:EVENT_HUB_NOT_FOUND, 5) + field(:SUBSCRIPTION_NOT_FOUND, 6) + field(:RESOURCE_GROUP_NOT_FOUND, 7) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsMsk.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:MSK_PERMISSION_DENIED, 2) + field(:PUBLISH_PERMISSION_DENIED, 3) + field(:CLUSTER_NOT_FOUND, 4) + field(:TOPIC_NOT_FOUND, 5) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.ConfluentCloud.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:CONFLUENT_CLOUD_PERMISSION_DENIED, 2) + field(:PUBLISH_PERMISSION_DENIED, 3) + field(:UNREACHABLE_BOOTSTRAP_SERVER, 4) + field(:CLUSTER_NOT_FOUND, 5) + field(:TOPIC_NOT_FOUND, 6) +end + +defmodule Google.Pubsub.V1.PlatformLogsSettings.Severity do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.PlatformLogsSettings.Severity", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:SEVERITY_UNSPECIFIED, 0) + field(:DISABLED, 1) + field(:DEBUG, 2) + field(:INFO, 3) + field(:WARNING, 4) + field(:ERROR, 5) +end + +defmodule Google.Pubsub.V1.Topic.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.Topic.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:INGESTION_RESOURCE_ERROR, 2) +end + +defmodule Google.Pubsub.V1.Subscription.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.Subscription.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:RESOURCE_ERROR, 2) +end + +defmodule Google.Pubsub.V1.BigQueryConfig.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.BigQueryConfig.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:PERMISSION_DENIED, 2) + field(:NOT_FOUND, 3) + field(:SCHEMA_MISMATCH, 4) + field(:IN_TRANSIT_LOCATION_RESTRICTION, 5) + field(:VERTEX_AI_LOCATION_RESTRICTION, 6) +end + +defmodule Google.Pubsub.V1.BigtableConfig.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.BigtableConfig.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:NOT_FOUND, 2) + field(:APP_PROFILE_MISCONFIGURED, 3) + field(:PERMISSION_DENIED, 4) + field(:SCHEMA_MISMATCH, 5) + field(:IN_TRANSIT_LOCATION_RESTRICTION, 6) + field(:VERTEX_AI_LOCATION_RESTRICTION, 7) +end + +defmodule Google.Pubsub.V1.CloudStorageConfig.State do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.CloudStorageConfig.State", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:STATE_UNSPECIFIED, 0) + field(:ACTIVE, 1) + field(:PERMISSION_DENIED, 2) + field(:NOT_FOUND, 3) + field(:IN_TRANSIT_LOCATION_RESTRICTION, 4) + field(:SCHEMA_MISMATCH, 5) + field(:VERTEX_AI_LOCATION_RESTRICTION, 6) +end + +defmodule Google.Pubsub.V1.MessageStoragePolicy do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.MessageStoragePolicy", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:allowed_persistence_regions, 1, + repeated: true, + type: :string, + json_name: "allowedPersistenceRegions", + deprecated: false + ) + + field(:enforce_in_transit, 2, type: :bool, json_name: "enforceInTransit", deprecated: false) +end + +defmodule Google.Pubsub.V1.SchemaSettings do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.SchemaSettings", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:schema, 1, type: :string, deprecated: false) + field(:encoding, 2, type: Google.Pubsub.V1.Encoding, enum: true, deprecated: false) + field(:first_revision_id, 3, type: :string, json_name: "firstRevisionId", deprecated: false) + field(:last_revision_id, 4, type: :string, json_name: "lastRevisionId", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsKinesis", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:state, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State, + enum: true, + deprecated: false + ) + + field(:stream_arn, 2, type: :string, json_name: "streamArn", deprecated: false) + field(:consumer_arn, 3, type: :string, json_name: "consumerArn", deprecated: false) + field(:aws_role_arn, 4, type: :string, json_name: "awsRoleArn", deprecated: false) + field(:gcp_service_account, 5, type: :string, json_name: "gcpServiceAccount", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.TextFormat", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:delimiter, 1, proto3_optional: true, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.AvroFormat", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:input_format, 0) + + field(:state, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State, + enum: true, + deprecated: false + ) + + field(:bucket, 2, type: :string, deprecated: false) + + field(:text_format, 3, + type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat, + json_name: "textFormat", + oneof: 0, + deprecated: false + ) + + field(:avro_format, 4, + type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat, + json_name: "avroFormat", + oneof: 0, + deprecated: false + ) + + field(:pubsub_avro_format, 5, + type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat, + json_name: "pubsubAvroFormat", + oneof: 0, + deprecated: false + ) + + field(:minimum_object_create_time, 6, + type: Google.Protobuf.Timestamp, + json_name: "minimumObjectCreateTime", + deprecated: false + ) + + field(:match_glob, 9, type: :string, json_name: "matchGlob", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AzureEventHubs", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:state, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State, + enum: true, + deprecated: false + ) + + field(:resource_group, 2, type: :string, json_name: "resourceGroup", deprecated: false) + field(:namespace, 3, type: :string, deprecated: false) + field(:event_hub, 4, type: :string, json_name: "eventHub", deprecated: false) + field(:client_id, 5, type: :string, json_name: "clientId", deprecated: false) + field(:tenant_id, 6, type: :string, json_name: "tenantId", deprecated: false) + field(:subscription_id, 7, type: :string, json_name: "subscriptionId", deprecated: false) + field(:gcp_service_account, 8, type: :string, json_name: "gcpServiceAccount", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsMsk", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:state, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State, + enum: true, + deprecated: false + ) + + field(:cluster_arn, 2, type: :string, json_name: "clusterArn", deprecated: false) + field(:topic, 3, type: :string, deprecated: false) + field(:aws_role_arn, 4, type: :string, json_name: "awsRoleArn", deprecated: false) + field(:gcp_service_account, 5, type: :string, json_name: "gcpServiceAccount", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings.ConfluentCloud", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:state, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State, + enum: true, + deprecated: false + ) + + field(:bootstrap_server, 2, type: :string, json_name: "bootstrapServer", deprecated: false) + field(:cluster_id, 3, type: :string, json_name: "clusterId", deprecated: false) + field(:topic, 4, type: :string, deprecated: false) + field(:identity_pool_id, 5, type: :string, json_name: "identityPoolId", deprecated: false) + field(:gcp_service_account, 6, type: :string, json_name: "gcpServiceAccount", deprecated: false) +end + +defmodule Google.Pubsub.V1.IngestionDataSourceSettings do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionDataSourceSettings", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:source, 0) + + field(:aws_kinesis, 1, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis, + json_name: "awsKinesis", + oneof: 0, + deprecated: false + ) + + field(:cloud_storage, 2, + type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage, + json_name: "cloudStorage", + oneof: 0, + deprecated: false + ) + + field(:azure_event_hubs, 3, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs, + json_name: "azureEventHubs", + oneof: 0, + deprecated: false + ) + + field(:aws_msk, 5, + type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk, + json_name: "awsMsk", + oneof: 0, + deprecated: false + ) + + field(:confluent_cloud, 6, + type: Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud, + json_name: "confluentCloud", + oneof: 0, + deprecated: false + ) + + field(:platform_logs_settings, 4, + type: Google.Pubsub.V1.PlatformLogsSettings, + json_name: "platformLogsSettings", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.PlatformLogsSettings do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PlatformLogsSettings", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:severity, 1, + type: Google.Pubsub.V1.PlatformLogsSettings.Severity, + enum: true, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.ApiViolationReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.AvroFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.SchemaViolationReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.MessageTransformationFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.CloudStorageFailure", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:reason, 0) + + field(:bucket, 1, type: :string, deprecated: false) + field(:object_name, 2, type: :string, json_name: "objectName", deprecated: false) + field(:object_generation, 3, type: :int64, json_name: "objectGeneration", deprecated: false) + + field(:avro_failure_reason, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason, + json_name: "avroFailureReason", + oneof: 0, + deprecated: false + ) + + field(:api_violation_reason, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, + json_name: "apiViolationReason", + oneof: 0, + deprecated: false + ) + + field(:schema_violation_reason, 7, + type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, + json_name: "schemaViolationReason", + oneof: 0, + deprecated: false + ) + + field(:message_transformation_failure_reason, 8, + type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, + json_name: "messageTransformationFailureReason", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.AwsMskFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:reason, 0) + + field(:cluster_arn, 1, type: :string, json_name: "clusterArn", deprecated: false) + field(:kafka_topic, 2, type: :string, json_name: "kafkaTopic", deprecated: false) + field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) + field(:offset, 4, type: :int64, deprecated: false) + + field(:api_violation_reason, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, + json_name: "apiViolationReason", + oneof: 0, + deprecated: false + ) + + field(:schema_violation_reason, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, + json_name: "schemaViolationReason", + oneof: 0, + deprecated: false + ) + + field(:message_transformation_failure_reason, 7, + type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, + json_name: "messageTransformationFailureReason", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.AzureEventHubsFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:reason, 0) + + field(:namespace, 1, type: :string, deprecated: false) + field(:event_hub, 2, type: :string, json_name: "eventHub", deprecated: false) + field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) + field(:offset, 4, type: :int64, deprecated: false) + + field(:api_violation_reason, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, + json_name: "apiViolationReason", + oneof: 0, + deprecated: false + ) + + field(:schema_violation_reason, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, + json_name: "schemaViolationReason", + oneof: 0, + deprecated: false + ) + + field(:message_transformation_failure_reason, 7, + type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, + json_name: "messageTransformationFailureReason", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.ConfluentCloudFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:reason, 0) + + field(:cluster_id, 1, type: :string, json_name: "clusterId", deprecated: false) + field(:kafka_topic, 2, type: :string, json_name: "kafkaTopic", deprecated: false) + field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) + field(:offset, 4, type: :int64, deprecated: false) + + field(:api_violation_reason, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, + json_name: "apiViolationReason", + oneof: 0, + deprecated: false + ) + + field(:schema_violation_reason, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, + json_name: "schemaViolationReason", + oneof: 0, + deprecated: false + ) + + field(:message_transformation_failure_reason, 7, + type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, + json_name: "messageTransformationFailureReason", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent.AwsKinesisFailureReason", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:reason, 0) + + field(:stream_arn, 1, type: :string, json_name: "streamArn", deprecated: false) + field(:partition_key, 2, type: :string, json_name: "partitionKey", deprecated: false) + field(:sequence_number, 3, type: :string, json_name: "sequenceNumber", deprecated: false) + + field(:schema_violation_reason, 4, + type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, + json_name: "schemaViolationReason", + oneof: 0, + deprecated: false + ) + + field(:message_transformation_failure_reason, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, + json_name: "messageTransformationFailureReason", + oneof: 0, + deprecated: false + ) + + field(:api_violation_reason, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, + json_name: "apiViolationReason", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.IngestionFailureEvent do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.IngestionFailureEvent", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:failure, 0) + + field(:topic, 1, type: :string, deprecated: false) + field(:error_message, 2, type: :string, json_name: "errorMessage", deprecated: false) + + field(:cloud_storage_failure, 3, + type: Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure, + json_name: "cloudStorageFailure", + oneof: 0, + deprecated: false + ) + + field(:aws_msk_failure, 4, + type: Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason, + json_name: "awsMskFailure", + oneof: 0, + deprecated: false + ) + + field(:azure_event_hubs_failure, 5, + type: Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason, + json_name: "azureEventHubsFailure", + oneof: 0, + deprecated: false + ) + + field(:confluent_cloud_failure, 6, + type: Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason, + json_name: "confluentCloudFailure", + oneof: 0, + deprecated: false + ) + + field(:aws_kinesis_failure, 7, + type: Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason, + json_name: "awsKinesisFailure", + oneof: 0, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.JavaScriptUDF do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.JavaScriptUDF", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:function_name, 1, type: :string, json_name: "functionName", deprecated: false) + field(:code, 2, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.AIInference.UnstructuredInference do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.AIInference.UnstructuredInference", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:parameters, 1, type: Google.Protobuf.Struct, deprecated: false) +end + +defmodule Google.Pubsub.V1.AIInference do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.AIInference", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:inference_mode, 0) + + field(:endpoint, 1, type: :string, deprecated: false) + + field(:unstructured_inference, 2, + type: Google.Pubsub.V1.AIInference.UnstructuredInference, + json_name: "unstructuredInference", + oneof: 0, + deprecated: false + ) + + field(:service_account_email, 3, + type: :string, + json_name: "serviceAccountEmail", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.MessageTransform do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.MessageTransform", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:transform, 0) + + field(:javascript_udf, 2, + type: Google.Pubsub.V1.JavaScriptUDF, + json_name: "javascriptUdf", + oneof: 0, + deprecated: false + ) + + field(:ai_inference, 6, + type: Google.Pubsub.V1.AIInference, + json_name: "aiInference", + oneof: 0, + deprecated: false + ) + + field(:enabled, 3, type: :bool, deprecated: true) + field(:disabled, 4, type: :bool, deprecated: false) +end + +defmodule Google.Pubsub.V1.Topic.LabelsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Topic.LabelsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.Topic.TagsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Topic.TagsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.Topic do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Topic", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + + field(:labels, 2, + repeated: true, + type: Google.Pubsub.V1.Topic.LabelsEntry, + map: true, + deprecated: false + ) + + field(:message_storage_policy, 3, + type: Google.Pubsub.V1.MessageStoragePolicy, + json_name: "messageStoragePolicy", + deprecated: false + ) + + field(:kms_key_name, 5, type: :string, json_name: "kmsKeyName", deprecated: false) + + field(:schema_settings, 6, + type: Google.Pubsub.V1.SchemaSettings, + json_name: "schemaSettings", + deprecated: false + ) + + field(:satisfies_pzs, 7, type: :bool, json_name: "satisfiesPzs", deprecated: false) + + field(:message_retention_duration, 8, + type: Google.Protobuf.Duration, + json_name: "messageRetentionDuration", + deprecated: false + ) + + field(:state, 9, type: Google.Pubsub.V1.Topic.State, enum: true, deprecated: false) + + field(:ingestion_data_source_settings, 10, + type: Google.Pubsub.V1.IngestionDataSourceSettings, + json_name: "ingestionDataSourceSettings", + deprecated: false + ) + + field(:message_transforms, 13, + repeated: true, + type: Google.Pubsub.V1.MessageTransform, + json_name: "messageTransforms", + deprecated: false + ) + + field(:tags, 14, + repeated: true, + type: Google.Pubsub.V1.Topic.TagsEntry, + map: true, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.PubsubMessage.AttributesEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PubsubMessage.AttributesEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.PubsubMessage do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PubsubMessage", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:data, 1, type: :bytes, deprecated: false) + + field(:attributes, 2, + repeated: true, + type: Google.Pubsub.V1.PubsubMessage.AttributesEntry, + map: true, + deprecated: false + ) + + field(:message_id, 3, type: :string, json_name: "messageId") + field(:publish_time, 4, type: Google.Protobuf.Timestamp, json_name: "publishTime") + field(:ordering_key, 5, type: :string, json_name: "orderingKey", deprecated: false) +end + +defmodule Google.Pubsub.V1.GetTopicRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.GetTopicRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.UpdateTopicRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.UpdateTopicRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: Google.Pubsub.V1.Topic, deprecated: false) + + field(:update_mask, 2, + type: Google.Protobuf.FieldMask, + json_name: "updateMask", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.PublishRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PublishRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: :string, deprecated: false) + field(:messages, 2, repeated: true, type: Google.Pubsub.V1.PubsubMessage, deprecated: false) +end + +defmodule Google.Pubsub.V1.PublishResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PublishResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:message_ids, 1, + repeated: true, + type: :string, + json_name: "messageIds", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.ListTopicsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:project, 1, type: :string, deprecated: false) + field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) + field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListTopicsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topics, 1, repeated: true, type: Google.Pubsub.V1.Topic, deprecated: false) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListTopicSubscriptionsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicSubscriptionsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: :string, deprecated: false) + field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) + field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListTopicSubscriptionsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicSubscriptionsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscriptions, 1, repeated: true, type: :string, deprecated: false) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListTopicSnapshotsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicSnapshotsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: :string, deprecated: false) + field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) + field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListTopicSnapshotsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListTopicSnapshotsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:snapshots, 1, repeated: true, type: :string, deprecated: false) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.DeleteTopicRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeleteTopicRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:topic, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.DetachSubscriptionRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DetachSubscriptionRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.DetachSubscriptionResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DetachSubscriptionResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Subscription.AnalyticsHubSubscriptionInfo", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:listing, 1, type: :string, deprecated: false) + field(:subscription, 2, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.Subscription.LabelsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Subscription.LabelsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.Subscription.TagsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Subscription.TagsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.Subscription do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Subscription", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:topic, 2, type: :string, deprecated: false) + + field(:push_config, 4, + type: Google.Pubsub.V1.PushConfig, + json_name: "pushConfig", + deprecated: false + ) + + field(:bigquery_config, 18, + type: Google.Pubsub.V1.BigQueryConfig, + json_name: "bigqueryConfig", + deprecated: false + ) + + field(:cloud_storage_config, 22, + type: Google.Pubsub.V1.CloudStorageConfig, + json_name: "cloudStorageConfig", + deprecated: false + ) + + field(:bigtable_config, 27, + type: Google.Pubsub.V1.BigtableConfig, + json_name: "bigtableConfig", + deprecated: false + ) + + field(:ack_deadline_seconds, 5, + type: :int32, + json_name: "ackDeadlineSeconds", + deprecated: false + ) + + field(:retain_acked_messages, 7, + type: :bool, + json_name: "retainAckedMessages", + deprecated: false + ) + + field(:message_retention_duration, 8, + type: Google.Protobuf.Duration, + json_name: "messageRetentionDuration", + deprecated: false + ) + + field(:labels, 9, + repeated: true, + type: Google.Pubsub.V1.Subscription.LabelsEntry, + map: true, + deprecated: false + ) + + field(:enable_message_ordering, 10, + type: :bool, + json_name: "enableMessageOrdering", + deprecated: false + ) + + field(:expiration_policy, 11, + type: Google.Pubsub.V1.ExpirationPolicy, + json_name: "expirationPolicy", + deprecated: false + ) + + field(:filter, 12, type: :string, deprecated: false) + + field(:dead_letter_policy, 13, + type: Google.Pubsub.V1.DeadLetterPolicy, + json_name: "deadLetterPolicy", + deprecated: false + ) + + field(:retry_policy, 14, + type: Google.Pubsub.V1.RetryPolicy, + json_name: "retryPolicy", + deprecated: false + ) + + field(:detached, 15, type: :bool, deprecated: false) + + field(:enable_exactly_once_delivery, 16, + type: :bool, + json_name: "enableExactlyOnceDelivery", + deprecated: false + ) + + field(:topic_message_retention_duration, 17, + type: Google.Protobuf.Duration, + json_name: "topicMessageRetentionDuration", + deprecated: false + ) + + field(:state, 19, type: Google.Pubsub.V1.Subscription.State, enum: true, deprecated: false) + + field(:analytics_hub_subscription_info, 23, + type: Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo, + json_name: "analyticsHubSubscriptionInfo", + deprecated: false + ) + + field(:message_transforms, 25, + repeated: true, + type: Google.Pubsub.V1.MessageTransform, + json_name: "messageTransforms", + deprecated: false + ) + + field(:tags, 26, + repeated: true, + type: Google.Pubsub.V1.Subscription.TagsEntry, + map: true, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.RetryPolicy do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.RetryPolicy", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:minimum_backoff, 1, + type: Google.Protobuf.Duration, + json_name: "minimumBackoff", + deprecated: false + ) + + field(:maximum_backoff, 2, + type: Google.Protobuf.Duration, + json_name: "maximumBackoff", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.DeadLetterPolicy do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeadLetterPolicy", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:dead_letter_topic, 1, type: :string, json_name: "deadLetterTopic", deprecated: false) + + field(:max_delivery_attempts, 2, + type: :int32, + json_name: "maxDeliveryAttempts", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.ExpirationPolicy do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ExpirationPolicy", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:ttl, 1, type: Google.Protobuf.Duration, deprecated: false) +end + +defmodule Google.Pubsub.V1.PushConfig.OidcToken do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PushConfig.OidcToken", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:service_account_email, 1, + type: :string, + json_name: "serviceAccountEmail", + deprecated: false + ) + + field(:audience, 2, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.PushConfig.PubsubWrapper do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PushConfig.PubsubWrapper", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.PushConfig.NoWrapper do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PushConfig.NoWrapper", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:write_metadata, 1, type: :bool, json_name: "writeMetadata", deprecated: false) +end + +defmodule Google.Pubsub.V1.PushConfig.AttributesEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PushConfig.AttributesEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.PushConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PushConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:authentication_method, 0) + + oneof(:wrapper, 1) + + field(:push_endpoint, 1, type: :string, json_name: "pushEndpoint", deprecated: false) + + field(:attributes, 2, + repeated: true, + type: Google.Pubsub.V1.PushConfig.AttributesEntry, + map: true, + deprecated: false + ) + + field(:oidc_token, 3, + type: Google.Pubsub.V1.PushConfig.OidcToken, + json_name: "oidcToken", + oneof: 0, + deprecated: false + ) + + field(:pubsub_wrapper, 4, + type: Google.Pubsub.V1.PushConfig.PubsubWrapper, + json_name: "pubsubWrapper", + oneof: 1, + deprecated: false + ) + + field(:no_wrapper, 5, + type: Google.Pubsub.V1.PushConfig.NoWrapper, + json_name: "noWrapper", + oneof: 1, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.BigQueryConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.BigQueryConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:table, 1, type: :string, deprecated: false) + field(:use_topic_schema, 2, type: :bool, json_name: "useTopicSchema", deprecated: false) + field(:write_metadata, 3, type: :bool, json_name: "writeMetadata", deprecated: false) + field(:drop_unknown_fields, 4, type: :bool, json_name: "dropUnknownFields", deprecated: false) + field(:state, 5, type: Google.Pubsub.V1.BigQueryConfig.State, enum: true, deprecated: false) + field(:use_table_schema, 6, type: :bool, json_name: "useTableSchema", deprecated: false) + + field(:service_account_email, 7, + type: :string, + json_name: "serviceAccountEmail", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.BigtableConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.BigtableConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:table, 1, type: :string, deprecated: false) + field(:app_profile_id, 2, type: :string, json_name: "appProfileId", deprecated: false) + + field(:service_account_email, 3, + type: :string, + json_name: "serviceAccountEmail", + deprecated: false + ) + + field(:write_metadata, 5, type: :bool, json_name: "writeMetadata", deprecated: false) + field(:state, 4, type: Google.Pubsub.V1.BigtableConfig.State, enum: true, deprecated: false) +end + +defmodule Google.Pubsub.V1.CloudStorageConfig.TextConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CloudStorageConfig.TextConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.CloudStorageConfig.AvroConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CloudStorageConfig.AvroConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:write_metadata, 1, type: :bool, json_name: "writeMetadata", deprecated: false) + field(:use_topic_schema, 2, type: :bool, json_name: "useTopicSchema", deprecated: false) +end + +defmodule Google.Pubsub.V1.CloudStorageConfig do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CloudStorageConfig", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:output_format, 0) + + field(:bucket, 1, type: :string, deprecated: false) + field(:filename_prefix, 2, type: :string, json_name: "filenamePrefix", deprecated: false) + field(:filename_suffix, 3, type: :string, json_name: "filenameSuffix", deprecated: false) + + field(:filename_datetime_format, 10, + type: :string, + json_name: "filenameDatetimeFormat", + deprecated: false + ) + + field(:text_config, 4, + type: Google.Pubsub.V1.CloudStorageConfig.TextConfig, + json_name: "textConfig", + oneof: 0, + deprecated: false + ) + + field(:avro_config, 5, + type: Google.Pubsub.V1.CloudStorageConfig.AvroConfig, + json_name: "avroConfig", + oneof: 0, + deprecated: false + ) + + field(:max_duration, 6, + type: Google.Protobuf.Duration, + json_name: "maxDuration", + deprecated: false + ) + + field(:max_bytes, 7, type: :int64, json_name: "maxBytes", deprecated: false) + field(:max_messages, 8, type: :int64, json_name: "maxMessages", deprecated: false) + field(:state, 9, type: Google.Pubsub.V1.CloudStorageConfig.State, enum: true, deprecated: false) + + field(:service_account_email, 11, + type: :string, + json_name: "serviceAccountEmail", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.ReceivedMessage do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ReceivedMessage", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:ack_id, 1, type: :string, json_name: "ackId", deprecated: false) + field(:message, 2, type: Google.Pubsub.V1.PubsubMessage, deprecated: false) + field(:delivery_attempt, 3, type: :int32, json_name: "deliveryAttempt", deprecated: false) +end + +defmodule Google.Pubsub.V1.GetSubscriptionRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.GetSubscriptionRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.UpdateSubscriptionRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.UpdateSubscriptionRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: Google.Pubsub.V1.Subscription, deprecated: false) + + field(:update_mask, 2, + type: Google.Protobuf.FieldMask, + json_name: "updateMask", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.ListSubscriptionsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSubscriptionsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:project, 1, type: :string, deprecated: false) + field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) + field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListSubscriptionsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSubscriptionsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscriptions, 1, repeated: true, type: Google.Pubsub.V1.Subscription, deprecated: false) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.DeleteSubscriptionRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeleteSubscriptionRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.ModifyPushConfigRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ModifyPushConfigRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) + + field(:push_config, 2, + type: Google.Pubsub.V1.PushConfig, + json_name: "pushConfig", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.PullRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PullRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) + field(:return_immediately, 2, type: :bool, json_name: "returnImmediately", deprecated: true) + field(:max_messages, 3, type: :int32, json_name: "maxMessages", deprecated: false) +end + +defmodule Google.Pubsub.V1.PullResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.PullResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:received_messages, 1, + repeated: true, + type: Google.Pubsub.V1.ReceivedMessage, + json_name: "receivedMessages", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.ModifyAckDeadlineRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ModifyAckDeadlineRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) + field(:ack_ids, 4, repeated: true, type: :string, json_name: "ackIds", deprecated: false) + + field(:ack_deadline_seconds, 3, + type: :int32, + json_name: "ackDeadlineSeconds", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.AcknowledgeRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.AcknowledgeRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) + field(:ack_ids, 2, repeated: true, type: :string, json_name: "ackIds", deprecated: false) +end + +defmodule Google.Pubsub.V1.StreamingPullRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.StreamingPullRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:subscription, 1, type: :string, deprecated: false) + field(:ack_ids, 2, repeated: true, type: :string, json_name: "ackIds", deprecated: false) + + field(:modify_deadline_seconds, 3, + repeated: true, + type: :int32, + json_name: "modifyDeadlineSeconds", + deprecated: false + ) + + field(:modify_deadline_ack_ids, 4, + repeated: true, + type: :string, + json_name: "modifyDeadlineAckIds", + deprecated: false + ) + + field(:stream_ack_deadline_seconds, 5, + type: :int32, + json_name: "streamAckDeadlineSeconds", + deprecated: false + ) + + field(:client_id, 6, type: :string, json_name: "clientId", deprecated: false) + + field(:max_outstanding_messages, 7, + type: :int64, + json_name: "maxOutstandingMessages", + deprecated: false + ) + + field(:max_outstanding_bytes, 8, + type: :int64, + json_name: "maxOutstandingBytes", + deprecated: false + ) + + field(:protocol_version, 10, type: :int64, json_name: "protocolVersion", deprecated: false) +end + +defmodule Google.Pubsub.V1.StreamingPullResponse.AcknowledgeConfirmation do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.StreamingPullResponse.AcknowledgeConfirmation", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:ack_ids, 1, repeated: true, type: :string, json_name: "ackIds", deprecated: false) + + field(:invalid_ack_ids, 2, + repeated: true, + type: :string, + json_name: "invalidAckIds", + deprecated: false + ) + + field(:unordered_ack_ids, 3, + repeated: true, + type: :string, + json_name: "unorderedAckIds", + deprecated: false + ) + + field(:temporary_failed_ack_ids, 4, + repeated: true, + type: :string, + json_name: "temporaryFailedAckIds", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.StreamingPullResponse.ModifyAckDeadlineConfirmation do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.StreamingPullResponse.ModifyAckDeadlineConfirmation", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:ack_ids, 1, repeated: true, type: :string, json_name: "ackIds", deprecated: false) + + field(:invalid_ack_ids, 2, + repeated: true, + type: :string, + json_name: "invalidAckIds", + deprecated: false + ) + + field(:temporary_failed_ack_ids, 3, + repeated: true, + type: :string, + json_name: "temporaryFailedAckIds", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.StreamingPullResponse.SubscriptionProperties", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:exactly_once_delivery_enabled, 1, + type: :bool, + json_name: "exactlyOnceDeliveryEnabled", + deprecated: false + ) + + field(:message_ordering_enabled, 2, + type: :bool, + json_name: "messageOrderingEnabled", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.StreamingPullResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.StreamingPullResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:received_messages, 1, + repeated: true, + type: Google.Pubsub.V1.ReceivedMessage, + json_name: "receivedMessages", + deprecated: false + ) + + field(:acknowledge_confirmation, 5, + type: Google.Pubsub.V1.StreamingPullResponse.AcknowledgeConfirmation, + json_name: "acknowledgeConfirmation", + deprecated: false + ) + + field(:modify_ack_deadline_confirmation, 3, + type: Google.Pubsub.V1.StreamingPullResponse.ModifyAckDeadlineConfirmation, + json_name: "modifyAckDeadlineConfirmation", + deprecated: false + ) + + field(:subscription_properties, 4, + type: Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties, + json_name: "subscriptionProperties", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CreateSnapshotRequest.LabelsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CreateSnapshotRequest.TagsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.CreateSnapshotRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CreateSnapshotRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:subscription, 2, type: :string, deprecated: false) + + field(:labels, 3, + repeated: true, + type: Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry, + map: true, + deprecated: false + ) + + field(:tags, 4, + repeated: true, + type: Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry, + map: true, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.UpdateSnapshotRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.UpdateSnapshotRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:snapshot, 1, type: Google.Pubsub.V1.Snapshot, deprecated: false) + + field(:update_mask, 2, + type: Google.Protobuf.FieldMask, + json_name: "updateMask", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.Snapshot.LabelsEntry do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Snapshot.LabelsEntry", + map: true, + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:key, 1, type: :string) + field(:value, 2, type: :string) +end + +defmodule Google.Pubsub.V1.Snapshot do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Snapshot", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:topic, 2, type: :string, deprecated: false) + + field(:expire_time, 3, + type: Google.Protobuf.Timestamp, + json_name: "expireTime", + deprecated: false + ) + + field(:labels, 4, + repeated: true, + type: Google.Pubsub.V1.Snapshot.LabelsEntry, + map: true, + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.GetSnapshotRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.GetSnapshotRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:snapshot, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.ListSnapshotsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSnapshotsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:project, 1, type: :string, deprecated: false) + field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) + field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.ListSnapshotsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSnapshotsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:snapshots, 1, repeated: true, type: Google.Pubsub.V1.Snapshot, deprecated: false) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) +end + +defmodule Google.Pubsub.V1.DeleteSnapshotRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeleteSnapshotRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:snapshot, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.SeekRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.SeekRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:target, 0) + + field(:subscription, 1, type: :string, deprecated: false) + field(:time, 2, type: Google.Protobuf.Timestamp, oneof: 0, deprecated: false) + field(:snapshot, 3, type: :string, oneof: 0, deprecated: false) +end + +defmodule Google.Pubsub.V1.SeekResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.SeekResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.Publisher.Service do + @moduledoc false + + use GRPC.Service, name: "google.pubsub.v1.Publisher", protoc_gen_elixir_version: "0.16.0" + + rpc(:CreateTopic, Google.Pubsub.V1.Topic, Google.Pubsub.V1.Topic) + + rpc(:UpdateTopic, Google.Pubsub.V1.UpdateTopicRequest, Google.Pubsub.V1.Topic) + + rpc(:Publish, Google.Pubsub.V1.PublishRequest, Google.Pubsub.V1.PublishResponse) + + rpc(:GetTopic, Google.Pubsub.V1.GetTopicRequest, Google.Pubsub.V1.Topic) + + rpc(:ListTopics, Google.Pubsub.V1.ListTopicsRequest, Google.Pubsub.V1.ListTopicsResponse) + + rpc( + :ListTopicSubscriptions, + Google.Pubsub.V1.ListTopicSubscriptionsRequest, + Google.Pubsub.V1.ListTopicSubscriptionsResponse + ) + + rpc( + :ListTopicSnapshots, + Google.Pubsub.V1.ListTopicSnapshotsRequest, + Google.Pubsub.V1.ListTopicSnapshotsResponse + ) + + rpc(:DeleteTopic, Google.Pubsub.V1.DeleteTopicRequest, Google.Protobuf.Empty) + + rpc( + :DetachSubscription, + Google.Pubsub.V1.DetachSubscriptionRequest, + Google.Pubsub.V1.DetachSubscriptionResponse + ) +end + +defmodule Google.Pubsub.V1.Publisher.Stub do + @moduledoc false + + use GRPC.Stub, service: Google.Pubsub.V1.Publisher.Service +end + +defmodule Google.Pubsub.V1.Subscriber.Service do + @moduledoc false + + use GRPC.Service, name: "google.pubsub.v1.Subscriber", protoc_gen_elixir_version: "0.16.0" + + rpc(:CreateSubscription, Google.Pubsub.V1.Subscription, Google.Pubsub.V1.Subscription) + + rpc(:GetSubscription, Google.Pubsub.V1.GetSubscriptionRequest, Google.Pubsub.V1.Subscription) + + rpc( + :UpdateSubscription, + Google.Pubsub.V1.UpdateSubscriptionRequest, + Google.Pubsub.V1.Subscription + ) + + rpc( + :ListSubscriptions, + Google.Pubsub.V1.ListSubscriptionsRequest, + Google.Pubsub.V1.ListSubscriptionsResponse + ) + + rpc(:DeleteSubscription, Google.Pubsub.V1.DeleteSubscriptionRequest, Google.Protobuf.Empty) + + rpc(:ModifyAckDeadline, Google.Pubsub.V1.ModifyAckDeadlineRequest, Google.Protobuf.Empty) + + rpc(:Acknowledge, Google.Pubsub.V1.AcknowledgeRequest, Google.Protobuf.Empty) + + rpc(:Pull, Google.Pubsub.V1.PullRequest, Google.Pubsub.V1.PullResponse) + + rpc( + :StreamingPull, + stream(Google.Pubsub.V1.StreamingPullRequest), + stream(Google.Pubsub.V1.StreamingPullResponse) + ) + + rpc(:ModifyPushConfig, Google.Pubsub.V1.ModifyPushConfigRequest, Google.Protobuf.Empty) + + rpc(:GetSnapshot, Google.Pubsub.V1.GetSnapshotRequest, Google.Pubsub.V1.Snapshot) + + rpc( + :ListSnapshots, + Google.Pubsub.V1.ListSnapshotsRequest, + Google.Pubsub.V1.ListSnapshotsResponse + ) + + rpc(:CreateSnapshot, Google.Pubsub.V1.CreateSnapshotRequest, Google.Pubsub.V1.Snapshot) + + rpc(:UpdateSnapshot, Google.Pubsub.V1.UpdateSnapshotRequest, Google.Pubsub.V1.Snapshot) + + rpc(:DeleteSnapshot, Google.Pubsub.V1.DeleteSnapshotRequest, Google.Protobuf.Empty) + + rpc(:Seek, Google.Pubsub.V1.SeekRequest, Google.Pubsub.V1.SeekResponse) +end + +defmodule Google.Pubsub.V1.Subscriber.Stub do + @moduledoc false + + use GRPC.Stub, service: Google.Pubsub.V1.Subscriber.Service +end diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex new file mode 100644 index 0000000..455f222 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex @@ -0,0 +1,280 @@ +defmodule Google.Pubsub.V1.SchemaView do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.SchemaView", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:SCHEMA_VIEW_UNSPECIFIED, 0) + field(:BASIC, 1) + field(:FULL, 2) +end + +defmodule Google.Pubsub.V1.Encoding do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.Encoding", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:ENCODING_UNSPECIFIED, 0) + field(:JSON, 1) + field(:BINARY, 2) +end + +defmodule Google.Pubsub.V1.Schema.Type do + @moduledoc false + + use Protobuf, + enum: true, + full_name: "google.pubsub.v1.Schema.Type", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:TYPE_UNSPECIFIED, 0) + field(:PROTOCOL_BUFFER, 1) + field(:AVRO, 2) +end + +defmodule Google.Pubsub.V1.Schema do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.Schema", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:type, 2, type: Google.Pubsub.V1.Schema.Type, enum: true) + field(:definition, 3, type: :string) + field(:revision_id, 4, type: :string, json_name: "revisionId", deprecated: false) + + field(:revision_create_time, 6, + type: Google.Protobuf.Timestamp, + json_name: "revisionCreateTime", + deprecated: false + ) +end + +defmodule Google.Pubsub.V1.CreateSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CreateSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:parent, 1, type: :string, deprecated: false) + field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) + field(:schema_id, 3, type: :string, json_name: "schemaId") +end + +defmodule Google.Pubsub.V1.GetSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.GetSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) +end + +defmodule Google.Pubsub.V1.ListSchemasRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSchemasRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:parent, 1, type: :string, deprecated: false) + field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) + field(:page_size, 3, type: :int32, json_name: "pageSize") + field(:page_token, 4, type: :string, json_name: "pageToken") +end + +defmodule Google.Pubsub.V1.ListSchemasResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSchemasResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:schemas, 1, repeated: true, type: Google.Pubsub.V1.Schema) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken") +end + +defmodule Google.Pubsub.V1.ListSchemaRevisionsRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSchemaRevisionsRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) + field(:page_size, 3, type: :int32, json_name: "pageSize") + field(:page_token, 4, type: :string, json_name: "pageToken") +end + +defmodule Google.Pubsub.V1.ListSchemaRevisionsResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ListSchemaRevisionsResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:schemas, 1, repeated: true, type: Google.Pubsub.V1.Schema) + field(:next_page_token, 2, type: :string, json_name: "nextPageToken") +end + +defmodule Google.Pubsub.V1.CommitSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.CommitSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) +end + +defmodule Google.Pubsub.V1.RollbackSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.RollbackSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:revision_id, 2, type: :string, json_name: "revisionId", deprecated: false) +end + +defmodule Google.Pubsub.V1.DeleteSchemaRevisionRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeleteSchemaRevisionRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) + field(:revision_id, 2, type: :string, json_name: "revisionId", deprecated: true) +end + +defmodule Google.Pubsub.V1.DeleteSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.DeleteSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:name, 1, type: :string, deprecated: false) +end + +defmodule Google.Pubsub.V1.ValidateSchemaRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ValidateSchemaRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + field(:parent, 1, type: :string, deprecated: false) + field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) +end + +defmodule Google.Pubsub.V1.ValidateSchemaResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ValidateSchemaResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.ValidateMessageRequest do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ValidateMessageRequest", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 + + oneof(:schema_spec, 0) + + field(:parent, 1, type: :string, deprecated: false) + field(:name, 2, type: :string, oneof: 0, deprecated: false) + field(:schema, 3, type: Google.Pubsub.V1.Schema, oneof: 0) + field(:message, 4, type: :bytes) + field(:encoding, 5, type: Google.Pubsub.V1.Encoding, enum: true) +end + +defmodule Google.Pubsub.V1.ValidateMessageResponse do + @moduledoc false + + use Protobuf, + full_name: "google.pubsub.v1.ValidateMessageResponse", + protoc_gen_elixir_version: "0.16.0", + syntax: :proto3 +end + +defmodule Google.Pubsub.V1.SchemaService.Service do + @moduledoc false + + use GRPC.Service, name: "google.pubsub.v1.SchemaService", protoc_gen_elixir_version: "0.16.0" + + rpc(:CreateSchema, Google.Pubsub.V1.CreateSchemaRequest, Google.Pubsub.V1.Schema) + + rpc(:GetSchema, Google.Pubsub.V1.GetSchemaRequest, Google.Pubsub.V1.Schema) + + rpc(:ListSchemas, Google.Pubsub.V1.ListSchemasRequest, Google.Pubsub.V1.ListSchemasResponse) + + rpc( + :ListSchemaRevisions, + Google.Pubsub.V1.ListSchemaRevisionsRequest, + Google.Pubsub.V1.ListSchemaRevisionsResponse + ) + + rpc(:CommitSchema, Google.Pubsub.V1.CommitSchemaRequest, Google.Pubsub.V1.Schema) + + rpc(:RollbackSchema, Google.Pubsub.V1.RollbackSchemaRequest, Google.Pubsub.V1.Schema) + + rpc( + :DeleteSchemaRevision, + Google.Pubsub.V1.DeleteSchemaRevisionRequest, + Google.Pubsub.V1.Schema + ) + + rpc(:DeleteSchema, Google.Pubsub.V1.DeleteSchemaRequest, Google.Protobuf.Empty) + + rpc( + :ValidateSchema, + Google.Pubsub.V1.ValidateSchemaRequest, + Google.Pubsub.V1.ValidateSchemaResponse + ) + + rpc( + :ValidateMessage, + Google.Pubsub.V1.ValidateMessageRequest, + Google.Pubsub.V1.ValidateMessageResponse + ) +end + +defmodule Google.Pubsub.V1.SchemaService.Stub do + @moduledoc false + + use GRPC.Stub, service: Google.Pubsub.V1.SchemaService.Service +end diff --git a/lib/broadway_cloud_pub_sub/pull_client.ex b/lib/broadway_cloud_pub_sub/pull_client.ex index 7fd2fa7..ceb4c11 100644 --- a/lib/broadway_cloud_pub_sub/pull_client.ex +++ b/lib/broadway_cloud_pub_sub/pull_client.ex @@ -3,7 +3,7 @@ defmodule BroadwayCloudPubSub.PullClient do A subscriptions [pull client](https://cloud.google.com/pubsub/docs/reference/rest/v1/projects.subscriptions/pull) built on `Finch`. """ alias Broadway.Message - alias BroadwayCloudPubSub.Client + alias BroadwayCloudPubSub.{Client, MessageBuilder} alias Finch.Response require Logger @@ -142,13 +142,14 @@ defmodule BroadwayCloudPubSub.PullClient do |> decode_message() |> Map.pop("data") - metadata = %{ - attributes: metadata["attributes"], - deliveryAttempt: delivery_attempt, - messageId: metadata["messageId"], - orderingKey: metadata["orderingKey"], - publishTime: parse_datetime(metadata["publishTime"]) - } + metadata = + MessageBuilder.build_metadata(%{ + message_id: metadata["messageId"], + ordering_key: metadata["orderingKey"], + publish_time: parse_datetime(metadata["publishTime"]), + delivery_attempt: delivery_attempt, + attributes: metadata["attributes"] + }) %Message{ data: data, diff --git a/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex new file mode 100644 index 0000000..adba412 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex @@ -0,0 +1,92 @@ +defmodule BroadwayCloudPubSub.Streaming.Acknowledger do + @moduledoc false + + # Broadway.Acknowledger for StreamingPull. + # Delegates ack/nack/modifyAckDeadline to the StreamManager via + # gRPC requests on the bidirectional stream. + + alias Broadway.Acknowledger + alias BroadwayCloudPubSub.Streaming.Options + + @behaviour Acknowledger + + @typedoc "Acknowledgement data for a Broadway.Message." + @type ack_data :: %{ + :ack_id => String.t(), + optional(:on_failure) => ack_option(), + optional(:on_success) => ack_option() + } + + @typedoc "An acknowledgement action." + @type ack_option :: :ack | :noop | {:nack, 0..600} + + @type ack_ref :: term() + + # The maximum number of ackIds per request. + # API limit is 524288 bytes (512KiB); ackIds have max 184 bytes each. + # 524288/184 ~= 2849 → use 2500 with headroom. + @max_ack_ids_per_request 2_500 + + @doc """ + Returns an acknowledger tuple builder function to attach to Broadway.Messages. + The returned function takes an `ack_id` and returns an acknowledger tuple. + """ + @spec builder(ack_ref()) :: (String.t() -> {__MODULE__, ack_ref(), ack_data()}) + def builder(ack_ref) do + &{__MODULE__, ack_ref, %{ack_id: &1}} + end + + @impl Acknowledger + def ack(ack_ref, successful, failed) do + {manager_pid, config} = :persistent_term.get(ack_ref) + + success_actions = group_actions_ack_ids(successful, :on_success, config) + failure_actions = group_actions_ack_ids(failed, :on_failure, config) + + success_actions + |> Map.merge(failure_actions, fn _, a, b -> a ++ b end) + |> dispatch_acks(manager_pid) + + :ok + end + + @impl Acknowledger + def configure(_ack_ref, ack_data, options) do + opts = NimbleOptions.validate!(options, Options.acknowledger_definition()) + ack_data = Map.merge(ack_data, Map.new(opts)) + {:ok, ack_data} + end + + # --- Private --- + + defp group_actions_ack_ids(messages, key, config) do + Enum.group_by(messages, &action_for(&1, key, config), &extract_ack_id/1) + end + + defp action_for(%{acknowledger: {_, _, ack_data}}, key, config) do + Map.get_lazy(ack_data, key, fn -> default_action(key, config) end) + end + + defp default_action(:on_success, %{on_success: action}), do: action + defp default_action(:on_failure, %{on_failure: action}), do: action + + defp extract_ack_id(%{acknowledger: {_, _, %{ack_id: ack_id}}}), do: ack_id + + defp dispatch_acks(actions_and_ids, manager_pid) do + Enum.each(actions_and_ids, fn {action, ack_ids} -> + ack_ids + |> Enum.chunk_every(@max_ack_ids_per_request) + |> Enum.each(&apply_action(action, &1, manager_pid)) + end) + end + + defp apply_action(:noop, _ack_ids, _manager_pid), do: :ok + + defp apply_action(:ack, ack_ids, manager_pid) do + BroadwayCloudPubSub.Streaming.StreamManager.acknowledge(manager_pid, ack_ids) + end + + defp apply_action({:nack, deadline}, ack_ids, manager_pid) do + BroadwayCloudPubSub.Streaming.StreamManager.modify_deadline(manager_pid, ack_ids, deadline) + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex new file mode 100644 index 0000000..85197fa --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -0,0 +1,268 @@ +defmodule BroadwayCloudPubSub.Streaming.Options do + @moduledoc """ + Options for `BroadwayCloudPubSub.Streaming.Producer`. + """ + + @default_grpc_endpoint "pubsub.googleapis.com:443" + @default_max_outstanding_messages 1_000 + @default_max_outstanding_bytes 100 * 1024 * 1024 + @default_stream_ack_deadline_seconds 60 + @default_lease_extension_percent 0.6 + @default_backoff_min 1_000 + @default_backoff_max 30_000 + + definition = [ + # Handled by Broadway. + broadway: [type: :any, doc: false], + subscription: [ + type: {:custom, __MODULE__, :type_non_empty_string, [[{:name, :subscription}]]}, + required: true, + doc: """ + The name of the subscription, including the project. + For example, if your project is `"my-project"` and your + subscription is `"my-subscription"`, the full name is + `"projects/my-project/subscriptions/my-subscription"`. + """ + ], + max_outstanding_messages: [ + type: :pos_integer, + default: @default_max_outstanding_messages, + doc: """ + The maximum number of outstanding messages (delivered but not yet + acknowledged) that the server will push. Acts as the primary flow + control mechanism. Analogous to AMQP `prefetch_count`. + """ + ], + max_outstanding_bytes: [ + type: :pos_integer, + default: @default_max_outstanding_bytes, + doc: """ + The maximum total size in bytes of outstanding messages. The server + will not push more messages if the total byte size of outstanding + messages exceeds this limit. Defaults to 100 MiB. + """ + ], + stream_ack_deadline_seconds: [ + type: + {:custom, __MODULE__, :type_integer_in_range, + [[{:name, :stream_ack_deadline_seconds}, {:min, 10}, {:max, 600}]]}, + default: @default_stream_ack_deadline_seconds, + doc: """ + The number of seconds the server will wait before re-delivering an + unacknowledged message. Must be between 10 and 600. Defaults to 60. + The producer will extend leases automatically before this deadline. + """ + ], + lease_extension_percent: [ + type: + {:custom, __MODULE__, :type_float_between_0_and_1, [[{:name, :lease_extension_percent}]]}, + default: @default_lease_extension_percent, + doc: """ + The fraction of `stream_ack_deadline_seconds` at which leases are + extended. For example, with a deadline of 60s and a percent of 0.6, + leases are extended every 36s. Must be between 0.0 and 1.0 exclusive. + Defaults to 0.6. + """ + ], + client_id: [ + type: :string, + doc: """ + An identifier that can be used to distinguish individual instances of + the producer. If not provided, a unique ID will be generated. Using + a stable `client_id` across reconnections enables the server to use + sticky assignment for ordered subscriptions. + """ + ], + on_success: [ + type: {:custom, __MODULE__, :type_ack_option, [[{:name, :on_success}]]}, + default: :ack, + doc: """ + Configures the acknowledgement behaviour for successfully processed + messages. Defaults to `:ack`. + """ + ], + on_failure: [ + type: {:custom, __MODULE__, :type_ack_option, [[{:name, :on_failure}]]}, + default: :noop, + doc: """ + Configures the acknowledgement behaviour for failed messages. + Defaults to `:noop`. + """ + ], + on_shutdown: [ + type: {:custom, __MODULE__, :type_shutdown_option, [[{:name, :on_shutdown}]]}, + default: {:nack, 5}, + doc: """ + Configures what happens to messages received but not yet processed + when the producer is shut down. + + * `{:nack, seconds}` - Sends a `modifyAckDeadline` request with the + given `seconds` for all outstanding messages, making them available + for redelivery after that delay. The default `{:nack, 5}` provides + a small delay to avoid thundering herd on rolling deploys. + * `:nack` - Equivalent to `{:nack, 0}`. Immediately makes unprocessed + messages available for redelivery. + * `:noop` - Does nothing. Messages become available after their ack + deadline expires naturally. + + Defaults to `{:nack, 5}`. + """ + ], + goth: [ + type: :atom, + doc: """ + The `Goth` module to use for authentication. Note that this option only + applies to the default token generator. + """ + ], + token_generator: [ + type: :mfa, + doc: """ + An MFArgs tuple that will be called before each gRPC connection to fetch + an authentication token. Should return `{:ok, String.t()} | {:error, any()}`. + By default this will invoke `Goth.fetch/1` with the `:goth` option. + """ + ], + backoff_type: [ + type: {:in, [:rand_exp, :exp, :rand, :stop]}, + default: :rand_exp, + doc: """ + The backoff strategy used when reconnecting after a stream failure. + + * `:rand_exp` - Randomized exponential backoff (default). Adds jitter + to prevent thundering herd after mass disconnects. + * `:exp` - Pure exponential backoff. + * `:rand` - Random value between `backoff_min` and `backoff_max`. + * `:stop` - Do not reconnect. The producer will crash after one failure. + + """ + ], + backoff_min: [ + type: :pos_integer, + default: @default_backoff_min, + doc: "Minimum reconnection backoff in milliseconds. Defaults to 1000." + ], + backoff_max: [ + type: :pos_integer, + default: @default_backoff_max, + doc: "Maximum reconnection backoff in milliseconds. Defaults to 30000." + ], + adapter: [ + type: {:in, [:gun, :mint]}, + default: :gun, + doc: """ + The gRPC HTTP/2 adapter to use for the streaming connection. + + * `:gun` — Uses the Gun HTTP/2 client (default). Gun is well-tested + and is the traditional adapter for the Elixir gRPC library. + * `:mint` — Uses the Mint HTTP/2 client. Mint may be preferable in + deployment environments where Gun is not available or not desired. + + Both adapters are provided by the `grpc` dependency. The adapter choice + does not affect the public API or message semantics. + """ + ], + grpc_endpoint: [ + type: {:custom, __MODULE__, :type_non_empty_string, [[{:name, :grpc_endpoint}]]}, + default: @default_grpc_endpoint, + doc: """ + The gRPC endpoint for the Cloud Pub/Sub service. Useful for testing + with the Pub/Sub emulator (e.g., `"localhost:8085"`). + """ + ], + use_ssl: [ + type: :boolean, + default: true, + doc: """ + Whether to use TLS when connecting to the gRPC endpoint. Set to `false` + when connecting to the Pub/Sub emulator, which does not use TLS. + Defaults to `true`. + """ + ], + + # Testing options + test_pid: [type: :pid, doc: false], + message_server: [type: :pid, doc: false] + ] + + @definition NimbleOptions.new!(definition) + + def definition do + @definition + end + + @acknowledger_definition definition + |> Keyword.take([:on_failure, :on_success]) + |> NimbleOptions.new!() + + def acknowledger_definition do + @acknowledger_definition + end + + @doc """ + Builds an MFArgs tuple for a token generator using Goth. + """ + defdelegate make_token_generator(opts), to: BroadwayCloudPubSub.Options + + # --- Custom type validators --- + + def type_non_empty_string("", [{:name, name}]) do + {:error, "expected :#{name} to be a non-empty string, got: \"\""} + end + + def type_non_empty_string(value, _) when is_binary(value) do + {:ok, value} + end + + def type_non_empty_string(value, [{:name, name}]) do + {:error, "expected :#{name} to be a non-empty string, got: #{inspect(value)}"} + end + + def type_integer_in_range(value, [{:name, _name}, {:min, min}, {:max, max}]) + when is_integer(value) and value >= min and value <= max do + {:ok, value} + end + + def type_integer_in_range(value, [{:name, name}, {:min, min}, {:max, max}]) do + {:error, + "expected :#{name} to be an integer between #{min} and #{max}, got: #{inspect(value)}"} + end + + def type_float_between_0_and_1(value, _) when is_float(value) and value > 0.0 and value < 1.0 do + {:ok, value} + end + + def type_float_between_0_and_1(value, [{:name, name}]) do + {:error, + "expected :#{name} to be a float between 0.0 and 1.0 exclusive, got: #{inspect(value)}"} + end + + def type_ack_option(:ack, _), do: {:ok, :ack} + def type_ack_option(:noop, _), do: {:ok, :noop} + def type_ack_option(:nack, _), do: {:ok, {:nack, 0}} + + def type_ack_option({:nack, value}, _) + when is_integer(value) and value >= 0 and value <= 600 do + {:ok, {:nack, value}} + end + + def type_ack_option(value, [{:name, name}]) do + {:error, + "expected :#{name} to be one of :ack, :noop, :nack, or {:nack, integer} where " <> + "integer is between 0 and 600, got: #{inspect(value)}"} + end + + def type_shutdown_option(:nack, _), do: {:ok, {:nack, 0}} + def type_shutdown_option(:noop, _), do: {:ok, :noop} + + def type_shutdown_option({:nack, value}, _) + when is_integer(value) and value >= 0 and value <= 600 do + {:ok, {:nack, value}} + end + + def type_shutdown_option(value, [{:name, name}]) do + {:error, + "expected :#{name} to be :nack, :noop, or {:nack, integer} where " <> + "integer is between 0 and 600, got: #{inspect(value)}"} + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex new file mode 100644 index 0000000..a8c6afa --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -0,0 +1,233 @@ +defmodule BroadwayCloudPubSub.Streaming.Producer do + @moduledoc """ + A Broadway producer that uses the gRPC StreamingPull API to receive + messages from a Google Cloud Pub/Sub subscription. + + ## Overview + + `StreamingProducer` opens a persistent bidirectional gRPC stream to the + Pub/Sub service and receives messages as the server pushes them. This is + more efficient than the default HTTP Pull approach (`BroadwayCloudPubSub.Producer`) + for workloads that require low latency or high throughput. + + ## Usage + + Broadway.start_link(MyPipeline, + name: MyPipeline, + producer: [ + module: + {BroadwayCloudPubSub.Streaming.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-subscription", + max_outstanding_messages: 1000} + ], + processors: [default: [concurrency: 10]] + ) + + ## Options + + #{NimbleOptions.docs(BroadwayCloudPubSub.Streaming.Options.definition())} + + ### Required options + + * `:subscription` - The full subscription name, e.g. + `"projects/my-project/subscriptions/my-subscription"`. + + ### Auth options + + * `:goth` - The `Goth` module to use for authentication (e.g. `MyApp.Goth`). + * `:token_generator` - Custom MFArgs token generator as an alternative to `:goth`. + + ### Flow control + + * `:max_outstanding_messages` - Maximum number of unacked messages the server + will push. Defaults to 1000. + * `:max_outstanding_bytes` - Maximum total size of unacked messages. Defaults + to 100 MiB. + + ### Shutdown + + * `:on_shutdown` - What to do with unprocessed messages on shutdown. + Defaults to `{:nack, 5}` (redeliver after 5 seconds). + + ## Differences from BroadwayCloudPubSub.Producer + + * **Push-based**: Messages arrive via a persistent gRPC stream rather than + being polled. `handle_demand` is a no-op. + * **Flow control**: Controlled by `max_outstanding_messages` / `max_outstanding_bytes` + on the gRPC stream rather than by `max_number_of_messages` per HTTP request. + * **Shutdown**: By default, unprocessed messages are returned to Pub/Sub with a + short delay (`on_shutdown: {:nack, 5}`), analogous to AMQP channel close behavior. + + ## Telemetry + + This producer emits the following [Telemetry](https://github.com/beam-telemetry/telemetry) + events: + + * `[:broadway_cloud_pub_sub, :stream, :connect]` - Emitted when a gRPC + StreamingPull connection is successfully established. + + Measurements: `%{}` + + * `[:broadway_cloud_pub_sub, :stream, :disconnect]` - Emitted when the + gRPC stream is closed or encounters an error. + + Measurements: `%{reason: term()}` + + * `[:broadway_cloud_pub_sub, :stream, :receive_messages]` - Emitted when + messages are received from the gRPC stream and forwarded to the producer. + + Measurements: `%{count: pos_integer()}` + + * `[:broadway_cloud_pub_sub, :stream, :ack]` - Emitted when messages are + acknowledged on the gRPC stream. + + Measurements: `%{count: pos_integer()}` + + * `[:broadway_cloud_pub_sub, :stream, :connection_failure]` - Emitted when + a connection attempt fails before the stream is established. + + Measurements: `%{reason: term()}` + + All events include the following metadata: + + * `:subscription` - the full subscription name + * `:config` - the producer configuration + + """ + + use GenStage + + alias BroadwayCloudPubSub.Streaming.{StreamManager, Options} + + @behaviour Broadway.Producer + + # --- Broadway.Producer callbacks --- + + @impl Broadway.Producer + def prepare_for_start(_module, broadway_opts) do + {producer_module, opts} = broadway_opts[:producer][:module] + + opts = + opts + |> Keyword.put(:broadway, broadway_opts) + |> validate_options!() + |> assign_client_id() + |> assign_token_generator() + + # Broadway will start the returned child specs under its supervisor. + # We use the Broadway pipeline name as the StreamManager's registered name. + broadway_name = broadway_opts[:name] + manager_name = Module.concat(broadway_name, StreamManager) + + manager_opts = Keyword.put(opts, :name, manager_name) + + child_spec = %{ + id: StreamManager, + start: {StreamManager, :start_link, [manager_opts]}, + restart: :permanent + } + + {[child_spec], put_in(broadway_opts, [:producer, :module], {producer_module, opts})} + end + + @impl GenStage + def init(opts) do + Process.flag(:trap_exit, true) + + config = Map.new(opts) + ack_ref = config.broadway[:name] + manager_name = Module.concat(ack_ref, StreamManager) + manager_pid = Process.whereis(manager_name) + + # Tell the StreamManager our pid so it can forward messages to us + :ok = StreamManager.set_producer(manager_pid, self()) + + # Store ack config in persistent_term for acknowledger lookup + ack_config = %{on_success: config.on_success, on_failure: config.on_failure} + :persistent_term.put(ack_ref, {manager_pid, ack_config}) + + buffer_size = config.max_outstanding_messages * 5 + + {:producer, %{manager_pid: manager_pid, ack_ref: ack_ref, config: config, draining: false}, + buffer_size: buffer_size} + end + + @impl GenStage + def handle_demand(_demand, state) do + {:noreply, [], state} + end + + @impl GenStage + def handle_info({:stream_messages, messages}, state) do + {:noreply, messages, state} + end + + @impl GenStage + def handle_info(_, state), do: {:noreply, [], state} + + @impl Broadway.Producer + def prepare_for_draining(state) do + StreamManager.stop_receiving(state.manager_pid) + {:noreply, [], %{state | draining: true}} + end + + @impl GenStage + def terminate(_reason, state) do + %{manager_pid: manager_pid, config: config} = state + + if Process.alive?(manager_pid) do + # Nack outstanding messages per on_shutdown option + outstanding = StreamManager.get_outstanding(manager_pid) + + case {config[:on_shutdown], outstanding} do + {_, []} -> + :ok + + {:noop, _} -> + :ok + + {{:nack, delay_seconds}, ack_ids} -> + StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) + + {:nack, ack_ids} -> + StreamManager.modify_deadline(manager_pid, ack_ids, 0) + end + + # Flush buffered acks and close stream + StreamManager.close(manager_pid) + end + + # Clean up persistent_term + :persistent_term.erase(state.ack_ref) + + :ok + end + + # --- Private --- + + defp validate_options!(opts) do + case NimbleOptions.validate(opts, Options.definition()) do + {:ok, validated} -> + validated + + {:error, err} -> + raise ArgumentError, "invalid Streaming.Producer options: #{Exception.message(err)}" + end + end + + defp assign_client_id(opts) do + Keyword.put_new_lazy(opts, :client_id, fn -> + :crypto.strong_rand_bytes(16) |> Base.encode16(case: :lower) + end) + end + + defp assign_token_generator(opts) do + if Keyword.has_key?(opts, :token_generator) do + opts + else + generator = Options.make_token_generator(opts) + Keyword.put(opts, :token_generator, generator) + end + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex new file mode 100644 index 0000000..eae1976 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -0,0 +1,771 @@ +defmodule BroadwayCloudPubSub.Streaming.StreamManager do + @moduledoc false + + # GenServer that owns the gRPC bidirectional StreamingPull connection. + # Responsibilities: + # - Connect and reconnect with exponential backoff + # - Receive messages from the stream and forward them to the producer + # - Accept ack/modifyAckDeadline requests from StreamingAcknowledger and + # send them on the stream + # - Track outstanding (delivered but not acked) message ack_ids for + # lease management and shutdown nacking + # - Extend message leases periodically via modifyAckDeadline + # - Buffer ack/nack requests during reconnection and replay on connect + # + # Implementation note on gRPC adapter support: + # This module supports two gRPC HTTP/2 adapters: Gun and Mint. + # + # Gun (default): GRPC.Client.Adapters.Gun calls :gun.post/3 from the calling + # process, making it the gun message owner. Raw {:gun_response,...}, + # {:gun_data,...} etc. messages arrive in this GenServer's mailbox and are + # handled in handle_info/2. Manual gRPC frame decoding is done in-process + # via recv_buffer. + # + # Mint: GRPC.Client.Adapters.Mint handles all framing internally via its + # ConnectionProcess and StreamResponseProcess. A linked reader process calls + # GRPC.Stub.recv/2 to enumerate decoded messages and forwards them back as + # {:mint_messages, msgs}. Only {:elixir_grpc, :connection_down, conn_pid} + # arrives in this process directly from Mint. + # The GenServer traps exits so reader process crashes are handled gracefully. + + use GenServer + require Logger + + alias BroadwayCloudPubSub.{Backoff, MessageBuilder} + alias Google.Pubsub.V1.{StreamingPullRequest, StreamingPullResponse} + alias Google.Pubsub.V1.Subscriber.Stub + + # Maximum acks to buffer while reconnecting + @max_ack_buffer 10_000 + + defstruct [ + :producer_pid, + :config, + :channel, + :grpc_stream, + :conn_pid, + :stream_ref, + # Mint-only: pid of the linked reader process that enumerates GRPC.Stub.recv/2 + :reader_pid, + :backoff, + :lease_timer, + :lease_extension_interval_ms, + :receiving, + # Gun-only: binary buffer for reassembling gRPC length-prefixed frames + recv_buffer: <<>>, + outstanding: MapSet.new(), + ack_buffer: [], + ack_buffer_size: 0 + ] + + # --- Public API --- + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + {name, opts} = Keyword.pop(opts, :name) + + if name do + GenServer.start_link(__MODULE__, opts, name: name) + else + GenServer.start_link(__MODULE__, opts) + end + end + + @doc """ + Sets the producer pid. Called by `StreamingProducer.init/1` after the producer + process starts (after Broadway has started both the StreamManager child and the + producer process). + """ + @spec set_producer(GenServer.server(), pid()) :: :ok + def set_producer(server, producer_pid) do + GenServer.call(server, {:set_producer, producer_pid}) + end + + @doc """ + Acknowledge (ack) a list of ack_ids. Called by StreamingAcknowledger. + """ + @spec acknowledge(pid(), [String.t()]) :: :ok + def acknowledge(pid, ack_ids) when is_list(ack_ids) do + GenServer.cast(pid, {:acknowledge, ack_ids}) + end + + @doc """ + Modify ack deadline for a list of ack_ids. Used for nack and lease extension. + """ + @spec modify_deadline(pid(), [String.t()], non_neg_integer()) :: :ok + def modify_deadline(pid, ack_ids, deadline_seconds) when is_list(ack_ids) do + GenServer.cast(pid, {:modify_deadline, ack_ids, deadline_seconds}) + end + + @doc """ + Tells the StreamManager to stop forwarding new messages to the producer. + Called during `prepare_for_draining`. The gRPC stream stays open so + in-flight acks can still be delivered. + """ + @spec stop_receiving(pid()) :: :ok + def stop_receiving(pid) do + GenServer.call(pid, :stop_receiving) + end + + @doc """ + Returns all currently outstanding ack_ids (received but not yet acked/nacked). + Used in `terminate/2` to nack unprocessed messages per the `on_shutdown` option. + """ + @spec get_outstanding(pid()) :: [String.t()] + def get_outstanding(pid) do + GenServer.call(pid, :get_outstanding) + end + + @doc """ + Flushes any buffered acks and closes the gRPC stream gracefully. + Called from the producer's `terminate/2`. + """ + @spec close(pid()) :: :ok + def close(pid) do + GenServer.call(pid, :close, 10_000) + end + + # --- GenServer callbacks --- + + @impl GenServer + def init(opts) do + Process.flag(:trap_exit, true) + config = Map.new(opts) + + backoff = + Backoff.new( + type: config.backoff_type, + min: config.backoff_min, + max: config.backoff_max + ) + + deadline_s = config.stream_ack_deadline_seconds + extension_percent = config.lease_extension_percent + lease_extension_interval_ms = round(deadline_s * extension_percent * 1000) + + state = %__MODULE__{ + producer_pid: nil, + config: config, + backoff: backoff, + lease_extension_interval_ms: lease_extension_interval_ms, + receiving: true + } + + # Delay connecting until producer tells us its pid via set_producer/2 + {:ok, state} + end + + @impl GenServer + def handle_info(:connect, state) do + case connect(state) do + {:ok, new_state} -> + {:noreply, new_state} + + {:error, reason, new_state} -> + log_connection_failure(reason) + emit_telemetry(:connection_failure, %{reason: reason}, state.config) + {:noreply, schedule_reconnect(new_state)} + end + end + + # --- Raw Gun protocol messages --- + + # Initial HTTP/2 response headers (200 OK) — connection established + def handle_info({:gun_response, conn_pid, stream_ref, :nofin, 200, _headers}, state) + when state.conn_pid == conn_pid and state.stream_ref == stream_ref do + {:noreply, state} + end + + # Non-200 initial response — treat as error + def handle_info({:gun_response, conn_pid, stream_ref, _fin, status, _headers}, state) + when state.conn_pid == conn_pid and state.stream_ref == stream_ref do + Logger.error("[StreamManager] gRPC stream got HTTP status #{status}") + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Catch-all for gun_response not matching our stream (e.g. different conn/ref) + def handle_info({:gun_response, conn_pid, stream_ref, fin, status, _headers}, state) do + Logger.debug( + "[StreamManager] Ignoring stale gun_response: conn=#{inspect(conn_pid)} ref=#{inspect(stream_ref)} fin=#{inspect(fin)} status=#{status} (state conn=#{inspect(state.conn_pid)} ref=#{inspect(state.stream_ref)})" + ) + + {:noreply, state} + end + + # Data chunk(s) from the server + def handle_info({:gun_data, conn_pid, stream_ref, fin, data}, state) + when state.conn_pid == conn_pid and state.stream_ref == stream_ref do + buffer = state.recv_buffer <> data + {messages, remaining_buffer} = decode_grpc_messages(buffer) + + state = %{state | recv_buffer: remaining_buffer} + + state = + if state.receiving and messages != [] do + broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) + ack_ids = Enum.map(messages, & &1.ack_id) + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.put(&2, &1)) + emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) + send(state.producer_pid, {:stream_messages, broadway_messages}) + %{state | outstanding: new_outstanding} + else + state + end + + if fin == :fin do + {:noreply, schedule_reconnect(reset_connection(state))} + else + {:noreply, state} + end + end + + # Catch-all for gun_data not matching our stream + def handle_info({:gun_data, conn_pid, stream_ref, _fin, _data} = _msg, state) do + Logger.debug( + "[StreamManager] Ignoring stale gun_data: conn=#{inspect(conn_pid)} ref=#{inspect(stream_ref)} (state conn=#{inspect(state.conn_pid)} ref=#{inspect(state.stream_ref)})" + ) + + {:noreply, state} + end + + # Trailers — stream ended normally + def handle_info({:gun_trailers, conn_pid, stream_ref, trailers}, state) + when state.conn_pid == conn_pid and state.stream_ref == stream_ref do + grpc_status = trailers |> List.keyfind("grpc-status", 0) |> elem(1) + grpc_message = trailers |> List.keyfind("grpc-message", 0, {"", ""}) |> elem(1) + + case grpc_status do + "0" -> + :ok + + status -> + Logger.warning( + "[StreamManager] gRPC stream closed with status #{status}: #{grpc_message}" + ) + end + + emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Stream-level error + def handle_info({:gun_error, conn_pid, stream_ref, reason}, state) + when state.conn_pid == conn_pid and state.stream_ref == stream_ref do + Logger.warning("[StreamManager] gRPC stream error: #{inspect(reason)}") + emit_telemetry(:disconnect, %{reason: reason}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Connection-level error/down + def handle_info({:gun_down, conn_pid, _protocol, reason, _killed_streams}, state) + when state.conn_pid == conn_pid do + Logger.warning("[StreamManager] gRPC connection down: #{inspect(reason)}") + emit_telemetry(:disconnect, %{reason: reason}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # --- Mint adapter messages --- + + # Decoded messages forwarded from the Mint reader process + def handle_info({:mint_messages, messages}, state) do + if state.receiving and messages != [] do + broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) + ack_ids = Enum.map(messages, & &1.ack_id) + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.put(&2, &1)) + emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) + send(state.producer_pid, {:stream_messages, broadway_messages}) + {:noreply, %{state | outstanding: new_outstanding}} + else + {:noreply, state} + end + end + + # Error reported by the Mint reader process (stream-level gRPC error) + def handle_info({:mint_stream_error, error}, state) do + Logger.warning("[StreamManager] Mint stream error: #{inspect(error)}") + emit_telemetry(:disconnect, %{reason: error}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Connection-down message sent by the Mint adapter's ConnectionProcess + def handle_info({:elixir_grpc, :connection_down, conn_pid}, state) + when state.conn_pid == conn_pid do + Logger.warning("[StreamManager] Mint gRPC connection down") + emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Mint reader process exited normally — stream ended, reconnect + def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do + Logger.info("[StreamManager] Mint reader stream ended normally") + emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Mint reader process crashed — reconnect + def handle_info({:EXIT, pid, reason}, %{reader_pid: pid} = state) do + Logger.warning("[StreamManager] Mint reader crashed: #{inspect(reason)}") + emit_telemetry(:disconnect, %{reason: reason}, state.config) + {:noreply, schedule_reconnect(reset_connection(state))} + end + + # Catch-all for other EXIT signals (e.g. from the supervisor during shutdown) + def handle_info({:EXIT, _pid, _reason}, state) do + {:noreply, state} + end + + def handle_info(:extend_leases, state) do + if MapSet.size(state.outstanding) == 0 or state.grpc_stream == nil do + timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) + {:noreply, %{state | lease_timer: timer}} + else + ack_ids = MapSet.to_list(state.outstanding) + deadline = state.config.stream_ack_deadline_seconds + deadlines = List.duplicate(deadline, length(ack_ids)) + + send_on_stream(state.grpc_stream, %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + }) + + timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) + {:noreply, %{state | lease_timer: timer}} + end + end + + def handle_info(msg, state) do + Logger.warning("[StreamManager] Unhandled message: #{inspect(msg)}") + {:noreply, state} + end + + @impl GenServer + def handle_cast({:acknowledge, ack_ids}, state) do + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.delete(&2, &1)) + state = %{state | outstanding: new_outstanding} + + state = + if state.grpc_stream do + send_on_stream(state.grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) + emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) + state + else + buffer_ack_request(state, {:ack, ack_ids}) + end + + {:noreply, state} + end + + def handle_cast({:modify_deadline, ack_ids, deadline_seconds}, state) do + new_outstanding = + if deadline_seconds == 0 do + Enum.reduce(ack_ids, state.outstanding, &MapSet.delete(&2, &1)) + else + state.outstanding + end + + deadlines = List.duplicate(deadline_seconds, length(ack_ids)) + state = %{state | outstanding: new_outstanding} + + state = + if state.grpc_stream do + send_on_stream(state.grpc_stream, %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + }) + + state + else + buffer_ack_request(state, {:modify_deadline, ack_ids, deadline_seconds}) + end + + {:noreply, state} + end + + @impl GenServer + def handle_call({:set_producer, producer_pid}, _from, state) do + state = %{state | producer_pid: producer_pid} + send(self(), :connect) + {:reply, :ok, state} + end + + def handle_call(:stop_receiving, _from, state) do + {:reply, :ok, %{state | receiving: false}} + end + + def handle_call(:get_outstanding, _from, state) do + {:reply, MapSet.to_list(state.outstanding), state} + end + + def handle_call(:close, _from, state) do + state = flush_ack_buffer(state) + state = close_stream(state) + {:reply, :ok, state} + end + + @impl GenServer + def terminate(reason, state) do + Logger.debug("[StreamManager] terminate: reason=#{inspect(reason)}") + cancel_lease_timer(state) + close_stream(state) + :ok + end + + # --- Private: connection --- + + defp connect(%{config: config} = state) do + with {:ok, token} <- fetch_token(config), + {:ok, channel} <- open_channel(config, token) do + connect_stream(channel, state) + end + end + + # Second phase of connect: open the gRPC stream on the already-open channel. + # Separated so that if open_stream raises, we can disconnect the channel + # before propagating the error (preventing stale Gun messages in our mailbox). + defp connect_stream(channel, %{config: config} = state) do + {:ok, grpc_stream, conn_pid, stream_ref} = open_stream(channel, state) + backoff = Backoff.reset(state.backoff) + + state = + flush_ack_buffer(%{ + state + | channel: channel, + grpc_stream: grpc_stream, + conn_pid: conn_pid, + stream_ref: stream_ref, + recv_buffer: <<>>, + backoff: backoff + }) + + state = maybe_start_reader(state) + state = schedule_lease_timer(state) + emit_telemetry(:connect, %{}, config) + {:ok, state} + rescue + e -> + # open_stream may raise (Stub.streaming_pull / send_request don't + # return error tuples). If it raised, no gRPC stream was successfully + # opened, so only disconnect the channel to prevent its Gun/Mint + # connection from delivering stale messages to our mailbox. + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + + {:error, {:open_stream_raised, Exception.message(e)}, state} + end + + # For Mint: spawn a linked reader process that enumerates GRPC.Stub.recv/2 and + # forwards decoded messages back to the StreamManager. + defp maybe_start_reader(%{config: %{adapter: :mint}, grpc_stream: grpc_stream} = state) do + manager = self() + + pid = + spawn_link(fn -> + {:ok, enum} = GRPC.Stub.recv(grpc_stream) + + enum + |> Stream.each(fn + {:ok, %StreamingPullResponse{received_messages: msgs}} when msgs != [] -> + send(manager, {:mint_messages, msgs}) + + {:ok, %StreamingPullResponse{}} -> + # Heartbeat / empty response — nothing to forward + :ok + + {:error, error} -> + send(manager, {:mint_stream_error, error}) + end) + |> Stream.run() + + # Stream exhausted normally. The reader exits :normal and StreamManager + # will receive {:EXIT, reader_pid, :normal} due to trap_exit. + end) + + %{state | reader_pid: pid} + end + + defp maybe_start_reader(state), do: state + + defp adapter_module(:gun), do: GRPC.Client.Adapters.Gun + defp adapter_module(:mint), do: GRPC.Client.Adapters.Mint + + defp open_channel( + %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = _config, + token + ) do + base_opts = [ + adapter: adapter_module(adapter), + headers: [{"authorization", "Bearer #{token}"}] + ] + + opts = + if use_ssl do + cred = GRPC.Credential.new(ssl: [cacerts: :public_key.cacerts_get()]) + Keyword.put(base_opts, :cred, cred) + else + base_opts + end + + case GRPC.Stub.connect(endpoint, opts) do + {:ok, channel} -> {:ok, channel} + {:error, reason} -> {:error, {:connect_failed, reason}} + end + end + + defp open_stream(channel, state) do + config = state.config + client_id = Map.fetch!(config, :client_id) + + initial_request = %StreamingPullRequest{ + subscription: config.subscription, + stream_ack_deadline_seconds: config.stream_ack_deadline_seconds, + max_outstanding_messages: config.max_outstanding_messages, + max_outstanding_bytes: config.max_outstanding_bytes, + client_id: client_id + } + + grpc_stream = Stub.streaming_pull(channel, []) + grpc_stream = GRPC.Stub.send_request(grpc_stream, initial_request) + + # Both adapters store the connection process pid in adapter_payload.conn_pid, + # but the stream_ref field only exists for Gun. + conn_pid = grpc_stream.channel.adapter_payload.conn_pid + + case config.adapter do + :gun -> + stream_ref = grpc_stream.payload.stream_ref + {:ok, grpc_stream, conn_pid, stream_ref} + + :mint -> + {:ok, grpc_stream, conn_pid, nil} + end + end + + # Decode one or more GRPC length-prefixed messages from the buffer. + # Returns {[StreamingPullResponse.received_messages], remaining_buffer} + defp decode_grpc_messages(buffer) do + decode_grpc_messages(buffer, []) + end + + defp decode_grpc_messages(buffer, acc) do + case GRPC.Message.get_message(buffer) do + {{_flag, encoded}, rest} -> + case StreamingPullResponse.decode(encoded) do + %StreamingPullResponse{received_messages: msgs} when msgs != [] -> + decode_grpc_messages(rest, Enum.reverse(msgs, acc)) + + %StreamingPullResponse{} -> + # Heartbeat/empty response + decode_grpc_messages(rest, acc) + end + + false -> + {Enum.reverse(acc), buffer} + end + end + + defp send_on_stream(grpc_stream, request) do + try do + GRPC.Stub.send_request(grpc_stream, request) + catch + kind, reason -> + Logger.warning("[StreamManager] Failed to send on stream: #{kind} #{inspect(reason)}") + end + end + + defp reset_connection(state) do + close_stream(state) + end + + defp close_stream(%{grpc_stream: nil} = state), do: stop_reader(state) + + defp close_stream(%{grpc_stream: grpc_stream, channel: channel, conn_pid: conn_pid} = state) do + state = stop_reader(state) + + # Cancel the stream (sends RST_STREAM) so Gun stops forwarding data for + # this stream_ref. end_stream/1 only half-closes the client side and leaves + # the server free to keep sending. + try do + GRPC.Stub.cancel(grpc_stream) + catch + _, _ -> :ok + end + + if channel do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + end + + # Force-kill the underlying Gun process synchronously. :gun.shutdown (called + # by GRPC.Stub.disconnect) is an async cast with a 15-second graceful close + # period during which Gun continues delivering messages to our mailbox. + # Killing it immediately eliminates that race window. + if is_pid(conn_pid), do: Process.exit(conn_pid, :kill) + + # Drain any gun messages from conn_pid that were already in our mailbox + # before the process died. + flush_gun_messages(conn_pid) + + %{state | grpc_stream: nil, channel: nil, conn_pid: nil, stream_ref: nil, recv_buffer: <<>>} + end + + # Kills the Mint reader process (if any) and removes it from state. + # Unlinks before killing so the EXIT signal does not trigger reconnect logic. + defp stop_reader(%{reader_pid: pid} = state) when is_pid(pid) do + Process.unlink(pid) + Process.exit(pid, :kill) + %{state | reader_pid: nil} + end + + defp stop_reader(state), do: state + + # Drains any Gun messages from a specific conn_pid that are already sitting + # in our mailbox. We pin on ^conn_pid so we never accidentally consume + # messages from a newly-opened connection. The after 0 makes this + # non-blocking — it only removes messages already present. + defp flush_gun_messages(conn_pid) when is_pid(conn_pid) do + receive do + {:gun_up, ^conn_pid, _} -> flush_gun_messages(conn_pid) + {:gun_down, ^conn_pid, _, _, _} -> flush_gun_messages(conn_pid) + {:gun_response, ^conn_pid, _, _, _, _} -> flush_gun_messages(conn_pid) + {:gun_data, ^conn_pid, _, _, _} -> flush_gun_messages(conn_pid) + {:gun_trailers, ^conn_pid, _, _} -> flush_gun_messages(conn_pid) + {:gun_error, ^conn_pid, _, _} -> flush_gun_messages(conn_pid) + {:gun_error, ^conn_pid, _} -> flush_gun_messages(conn_pid) + after + 0 -> :ok + end + end + + defp flush_gun_messages(_), do: :ok + + # --- Private: backoff --- + + defp schedule_reconnect(%{backoff: nil} = _state) do + raise "StreamManager failed to connect and backoff is :stop — crashing" + end + + defp schedule_reconnect(%{backoff: backoff} = state) do + {timeout, new_backoff} = Backoff.backoff(backoff) + Logger.info("[StreamManager] Reconnecting in #{timeout}ms") + Process.send_after(self(), :connect, timeout) + %{state | backoff: new_backoff} + end + + # --- Private: lease management --- + + defp schedule_lease_timer(state) do + cancel_lease_timer(state) + timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) + %{state | lease_timer: timer} + end + + defp cancel_lease_timer(%{lease_timer: nil} = state), do: state + + defp cancel_lease_timer(%{lease_timer: timer} = state) do + Process.cancel_timer(timer) + %{state | lease_timer: nil} + end + + # --- Private: ack buffering --- + + defp buffer_ack_request(%{ack_buffer: buffer, ack_buffer_size: size} = state, request) do + if size < @max_ack_buffer do + %{state | ack_buffer: [request | buffer], ack_buffer_size: size + 1} + else + Logger.warning("[StreamManager] Ack buffer full, dropping oldest ack request") + %{state | ack_buffer: [request | Enum.drop(buffer, -1)]} + end + end + + defp flush_ack_buffer(%{ack_buffer: [], grpc_stream: _} = state), do: state + + defp flush_ack_buffer(%{ack_buffer: _buffer, grpc_stream: nil} = state), do: state + + defp flush_ack_buffer(%{ack_buffer: buffer, grpc_stream: grpc_stream} = state) do + buffer + |> Enum.reverse() + |> Enum.each(fn + {:ack, ack_ids} -> + send_on_stream(grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) + + {:modify_deadline, ack_ids, deadline_seconds} -> + deadlines = List.duplicate(deadline_seconds, length(ack_ids)) + + send_on_stream(grpc_stream, %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + }) + end) + + %{state | ack_buffer: [], ack_buffer_size: 0} + end + + # --- Private: message building --- + + defp build_broadway_message( + %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, + state + ) do + # ack_ref is the Broadway pipeline name — the key used in :persistent_term by the producer + ack_ref = state.config.broadway[:name] + acknowledger = BroadwayCloudPubSub.Streaming.Acknowledger.builder(ack_ref).(ack_id) + + data = pubsub_msg.data + metadata = build_metadata(pubsub_msg, delivery_attempt) + + %Broadway.Message{ + data: data, + metadata: metadata, + acknowledger: acknowledger + } + end + + defp build_metadata(msg, delivery_attempt) do + MessageBuilder.build_metadata(%{ + message_id: msg.message_id, + ordering_key: msg.ordering_key, + publish_time: to_datetime(msg.publish_time), + delivery_attempt: delivery_attempt, + attributes: Map.new(msg.attributes || []) + }) + end + + defp to_datetime(nil), do: nil + + defp to_datetime(%{seconds: seconds, nanos: nanos}) do + DateTime.from_unix!(seconds * 1_000_000_000 + nanos, :nanosecond) + rescue + _ -> nil + end + + # --- Private: auth --- + + defp fetch_token(%{token_generator: {mod, fun, args}}) do + apply(mod, fun, args) + end + + # --- Private: telemetry --- + + defp emit_telemetry(event, measurements, config) do + metadata = %{ + subscription: config.subscription, + config: config + } + + :telemetry.execute( + [:broadway_cloud_pub_sub, :stream, event], + measurements, + metadata + ) + end + + defp log_connection_failure(reason) do + Logger.error("[StreamManager] Failed to connect: #{inspect(reason)}") + end +end diff --git a/mix.exs b/mix.exs index 709a7db..b8fac01 100644 --- a/mix.exs +++ b/mix.exs @@ -38,6 +38,8 @@ defmodule BroadwayCloudPubSub.MixProject do {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0"}, {:telemetry, "~> 0.4.3 or ~> 1.0"}, {:goth, "~> 1.3", optional: true}, + {:grpc, "~> 0.9 or ~> 0.10 or ~> 0.11", optional: true}, + {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} ] diff --git a/mix.lock b/mix.lock index da09cf4..f4956b1 100644 --- a/mix.lock +++ b/mix.lock @@ -1,29 +1,34 @@ %{ - "broadway": {:hex, :broadway, "1.0.0", "da99ca10aa221a9616ccff8cb8124510b7e063112d4593c3bae50448b37bbc90", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b86ebd492f687edc9ad44d0f9e359da70f305b6d090e92a06551cef71ec41324"}, + "broadway": {:hex, :broadway, "1.2.1", "83a1567423c26885e15f6cd8670ca790370af2fcff2ede7fa88c5ea793087a67", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "68ae63d83b55bdca0f95cd49feee5fb74c5a6bec557caf940860fe07dbc8a4fb"}, "bypass": {:hex, :bypass, "2.1.0", "909782781bf8e20ee86a9cabde36b259d44af8b9f38756173e8f5e2e1fabb9b1", [:mix], [{:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "d9b5df8fa5b7a6efa08384e9bbecfe4ce61c77d28a4282f79e02f1ef78d96b80"}, "castore": {:hex, :castore, "0.1.18", "deb5b9ab02400561b6f5708f3e7660fc35ca2d51bfc6a940d2f513f89c2975fc", [:mix], [], "hexpm", "61bbaf6452b782ef80b33cdb45701afbcf0a918a45ebe7e73f1130d661e66a06"}, - "cowboy": {:hex, :cowboy, "2.9.0", "865dd8b6607e14cf03282e10e934023a1bd8be6f6bacf921a7e2a96d800cd452", [:make, :rebar3], [{:cowlib, "2.11.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "2c729f934b4e1aa149aff882f57c6372c15399a20d54f65c8d67bef583021bde"}, - "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.3.1", "ebd1a1d7aff97f27c66654e78ece187abdc646992714164380d8a041eda16754", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "3a6efd3366130eab84ca372cbd4a7d3c3a97bdfcfb4911233b035d117063f0af"}, - "cowlib": {:hex, :cowlib, "2.11.0", "0b9ff9c346629256c42ebe1eeb769a83c6cb771a6ee5960bd110ab0b9b872063", [:make, :rebar3], [], "hexpm", "2b3e9da0b21c4565751a6d4901c20d1b4cc25cbb7fd50d91d2ab6dd287bc86a9"}, - "earmark_parser": {:hex, :earmark_parser, "1.4.39", "424642f8335b05bb9eb611aa1564c148a8ee35c9c8a8bba6e129d51a3e3c6769", [:mix], [], "hexpm", "06553a88d1f1846da9ef066b87b57c6f605552cfbe40d20bd8d59cc6bde41944"}, - "ex_doc": {:hex, :ex_doc, "0.34.1", "9751a0419bc15bc7580c73fde506b17b07f6402a1e5243be9e0f05a68c723368", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "d441f1a86a235f59088978eff870de2e815e290e44a8bd976fe5d64470a4c9d2"}, - "finch": {:hex, :finch, "0.9.0", "8b772324aebafcaba763f1dffaa3e7f52f8c4e52485f50f48bbb2f42219a2e87", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.5", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a93bfcad9ca50fa3cb2d459f27667d9a87cfbb7fecf9b29b2e78a50bc2ab445d"}, - "gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"}, - "goth": {:hex, :goth, "1.4.2", "a598dfbce6fe65db3f5f43b1ab2ce8fbe3b2fe20a7569ad62d71c11c0ddc3f41", [:mix], [{:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "d51bb6544dc551fe5754ab72e6cf194120b3c06d924282aaa3321a516ed3b98a"}, - "hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"}, - "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, - "jose": {:hex, :jose, "1.11.6", "613fda82552128aa6fb804682e3a616f4bc15565a048dabd05b1ebd5827ed965", [:mix, :rebar3], [], "hexpm", "6275cb75504f9c1e60eeacb771adfeee4905a9e182103aa59b53fed651ff9738"}, - "makeup": {:hex, :makeup, "1.1.2", "9ba8837913bdf757787e71c1581c21f9d2455f4dd04cfca785c70bbfff1a76a3", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cce1566b81fbcbd21eca8ffe808f33b221f9eee2cbc7a1706fc3da9ff18e6cac"}, - "makeup_elixir": {:hex, :makeup_elixir, "0.16.2", "627e84b8e8bf22e60a2579dad15067c755531fea049ae26ef1020cad58fe9578", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "41193978704763f6bbe6cc2758b84909e62984c7752b3784bd3c218bb341706b"}, - "makeup_erlang": {:hex, :makeup_erlang, "1.0.0", "6f0eff9c9c489f26b69b61440bf1b238d95badae49adac77973cbacae87e3c2e", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "ea7a9307de9d1548d2a72d299058d1fd2339e3d398560a0e46c27dab4891e4d2"}, - "mime": {:hex, :mime, "1.6.0", "dabde576a497cef4bbdd60aceee8160e02a6c89250d6c0b29e56c0dfb00db3d2", [:mix], [], "hexpm", "31a1a8613f8321143dde1dafc36006a17d28d02bdfecb9e95a880fa7aabd19a7"}, - "mint": {:hex, :mint, "1.4.2", "50330223429a6e1260b2ca5415f69b0ab086141bc76dc2fbf34d7c389a6675b2", [:mix], [{:castore, "~> 0.1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "ce75a5bbcc59b4d7d8d70f8b2fc284b1751ffb35c7b6a6302b5192f8ab4ddd80"}, - "nimble_options": {:hex, :nimble_options, "0.3.7", "1e52dd7673d36138b1a5dede183b5d86dff175dc46d104a8e98e396b85b04670", [:mix], [], "hexpm", "2086907e6665c6b6579be54ef5001928df5231f355f71ed258f80a55e9f63633"}, - "nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"}, - "nimble_pool": {:hex, :nimble_pool, "0.2.6", "91f2f4c357da4c4a0a548286c84a3a28004f68f05609b4534526871a22053cde", [:mix], [], "hexpm", "1c715055095d3f2705c4e236c18b618420a35490da94149ff8b580a2144f653f"}, - "plug": {:hex, :plug, "1.11.1", "f2992bac66fdae679453c9e86134a4201f6f43a687d8ff1cd1b2862d53c80259", [:mix], [{:mime, "~> 1.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "23524e4fefbb587c11f0833b3910bfb414bf2e2534d61928e920f54e3a1b881f"}, - "plug_cowboy": {:hex, :plug_cowboy, "2.5.0", "51c998f788c4e68fc9f947a5eba8c215fbb1d63a520f7604134cab0270ea6513", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "5b2c8925a5e2587446f33810a58c01e66b3c345652eeec809b76ba007acde71a"}, - "plug_crypto": {:hex, :plug_crypto, "1.2.2", "05654514ac717ff3a1843204b424477d9e60c143406aa94daf2274fdd280794d", [:mix], [], "hexpm", "87631c7ad914a5a445f0a3809f99b079113ae4ed4b867348dd9eec288cecb6db"}, - "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, - "telemetry": {:hex, :telemetry, "0.4.3", "a06428a514bdbc63293cd9a6263aad00ddeb66f608163bdec7c8995784080818", [:rebar3], [], "hexpm", "eb72b8365ffda5bed68a620d1da88525e326cb82a75ee61354fc24b844768041"}, + "cowboy": {:hex, :cowboy, "2.14.2", "4008be1df6ade45e4f2a4e9e2d22b36d0b5aba4e20b0a0d7049e28d124e34847", [:make, :rebar3], [{:cowlib, ">= 2.16.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, ">= 1.8.0 and < 3.0.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "569081da046e7b41b5df36aa359be71a0c8874e5b9cff6f747073fc57baf1ab9"}, + "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"}, + "cowlib": {:hex, :cowlib, "2.16.0", "54592074ebbbb92ee4746c8a8846e5605052f29309d3a873468d76cdf932076f", [:make, :rebar3], [], "hexpm", "7f478d80d66b747344f0ea7708c187645cfcc08b11aa424632f78e25bf05db51"}, + "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, + "ex_doc": {:hex, :ex_doc, "0.40.1", "67542e4b6dde74811cfd580e2c0149b78010fd13001fda7cfeb2b2c2ffb1344d", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "bcef0e2d360d93ac19f01a85d58f91752d930c0a30e2681145feea6bd3516e00"}, + "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"}, + "flow": {:hex, :flow, "1.2.4", "1dd58918287eb286656008777cb32714b5123d3855956f29aa141ebae456922d", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}], "hexpm", "874adde96368e71870f3510b91e35bc31652291858c86c0e75359cbdd35eb211"}, + "gen_stage": {:hex, :gen_stage, "1.3.2", "7c77e5d1e97de2c6c2f78f306f463bca64bf2f4c3cdd606affc0100b89743b7b", [:mix], [], "hexpm", "0ffae547fa777b3ed889a6b9e1e64566217413d018cabd825f786e843ffe63e7"}, + "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, + "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"}, + "grpc": {:hex, :grpc, "0.11.5", "5dbde9420718b58712779ad98fff1ef50349ca0fa7cc0858ae0f826015068654", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:cowboy, "~> 2.10", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowlib, "~> 2.12", [hex: :cowlib, repo: "hexpm", optional: false]}, {:flow, "~> 1.2", [hex: :flow, repo: "hexpm", optional: false]}, {:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.14", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "0a5d8673ef16649bef0903bca01c161acfc148e4d269133b6834b2af1f07f45e"}, + "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"}, + "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, + "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, + "jose": {:hex, :jose, "1.11.12", "06e62b467b61d3726cbc19e9b5489f7549c37993de846dfb3ee8259f9ed208b3", [:mix, :rebar3], [], "hexpm", "31e92b653e9210b696765cdd885437457de1add2a9011d92f8cf63e4641bab7b"}, + "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, + "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, + "makeup_erlang": {:hex, :makeup_erlang, "1.0.3", "4252d5d4098da7415c390e847c814bad3764c94a814a0b4245176215615e1035", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "953297c02582a33411ac6208f2c6e55f0e870df7f80da724ed613f10e6706afd"}, + "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, + "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, + "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "plug": {:hex, :plug, "1.19.1", "09bac17ae7a001a68ae393658aa23c7e38782be5c5c00c80be82901262c394c0", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "560a0017a8f6d5d30146916862aaf9300b7280063651dd7e532b8be168511e62"}, + "plug_cowboy": {:hex, :plug_cowboy, "2.8.0", "07789e9c03539ee51bb14a07839cc95aa96999fd8846ebfd28c97f0b50c7b612", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "9cbfaaf17463334ca31aed38ea7e08a68ee37cabc077b1e9be6d2fb68e0171d0"}, + "plug_crypto": {:hex, :plug_crypto, "2.1.1", "19bda8184399cb24afa10be734f84a16ea0a2bc65054e23a62bb10f06bc89491", [:mix], [], "hexpm", "6470bce6ffe41c8bd497612ffde1a7e4af67f36a15eea5f921af71cf3e11247c"}, + "protobuf": {:hex, :protobuf, "0.16.0", "d1878725105d49162977cf3408ccc3eac4f3532e26e5a9e250f2c624175d10f6", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "f0d0d3edd8768130f24cc2cfc41320637d32c80110e80d13f160fa699102c828"}, + "ranch": {:hex, :ranch, "1.8.1", "208169e65292ac5d333d6cdbad49388c1ae198136e4697ae2f474697140f201c", [:make, :rebar3], [], "hexpm", "aed58910f4e21deea992a67bf51632b6d60114895eb03bb392bb733064594dd0"}, + "telemetry": {:hex, :telemetry, "1.4.1", "ab6de178e2b29b58e8256b92b382ea3f590a47152ca3651ea857a6cae05ac423", [:rebar3], [], "hexpm", "2172e05a27531d3d31dd9782841065c50dd5c3c7699d95266b2edd54c2dafa1c"}, } diff --git a/test/broadway_cloud_pub_sub/backoff_test.exs b/test/broadway_cloud_pub_sub/backoff_test.exs new file mode 100644 index 0000000..9eefe9f --- /dev/null +++ b/test/broadway_cloud_pub_sub/backoff_test.exs @@ -0,0 +1,152 @@ +defmodule BroadwayCloudPubSub.BackoffTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Backoff + + describe "new/1" do + test "returns nil for :stop type" do + assert Backoff.new(type: :stop) == nil + end + + test "returns a Backoff struct for :rand_exp type" do + b = Backoff.new(type: :rand_exp) + assert %Backoff{type: :rand_exp} = b + end + + test "returns a Backoff struct for :exp type" do + b = Backoff.new(type: :exp) + assert %Backoff{type: :exp} = b + end + + test "returns a Backoff struct for :rand type" do + b = Backoff.new(type: :rand) + assert %Backoff{type: :rand} = b + end + + test "uses default min and max when not provided" do + b = Backoff.new() + assert b.min == 1_000 + assert b.max == 30_000 + end + + test "accepts custom min and max" do + b = Backoff.new(type: :exp, min: 500, max: 5_000) + assert b.min == 500 + assert b.max == 5_000 + end + end + + describe "backoff/1" do + test "returns {nil, nil} for nil (stop) backoff" do + assert {nil, nil} = Backoff.backoff(nil) + end + + test ":exp starts at min and doubles each call up to max" do + b = Backoff.new(type: :exp, min: 1_000, max: 8_000) + {t1, b} = Backoff.backoff(b) + {t2, b} = Backoff.backoff(b) + {t3, b} = Backoff.backoff(b) + {t4, _b} = Backoff.backoff(b) + + assert t1 == 1_000 + assert t2 == 2_000 + assert t3 == 4_000 + # capped at max + assert t4 == 8_000 + end + + test ":exp never exceeds max" do + b = Backoff.new(type: :exp, min: 1_000, max: 3_000) + + timeouts = + Enum.reduce(1..10, {[], b}, fn _, {acc, b} -> + {t, b} = Backoff.backoff(b) + {[t | acc], b} + end) + |> elem(0) + + assert Enum.all?(timeouts, &(&1 <= 3_000)) + end + + test ":rand returns a value within [min, max]" do + b = Backoff.new(type: :rand, min: 1_000, max: 5_000) + + timeouts = + Enum.map(1..20, fn _ -> + {t, _} = Backoff.backoff(b) + t + end) + + assert Enum.all?(timeouts, &(&1 >= 1_000 and &1 <= 5_000)) + end + + test ":rand_exp stays within [min, max]" do + b = Backoff.new(type: :rand_exp, min: 1_000, max: 30_000) + + timeouts = + Enum.reduce(1..30, {[], b}, fn _, {acc, b} -> + {t, b} = Backoff.backoff(b) + {[t | acc], b} + end) + |> elem(0) + + assert Enum.all?(timeouts, &(&1 >= 1_000 and &1 <= 30_000)) + end + + test ":rand_exp generally grows over time (not always due to randomness)" do + b = Backoff.new(type: :rand_exp, min: 100, max: 10_000) + + {t1, b} = Backoff.backoff(b) + # advance a few steps + {_, b} = Backoff.backoff(b) + {_, b} = Backoff.backoff(b) + {_, b} = Backoff.backoff(b) + {t5, _} = Backoff.backoff(b) + + # After several steps, the upper bound has grown — at minimum t5 >= t1 + # (not strictly true due to randomness but min stays anchored) + assert t1 >= 100 + assert t5 >= 100 + end + end + + describe "reset/1" do + test "returns nil for nil (stop) backoff" do + assert Backoff.reset(nil) == nil + end + + test ":exp resets state to min" do + b = Backoff.new(type: :exp, min: 1_000, max: 8_000) + {_, b_advanced} = Backoff.backoff(b) + {_, b_advanced} = Backoff.backoff(b_advanced) + + b_reset = Backoff.reset(b_advanced) + {t, _} = Backoff.backoff(b_reset) + assert t == 1_000 + end + + test ":rand_exp resets so next backoff starts from the initial range" do + b = Backoff.new(type: :rand_exp, min: 1_000, max: 30_000) + + # Advance many steps + b_advanced = + Enum.reduce(1..20, b, fn _, b -> + {_, b} = Backoff.backoff(b) + b + end) + + b_reset = Backoff.reset(b_advanced) + {t, _} = Backoff.backoff(b_reset) + + # After reset, first backoff should be in initial range [min, max] + assert t >= 1_000 + assert t <= 30_000 + end + + test ":rand is unchanged after reset" do + b = Backoff.new(type: :rand, min: 1_000, max: 5_000) + b_reset = Backoff.reset(b) + assert b_reset == b + end + end +end diff --git a/test/broadway_cloud_pub_sub/producer_test.exs b/test/broadway_cloud_pub_sub/producer_test.exs index 02b87f1..224222f 100644 --- a/test/broadway_cloud_pub_sub/producer_test.exs +++ b/test/broadway_cloud_pub_sub/producer_test.exs @@ -148,7 +148,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do test ":subscription should be a string" do assert_raise( ValidationError, - "required option :subscription not found, received options: [:client]", + "required :subscription option not found, received options: []", fn -> prepare_for_start_module_opts([]) end @@ -266,7 +266,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert_raise( ValidationError, - ~r/expected :max_number_of_messages to be a positive integer, got: 0/, + ~r/expected positive integer, got: 0/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -278,7 +278,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert_raise( ValidationError, - ~r/expected :max_number_of_messages to be a positive integer, got: -1/, + ~r/expected positive integer, got: -1/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -326,7 +326,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert producer_opts[:token_generator] == token_generator assert_raise ValidationError, - ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: {1, 1, 1}/, + ~r/expected tuple {mod, fun, args}, got: {1, 1, 1}/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", @@ -335,7 +335,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do end assert_raise ValidationError, - ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: SomeModule/, + ~r/expected tuple {mod, fun, args}, got: SomeModule/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", diff --git a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs new file mode 100644 index 0000000..23f6ae9 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs @@ -0,0 +1,200 @@ +defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do + use ExUnit.Case, async: true + + alias Broadway.Message + alias BroadwayCloudPubSub.Streaming.Acknowledger + + # Stub StreamManager: just sends messages to the test process instead of + # calling gRPC. We monkey-patch via :persistent_term — the ack_ref maps to + # {stub_pid, config} where stub_pid is our test process. + # (Alias: Acknowledger refers to BroadwayCloudPubSub.Streaming.Acknowledger) + # + # Acknowledger calls: + # StreamManager.acknowledge(pid, ack_ids) → GenServer.cast({:acknowledge, ack_ids}) + # StreamManager.modify_deadline(pid, ack_ids, deadline) → GenServer.cast(...) + # + # We spin up a tiny GenServer that forwards casts to the test process. + + defmodule StubManager do + use GenServer + + def start_link(test_pid) do + GenServer.start_link(__MODULE__, test_pid) + end + + def init(test_pid), do: {:ok, test_pid} + + def handle_cast({:acknowledge, ack_ids}, test_pid) do + send(test_pid, {:acknowledge, ack_ids}) + {:noreply, test_pid} + end + + def handle_cast({:modify_deadline, ack_ids, deadline}, test_pid) do + send(test_pid, {:modify_deadline, ack_ids, deadline}) + {:noreply, test_pid} + end + end + + setup do + {:ok, stub_pid} = StubManager.start_link(self()) + ack_ref = make_ref() + + config = %{on_success: :ack, on_failure: :noop} + :persistent_term.put(ack_ref, {stub_pid, config}) + + on_exit(fn -> :persistent_term.erase(ack_ref) end) + + {:ok, ack_ref: ack_ref, stub_pid: stub_pid} + end + + defp build_message(ack_id, ack_ref, overrides \\ %{}) do + base = %{ack_id: ack_id} + ack_data = Map.merge(base, overrides) + %Message{data: "data_#{ack_id}", acknowledger: {Acknowledger, ack_ref, ack_data}} + end + + describe "builder/1" do + test "returns a function that builds acknowledger tuples", %{ack_ref: ack_ref} do + builder = Acknowledger.builder(ack_ref) + {mod, ref, data} = builder.("ack-123") + + assert mod == Acknowledger + assert ref == ack_ref + assert data == %{ack_id: "ack-123"} + end + end + + describe "configure/3" do + test "raises on unknown option", %{ack_ref: ack_ref} do + assert_raise NimbleOptions.ValidationError, ~r/unknown options/, fn -> + Acknowledger.configure(ack_ref, %{ack_id: "x"}, on_other: :ack) + end + end + + test "merges on_success into ack_data", %{ack_ref: ack_ref} do + {:ok, data} = Acknowledger.configure(ack_ref, %{ack_id: "x"}, on_success: :noop) + assert data == %{ack_id: "x", on_success: :noop, on_failure: :noop} + end + + test "normalises :nack on_failure to {:nack, 0}", %{ack_ref: ack_ref} do + {:ok, data} = Acknowledger.configure(ack_ref, %{ack_id: "x"}, on_failure: :nack) + assert data.on_failure == {:nack, 0} + end + + test "accepts {:nack, N} on_failure", %{ack_ref: ack_ref} do + {:ok, data} = + Acknowledger.configure(ack_ref, %{ack_id: "x"}, on_failure: {:nack, 60}) + + assert data.on_failure == {:nack, 60} + end + end + + describe "ack/3 — success path" do + test "acks successful messages by default", %{ack_ref: ack_ref} do + msgs = [build_message("id-1", ack_ref), build_message("id-2", ack_ref)] + Acknowledger.ack(ack_ref, msgs, []) + + assert_receive {:acknowledge, ack_ids} + assert Enum.sort(ack_ids) == ["id-1", "id-2"] + end + + test "does not ack failed messages when on_failure is :noop (default)", %{ack_ref: ack_ref} do + success = [build_message("ok-1", ack_ref)] + failure = [build_message("fail-1", ack_ref)] + + Acknowledger.ack(ack_ref, success, failure) + + assert_receive {:acknowledge, ["ok-1"]} + refute_receive {:acknowledge, _} + refute_receive {:modify_deadline, _, _} + end + + test "does not send anything when on_success is :noop", %{ + ack_ref: ack_ref, + stub_pid: stub_pid + } do + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :noop, on_failure: :noop}}) + msgs = [build_message("id-1", ack_ref)] + Acknowledger.ack(ack_ref, msgs, []) + + refute_receive {:acknowledge, _} + refute_receive {:modify_deadline, _, _} + end + end + + describe "ack/3 — failure path" do + test "nacks failed messages when on_failure is :nack", %{ack_ref: ack_ref, stub_pid: stub_pid} do + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :ack, on_failure: {:nack, 0}}}) + failure = [build_message("fail-1", ack_ref), build_message("fail-2", ack_ref)] + + Acknowledger.ack(ack_ref, [], failure) + + assert_receive {:modify_deadline, ack_ids, 0} + assert Enum.sort(ack_ids) == ["fail-1", "fail-2"] + end + + test "nacks with custom deadline when on_failure is {:nack, N}", %{ + ack_ref: ack_ref, + stub_pid: stub_pid + } do + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :ack, on_failure: {:nack, 30}}}) + failure = [build_message("fail-1", ack_ref)] + + Acknowledger.ack(ack_ref, [], failure) + + assert_receive {:modify_deadline, ["fail-1"], 30} + end + + test "acks failed messages when on_failure is :ack", %{ack_ref: ack_ref, stub_pid: stub_pid} do + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :noop, on_failure: :ack}}) + failure = [build_message("fail-1", ack_ref)] + + Acknowledger.ack(ack_ref, [], failure) + + assert_receive {:acknowledge, ["fail-1"]} + end + end + + describe "ack/3 — per-message overrides" do + test "respects per-message on_success override", %{ack_ref: ack_ref} do + noop_msg = build_message("noop-id", ack_ref, %{on_success: :noop}) + ack_msg = build_message("ack-id", ack_ref) + + Acknowledger.ack(ack_ref, [noop_msg, ack_msg], []) + + assert_receive {:acknowledge, ["ack-id"]} + # noop-id must not appear in any acknowledge message + refute_receive {:acknowledge, _} + end + + test "respects per-message on_failure override", %{ack_ref: ack_ref} do + nack_msg = build_message("nack-id", ack_ref, %{on_failure: {:nack, 10}}) + noop_msg = build_message("noop-id", ack_ref) + + Acknowledger.ack(ack_ref, [], [nack_msg, noop_msg]) + + assert_receive {:modify_deadline, ["nack-id"], 10} + refute_receive {:modify_deadline, _, _} + end + end + + describe "ack/3 — batching" do + test "chunks ack_ids at #{Acknowledger |> inspect()}'s @max_ack_ids_per_request" do + # Acknowledger chunks at 2500 + ack_ref = make_ref() + {:ok, stub_pid} = StubManager.start_link(self()) + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :ack, on_failure: :noop}}) + + on_exit(fn -> :persistent_term.erase(ack_ref) end) + + msgs = Enum.map(1..3000, &build_message("id-#{&1}", ack_ref)) + Acknowledger.ack(ack_ref, msgs, []) + + assert_receive {:acknowledge, batch1} + assert_receive {:acknowledge, batch2} + assert length(batch1) + length(batch2) == 3000 + assert length(batch1) == 2500 + assert length(batch2) == 500 + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs new file mode 100644 index 0000000..b26a0d5 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -0,0 +1,252 @@ +defmodule BroadwayCloudPubSub.Streaming.OptionsTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.Options + + defp validate(opts) do + # Inject required broadway key for the schema + opts = Keyword.put_new(opts, :broadway, name: :TestPipeline) + NimbleOptions.validate(opts, Options.definition()) + end + + describe "subscription" do + test "is required" do + assert {:error, err} = validate([]) + assert Exception.message(err) =~ "required" + assert Exception.message(err) =~ "subscription" + end + + test "accepts a valid subscription path" do + assert {:ok, opts} = + validate(subscription: "projects/my-project/subscriptions/my-sub") + + assert opts[:subscription] == "projects/my-project/subscriptions/my-sub" + end + + test "rejects an empty string" do + assert {:error, err} = validate(subscription: "") + assert Exception.message(err) =~ "non-empty string" + end + + test "rejects a non-string value" do + assert {:error, err} = validate(subscription: 123) + assert Exception.message(err) =~ "non-empty string" + end + end + + describe "max_outstanding_messages" do + test "defaults to 1000" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:max_outstanding_messages] == 1_000 + end + + test "accepts a positive integer" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", max_outstanding_messages: 500) + + assert opts[:max_outstanding_messages] == 500 + end + + test "rejects zero" do + assert {:error, _} = + validate(subscription: "projects/p/subscriptions/s", max_outstanding_messages: 0) + end + end + + describe "stream_ack_deadline_seconds" do + test "defaults to 60" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:stream_ack_deadline_seconds] == 60 + end + + test "accepts value within range" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", stream_ack_deadline_seconds: 120) + + assert opts[:stream_ack_deadline_seconds] == 120 + end + + test "rejects value below 10" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + stream_ack_deadline_seconds: 5 + ) + + assert Exception.message(err) =~ "between 10 and 600" + end + + test "rejects value above 600" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + stream_ack_deadline_seconds: 601 + ) + + assert Exception.message(err) =~ "between 10 and 600" + end + end + + describe "on_success / on_failure" do + test "on_success defaults to :ack" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:on_success] == :ack + end + + test "on_failure defaults to :noop" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:on_failure] == :noop + end + + test "accepts :ack" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_success: :ack) + + assert opts[:on_success] == :ack + end + + test "accepts :noop" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_success: :noop) + + assert opts[:on_success] == :noop + end + + test "normalises :nack to {:nack, 0}" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_success: :nack) + + assert opts[:on_success] == {:nack, 0} + end + + test "accepts {:nack, integer} within range" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_failure: {:nack, 30}) + + assert opts[:on_failure] == {:nack, 30} + end + + test "rejects invalid on_success value" do + assert {:error, err} = + validate(subscription: "projects/p/subscriptions/s", on_success: :bad) + + assert Exception.message(err) =~ "on_success" + end + end + + describe "on_shutdown" do + test "defaults to {:nack, 5}" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:on_shutdown] == {:nack, 5} + end + + test "accepts :noop" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_shutdown: :noop) + + assert opts[:on_shutdown] == :noop + end + + test "normalises :nack to {:nack, 0}" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_shutdown: :nack) + + assert opts[:on_shutdown] == {:nack, 0} + end + + test "accepts {:nack, N}" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", on_shutdown: {:nack, 10}) + + assert opts[:on_shutdown] == {:nack, 10} + end + + test "rejects invalid shutdown option" do + assert {:error, err} = + validate(subscription: "projects/p/subscriptions/s", on_shutdown: :ack) + + assert Exception.message(err) =~ "on_shutdown" + end + end + + describe "backoff_type" do + test "defaults to :rand_exp" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:backoff_type] == :rand_exp + end + + test "accepts all valid types" do + for type <- [:rand_exp, :exp, :rand, :stop] do + assert {:ok, _} = + validate(subscription: "projects/p/subscriptions/s", backoff_type: type) + end + end + + test "rejects unknown type" do + assert {:error, _} = + validate(subscription: "projects/p/subscriptions/s", backoff_type: :linear) + end + end + + describe "grpc_endpoint" do + test "defaults to pubsub.googleapis.com:443" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:grpc_endpoint] == "pubsub.googleapis.com:443" + end + + test "accepts custom endpoint" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", grpc_endpoint: "localhost:8085") + + assert opts[:grpc_endpoint] == "localhost:8085" + end + + test "rejects empty string" do + assert {:error, err} = + validate(subscription: "projects/p/subscriptions/s", grpc_endpoint: "") + + assert Exception.message(err) =~ "non-empty string" + end + end + + describe "type_ack_option/2" do + test "converts :nack atom to {:nack, 0}" do + assert {:ok, {:nack, 0}} = Options.type_ack_option(:nack, []) + end + + test "passes through :ack" do + assert {:ok, :ack} = Options.type_ack_option(:ack, []) + end + + test "passes through :noop" do + assert {:ok, :noop} = Options.type_ack_option(:noop, []) + end + + test "passes through {:nack, N} within range" do + assert {:ok, {:nack, 300}} = Options.type_ack_option({:nack, 300}, []) + end + + test "rejects {:nack, N} outside range" do + assert {:error, _} = Options.type_ack_option({:nack, 601}, name: :on_success) + end + end + + describe "type_shutdown_option/2" do + test "converts :nack to {:nack, 0}" do + assert {:ok, {:nack, 0}} = Options.type_shutdown_option(:nack, []) + end + + test "accepts :noop" do + assert {:ok, :noop} = Options.type_shutdown_option(:noop, []) + end + + test "accepts {:nack, N}" do + assert {:ok, {:nack, 5}} = Options.type_shutdown_option({:nack, 5}, []) + end + + test "rejects :ack" do + assert {:error, _} = + Options.type_shutdown_option(:ack, name: :on_shutdown) + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs new file mode 100644 index 0000000..1100ae5 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs @@ -0,0 +1,194 @@ +defmodule BroadwayCloudPubSub.Streaming.ProducerIntegrationTest do + @moduledoc """ + Integration tests for `StreamingProducer` against the Cloud Pub/Sub emulator. + + These tests require the emulator to be running on `PUBSUB_EMULATOR_HOST` + (default `localhost:8085`). Run with: + + mix test --only integration + + Or with env: + + PUBSUB_EMULATOR_HOST=localhost:8085 mix test --only integration + """ + + use ExUnit.Case, async: false + + @moduletag :integration + @moduletag timeout: 30_000 + + alias BroadwayCloudPubSub.PubSubEmulator + + # No-op token generator for the emulator (no auth required) + def noop_token, do: {:ok, "emulator-no-auth"} + + # Minimal Broadway pipeline that sends received messages to the test process + defmodule TestPipeline do + use Broadway + + def start_link(opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + subscription = Keyword.fetch!(opts, :subscription) + emulator_host = Keyword.fetch!(opts, :emulator_host) + name = Keyword.fetch!(opts, :name) + + Broadway.start_link(__MODULE__, + name: name, + producer: [ + module: + {BroadwayCloudPubSub.Streaming.Producer, + subscription: subscription, + token_generator: + {BroadwayCloudPubSub.Streaming.ProducerIntegrationTest, :noop_token, []}, + grpc_endpoint: emulator_host, + use_ssl: false, + max_outstanding_messages: 100, + on_failure: {:nack, 0}}, + concurrency: 1 + ], + processors: [ + default: [concurrency: 2] + ], + context: %{test_pid: test_pid} + ) + end + + @impl Broadway + def handle_message(:default, message, %{test_pid: test_pid}) do + require Logger + + Logger.debug( + "[TestPipeline] handle_message data=#{inspect(message.data)} ack_ref=#{inspect(elem(message.acknowledger, 1))}" + ) + + send(test_pid, {:broadway_message, message.data, message.metadata}) + message + end + + @impl Broadway + def handle_failed(messages, %{test_pid: test_pid}) do + Enum.each(messages, fn msg -> + send(test_pid, {:broadway_failed, msg.data}) + end) + + messages + end + end + + setup_all do + {:ok, _} = DynamicSupervisor.start_link(strategy: :one_for_one, name: GRPC.Client.Supervisor) + PubSubEmulator.start() + :ok + end + + setup do + topic_name = "broadway-integration-#{:erlang.unique_integer([:positive])}" + sub_name = "broadway-integration-sub-#{:erlang.unique_integer([:positive])}" + + {_topic, subscription} = + PubSubEmulator.setup_topic_and_subscription(topic_name, sub_name, ack_deadline_seconds: 60) + + pipeline_name = :"TestPipeline#{:erlang.unique_integer([:positive])}" + + {:ok, pid} = + TestPipeline.start_link( + name: pipeline_name, + test_pid: self(), + subscription: subscription, + emulator_host: PubSubEmulator.host() + ) + + on_exit(fn -> + ref = Process.monitor(pid) + + try do + Broadway.stop(pid) + catch + :exit, _ -> :ok + end + + receive do + {:DOWN, ^ref, :process, ^pid, _} -> :ok + after + 5_000 -> :ok + end + end) + + # Give the pipeline a moment to connect to the emulator + Process.sleep(500) + + {:ok, + topic: topic_name, + sub: sub_name, + subscription: subscription, + pipeline: pid, + pipeline_name: pipeline_name} + end + + describe "message delivery" do + test "receives a single published message", %{topic: topic} do + {:ok, [_msg_id]} = PubSubEmulator.publish(topic, ["hello world"]) + + assert_receive {:broadway_message, "hello world", _metadata}, 5_000 + end + + test "receives multiple published messages", %{topic: topic} do + payloads = Enum.map(1..5, &"message-#{&1}") + {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) + + received = + Enum.map(1..5, fn _ -> + receive do + {:broadway_message, data, _meta} -> data + after + 5_000 -> flunk("Timed out waiting for message") + end + end) + + assert Enum.sort(received) == Enum.sort(payloads) + end + + test "message metadata contains messageId and publishTime", %{topic: topic} do + {:ok, [_msg_id]} = PubSubEmulator.publish(topic, ["meta-test"]) + + assert_receive {:broadway_message, "meta-test", metadata}, 5_000 + assert is_binary(metadata.messageId) + assert metadata.messageId != "" + # publishTime may be nil on some emulator versions — check the key exists + assert Map.has_key?(metadata, :publishTime) + assert Map.has_key?(metadata, :attributes) + end + + test "handles large batches without dropping messages", %{topic: topic} do + count = 50 + payloads = Enum.map(1..count, &"bulk-msg-#{&1}") + {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) + + received = + Enum.map(1..count, fn _ -> + receive do + {:broadway_message, data, _meta} -> data + after + 10_000 -> flunk("Timed out waiting for bulk message") + end + end) + + assert length(received) == count + assert Enum.sort(received) == Enum.sort(payloads) + end + end + + describe "acknowledgement" do + test "acked messages are not redelivered", %{topic: topic, sub: sub} do + {:ok, [_id]} = PubSubEmulator.publish(topic, ["ack-me"]) + + assert_receive {:broadway_message, "ack-me", _}, 5_000 + + # Wait for ack to be processed, then confirm no pending messages remain + Process.sleep(500) + + {:ok, messages} = PubSubEmulator.pull(sub, max_messages: 5) + assert messages == [] + end + end +end diff --git a/test/support/pubsub_emulator.ex b/test/support/pubsub_emulator.ex new file mode 100644 index 0000000..ba3395a --- /dev/null +++ b/test/support/pubsub_emulator.ex @@ -0,0 +1,178 @@ +defmodule BroadwayCloudPubSub.PubSubEmulator do + @moduledoc """ + Helpers for integration tests against the Cloud Pub/Sub emulator. + + The emulator must be running on `PUBSUB_EMULATOR_HOST` (default `localhost:8085`). + It exposes both HTTP/REST and gRPC on the same port, without TLS. + + ## Usage + + @moduletag :integration + + setup do + BroadwayCloudPubSub.PubSubEmulator.setup_topic_and_subscription( + "my-test-topic", + "my-test-sub" + ) + end + """ + + @default_host "localhost:8085" + @project "test-project" + @finch_name BroadwayCloudPubSub.PubSubEmulator.Finch + + @doc "Returns the emulator host:port (from env or default)." + def host do + System.get_env("PUBSUB_EMULATOR_HOST", @default_host) + end + + @doc "Returns the test GCP project ID." + def project, do: @project + + @doc "Returns the full subscription name." + def subscription(sub_name) do + "projects/#{@project}/subscriptions/#{sub_name}" + end + + @doc "Returns the full topic name." + def topic(topic_name) do + "projects/#{@project}/topics/#{topic_name}" + end + + @doc """ + Starts the internal Finch pool used for emulator REST calls. + Call this once in your `setup` or `setup_all`. + """ + def start do + {:ok, _} = + Finch.start_link( + name: @finch_name, + pools: %{ + :default => [size: 5] + } + ) + + :ok + end + + @doc """ + Creates a topic, then a subscription bound to it. + Deletes them first if they already exist (idempotent). + Returns `{full_topic, full_sub}` as full resource paths. + """ + def setup_topic_and_subscription(topic_name, sub_name, opts \\ []) do + ack_deadline = Keyword.get(opts, :ack_deadline_seconds, 60) + full_topic = topic(topic_name) + full_sub = subscription(sub_name) + + # Idempotent: delete if they exist (ignore errors) + delete_subscription(full_sub) + delete_topic(full_topic) + + :ok = create_topic(full_topic) + :ok = create_subscription(full_sub, full_topic, ack_deadline) + + {full_topic, full_sub} + end + + @doc "Publish messages via the emulator REST API. `messages` is a list of string payloads." + def publish(topic_name, messages) when is_list(messages) do + full_topic = topic(topic_name) + + body = + Jason.encode!(%{ + messages: + Enum.map(messages, fn msg -> + %{data: Base.encode64(msg)} + end) + }) + + url = "http://#{host()}/v1/#{full_topic}:publish" + + case request(:post, url, body) do + {:ok, 200, response_body} -> + decoded = Jason.decode!(response_body) + {:ok, decoded["messageIds"]} + + {:ok, status, body} -> + {:error, {:http_error, status, body}} + + {:error, reason} -> + {:error, reason} + end + end + + @doc "Pulls messages synchronously via the REST API (for post-ack verification)." + def pull(sub_name, opts \\ []) do + max = Keyword.get(opts, :max_messages, 10) + full_sub = subscription(sub_name) + + body = Jason.encode!(%{maxMessages: max, returnImmediately: true}) + url = "http://#{host()}/v1/#{full_sub}:pull" + + case request(:post, url, body) do + {:ok, 200, response_body} -> + decoded = Jason.decode!(response_body) + {:ok, Map.get(decoded, "receivedMessages", [])} + + {:ok, status, body} -> + {:error, {:http_error, status, body}} + + {:error, reason} -> + {:error, reason} + end + end + + # --- Private REST helpers --- + + defp create_topic(full_topic) do + url = "http://#{host()}/v1/#{full_topic}" + + case request(:put, url, "{}") do + {:ok, status, _} when status in [200, 409] -> :ok + {:ok, status, body} -> {:error, {:http_error, status, body}} + {:error, reason} -> {:error, reason} + end + end + + defp create_subscription(full_sub, full_topic, ack_deadline) do + url = "http://#{host()}/v1/#{full_sub}" + + body = + Jason.encode!(%{ + topic: full_topic, + ackDeadlineSeconds: ack_deadline + }) + + case request(:put, url, body) do + {:ok, status, _} when status in [200, 409] -> :ok + {:ok, status, body} -> {:error, {:http_error, status, body}} + {:error, reason} -> {:error, reason} + end + end + + defp delete_topic(full_topic) do + url = "http://#{host()}/v1/#{full_topic}" + request(:delete, url, "") + :ok + end + + defp delete_subscription(full_sub) do + url = "http://#{host()}/v1/#{full_sub}" + request(:delete, url, "") + :ok + end + + defp request(method, url, body) do + headers = [{"content-type", "application/json"}] + req = Finch.build(method, url, headers, body) + + case Finch.request(req, @finch_name) do + {:ok, %Finch.Response{status: status, body: resp_body}} -> + {:ok, status, resp_body} + + {:error, reason} -> + {:error, reason} + end + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs index 869559e..7f28561 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1 +1 @@ -ExUnit.start() +ExUnit.start(exclude: [:integration]) From e7e72ac4afae5091f4e22ee13120ad5200b5e87c Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 27 Mar 2026 16:16:52 +0100 Subject: [PATCH 02/29] feat: add stream reader and improve stream manager reliability Extract the stream reading logic into a dedicated StreamReader module, separating concerns between connection management and message reading. Add error classification for gRPC response codes and improve the overall reliability of the stream manager. Key changes: - Extract StreamReader module from StreamManager - Add ErrorClassifier for categorizing gRPC errors - Improve stream manager test coverage - Clean up module interfaces and responsibilities --- lib/broadway_cloud_pub_sub/backoff.ex | 2 +- .../streaming/error_classifier.ex | 81 ++ .../streaming/options.ex | 15 +- .../streaming/producer.ex | 39 +- .../streaming/stream_manager.ex | 825 ++++++++++-------- .../streaming/stream_reader.ex | 136 +++ mix.exs | 5 +- mix.lock | 2 + .../streaming/error_classifier_test.exs | 83 ++ .../streaming/producer_integration_test.exs | 2 +- .../streaming/stream_manager_test.exs | 595 +++++++++++++ 11 files changed, 1395 insertions(+), 390 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/streaming/error_classifier.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/stream_reader.ex create mode 100644 test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs diff --git a/lib/broadway_cloud_pub_sub/backoff.ex b/lib/broadway_cloud_pub_sub/backoff.ex index f312cc9..ceafff0 100644 --- a/lib/broadway_cloud_pub_sub/backoff.ex +++ b/lib/broadway_cloud_pub_sub/backoff.ex @@ -63,7 +63,7 @@ defmodule BroadwayCloudPubSub.Backoff do def backoff(%__MODULE__{type: :rand_exp, min: _min, max: max, state: {prev, lower, seed}} = b) do next_min = min(prev, lower) - next_max = min(prev * 3, max) + next_max = min(prev * 2, max) {timeout, seed} = rand(next_min, next_max, seed) {timeout, %{b | state: {min(next_max, max), lower, seed}}} end diff --git a/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex new file mode 100644 index 0000000..7b4434a --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex @@ -0,0 +1,81 @@ +defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do + @moduledoc false + + # Classifies gRPC errors into :retryable or :terminal categories, + # matching the behaviour of the official Google Cloud Go and Python + # Pub/Sub client libraries. + # + # ## Retryable errors (reconnect the stream) + # + # These are transient conditions where the subscription is still valid and + # reconnecting will eventually succeed: + # + # - DEADLINE_EXCEEDED (4) — server-side idle timeout (the primary issue) + # - INTERNAL (13) — transient server error + # - ABORTED (10) — concurrent modification, retry + # - UNAVAILABLE (14) — server temporarily unavailable or being drained + # EXCEPT "Server shutdownNow invoked" (permanent) + # - UNKNOWN (2) — includes HTTP/2 GOAWAY frames on connection drain + # - RESOURCE_EXHAUSTED (8)— quota temporarily exceeded, retry with backoff + # - Non-gRPC errors — connection resets, EOF, transport errors + # + # ## Terminal errors (stop the GenServer, let Broadway restart it) + # + # These indicate a permanent misconfiguration or missing resource where + # reconnecting without a config change would loop forever: + # + # - NOT_FOUND (5) — subscription does not exist + # - PERMISSION_DENIED (7) — service account lacks Subscriber role + # - INVALID_ARGUMENT (3) — bad subscription name or flow-control params + # - UNAUTHENTICATED (16) — invalid or expired credentials + # - CANCELLED (1) — deliberate cancellation (not self-initiated) + # + # ## Reference + # + # Go: defaultRetryer.Retry() in pubsub/service.go + # Python: _RETRYABLE_STREAM_ERRORS / _TERMINATING_STREAM_ERRORS in bidi.py + + @terminal_status_codes MapSet.new([ + # NOT_FOUND — subscription does not exist + 5, + # PERMISSION_DENIED — no IAM access + 7, + # INVALID_ARGUMENT — bad config / subscription name + 3, + # UNAUTHENTICATED — bad or expired credentials + 16, + # CANCELLED — external cancellation (self-cancellation is handled separately) + 1 + ]) + + # UNAVAILABLE (14) with this message means an intentional server shutdown: + # retrying would connect to the same dying backend. Treat as terminal. + @shutdown_now_message "Server shutdownNow invoked" + + @type classification :: :retryable | :terminal + + @doc """ + Returns `:retryable` or `:terminal` for the given error. + + Any error not listed as terminal is classified as retryable, following the + principle that it is safer to retry unknown errors than to permanently + stop processing messages. + """ + @spec classify(term()) :: classification() + def classify(%GRPC.RPCError{status: status, message: message}) do + cond do + MapSet.member?(@terminal_status_codes, status) -> + :terminal + + # UNAVAILABLE with shutdown message is permanent + status == 14 and String.contains?(message || "", @shutdown_now_message) -> + :terminal + + true -> + :retryable + end + end + + # Non-gRPC errors (transport failures, connection resets, etc.) are retryable + def classify(_other), do: :retryable +end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 85197fa..47ff311 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -10,6 +10,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do @default_lease_extension_percent 0.6 @default_backoff_min 1_000 @default_backoff_max 30_000 + @default_keepalive_interval_ms 30_000 definition = [ # Handled by Broadway. @@ -147,6 +148,18 @@ defmodule BroadwayCloudPubSub.Streaming.Options do default: @default_backoff_max, doc: "Maximum reconnection backoff in milliseconds. Defaults to 30000." ], + keepalive_interval_ms: [ + type: :pos_integer, + default: @default_keepalive_interval_ms, + doc: """ + Interval in milliseconds at which HTTP/2 PING frames are sent on the gRPC + connection to keep it alive. This prevents Google Cloud's load balancer + from closing idle connections (which it does after roughly 20 seconds by + default). Matches the 30-second keepalive interval used by the official + Python and Go Pub/Sub client libraries. Only applies to the `:gun` adapter. + Defaults to 30000. + """ + ], adapter: [ type: {:in, [:gun, :mint]}, default: :gun, @@ -158,7 +171,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do * `:mint` — Uses the Mint HTTP/2 client. Mint may be preferable in deployment environments where Gun is not available or not desired. - Both adapters are provided by the `grpc` dependency. The adapter choice + Both adapters are provided by the `grpc_client` dependency. The adapter choice does not affect the public API or message semantics. """ ], diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index a8c6afa..0cf73f8 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -52,10 +52,14 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do ## Differences from BroadwayCloudPubSub.Producer - * **Push-based**: Messages arrive via a persistent gRPC stream rather than - being polled. `handle_demand` is a no-op. + * **Push-based with demand signaling**: Messages arrive via a persistent gRPC + stream. The producer tracks GenStage demand from downstream consumers and + signals StreamManager when capacity is available. StreamManager buffers + messages internally when demand is zero, preventing unbounded mailbox growth. * **Flow control**: Controlled by `max_outstanding_messages` / `max_outstanding_bytes` on the gRPC stream rather than by `max_number_of_messages` per HTTP request. + This is the primary backpressure mechanism — the Pub/Sub server will not push + more than `max_outstanding_messages` unacked messages. * **Shutdown**: By default, unprocessed messages are returned to Pub/Sub with a short delay (`on_shutdown: {:nack, 5}`), analogous to AMQP channel close behavior. @@ -89,10 +93,22 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Measurements: `%{reason: term()}` + * `[:broadway_cloud_pub_sub, :stream, :terminal_error]` - Emitted when a + non-retryable gRPC error is received (e.g. NOT_FOUND, PERMISSION_DENIED). + The StreamManager will stop after this event is emitted. + + Measurements: `%{reason: term()}` + + * `[:broadway_cloud_pub_sub, :stream, :ack_buffered]` - Emitted when an + ack/nack request is buffered because the gRPC stream is temporarily + unavailable (e.g. during reconnection). + + Measurements: `%{buffer_size: non_neg_integer()}` + All events include the following metadata: + * `:name` - the Broadway topology name * `:subscription` - the full subscription name - * `:config` - the producer configuration """ @@ -147,20 +163,21 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do ack_config = %{on_success: config.on_success, on_failure: config.on_failure} :persistent_term.put(ack_ref, {manager_pid, ack_config}) - buffer_size = config.max_outstanding_messages * 5 - - {:producer, %{manager_pid: manager_pid, ack_ref: ack_ref, config: config, draining: false}, - buffer_size: buffer_size} + {:producer, + %{manager_pid: manager_pid, ack_ref: ack_ref, config: config, draining: false, demand: 0}} end @impl GenStage - def handle_demand(_demand, state) do - {:noreply, [], state} + def handle_demand(incoming_demand, %{demand: demand} = state) do + new_demand = demand + incoming_demand + StreamManager.notify_demand(state.manager_pid, new_demand) + {:noreply, [], %{state | demand: new_demand}} end @impl GenStage - def handle_info({:stream_messages, messages}, state) do - {:noreply, messages, state} + def handle_info({:stream_messages, messages}, %{demand: demand} = state) do + new_demand = max(demand - length(messages), 0) + {:noreply, messages, %{state | demand: new_demand}} end @impl GenStage diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index eae1976..4749403 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -11,32 +11,65 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # lease management and shutdown nacking # - Extend message leases periodically via modifyAckDeadline # - Buffer ack/nack requests during reconnection and replay on connect + # - Buffer incoming messages when the producer has no pending demand + # (demand-based backpressure via notify_demand/1) + # - Send keep-alive pings every 30s to prevent server idle timeout + # (matches the Go pingTicker and Python Heartbeater behaviour) # - # Implementation note on gRPC adapter support: - # This module supports two gRPC HTTP/2 adapters: Gun and Mint. + # Backpressure design: + # The producer calls notify_demand/2 whenever Broadway signals new demand, + # passing the total accumulated demand count. StreamManager keeps a + # `pending_demand` integer. When `pending_demand` is 0, incoming messages + # are stored in `message_buffer` instead of being forwarded. On each + # notify_demand/2 or incoming gRPC batch, up to `pending_demand` messages + # are flushed to the producer with the rest remaining buffered. The buffer + # is naturally bounded by `max_outstanding_messages` (the Pub/Sub server + # will not push more unacked messages than that limit). # - # Gun (default): GRPC.Client.Adapters.Gun calls :gun.post/3 from the calling - # process, making it the gun message owner. Raw {:gun_response,...}, - # {:gun_data,...} etc. messages arrive in this GenServer's mailbox and are - # handled in handle_info/2. Manual gRPC frame decoding is done in-process - # via recv_buffer. + # gRPC streaming: + # A dedicated `StreamReader` process owns the gRPC stream for both the + # Gun and Mint adapters. The reader calls `GRPC.Stub.recv/2` and forwards + # decoded messages back as `{:stream_messages, msgs}`. See `StreamReader` + # for a detailed explanation of why a separate process is needed and how + # both adapters behave identically from this module's perspective. # - # Mint: GRPC.Client.Adapters.Mint handles all framing internally via its - # ConnectionProcess and StreamResponseProcess. A linked reader process calls - # GRPC.Stub.recv/2 to enumerate decoded messages and forwards them back as - # {:mint_messages, msgs}. Only {:elixir_grpc, :connection_down, conn_pid} - # arrives in this process directly from Mint. - # The GenServer traps exits so reader process crashes are handled gracefully. + # Keep-alive pings: + # Google's servers close idle StreamingPull connections after ~60 seconds + # of inactivity. Both the official Go (pingTicker) and Python (Heartbeater) + # libraries send an empty StreamingPullRequest every 30 seconds to prevent + # this. We do the same via the :send_keepalive timer. The timer is started + # when the stream opens and cancelled when it closes. + # + # Reconnect deduplication: + # Multiple events can arrive close together on a disconnect — e.g. + # {:stream_error} followed by {:stream_closed} or an {:EXIT} signal. + # Without deduplication, each would schedule a separate :connect message, + # causing two concurrent connection attempts. We track the pending reconnect + # timer ref in `reconnect_ref` and skip scheduling if one is already set. + # + # Error classification: + # gRPC errors are classified as :retryable (reconnect) or :terminal (stop). + # Terminal errors (NOT_FOUND, PERMISSION_DENIED, etc.) indicate a permanent + # misconfiguration; retrying forever would be counterproductive. The GenServer + # stops with {:terminal_error, reason} and Broadway's supervision restarts it, + # which will surface the error via normal OTP crash reporting. + # + # Skip-backoff optimisation: + # If a stream error arrives quickly after the stream opened, we apply the + # full exponential backoff. If the stream was alive for >30s before failing + # (meaning the server already had time to send a DEADLINE_EXCEEDED), we skip + # the backoff sleep and reconnect immediately — matching the Go optimisation. use GenServer require Logger alias BroadwayCloudPubSub.{Backoff, MessageBuilder} - alias Google.Pubsub.V1.{StreamingPullRequest, StreamingPullResponse} - alias Google.Pubsub.V1.Subscriber.Stub + alias BroadwayCloudPubSub.Streaming.{ErrorClassifier, StreamReader} + alias Google.Pubsub.V1.StreamingPullRequest - # Maximum acks to buffer while reconnecting - @max_ack_buffer 10_000 + # Default keep-alive interval — matches Go's pingTicker and Python's Heartbeater. + # The server's inactivity timeout is ~60s; pinging at half that prevents closure. + @default_keepalive_ms 30_000 defstruct [ :producer_pid, @@ -44,16 +77,31 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :channel, :grpc_stream, :conn_pid, - :stream_ref, - # Mint-only: pid of the linked reader process that enumerates GRPC.Stub.recv/2 + # Pid of the linked StreamReader process that enumerates GRPC.Stub.recv/2 :reader_pid, :backoff, :lease_timer, :lease_extension_interval_ms, :receiving, - # Gun-only: binary buffer for reassembling gRPC length-prefixed frames - recv_buffer: <<>>, + # Timer ref for the pending :connect message. Non-nil means a reconnect is + # already scheduled — prevents double-scheduling from multiple close signals. + :reconnect_ref, + # Timer ref for the periodic :send_keepalive message. + :keepalive_timer, + # Monotonic timestamp (ms) of when the current stream was opened. + # Used for the skip-backoff optimisation: if the stream ran >30s before + # failing, we skip the backoff sleep and reconnect immediately. + :stream_opened_at, outstanding: MapSet.new(), + # Messages buffered while the producer has no pending demand. + # Naturally bounded by max_outstanding_messages (server-side flow control). + message_buffer: [], + # How many messages the producer can currently accept. + # Refreshed on each notify_demand/2; decremented when messages are flushed. + pending_demand: 0, + # Ack/nack requests buffered while the gRPC stream is down (reconnecting). + # Replayed in FIFO order on successful reconnect. Naturally bounded by + # max_outstanding_messages — no more acks can arrive than messages delivered. ack_buffer: [], ack_buffer_size: 0 ] @@ -125,6 +173,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do GenServer.call(pid, :close, 10_000) end + @doc """ + Signals the current demand from the producer. The `amount` is the producer's + total accumulated demand (not a delta). The StreamManager uses it as an upper + bound for how many buffered messages to flush immediately. + + Called by `Streaming.Producer.handle_demand/2`. + """ + @spec notify_demand(pid(), non_neg_integer()) :: :ok + def notify_demand(pid, amount) when is_integer(amount) and amount >= 0 do + GenServer.cast(pid, {:demand_available, amount}) + end + # --- GenServer callbacks --- @impl GenServer @@ -148,7 +208,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do config: config, backoff: backoff, lease_extension_interval_ms: lease_extension_interval_ms, - receiving: true + receiving: true, + pending_demand: 0 } # Delay connecting until producer tells us its pid via set_producer/2 @@ -157,156 +218,103 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @impl GenServer def handle_info(:connect, state) do + # Clear the reconnect_ref — we are now executing the scheduled connect. + state = %{state | reconnect_ref: nil} + case connect(state) do {:ok, new_state} -> {:noreply, new_state} {:error, reason, new_state} -> - log_connection_failure(reason) emit_telemetry(:connection_failure, %{reason: reason}, state.config) {:noreply, schedule_reconnect(new_state)} end end - # --- Raw Gun protocol messages --- - - # Initial HTTP/2 response headers (200 OK) — connection established - def handle_info({:gun_response, conn_pid, stream_ref, :nofin, 200, _headers}, state) - when state.conn_pid == conn_pid and state.stream_ref == stream_ref do - {:noreply, state} - end - - # Non-200 initial response — treat as error - def handle_info({:gun_response, conn_pid, stream_ref, _fin, status, _headers}, state) - when state.conn_pid == conn_pid and state.stream_ref == stream_ref do - Logger.error("[StreamManager] gRPC stream got HTTP status #{status}") - {:noreply, schedule_reconnect(reset_connection(state))} - end - - # Catch-all for gun_response not matching our stream (e.g. different conn/ref) - def handle_info({:gun_response, conn_pid, stream_ref, fin, status, _headers}, state) do - Logger.debug( - "[StreamManager] Ignoring stale gun_response: conn=#{inspect(conn_pid)} ref=#{inspect(stream_ref)} fin=#{inspect(fin)} status=#{status} (state conn=#{inspect(state.conn_pid)} ref=#{inspect(state.stream_ref)})" - ) - - {:noreply, state} - end - - # Data chunk(s) from the server - def handle_info({:gun_data, conn_pid, stream_ref, fin, data}, state) - when state.conn_pid == conn_pid and state.stream_ref == stream_ref do - buffer = state.recv_buffer <> data - {messages, remaining_buffer} = decode_grpc_messages(buffer) + # The StreamReader successfully opened the gRPC stream and sends us the + # stream struct so we can call send_request for acks and lease extensions. + def handle_info({:stream_opened, reader_pid, grpc_stream}, %{reader_pid: reader_pid} = state) do + conn_pid = grpc_stream.channel.adapter_payload.conn_pid + backoff = Backoff.reset(state.backoff) - state = %{state | recv_buffer: remaining_buffer} + pre_flush_state = %{ + state + | grpc_stream: grpc_stream, + conn_pid: conn_pid, + backoff: backoff, + stream_opened_at: now_ms() + } - state = - if state.receiving and messages != [] do - broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) - ack_ids = Enum.map(messages, & &1.ack_id) - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.put(&2, &1)) - emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - send(state.producer_pid, {:stream_messages, broadway_messages}) - %{state | outstanding: new_outstanding} - else - state - end + case flush_ack_buffer(pre_flush_state) do + {:ok, state} -> + state = schedule_lease_timer(state) + state = schedule_keepalive_timer(state) + emit_telemetry(:connect, %{}, state.config) + {:noreply, state} - if fin == :fin do - {:noreply, schedule_reconnect(reset_connection(state))} - else - {:noreply, state} + {:error, reason, state} -> + {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} end end - # Catch-all for gun_data not matching our stream - def handle_info({:gun_data, conn_pid, stream_ref, _fin, _data} = _msg, state) do - Logger.debug( - "[StreamManager] Ignoring stale gun_data: conn=#{inspect(conn_pid)} ref=#{inspect(stream_ref)} (state conn=#{inspect(state.conn_pid)} ref=#{inspect(state.stream_ref)})" - ) - + # Stale :stream_opened from a previous reader (race during reconnect) — ignore. + def handle_info({:stream_opened, _pid, _stream}, state) do {:noreply, state} end - # Trailers — stream ended normally - def handle_info({:gun_trailers, conn_pid, stream_ref, trailers}, state) - when state.conn_pid == conn_pid and state.stream_ref == stream_ref do - grpc_status = trailers |> List.keyfind("grpc-status", 0) |> elem(1) - grpc_message = trailers |> List.keyfind("grpc-message", 0, {"", ""}) |> elem(1) - - case grpc_status do - "0" -> - :ok - - status -> - Logger.warning( - "[StreamManager] gRPC stream closed with status #{status}: #{grpc_message}" - ) - end - - emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} - end - - # Stream-level error - def handle_info({:gun_error, conn_pid, stream_ref, reason}, state) - when state.conn_pid == conn_pid and state.stream_ref == stream_ref do - Logger.warning("[StreamManager] gRPC stream error: #{inspect(reason)}") - emit_telemetry(:disconnect, %{reason: reason}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} - end - - # Connection-level error/down - def handle_info({:gun_down, conn_pid, _protocol, reason, _killed_streams}, state) - when state.conn_pid == conn_pid do - Logger.warning("[StreamManager] gRPC connection down: #{inspect(reason)}") - emit_telemetry(:disconnect, %{reason: reason}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} - end - - # --- Mint adapter messages --- - - # Decoded messages forwarded from the Mint reader process - def handle_info({:mint_messages, messages}, state) do + # Decoded messages forwarded from the StreamReader + def handle_info({:stream_messages, messages}, state) do if state.receiving and messages != [] do broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) ack_ids = Enum.map(messages, & &1.ack_id) new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.put(&2, &1)) emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - send(state.producer_pid, {:stream_messages, broadway_messages}) - {:noreply, %{state | outstanding: new_outstanding}} + {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} else {:noreply, state} end end - # Error reported by the Mint reader process (stream-level gRPC error) - def handle_info({:mint_stream_error, error}, state) do - Logger.warning("[StreamManager] Mint stream error: #{inspect(error)}") - emit_telemetry(:disconnect, %{reason: error}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} + # Stream-level gRPC error reported by the StreamReader. + # Classify: retryable errors trigger reconnect; terminal errors stop the GenServer. + def handle_info({:stream_error, error}, state) do + case ErrorClassifier.classify(error) do + :terminal -> + Logger.error("Terminal Cloud Pub/Sub gRPC error — stopping: #{inspect(error)}") + + emit_telemetry(:terminal_error, %{reason: error}, state.config) + {:stop, {:terminal_error, error}, close_stream(state)} + + :retryable -> + emit_telemetry(:disconnect, %{reason: error}, state.config) + {:noreply, schedule_reconnect(reset_connection(state, error))} + end end - # Connection-down message sent by the Mint adapter's ConnectionProcess - def handle_info({:elixir_grpc, :connection_down, conn_pid}, state) - when state.conn_pid == conn_pid do - Logger.warning("[StreamManager] Mint gRPC connection down") - emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} + # Server closed the stream normally (StreamReader enumeration exhausted) + def handle_info({:stream_closed}, state) do + emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} end - # Mint reader process exited normally — stream ended, reconnect + # StreamReader process exited normally — stream ended cleanly. + # {:stream_closed} is sent before the exit, so this is a duplicate signal. + # We only reconnect if grpc_stream is still set (meaning the stream_closed + # message wasn't processed first). def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do - Logger.info("[StreamManager] Mint reader stream ended normally") - emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} + if state.grpc_stream do + emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} + else + # Already handled by {:stream_closed} — just clear the reader_pid + {:noreply, %{state | reader_pid: nil}} + end end - # Mint reader process crashed — reconnect + # StreamReader process crashed — reconnect def handle_info({:EXIT, pid, reason}, %{reader_pid: pid} = state) do - Logger.warning("[StreamManager] Mint reader crashed: #{inspect(reason)}") emit_telemetry(:disconnect, %{reason: reason}, state.config) - {:noreply, schedule_reconnect(reset_connection(state))} + {:noreply, schedule_reconnect(reset_connection(state, reason))} end # Catch-all for other EXIT signals (e.g. from the supervisor during shutdown) @@ -323,18 +331,60 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do deadline = state.config.stream_ack_deadline_seconds deadlines = List.duplicate(deadline, length(ack_ids)) - send_on_stream(state.grpc_stream, %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - }) + case send_on_stream(state.grpc_stream, %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + }) do + :ok -> + timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) + {:noreply, %{state | lease_timer: timer}} - timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) - {:noreply, %{state | lease_timer: timer}} + {:error, reason} -> + {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} + end end end - def handle_info(msg, state) do - Logger.warning("[StreamManager] Unhandled message: #{inspect(msg)}") + # Periodic keep-alive ping: send an empty StreamingPullRequest to prevent the + # server from closing an idle stream. Matches Go's pingTicker (30s) and Python's + # Heartbeater (30s). The server's inactivity timeout is ~60s; pinging at half + # that gives a comfortable margin. + def handle_info(:send_keepalive, %{grpc_stream: nil} = state) do + # Stream is disconnected — don't ping, but reschedule for when it reconnects. + # (Timer will be cancelled and restarted by close_stream/schedule_keepalive_timer.) + {:noreply, state} + end + + def handle_info(:send_keepalive, state) do + case send_on_stream(state.grpc_stream, %StreamingPullRequest{}) do + :ok -> + timer = schedule_keepalive_after(state.config) + {:noreply, %{state | keepalive_timer: timer}} + + {:error, reason} -> + {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} + end + end + + # Mint adapter signals connection loss to its parent process. + # When the test (or the real stack) routes this signal to StreamManager, + # treat it the same as a stream error: reset and reconnect. + def handle_info({:elixir_grpc, :connection_down, conn_pid}, %{conn_pid: conn_pid} = state) do + emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) + {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} + end + + # Gun adapter signals connection loss via :gun_down messages. + # Guard on the stored conn_pid to ignore stale/other connections. + def handle_info( + {:gun_down, conn_pid, _protocol, _reason, _killed_streams}, + %{conn_pid: conn_pid} = state + ) do + emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) + {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} + end + + def handle_info(_msg, state) do {:noreply, state} end @@ -343,16 +393,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.delete(&2, &1)) state = %{state | outstanding: new_outstanding} - state = - if state.grpc_stream do - send_on_stream(state.grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) - emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) - state - else - buffer_ack_request(state, {:ack, ack_ids}) - end + if state.grpc_stream do + case send_on_stream(state.grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) do + :ok -> + emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) + {:noreply, state} - {:noreply, state} + {:error, reason} -> + {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} + end + else + {:noreply, buffer_ack_request(state, {:ack, ack_ids})} + end end def handle_cast({:modify_deadline, ack_ids, deadline_seconds}, state) do @@ -366,19 +418,31 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do deadlines = List.duplicate(deadline_seconds, length(ack_ids)) state = %{state | outstanding: new_outstanding} - state = - if state.grpc_stream do - send_on_stream(state.grpc_stream, %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - }) + if state.grpc_stream do + case send_on_stream(state.grpc_stream, %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + }) do + :ok -> + {:noreply, state} - state - else - buffer_ack_request(state, {:modify_deadline, ack_ids, deadline_seconds}) + {:error, reason} -> + {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} end + else + {:noreply, buffer_ack_request(state, {:modify_deadline, ack_ids, deadline_seconds})} + end + end - {:noreply, state} + # The producer signals its current total demand. Update pending_demand and + # flush up to that many buffered messages to the producer. + def handle_cast({:demand_available, amount}, %{message_buffer: []} = state) do + {:noreply, %{state | pending_demand: amount}} + end + + def handle_cast({:demand_available, amount}, state) do + state = %{state | pending_demand: amount} + {:noreply, flush_demand(state)} end @impl GenServer @@ -397,15 +461,22 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_call(:close, _from, state) do - state = flush_ack_buffer(state) + # Best-effort: flush buffered acks before closing. Errors are ignored + # because we're shutting down regardless. + state = + case flush_ack_buffer(state) do + {:ok, s} -> s + {:error, _reason, s} -> %{s | ack_buffer: [], ack_buffer_size: 0} + end + state = close_stream(state) {:reply, :ok, state} end @impl GenServer - def terminate(reason, state) do - Logger.debug("[StreamManager] terminate: reason=#{inspect(reason)}") + def terminate(_reason, state) do cancel_lease_timer(state) + cancel_keepalive_timer(state) close_stream(state) :ok end @@ -417,87 +488,52 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:ok, channel} <- open_channel(config, token) do connect_stream(channel, state) end - end - - # Second phase of connect: open the gRPC stream on the already-open channel. - # Separated so that if open_stream raises, we can disconnect the channel - # before propagating the error (preventing stale Gun messages in our mailbox). - defp connect_stream(channel, %{config: config} = state) do - {:ok, grpc_stream, conn_pid, stream_ref} = open_stream(channel, state) - backoff = Backoff.reset(state.backoff) - - state = - flush_ack_buffer(%{ - state - | channel: channel, - grpc_stream: grpc_stream, - conn_pid: conn_pid, - stream_ref: stream_ref, - recv_buffer: <<>>, - backoff: backoff - }) - - state = maybe_start_reader(state) - state = schedule_lease_timer(state) - emit_telemetry(:connect, %{}, config) - {:ok, state} rescue e -> - # open_stream may raise (Stub.streaming_pull / send_request don't - # return error tuples). If it raised, no gRPC stream was successfully - # opened, so only disconnect the channel to prevent its Gun/Mint - # connection from delivering stale messages to our mailbox. + {:error, {:connect_failed, Exception.message(e)}, state} + end + + # Opens the gRPC channel and spawns the StreamReader, which will open the + # stream and send {:stream_opened, reader_pid, grpc_stream} back to us. + # The actual grpc_stream struct is stored on {:stream_opened} receipt, not here. + defp connect_stream(channel, state) do + reader_pid = StreamReader.start_link(self(), channel, state.config) + + {:ok, + %{ + state + | channel: channel, + reader_pid: reader_pid, + grpc_stream: nil, + conn_pid: nil + }} + rescue + e -> try do GRPC.Stub.disconnect(channel) catch _, _ -> :ok end - {:error, {:open_stream_raised, Exception.message(e)}, state} + {:error, {:connect_failed, Exception.message(e)}, state} end - # For Mint: spawn a linked reader process that enumerates GRPC.Stub.recv/2 and - # forwards decoded messages back to the StreamManager. - defp maybe_start_reader(%{config: %{adapter: :mint}, grpc_stream: grpc_stream} = state) do - manager = self() - - pid = - spawn_link(fn -> - {:ok, enum} = GRPC.Stub.recv(grpc_stream) - - enum - |> Stream.each(fn - {:ok, %StreamingPullResponse{received_messages: msgs}} when msgs != [] -> - send(manager, {:mint_messages, msgs}) - - {:ok, %StreamingPullResponse{}} -> - # Heartbeat / empty response — nothing to forward - :ok - - {:error, error} -> - send(manager, {:mint_stream_error, error}) - end) - |> Stream.run() - - # Stream exhausted normally. The reader exits :normal and StreamManager - # will receive {:EXIT, reader_pid, :normal} due to trap_exit. - end) - - %{state | reader_pid: pid} - end - - defp maybe_start_reader(state), do: state - - defp adapter_module(:gun), do: GRPC.Client.Adapters.Gun - defp adapter_module(:mint), do: GRPC.Client.Adapters.Mint - defp open_channel( - %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = _config, + %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = config, token ) do + adapter_mod = + case adapter do + :gun -> GRPC.Client.Adapters.Gun + :mint -> GRPC.Client.Adapters.Mint + end + + keepalive_interval_ms = Map.get(config, :keepalive_interval_ms, 30_000) + base_opts = [ - adapter: adapter_module(adapter), - headers: [{"authorization", "Bearer #{token}"}] + adapter: adapter_mod, + headers: [{"authorization", "Bearer #{token}"}], + adapter_opts: [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] ] opts = @@ -514,135 +550,96 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end - defp open_stream(channel, state) do - config = state.config - client_id = Map.fetch!(config, :client_id) - - initial_request = %StreamingPullRequest{ - subscription: config.subscription, - stream_ack_deadline_seconds: config.stream_ack_deadline_seconds, - max_outstanding_messages: config.max_outstanding_messages, - max_outstanding_bytes: config.max_outstanding_bytes, - client_id: client_id - } - - grpc_stream = Stub.streaming_pull(channel, []) - grpc_stream = GRPC.Stub.send_request(grpc_stream, initial_request) - - # Both adapters store the connection process pid in adapter_payload.conn_pid, - # but the stream_ref field only exists for Gun. - conn_pid = grpc_stream.channel.adapter_payload.conn_pid - - case config.adapter do - :gun -> - stream_ref = grpc_stream.payload.stream_ref - {:ok, grpc_stream, conn_pid, stream_ref} - - :mint -> - {:ok, grpc_stream, conn_pid, nil} - end - end - - # Decode one or more GRPC length-prefixed messages from the buffer. - # Returns {[StreamingPullResponse.received_messages], remaining_buffer} - defp decode_grpc_messages(buffer) do - decode_grpc_messages(buffer, []) - end - - defp decode_grpc_messages(buffer, acc) do - case GRPC.Message.get_message(buffer) do - {{_flag, encoded}, rest} -> - case StreamingPullResponse.decode(encoded) do - %StreamingPullResponse{received_messages: msgs} when msgs != [] -> - decode_grpc_messages(rest, Enum.reverse(msgs, acc)) - - %StreamingPullResponse{} -> - # Heartbeat/empty response - decode_grpc_messages(rest, acc) - end - - false -> - {Enum.reverse(acc), buffer} - end - end - defp send_on_stream(grpc_stream, request) do - try do - GRPC.Stub.send_request(grpc_stream, request) - catch - kind, reason -> - Logger.warning("[StreamManager] Failed to send on stream: #{kind} #{inspect(reason)}") - end + GRPC.Stub.send_request(grpc_stream, request) + :ok + catch + kind, reason -> {:error, {kind, reason}} end - defp reset_connection(state) do - close_stream(state) - end + defp reset_connection(state, reason) do + # Drop buffered (not-yet-delivered) messages on disconnect. Their ack_ids are + # in `outstanding`, so extract and remove them before closing the stream to + # avoid pointless lease-extension attempts for messages that will be redelivered. + buffered_ack_ids = + state.message_buffer + |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) - defp close_stream(%{grpc_stream: nil} = state), do: stop_reader(state) + new_outstanding = + Enum.reduce(buffered_ack_ids, state.outstanding, &MapSet.delete(&2, &1)) + + # Preserve `pending_demand` across reconnection. The producer's demand counter + # survives the disconnect and it won't re-signal demand it already sent. + # Clearing pending_demand here would cause a demand deadlock: the producer has + # pending demand but thinks it already notified us, while we lost the count. + # Buffered messages are dropped (the server will redeliver them), but demand + # state must carry over so the reconnected stream can deliver immediately. + # + # Record the disconnect reason so schedule_reconnect can apply the skip-backoff + # optimisation: if the stream was alive for >30s, reconnect without delay. + close_stream( + %{ + state + | message_buffer: [], + outstanding: new_outstanding, + stream_opened_at: state.stream_opened_at, + # carry stream_opened_at through for the skip-backoff check + reconnect_ref: state.reconnect_ref + }, + reason + ) + end - defp close_stream(%{grpc_stream: grpc_stream, channel: channel, conn_pid: conn_pid} = state) do - state = stop_reader(state) + # Overload that does not carry a reason (used by close_stream directly) + defp close_stream(%{reader_pid: nil, grpc_stream: nil} = state), do: state - # Cancel the stream (sends RST_STREAM) so Gun stops forwarding data for - # this stream_ref. end_stream/1 only half-closes the client side and leaves - # the server free to keep sending. - try do - GRPC.Stub.cancel(grpc_stream) - catch - _, _ -> :ok + defp close_stream(%{reader_pid: reader_pid, grpc_stream: grpc_stream, channel: channel} = state) do + # Stop the reader first so it doesn't send more messages while we clean up. + # Unlink before killing to prevent the EXIT signal from triggering reconnect. + if is_pid(reader_pid) do + Process.unlink(reader_pid) + Process.exit(reader_pid, :kill) end - if channel do + if grpc_stream do try do - GRPC.Stub.disconnect(channel) + GRPC.Stub.cancel(grpc_stream) catch _, _ -> :ok end end - # Force-kill the underlying Gun process synchronously. :gun.shutdown (called - # by GRPC.Stub.disconnect) is an async cast with a 15-second graceful close - # period during which Gun continues delivering messages to our mailbox. - # Killing it immediately eliminates that race window. - if is_pid(conn_pid), do: Process.exit(conn_pid, :kill) - - # Drain any gun messages from conn_pid that were already in our mailbox - # before the process died. - flush_gun_messages(conn_pid) - - %{state | grpc_stream: nil, channel: nil, conn_pid: nil, stream_ref: nil, recv_buffer: <<>>} - end - - # Kills the Mint reader process (if any) and removes it from state. - # Unlinks before killing so the EXIT signal does not trigger reconnect logic. - defp stop_reader(%{reader_pid: pid} = state) when is_pid(pid) do - Process.unlink(pid) - Process.exit(pid, :kill) - %{state | reader_pid: nil} - end - - defp stop_reader(state), do: state - - # Drains any Gun messages from a specific conn_pid that are already sitting - # in our mailbox. We pin on ^conn_pid so we never accidentally consume - # messages from a newly-opened connection. The after 0 makes this - # non-blocking — it only removes messages already present. - defp flush_gun_messages(conn_pid) when is_pid(conn_pid) do - receive do - {:gun_up, ^conn_pid, _} -> flush_gun_messages(conn_pid) - {:gun_down, ^conn_pid, _, _, _} -> flush_gun_messages(conn_pid) - {:gun_response, ^conn_pid, _, _, _, _} -> flush_gun_messages(conn_pid) - {:gun_data, ^conn_pid, _, _, _} -> flush_gun_messages(conn_pid) - {:gun_trailers, ^conn_pid, _, _} -> flush_gun_messages(conn_pid) - {:gun_error, ^conn_pid, _, _} -> flush_gun_messages(conn_pid) - {:gun_error, ^conn_pid, _} -> flush_gun_messages(conn_pid) - after - 0 -> :ok + if channel do + # Only call disconnect if the underlying connection process is alive. + # When the server closes the channel (e.g. after DEADLINE_EXCEEDED), the + # adapter's connection process may already be gone. Calling disconnect on a + # dead channel causes a FunctionClauseError inside grpc's GenServer. + conn_alive? = + case state.conn_pid do + pid when is_pid(pid) -> Process.alive?(pid) + _ -> true + end + + if conn_alive? do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + end end + + # Cancel the keep-alive timer — it will be restarted when the new stream opens. + state = cancel_keepalive_timer(state) + + %{state | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil} end - defp flush_gun_messages(_), do: :ok + # close_stream with a reason — delegates to the main close_stream but also + # stores the reason for the skip-backoff optimisation in schedule_reconnect. + defp close_stream(state, _reason) do + close_stream(state) + end # --- Private: backoff --- @@ -650,13 +647,42 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do raise "StreamManager failed to connect and backoff is :stop — crashing" end - defp schedule_reconnect(%{backoff: backoff} = state) do + # Deduplication: if a :connect message is already pending, do not schedule + # another one. This prevents the double-reconnect race where {:stream_error} + # and {:stream_closed} (or {:EXIT}) both arrive within a single disconnect. + defp schedule_reconnect(%{reconnect_ref: ref} = state) when not is_nil(ref) do + state + end + + defp schedule_reconnect(%{backoff: backoff, stream_opened_at: opened_at} = state) do {timeout, new_backoff} = Backoff.backoff(backoff) - Logger.info("[StreamManager] Reconnecting in #{timeout}ms") - Process.send_after(self(), :connect, timeout) - %{state | backoff: new_backoff} + + # Skip-backoff optimisation (matches Go's behaviour): + # If the stream was alive for more than 30 seconds before failing, the server + # had time to process a DEADLINE_EXCEEDED (or similar timeout). Adding a + # backoff delay on top of the already-long blocking period compounds the + # reconnect latency unnecessarily. Reconnect immediately instead. + effective_timeout = + if skip_backoff?(opened_at) do + 0 + else + timeout + end + + ref = Process.send_after(self(), :connect, effective_timeout) + %{state | backoff: new_backoff, reconnect_ref: ref} end + # Returns true if the stream was open long enough that we should skip the + # exponential backoff sleep. Threshold: 30 seconds (same as Go). + defp skip_backoff?(nil), do: false + + defp skip_backoff?(opened_at) do + now_ms() - opened_at >= 30_000 + end + + defp now_ms, do: System.monotonic_time(:millisecond) + # --- Private: lease management --- defp schedule_lease_timer(state) do @@ -672,41 +698,94 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do %{state | lease_timer: nil} end + # --- Private: keep-alive --- + + defp schedule_keepalive_timer(state) do + state = cancel_keepalive_timer(state) + timer = schedule_keepalive_after(state.config) + %{state | keepalive_timer: timer} + end + + defp schedule_keepalive_after(config) do + interval = Map.get(config, :keepalive_interval_ms, @default_keepalive_ms) + Process.send_after(self(), :send_keepalive, interval) + end + + defp cancel_keepalive_timer(%{keepalive_timer: nil} = state), do: state + + defp cancel_keepalive_timer(%{keepalive_timer: timer} = state) do + Process.cancel_timer(timer) + %{state | keepalive_timer: nil} + end + # --- Private: ack buffering --- defp buffer_ack_request(%{ack_buffer: buffer, ack_buffer_size: size} = state, request) do - if size < @max_ack_buffer do - %{state | ack_buffer: [request | buffer], ack_buffer_size: size + 1} - else - Logger.warning("[StreamManager] Ack buffer full, dropping oldest ack request") - %{state | ack_buffer: [request | Enum.drop(buffer, -1)]} - end + emit_telemetry(:ack_buffered, %{buffer_size: size + 1}, state.config) + %{state | ack_buffer: [request | buffer], ack_buffer_size: size + 1} end - defp flush_ack_buffer(%{ack_buffer: [], grpc_stream: _} = state), do: state + defp flush_ack_buffer(%{ack_buffer: [], grpc_stream: _} = state), do: {:ok, state} - defp flush_ack_buffer(%{ack_buffer: _buffer, grpc_stream: nil} = state), do: state + defp flush_ack_buffer(%{ack_buffer: _buffer, grpc_stream: nil} = state), do: {:ok, state} defp flush_ack_buffer(%{ack_buffer: buffer, grpc_stream: grpc_stream} = state) do - buffer - |> Enum.reverse() - |> Enum.each(fn - {:ack, ack_ids} -> - send_on_stream(grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) + result = + buffer + |> Enum.reverse() + |> Enum.reduce_while(:ok, fn entry, :ok -> + request = + case entry do + {:ack, ack_ids} -> + %StreamingPullRequest{ack_ids: ack_ids} + + {:modify_deadline, ack_ids, deadline_seconds} -> + deadlines = List.duplicate(deadline_seconds, length(ack_ids)) + + %StreamingPullRequest{ + modify_deadline_ack_ids: ack_ids, + modify_deadline_seconds: deadlines + } + end + + case send_on_stream(grpc_stream, request) do + :ok -> {:cont, :ok} + {:error, reason} -> {:halt, {:error, reason}} + end + end) - {:modify_deadline, ack_ids, deadline_seconds} -> - deadlines = List.duplicate(deadline_seconds, length(ack_ids)) + case result do + :ok -> {:ok, %{state | ack_buffer: [], ack_buffer_size: 0}} + {:error, reason} -> {:error, reason, state} + end + end - send_on_stream(grpc_stream, %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - }) - end) + # --- Private: message building --- - %{state | ack_buffer: [], ack_buffer_size: 0} + # Buffer incoming messages, then flush up to pending_demand to the producer. + # Returns updated state. + defp deliver_messages(state, messages) do + # Prepend for O(1); reversed on flush. + new_buffer = Enum.reduce(messages, state.message_buffer, fn msg, acc -> [msg | acc] end) + flush_demand(%{state | message_buffer: new_buffer}) end - # --- Private: message building --- + # Flush up to `pending_demand` messages from the buffer to the producer. + # If the buffer is empty or pending_demand is 0, this is a no-op. + defp flush_demand(%{pending_demand: 0} = state), do: state + defp flush_demand(%{message_buffer: []} = state), do: state + + defp flush_demand(state) do + all_messages = Enum.reverse(state.message_buffer) + to_send = min(state.pending_demand, length(all_messages)) + {batch, rest} = Enum.split(all_messages, to_send) + + send(state.producer_pid, {:stream_messages, batch}) + + # Store remainder back in reversed (prepend-friendly) order + reversed_rest = Enum.reverse(rest) + %{state | message_buffer: reversed_rest, pending_demand: state.pending_demand - to_send} + end defp build_broadway_message( %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, @@ -754,8 +833,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp emit_telemetry(event, measurements, config) do metadata = %{ - subscription: config.subscription, - config: config + name: config.broadway[:name], + subscription: config.subscription } :telemetry.execute( @@ -764,8 +843,4 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do metadata ) end - - defp log_connection_failure(reason) do - Logger.error("[StreamManager] Failed to connect: #{inspect(reason)}") - end end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex new file mode 100644 index 0000000..8ba3656 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex @@ -0,0 +1,136 @@ +defmodule BroadwayCloudPubSub.Streaming.StreamReader do + @moduledoc false + + # A short-lived process that owns the gRPC bidirectional streaming connection + # and forwards decoded messages to the StreamManager. + # + # ## Why a separate process? + # + # The `grpc_client` library's `GRPC.Stub.recv/2` returns a blocking `Enumerable`. + # A GenServer cannot block on enumeration (it would stop processing casts, + # calls, and timers). By spawning a dedicated reader process, the GenServer + # remains fully responsive while streaming runs concurrently. + # + # ## Unified adapter abstraction + # + # This module uses only the public `GRPC.Stub` API: + # + # 1. `Stub.streaming_pull(channel)` — opens the bidirectional stream + # 2. `GRPC.Stub.send_request(stream, initial_request)` — sends the initial + # StreamingPullRequest (subscription name, flow control settings, etc.) + # 3. `GRPC.Stub.recv(stream)` — returns an `{:ok, Enumerable.t()}` of + # decoded `{:ok, StreamingPullResponse.t()}` items + # + # Both the Gun and Mint adapters implement this interface identically from the + # caller's perspective: + # + # - **Gun**: `:gun.post` is called from this process, so Gun sends all + # `{:gun_response, :gun_data, ...}` messages to this process's mailbox. + # `GRPC.Stub.recv` returns a `Stream.unfold/2` backed by `:gun.await/3`, + # which is a selective receive that processes those mailbox messages. + # + # - **Mint**: `GRPC.Client.Adapters.Mint.ConnectionProcess` owns the TCP + # connection. A `StreamResponseProcess` is started per stream. Decoded + # messages are enqueued there and served to the caller via + # `GenServer.call(:get_response, :infinity)`. + # + # In both cases the library handles gRPC frame decoding (5-byte + # length-prefixed framing + codec decode) and delivers decoded protobuf + # structs to the caller. + # + # ## Message protocol with StreamManager + # + # After the stream is opened, this process sends the grpc_stream back: + # + # `{:stream_opened, pid, grpc_stream}` + # + # Then it forwards received messages and lifecycle events: + # + # `{:stream_messages, [ReceivedMessage.t()]}` — one or more decoded messages + # `{:stream_error, error}` — stream-level gRPC error + # `{:stream_closed}` — server closed stream normally + # + # On exit (normal or crash), the StreamManager detects it via the linked + # process `{:EXIT, pid, reason}` signal (StreamManager traps exits). + # + # ## Sending on the stream (acks, deadline modifications) + # + # After receiving `{:stream_opened, _pid, grpc_stream}`, the StreamManager + # calls `GRPC.Stub.send_request(grpc_stream, request)` directly from the + # GenServer process. + # + # - **Gun**: `:gun.data/4` is a fire-and-forget `gen_statem:cast`. It can be + # called from any process regardless of who opened the stream. + # - **Mint**: `ConnectionProcess.stream_request_body/3` is also a GenServer + # cast, callable from any process. + # + # Both are safe to call from the StreamManager GenServer concurrently with + # the reader process enumerating the receive stream. + + alias Google.Pubsub.V1.{StreamingPullRequest, StreamingPullResponse} + alias Google.Pubsub.V1.Subscriber.Stub + + @doc """ + Spawns a linked reader process. The reader opens the gRPC stream and sends + the stream struct back to `manager` via `{:stream_opened, self(), grpc_stream}` + before beginning enumeration. + + Returns the reader pid. + """ + @spec start_link(pid(), GRPC.Channel.t(), map()) :: pid() + def start_link(manager, channel, config) do + spawn_link(fn -> run(manager, channel, config) end) + end + + # --- Private --- + + defp run(manager, channel, config) do + client_id = Map.fetch!(config, :client_id) + + initial_request = %StreamingPullRequest{ + subscription: config.subscription, + stream_ack_deadline_seconds: config.stream_ack_deadline_seconds, + max_outstanding_messages: config.max_outstanding_messages, + max_outstanding_bytes: config.max_outstanding_bytes, + client_id: client_id + } + + grpc_stream = Stub.streaming_pull(channel, []) + grpc_stream = GRPC.Stub.send_request(grpc_stream, initial_request) + + # Notify the manager that the stream is open. The manager needs the + # grpc_stream struct to call GRPC.Stub.send_request for acks and deadline + # modifications on the bidirectional stream. + send(manager, {:stream_opened, self(), grpc_stream}) + + case GRPC.Stub.recv(grpc_stream, timeout: :infinity) do + {:ok, enum} -> + enumerate(enum, manager) + + {:error, error} -> + send(manager, {:stream_error, error}) + end + end + + defp enumerate(enum, manager) do + enum + |> Stream.each(fn + {:ok, %StreamingPullResponse{received_messages: msgs}} when msgs != [] -> + send(manager, {:stream_messages, msgs}) + + {:ok, %StreamingPullResponse{}} -> + # Heartbeat / empty response — nothing to forward + :ok + + {:error, error} -> + send(manager, {:stream_error, error}) + end) + |> Stream.run() + + # Stream exhausted normally — notify manager before exit. + # StreamManager will also receive {:EXIT, reader_pid, :normal} and + # schedule reconnect, but sending {:stream_closed} allows distinguishing + # normal closes from crashes in logs/telemetry. + send(manager, {:stream_closed}) + end +end diff --git a/mix.exs b/mix.exs index b8fac01..65a7134 100644 --- a/mix.exs +++ b/mix.exs @@ -38,7 +38,10 @@ defmodule BroadwayCloudPubSub.MixProject do {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0"}, {:telemetry, "~> 0.4.3 or ~> 1.0"}, {:goth, "~> 1.3", optional: true}, - {:grpc, "~> 0.9 or ~> 0.10 or ~> 0.11", optional: true}, + # TODO: Replace with Hex versions when grpc 1.0 is released + {:grpc_core, + github: "elixir-grpc/grpc", sparse: "grpc_core", optional: true, override: true}, + {:grpc_client, github: "elixir-grpc/grpc", sparse: "grpc_client", optional: true}, {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} diff --git a/mix.lock b/mix.lock index f4956b1..47fc13f 100644 --- a/mix.lock +++ b/mix.lock @@ -13,6 +13,8 @@ "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"}, "grpc": {:hex, :grpc, "0.11.5", "5dbde9420718b58712779ad98fff1ef50349ca0fa7cc0858ae0f826015068654", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:cowboy, "~> 2.10", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowlib, "~> 2.12", [hex: :cowlib, repo: "hexpm", optional: false]}, {:flow, "~> 1.2", [hex: :flow, repo: "hexpm", optional: false]}, {:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.14", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "0a5d8673ef16649bef0903bca01c161acfc148e4d269133b6834b2af1f07f45e"}, + "grpc_client": {:git, "https://github.com/elixir-grpc/grpc.git", "abc5e1e7d4fdb9db4eedd830a8cc5c5414a7dabd", [sparse: "grpc_client"]}, + "grpc_core": {:git, "https://github.com/elixir-grpc/grpc.git", "abc5e1e7d4fdb9db4eedd830a8cc5c5414a7dabd", [sparse: "grpc_core"]}, "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, diff --git a/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs new file mode 100644 index 0000000..6e69fbc --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs @@ -0,0 +1,83 @@ +defmodule BroadwayCloudPubSub.Streaming.ErrorClassifierTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.ErrorClassifier + + # Helper to build a GRPC.RPCError with just status + message + defp rpc_error(status, message \\ "") do + %GRPC.RPCError{status: status, message: message} + end + + describe "classify/1 — retryable gRPC status codes" do + test "DEADLINE_EXCEEDED (4) is retryable" do + assert ErrorClassifier.classify(rpc_error(4)) == :retryable + end + + test "INTERNAL (13) is retryable" do + assert ErrorClassifier.classify(rpc_error(13)) == :retryable + end + + test "ABORTED (10) is retryable" do + assert ErrorClassifier.classify(rpc_error(10)) == :retryable + end + + test "UNAVAILABLE (14) without shutdown message is retryable" do + assert ErrorClassifier.classify(rpc_error(14, "service unavailable")) == :retryable + assert ErrorClassifier.classify(rpc_error(14, "")) == :retryable + assert ErrorClassifier.classify(rpc_error(14)) == :retryable + end + + test "UNKNOWN (2) is retryable" do + assert ErrorClassifier.classify(rpc_error(2)) == :retryable + end + + test "RESOURCE_EXHAUSTED (8) is retryable" do + assert ErrorClassifier.classify(rpc_error(8)) == :retryable + end + end + + describe "classify/1 — terminal gRPC status codes" do + test "NOT_FOUND (5) is terminal" do + assert ErrorClassifier.classify(rpc_error(5)) == :terminal + end + + test "PERMISSION_DENIED (7) is terminal" do + assert ErrorClassifier.classify(rpc_error(7)) == :terminal + end + + test "INVALID_ARGUMENT (3) is terminal" do + assert ErrorClassifier.classify(rpc_error(3)) == :terminal + end + + test "UNAUTHENTICATED (16) is terminal" do + assert ErrorClassifier.classify(rpc_error(16)) == :terminal + end + + test "CANCELLED (1) is terminal" do + assert ErrorClassifier.classify(rpc_error(1)) == :terminal + end + + test "UNAVAILABLE (14) with 'Server shutdownNow invoked' is terminal" do + assert ErrorClassifier.classify(rpc_error(14, "Server shutdownNow invoked")) == :terminal + end + + test "UNAVAILABLE (14) with message containing shutdown string is terminal" do + assert ErrorClassifier.classify(rpc_error(14, "prefix Server shutdownNow invoked suffix")) == + :terminal + end + end + + describe "classify/1 — non-gRPC errors" do + test "plain atom is retryable" do + assert ErrorClassifier.classify(:closed) == :retryable + end + + test "arbitrary tuple is retryable" do + assert ErrorClassifier.classify({:error, :econnrefused}) == :retryable + end + + test "nil is retryable" do + assert ErrorClassifier.classify(nil) == :retryable + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs index 1100ae5..53d75c9 100644 --- a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs @@ -76,7 +76,7 @@ defmodule BroadwayCloudPubSub.Streaming.ProducerIntegrationTest do end setup_all do - {:ok, _} = DynamicSupervisor.start_link(strategy: :one_for_one, name: GRPC.Client.Supervisor) + DynamicSupervisor.start_link(strategy: :one_for_one, name: GRPC.Client.Supervisor) PubSubEmulator.start() :ok end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs new file mode 100644 index 0000000..7bef2da --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -0,0 +1,595 @@ +defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.StreamManager + + # Minimal config with enough keys to satisfy StreamManager.init/1 + # (mirrors what Options produces after validation + defaults). + defp base_config do + [ + subscription: "projects/test/subscriptions/test-sub", + max_outstanding_messages: 1_000, + max_outstanding_bytes: 104_857_600, + stream_ack_deadline_seconds: 60, + lease_extension_percent: 0.6, + backoff_type: :exp, + backoff_min: 1_000, + backoff_max: 30_000, + use_ssl: true, + adapter: :gun, + grpc_endpoint: "pubsub.googleapis.com:443", + keepalive_interval_ms: 30_000, + on_success: :ack, + on_failure: :noop, + client_id: "test-client-id", + token_generator: {__MODULE__, :noop_token, []}, + broadway: [name: __MODULE__] + ] + end + + def noop_token, do: {:ok, "test-token"} + + # Start a StreamManager, inject producer_pid so it doesn't try to connect. + defp start_manager(extra_opts \\ []) do + test_pid = self() + opts = Keyword.merge(base_config(), extra_opts) + {:ok, pid} = StreamManager.start_link(opts) + + # Inject state: set producer_pid to test process and skip the real connect. + # NOTE: pass test_pid explicitly — self() inside :sys.replace_state runs in + # the GenServer process context, not the test process. + :sys.replace_state(pid, fn state -> + %{state | producer_pid: test_pid} + end) + + pid + end + + # Inject a fake grpc_stream into state so ack paths see a connected stream. + defp inject_connected(pid) do + :sys.replace_state(pid, fn state -> + %{state | grpc_stream: :fake_stream} + end) + end + + # ============================================================ + # Demand signaling + # ============================================================ + + describe "notify_demand/2 — no buffered messages" do + test "stores pending_demand when message buffer is empty" do + pid = start_manager() + + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0, message_buffer: []} end) + StreamManager.notify_demand(pid, 10) + + # Allow the async cast to be processed + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.pending_demand == 10 + assert state.message_buffer == [] + end + end + + describe "notify_demand/2 — with buffered messages" do + test "flushes buffered messages to producer and decrements pending_demand" do + pid = start_manager() + + msgs = [ + %Broadway.Message{data: "msg1", acknowledger: {Broadway.NoopAcknowledger, nil, nil}}, + %Broadway.Message{data: "msg2", acknowledger: {Broadway.NoopAcknowledger, nil, nil}} + ] + + :sys.replace_state(pid, fn s -> + %{s | pending_demand: 0, message_buffer: Enum.reverse(msgs)} + end) + + StreamManager.notify_demand(pid, 10) + + assert_receive {:stream_messages, received} + assert Enum.map(received, & &1.data) == ["msg1", "msg2"] + + state = :sys.get_state(pid) + assert state.message_buffer == [] + assert state.pending_demand == 8 + end + + test "flushes only up to pending_demand, keeps remainder buffered" do + pid = start_manager() + + msgs = + for i <- 1..5 do + %Broadway.Message{ + data: "msg#{i}", + acknowledger: {Broadway.NoopAcknowledger, nil, nil} + } + end + + :sys.replace_state(pid, fn s -> + %{s | pending_demand: 0, message_buffer: Enum.reverse(msgs)} + end) + + StreamManager.notify_demand(pid, 2) + + assert_receive {:stream_messages, received} + assert length(received) == 2 + assert Enum.map(received, & &1.data) == ["msg1", "msg2"] + + state = :sys.get_state(pid) + assert length(state.message_buffer) == 3 + assert state.pending_demand == 0 + + StreamManager.notify_demand(pid, 10) + + assert_receive {:stream_messages, received2} + assert length(received2) == 3 + assert Enum.map(received2, & &1.data) == ["msg3", "msg4", "msg5"] + + state = :sys.get_state(pid) + assert state.message_buffer == [] + assert state.pending_demand == 7 + end + end + + describe "stream_messages → message delivery" do + test "messages are forwarded immediately when pending_demand > 0" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | pending_demand: 10} end) + + fake_msg = %Google.Pubsub.V1.ReceivedMessage{ + ack_id: "ack-1", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-1", + data: "hello", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + send(pid, {:stream_messages, [fake_msg]}) + + assert_receive {:stream_messages, messages} + assert length(messages) == 1 + assert hd(messages).data == "hello" + + state = :sys.get_state(pid) + assert state.pending_demand == 9 + assert state.message_buffer == [] + end + + test "messages are buffered when pending_demand is 0" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + + fake_msg = %Google.Pubsub.V1.ReceivedMessage{ + ack_id: "ack-2", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-2", + data: "buffered", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + send(pid, {:stream_messages, [fake_msg]}) + + refute_receive {:stream_messages, _}, 100 + + state = :sys.get_state(pid) + assert length(state.message_buffer) == 1 + end + + test "buffer is flushed in FIFO order on notify_demand" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + + for i <- 1..3 do + msg = %Google.Pubsub.V1.ReceivedMessage{ + ack_id: "ack-#{i}", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-#{i}", + data: "data-#{i}", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + send(pid, {:stream_messages, [msg]}) + end + + :sys.get_state(pid) + + StreamManager.notify_demand(pid, 10) + + assert_receive {:stream_messages, messages} + assert Enum.map(messages, & &1.data) == ["data-1", "data-2", "data-3"] + end + end + + # ============================================================ + # Ack buffering — no cap + # ============================================================ + + describe "ack buffer — unbounded" do + test "buffers acks when grpc_stream is nil" do + pid = start_manager() + + :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) + + StreamManager.acknowledge(pid, ["ack-1", "ack-2"]) + + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.ack_buffer_size == 1 + assert state.ack_buffer != [] + end + + test "buffer grows without dropping entries" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) + + count = 20_000 + + for i <- 1..count do + StreamManager.acknowledge(pid, ["ack-#{i}"]) + end + + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.ack_buffer_size == count + end + + test "flushed buffer is replayed on reconnect (connect_stream)" do + pid = start_manager() + + :sys.replace_state(pid, fn s -> + %{ + s + | grpc_stream: nil, + ack_buffer: [{:ack, ["id-1"]}, {:ack, ["id-2"]}], + ack_buffer_size: 2 + } + end) + + inject_connected(pid) + + StreamManager.close(pid) + + state = :sys.get_state(pid) + assert state.ack_buffer == [] + assert state.ack_buffer_size == 0 + end + end + + # ============================================================ + # Ack buffer telemetry + # ============================================================ + + describe "ack buffer telemetry" do + test "emits :ack_buffered telemetry when buffering an ack" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) + + test_pid = self() + + :telemetry.attach( + "test-ack-buffered", + [:broadway_cloud_pub_sub, :stream, :ack_buffered], + fn _event, measurements, _metadata, _config -> + send(test_pid, {:telemetry, measurements}) + end, + nil + ) + + StreamManager.acknowledge(pid, ["ack-x"]) + + assert_receive {:telemetry, %{buffer_size: 1}} + + :telemetry.detach("test-ack-buffered") + end + end + + # ============================================================ + # receiving flag — draining + # ============================================================ + + describe "stop_receiving/1" do + test "messages are not forwarded after stop_receiving even when pending_demand > 0" do + pid = start_manager() + :sys.replace_state(pid, fn s -> %{s | pending_demand: 10} end) + + StreamManager.stop_receiving(pid) + + fake_msg = %Google.Pubsub.V1.ReceivedMessage{ + ack_id: "drain-ack", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "drain-msg", + data: "should not arrive", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + send(pid, {:stream_messages, [fake_msg]}) + + refute_receive {:stream_messages, _}, 200 + end + end + + # ============================================================ + # Keep-alive pings + # ============================================================ + + describe "keep-alive ping" do + test "triggers reconnect when send fails (fake stream)" do + # Use a very short keepalive interval so the test doesn't wait 30s. + # With a fake stream, send_on_stream will throw, which should trigger + # a reconnect instead of being silently swallowed. + pid = start_manager(keepalive_interval_ms: 10) + + :sys.replace_state(pid, fn s -> %{s | grpc_stream: :fake_stream} end) + + # Bootstrap the keepalive cycle — normally started by {:stream_opened}, + # but we injected the stream directly via replace_state. + send(pid, :send_keepalive) + + Process.sleep(30) + + assert Process.alive?(pid) + + # After a send failure, the stream is reset (grpc_stream: nil) and a + # reconnect is scheduled. + state = :sys.get_state(pid) + assert state.grpc_stream == nil + assert state.reconnect_ref != nil + end + + test "does not crash when stream is nil (reconnecting)" do + pid = start_manager(keepalive_interval_ms: 10) + + :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) + + Process.sleep(30) + + assert Process.alive?(pid) + end + + test "keepalive_timer is nil before stream opens" do + pid = start_manager() + state = :sys.get_state(pid) + assert state.keepalive_timer == nil + end + + test "keepalive_timer is set when stream is active" do + pid = start_manager(keepalive_interval_ms: 60_000) + + :sys.replace_state(pid, fn s -> + timer = Process.send_after(self(), :send_keepalive, 60_000) + + %{ + s + | grpc_stream: :fake_stream, + conn_pid: self(), + stream_opened_at: System.monotonic_time(:millisecond), + keepalive_timer: timer + } + end) + + state = :sys.get_state(pid) + assert state.keepalive_timer != nil + end + end + + # ============================================================ + # Reconnect deduplication + # ============================================================ + + describe "reconnect deduplication" do + test "only one reconnect is scheduled when stream_error and stream_closed both arrive" do + # Use high backoff so :connect doesn't actually fire during the test + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) + + send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) + send(pid, {:stream_closed}) + + :sys.get_state(pid) + + state = :sys.get_state(pid) + first_ref = state.reconnect_ref + + # Ref must be set (at least one reconnect scheduled) + assert first_ref != nil + + # Send another close signal — ref must not change (dedup kicks in) + send(pid, {:stream_closed}) + :sys.get_state(pid) + + state2 = :sys.get_state(pid) + assert state2.reconnect_ref == first_ref + end + + test "reconnect_ref is cleared when :connect message fires" do + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) + + send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) + :sys.get_state(pid) + + # Manually fire :connect (connect() will fail — no real gRPC — but that's fine) + send(pid, :connect) + :sys.get_state(pid) + + # GenServer should still be alive + assert Process.alive?(pid) + end + end + + # ============================================================ + # Error classification — terminal vs retryable + # ============================================================ + + describe "terminal gRPC errors stop the GenServer" do + test "NOT_FOUND (5) stops the GenServer" do + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end + + test "PERMISSION_DENIED (7) stops the GenServer" do + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 7, message: "permission denied"}}) + + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end + + test "INVALID_ARGUMENT (3) stops the GenServer" do + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 3, message: "bad argument"}}) + + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end + + test "UNAUTHENTICATED (16) stops the GenServer" do + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 16, message: "unauthenticated"}}) + + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end + + test "UNAVAILABLE (14) with 'Server shutdownNow invoked' stops the GenServer" do + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send( + pid, + {:stream_error, %GRPC.RPCError{status: 14, message: "Server shutdownNow invoked"}} + ) + + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end + + test "terminal error emits :terminal_error telemetry before stopping" do + pid = start_manager() + test_pid = self() + Process.unlink(pid) + + :telemetry.attach( + "test-terminal-error-#{inspect(pid)}", + [:broadway_cloud_pub_sub, :stream, :terminal_error], + fn _event, measurements, _metadata, _config -> + send(test_pid, {:telemetry, :terminal_error, measurements}) + end, + nil + ) + + send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + + assert_receive {:telemetry, :terminal_error, %{reason: _}}, 1_000 + + :telemetry.detach("test-terminal-error-#{inspect(pid)}") + end + end + + describe "retryable gRPC errors trigger reconnect" do + test "DEADLINE_EXCEEDED (4) schedules reconnect without stopping" do + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) + ref = Process.monitor(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) + :sys.get_state(pid) + + refute_received {:DOWN, ^ref, :process, ^pid, _} + assert Process.alive?(pid) + + state = :sys.get_state(pid) + assert state.reconnect_ref != nil + end + + test "UNAVAILABLE (14) without shutdown message schedules reconnect" do + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) + ref = Process.monitor(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "service temporarily down"}}) + :sys.get_state(pid) + + refute_received {:DOWN, ^ref, :process, ^pid, _} + assert Process.alive?(pid) + end + end + + # ============================================================ + # Skip-backoff optimisation + # ============================================================ + + describe "skip-backoff on long-lived stream" do + test "reconnects immediately (0ms) when stream was open for >30 seconds" do + # Use a non-routable endpoint so the connect attempt fails at TCP level + # (not by reaching a real server that returns a terminal auth error). + pid = start_manager(backoff_min: 5_000, backoff_max: 30_000, grpc_endpoint: "localhost:1") + + # Pretend the stream opened 31 seconds ago + opened_at = System.monotonic_time(:millisecond) - 31_000 + + :sys.replace_state(pid, fn s -> %{s | stream_opened_at: opened_at} end) + + send(pid, {:stream_closed}) + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.reconnect_ref != nil + + # With 0ms effective timeout, :connect should arrive almost immediately. + # Give it 500ms — even with scheduling jitter this is far below the 5s backoff. + # The connect attempt will fail (no gRPC), but the GenServer remains alive. + Process.sleep(200) + assert Process.alive?(pid) + end + + test "applies full backoff when stream was open for <30 seconds" do + pid = start_manager(backoff_min: 5_000, backoff_max: 30_000) + + # Stream opened 1 second ago — should NOT skip backoff + opened_at = System.monotonic_time(:millisecond) - 1_000 + + :sys.replace_state(pid, fn s -> %{s | stream_opened_at: opened_at} end) + + send(pid, {:stream_closed}) + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.reconnect_ref != nil + + # With 5s minimum backoff, no :connect should arrive within 500ms + Process.sleep(300) + + # State should still show the same reconnect_ref (connect hasn't fired) + state2 = :sys.get_state(pid) + assert state2.reconnect_ref == state.reconnect_ref + assert Process.alive?(pid) + end + end +end From d30d9d7eade7cc01f63883dd32796ad728064b66 Mon Sep 17 00:00:00 2001 From: Rock Date: Mon, 30 Mar 2026 10:36:27 +0200 Subject: [PATCH 03/29] feat: add unary RPC acknowledgment pipeline Implement the acknowledgment pipeline using unary RPCs for batched message acknowledgment and deadline modification. This replaces stream-based acks with a more reliable unary RPC approach. Key additions: - AckBatcher for batching acknowledge/nack/deadline operations - AckResult for tracking acknowledgment outcomes - AckTimeDistribution for adaptive ack deadline estimation - UnaryRpcClient for making acknowledge and modifyAckDeadline RPCs - UnaryAckSupervisor for managing concurrent ack tasks - Stress test suite for high-throughput scenarios --- lib/broadway_cloud_pub_sub/backoff.ex | 15 +- .../streaming/ack_batcher.ex | 243 +++ .../streaming/ack_result.ex | 135 ++ .../streaming/ack_time_distribution.ex | 131 ++ .../streaming/acknowledger.ex | 42 +- .../streaming/error_classifier.ex | 41 +- .../streaming/options.ex | 119 +- .../streaming/producer.ex | 379 +++- .../streaming/stream_manager.ex | 566 ++--- .../streaming/stream_reader.ex | 57 +- .../streaming/unary_ack_supervisor.ex | 72 + .../streaming/unary_rpc_client.ex | 411 ++++ test/broadway_cloud_pub_sub/backoff_test.exs | 4 +- .../streaming/ack_batcher_test.exs | 388 ++++ .../streaming/ack_result_test.exs | 232 +++ .../streaming/ack_time_distribution_test.exs | 142 ++ .../streaming/error_classifier_test.exs | 26 +- .../streaming/options_test.exs | 26 + .../streaming/stream_manager_test.exs | 581 ++++-- .../streaming/stress_test.exs | 1839 +++++++++++++++++ .../streaming/unary_rpc_client_test.exs | 253 +++ test/test_helper.exs | 2 +- 22 files changed, 5051 insertions(+), 653 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/ack_result.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex create mode 100644 test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/ack_result_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/ack_time_distribution_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/stress_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs diff --git a/lib/broadway_cloud_pub_sub/backoff.ex b/lib/broadway_cloud_pub_sub/backoff.ex index ceafff0..38fc742 100644 --- a/lib/broadway_cloud_pub_sub/backoff.ex +++ b/lib/broadway_cloud_pub_sub/backoff.ex @@ -5,8 +5,9 @@ defmodule BroadwayCloudPubSub.Backoff do # Supports :rand_exp (randomized exponential), :exp (pure exponential), # :rand (pure random), and :stop (no reconnect). - @default_min 1_000 - @default_max 30_000 + # Aligned with Options defaults (backoff_min: 100, backoff_max: 60_000). + @default_min 100 + @default_max 60_000 @type type :: :rand_exp | :exp | :rand | :stop @@ -27,8 +28,8 @@ defmodule BroadwayCloudPubSub.Backoff do ## Options * `:type` - `:rand_exp` (default), `:exp`, `:rand`, or `:stop` - * `:min` - minimum backoff in milliseconds (default: 1000) - * `:max` - maximum backoff in milliseconds (default: 30000) + * `:min` - minimum backoff in milliseconds (default: 100) + * `:max` - maximum backoff in milliseconds (default: 60000) """ @spec new(keyword()) :: t() | nil @@ -102,8 +103,10 @@ defmodule BroadwayCloudPubSub.Backoff do end defp rand(min, max, seed) do - {value, new_seed} = :rand.uniform_s(max - min, seed) - {value + min, new_seed} + # :rand.uniform_s(N) returns a value in [1, N], so we use (max - min + 1) + # and subtract 1 to get the correct range [min, max]. + {value, new_seed} = :rand.uniform_s(max - min + 1, seed) + {value - 1 + min, new_seed} end defp seed do diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex new file mode 100644 index 0000000..00f4a1c --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -0,0 +1,243 @@ +defmodule BroadwayCloudPubSub.Streaming.AckBatcher do + @moduledoc false + + # GenServer that accumulates ack and modifyAckDeadline requests and flushes + # them to UnaryRpcClient on a configurable timer or size threshold. + # + # ## Modack grouping + # + # ModifyAckDeadline requests carry a single deadline value for all ack IDs in + # the request. We group modack requests by deadline value so that one unary RPC + # is sent per unique deadline per flush cycle. + # + # ## Flush triggers + # + # 1. Timer fires (every ack_batch_interval_ms) + # 2. Accumulated ack count reaches ack_batch_max_size + # 3. Explicit `flush/1` call (used during graceful shutdown) + # + # ## Relationship to UnaryRpcClient + # + # AckBatcher and UnaryRpcClient are siblings under UnaryAckSupervisor. The + # batcher looks up the RPC client by its registered name derived from the + # Broadway pipeline name. + + use GenServer + + alias BroadwayCloudPubSub.Streaming.UnaryRpcClient + + defstruct [ + :rpc_client, + :batch_interval_ms, + :batch_max_size, + :timer_ref, + # Accumulated ack_ids waiting to be flushed. + ack_ids: [], + ack_count: 0, + # Accumulated modacks: %{deadline_seconds => [ack_id]} + modack_ids: %{} + ] + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + {name, opts} = Keyword.pop(opts, :name) + + if name do + GenServer.start_link(__MODULE__, opts, name: name) + else + GenServer.start_link(__MODULE__, opts) + end + end + + @doc """ + Queues ack_ids for acknowledgement. Fire-and-forget. + """ + @spec ack(GenServer.server(), [String.t()]) :: :ok + def ack(pid, ack_ids) when is_list(ack_ids) and ack_ids != [] do + GenServer.cast(pid, {:ack, ack_ids}) + end + + def ack(_pid, []), do: :ok + + @doc """ + Queues ack_ids for a modifyAckDeadline request. Fire-and-forget. + """ + @spec modack(GenServer.server(), [String.t()], non_neg_integer()) :: :ok + def modack(pid, ack_ids, deadline_seconds) when is_list(ack_ids) and ack_ids != [] do + GenServer.cast(pid, {:modack, ack_ids, deadline_seconds}) + end + + def modack(_pid, [], _deadline), do: :ok + + @doc """ + Flushes all pending batches synchronously. Used during graceful shutdown to + ensure no acks are dropped before the process terminates. + """ + @spec flush(GenServer.server()) :: :ok + def flush(pid) do + GenServer.call(pid, :flush, 15_000) + end + + # --- GenServer callbacks --- + + @impl GenServer + def init(opts) do + config = Map.new(opts) + + state = %__MODULE__{ + rpc_client: config.rpc_client, + batch_interval_ms: config.ack_batch_interval_ms, + batch_max_size: config.ack_batch_max_size + } + + {:ok, schedule_flush(state)} + end + + @impl GenServer + def handle_cast({:ack, ack_ids}, state) do + new_ids = ack_ids ++ state.ack_ids + new_count = state.ack_count + length(ack_ids) + state = %{state | ack_ids: new_ids, ack_count: new_count} + + state = + if new_count >= state.batch_max_size do + # Size-triggered flush: reschedule the timer so periodic flushing + # continues. Without rescheduling, timer_ref is left nil after + # do_flush cancels it and no further periodic flushes ever occur. + do_flush(state) + else + state + end + + {:noreply, state} + end + + def handle_cast({:modack, ack_ids, deadline_seconds}, state) do + new_modack_ids = + Map.update(state.modack_ids, deadline_seconds, ack_ids, &(ack_ids ++ &1)) + + total_modack_count = new_modack_ids |> Map.values() |> Enum.map(&length/1) |> Enum.sum() + state = %{state | modack_ids: new_modack_ids} + + state = + if state.ack_count + total_modack_count >= state.batch_max_size do + do_flush(state) + else + state + end + + {:noreply, state} + end + + @impl GenServer + def handle_call(:flush, _from, state) do + state = do_flush(state) + {:reply, :ok, state} + end + + @impl GenServer + def handle_info(:flush_timer, state) do + state = do_flush(state) + {:noreply, schedule_flush(state)} + end + + defp do_flush(state) do + state = cancel_timer(state) + + # Check if RPC client is available. If not, keep all state and retry on the + # next timer tick to avoid a noproc crash while UnaryRpcClient is restarting. + case GenServer.whereis(state.rpc_client) do + nil -> + :telemetry.execute( + [:broadway_cloud_pub_sub, :stream, :flush_deferred], + %{ack_count: state.ack_count, modack_groups: map_size(state.modack_ids)}, + %{} + ) + + schedule_flush(state) + + _pid -> + # Each step runs independently — a failure in flush_acks does not + # prevent flush_modacks from running. + state + |> flush_acks() + |> flush_modacks() + |> schedule_flush() + end + end + + defp flush_acks(%{ack_count: 0} = state), do: state + + defp flush_acks(state) do + case UnaryRpcClient.acknowledge(state.rpc_client, state.ack_ids) do + {:ok, []} -> + %{state | ack_ids: [], ack_count: 0} + + {:ok, remaining_ids} -> + # Partial success — retain only the failed ack_ids for next flush + %{state | ack_ids: remaining_ids, ack_count: length(remaining_ids)} + + {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> + # Per-ack-ID partial failure: permanent ids already dropped by + # UnaryRpcClient. Retain only the transient ids for retry. + %{state | ack_ids: transient_ids, ack_count: length(transient_ids)} + + {:error, _reason} -> + # Total failure — retain all ack_ids + state + end + end + + defp flush_modacks(%{modack_ids: modacks} = state) when map_size(modacks) == 0, do: state + + defp flush_modacks(state) do + # Each deadline group is attempted independently — failure in one group does + # not prevent the others from being flushed. + remaining_modacks = + Enum.reduce(state.modack_ids, %{}, fn {deadline, ids}, remaining -> + case UnaryRpcClient.modify_ack_deadline(state.rpc_client, ids, deadline) do + {:ok, []} -> + remaining + + {:ok, remaining_ids} -> + # Partial success — retain only the failed ids for this deadline + Map.put(remaining, deadline, remaining_ids) + + {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> + # Per-ack-ID partial failure: retain only transient ids. + if transient_ids == [] do + remaining + else + Map.put(remaining, deadline, transient_ids) + end + + {:error, _reason} -> + # Total failure for this deadline — retain all ids + Map.put(remaining, deadline, ids) + end + end) + + %{state | modack_ids: remaining_modacks} + end + + defp schedule_flush(state) do + state = cancel_timer(state) + ref = Process.send_after(self(), :flush_timer, state.batch_interval_ms) + %{state | timer_ref: ref} + end + + defp cancel_timer(%{timer_ref: nil} = state), do: state + + defp cancel_timer(%{timer_ref: ref} = state) do + Process.cancel_timer(ref) + # Drain any :flush_timer message that was already delivered to the mailbox + # before cancel_timer ran, to prevent an extra flush after the cancel. + receive do + :flush_timer -> :ok + after + 0 -> :ok + end + + %{state | timer_ref: nil} + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_result.ex b/lib/broadway_cloud_pub_sub/streaming/ack_result.ex new file mode 100644 index 0000000..8ab3e57 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/ack_result.ex @@ -0,0 +1,135 @@ +defmodule BroadwayCloudPubSub.Streaming.AckResult do + @moduledoc false + + # Represents the outcome of an ack or nack operation in exactly-once delivery mode. + # + # In exactly-once mode the server guarantees at-most-once delivery only when the + # ack is durably committed. To surface ack failures to callers, each ack/nack + # operation resolves to an AckResult describing whether the operation succeeded + # or failed and why. + # + # This matches Go's AckResult type in pubsub/message.go: + # + # type AckResult struct { + # ready chan struct{} + # status AcknowledgeStatus + # err error + # } + # + # ## Per-ack-ID error parsing (exactly-once) + # + # Google's Pub/Sub API returns per-ack-ID errors in the gRPC response metadata + # when an Acknowledge or ModifyAckDeadline RPC partially fails. The errors are + # encoded in a `google.rpc.Status` detail of type `google.rpc.ErrorInfo`: + # + # - `reason` field: either "TRANSIENT_
" (retry) or "PERMANENT_
" + # - `metadata` map: keys are ack_ids, values are the per-ack-id error reason + # + # Transient errors should be retried; permanent errors indicate an invalid ack ID + # and should be resolved immediately with the appropriate status. + # + # ## Status values + # + # | Status | Meaning | + # |-----------------------|--------------------------------------------------| + # | :success | Ack/nack committed by the server | + # | :permission_denied | Service account lacks Subscriber role | + # | :failed_precondition | Subscription not in exactly-once mode | + # | :invalid_ack_id | Ack ID is invalid or has already expired | + # | :other | Unrecognised permanent failure | + + @type status :: + :success + | :permission_denied + | :failed_precondition + | :invalid_ack_id + | :other + + @type t :: %__MODULE__{ + ack_id: String.t(), + status: status(), + error: term() | nil + } + + defstruct [:ack_id, :status, :error] + + @doc """ + Returns a successful AckResult for the given ack_id. + """ + @spec success(String.t()) :: t() + def success(ack_id), do: %__MODULE__{ack_id: ack_id, status: :success, error: nil} + + @doc """ + Returns a failed AckResult for the given ack_id with the given status and error. + """ + @spec failure(String.t(), status(), term()) :: t() + def failure(ack_id, status, error), + do: %__MODULE__{ack_id: ack_id, status: status, error: error} + + @doc """ + Parses per-ack-ID error information from a `GRPC.RPCError`'s details list. + + Google's Pub/Sub API encodes per-ack-ID errors in `google.rpc.ErrorInfo` details + when an Acknowledge or ModifyAckDeadline RPC partially fails. The `metadata` map + in `ErrorInfo` maps ack_id => error_reason, where the reason has either a + "TRANSIENT_" or "PERMANENT_" prefix. + + Returns a map of `ack_id => :transient | {:permanent, reason_string}`. + + If no per-ack-ID errors are present (e.g. the whole RPC failed), returns an + empty map. + """ + @spec parse_error_details([Google.Protobuf.Any.t()] | nil) :: + %{String.t() => :transient | {:permanent, String.t()}} + def parse_error_details(nil), do: %{} + def parse_error_details([]), do: %{} + + def parse_error_details(details) when is_list(details) do + Enum.reduce(details, %{}, fn any_proto, acc -> + case decode_error_info(any_proto) do + {:ok, %Google.Rpc.ErrorInfo{metadata: metadata}} when map_size(metadata) > 0 -> + Enum.reduce(metadata, acc, fn {ack_id, reason}, inner_acc -> + Map.put(inner_acc, ack_id, classify_reason(reason)) + end) + + _ -> + acc + end + end) + end + + @doc """ + Returns true if the given reason string indicates a transient (retryable) error. + + Transient errors from Google Pub/Sub have a "TRANSIENT_" prefix in the reason + field, e.g. "TRANSIENT_FAILURE_INVALID_ACK_ID". + """ + @spec transient_reason?(String.t()) :: boolean() + def transient_reason?(reason) when is_binary(reason), + do: String.starts_with?(reason, "TRANSIENT_") + + # --- Private --- + + defp decode_error_info(%Google.Protobuf.Any{type_url: type_url, value: value}) + when is_binary(value) do + if type_url == "type.googleapis.com/google.rpc.ErrorInfo" do + try do + {:ok, Google.Rpc.ErrorInfo.decode(value)} + rescue + _ -> :error + end + else + :error + end + end + + defp decode_error_info(_), do: :error + + defp classify_reason(reason) when is_binary(reason) do + if String.starts_with?(reason, "TRANSIENT_") do + :transient + else + {:permanent, reason} + end + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex b/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex new file mode 100644 index 0000000..ff11e99 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex @@ -0,0 +1,131 @@ +defmodule BroadwayCloudPubSub.Streaming.AckTimeDistribution do + @moduledoc false + + # Fixed-bucket histogram for tracking message processing durations. + # + # One bucket per second in the range [0, 600]. Record is O(1) (tuple element + # increment). Percentile is O(601) linear scan = effectively O(1). Counters + # are monotonically growing — data is never evicted. + # + # This matches the algorithm used by all five official Google Cloud Pub/Sub + # client libraries: + # - Go: distribution/distribution.go (type D struct { buckets []uint64 }) + # - Python: histogram.py (601-bucket array) + # - Java: Distribution class with fixed buckets + # - Node.js: histogram.ts with bucket array + # - Ruby: windowed approach with bucket concept + # + # Before enough samples are collected (< @min_samples), the distribution + # returns the configured `default_deadline` so behaviour is identical to the + # old fixed-deadline strategy during cold start. + # + # All duration values are clamped to the valid Pub/Sub ack deadline range: + # 10–600 seconds (matching the server-enforced limits). + + @min_deadline_seconds 10 + @max_deadline_seconds 600 + # 601 buckets: indices 0..600 + @num_buckets @max_deadline_seconds + 1 + @min_samples 10 + + @typedoc "An AckTimeDistribution struct." + @opaque t :: %__MODULE__{ + buckets: tuple(), + total: non_neg_integer(), + default_deadline: pos_integer() + } + + defstruct buckets: nil, total: 0, default_deadline: 60 + + @doc """ + Creates a new distribution with the given default deadline (seconds). + + The `default_deadline` is returned by `percentile/2` until at least + #{@min_samples} samples have been recorded (cold-start fallback). + + The deadline is clamped to the valid Pub/Sub ack deadline range + (#{@min_deadline_seconds}–#{@max_deadline_seconds} seconds). + """ + @spec new(pos_integer()) :: t() + def new(default_deadline) when is_integer(default_deadline) and default_deadline > 0 do + clamped = clamp(default_deadline) + + %__MODULE__{ + buckets: :erlang.make_tuple(@num_buckets, 0), + default_deadline: clamped + } + end + + @doc """ + Records a new processing duration (in seconds). O(1). + + The value is clamped to the valid Pub/Sub ack deadline range + (#{@min_deadline_seconds}–#{@max_deadline_seconds} seconds). + Unlike the previous circular-buffer implementation, data is never evicted — + counters grow monotonically. + """ + @spec record(t(), integer()) :: t() + def record(%__MODULE__{buckets: buckets, total: total} = dist, duration_seconds) do + idx = clamp(duration_seconds) + new_buckets = put_elem(buckets, idx, elem(buckets, idx) + 1) + %{dist | buckets: new_buckets, total: total + 1} + end + + @doc """ + Returns the p-th percentile of recorded processing times (in seconds). O(601). + + `p` should be in the range `[0.0, 1.0]`. For example, `0.99` for the 99th + percentile. + + Returns `default_deadline` if fewer than #{@min_samples} samples have been + recorded (cold-start fallback). + + The result is always in the valid Pub/Sub ack deadline range + (#{@min_deadline_seconds}–#{@max_deadline_seconds} seconds) because all + recorded values are clamped on `record/2`. + """ + @spec percentile(t(), float()) :: pos_integer() + def percentile(%__MODULE__{total: total, default_deadline: default}, _p) + when total < @min_samples do + default + end + + def percentile(%__MODULE__{buckets: buckets, total: total}, p) + when is_number(p) and p >= 0.0 and p <= 1.0 do + target = max(1, ceil(p * total)) + find_percentile_bucket(buckets, target, 0, 0) + end + + @doc """ + Returns the total number of samples recorded (monotonically increasing). + """ + @spec sample_count(t()) :: non_neg_integer() + def sample_count(%__MODULE__{total: total}), do: total + + # Linear scan over the 601 buckets to find the bucket where the cumulative + # count first reaches or exceeds `target`. Returns that bucket's index, which + # is also the clamped duration in seconds. + defp find_percentile_bucket(_buckets, _target, idx, _cumulative) + when idx >= @num_buckets do + @max_deadline_seconds + end + + defp find_percentile_bucket(buckets, target, idx, cumulative) do + new_cumulative = cumulative + elem(buckets, idx) + + if new_cumulative >= target do + # idx is already in [@min_deadline_seconds, @max_deadline_seconds] + # because we only ever write to clamped indices in record/2. + idx + else + find_percentile_bucket(buckets, target, idx + 1, new_cumulative) + end + end + + # Clamp a duration value to the valid Pub/Sub ack deadline range. + defp clamp(value) do + value + |> max(@min_deadline_seconds) + |> min(@max_deadline_seconds) + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex index adba412..49ff317 100644 --- a/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex +++ b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex @@ -38,16 +38,24 @@ defmodule BroadwayCloudPubSub.Streaming.Acknowledger do @impl Acknowledger def ack(ack_ref, successful, failed) do - {manager_pid, config} = :persistent_term.get(ack_ref) - - success_actions = group_actions_ack_ids(successful, :on_success, config) - failure_actions = group_actions_ack_ids(failed, :on_failure, config) - - success_actions - |> Map.merge(failure_actions, fn _, a, b -> a ++ b end) - |> dispatch_acks(manager_pid) - - :ok + # persistent_term stores the manager's registered *name*, not its PID, + # so this survives a StreamManager restart. Use the 2-arity form with a nil + # default so that if the pipeline shuts down and the key is erased before + # this callback runs we silently skip the dispatch rather than raising. + case :persistent_term.get(ack_ref, nil) do + nil -> + :ok + + {manager_server, config} -> + success_actions = group_actions_ack_ids(successful, :on_success, config) + failure_actions = group_actions_ack_ids(failed, :on_failure, config) + + success_actions + |> Map.merge(failure_actions, fn _, a, b -> a ++ b end) + |> dispatch_acks(manager_server) + + :ok + end end @impl Acknowledger @@ -72,21 +80,21 @@ defmodule BroadwayCloudPubSub.Streaming.Acknowledger do defp extract_ack_id(%{acknowledger: {_, _, %{ack_id: ack_id}}}), do: ack_id - defp dispatch_acks(actions_and_ids, manager_pid) do + defp dispatch_acks(actions_and_ids, manager_server) do Enum.each(actions_and_ids, fn {action, ack_ids} -> ack_ids |> Enum.chunk_every(@max_ack_ids_per_request) - |> Enum.each(&apply_action(action, &1, manager_pid)) + |> Enum.each(&apply_action(action, &1, manager_server)) end) end - defp apply_action(:noop, _ack_ids, _manager_pid), do: :ok + defp apply_action(:noop, _ack_ids, _manager_server), do: :ok - defp apply_action(:ack, ack_ids, manager_pid) do - BroadwayCloudPubSub.Streaming.StreamManager.acknowledge(manager_pid, ack_ids) + defp apply_action(:ack, ack_ids, manager_server) do + BroadwayCloudPubSub.Streaming.StreamManager.acknowledge(manager_server, ack_ids) end - defp apply_action({:nack, deadline}, ack_ids, manager_pid) do - BroadwayCloudPubSub.Streaming.StreamManager.modify_deadline(manager_pid, ack_ids, deadline) + defp apply_action({:nack, deadline}, ack_ids, manager_server) do + BroadwayCloudPubSub.Streaming.StreamManager.modify_deadline(manager_server, ack_ids, deadline) end end diff --git a/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex index 7b4434a..2cd4c9c 100644 --- a/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex +++ b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex @@ -1,9 +1,7 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do @moduledoc false - # Classifies gRPC errors into :retryable or :terminal categories, - # matching the behaviour of the official Google Cloud Go and Python - # Pub/Sub client libraries. + # Classifies gRPC errors into :retryable or :terminal categories. # # ## Retryable errors (reconnect the stream) # @@ -13,8 +11,13 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do # - DEADLINE_EXCEEDED (4) — server-side idle timeout (the primary issue) # - INTERNAL (13) — transient server error # - ABORTED (10) — concurrent modification, retry - # - UNAVAILABLE (14) — server temporarily unavailable or being drained - # EXCEPT "Server shutdownNow invoked" (permanent) + # - UNAVAILABLE (14) — server temporarily unavailable or being drained. + # Note: "Server shutdownNow invoked" is a routine + # Google-side backend drain — the LB will route to + # another backend immediately. + # - UNAUTHENTICATED (16) — expired OAuth2 token. Reconnect fetches a fresh + # token. Treating this as terminal would permanently + # stop the producer on routine token rotation. # - UNKNOWN (2) — includes HTTP/2 GOAWAY frames on connection drain # - RESOURCE_EXHAUSTED (8)— quota temporarily exceeded, retry with backoff # - Non-gRPC errors — connection resets, EOF, transport errors @@ -27,13 +30,7 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do # - NOT_FOUND (5) — subscription does not exist # - PERMISSION_DENIED (7) — service account lacks Subscriber role # - INVALID_ARGUMENT (3) — bad subscription name or flow-control params - # - UNAUTHENTICATED (16) — invalid or expired credentials # - CANCELLED (1) — deliberate cancellation (not self-initiated) - # - # ## Reference - # - # Go: defaultRetryer.Retry() in pubsub/service.go - # Python: _RETRYABLE_STREAM_ERRORS / _TERMINATING_STREAM_ERRORS in bidi.py @terminal_status_codes MapSet.new([ # NOT_FOUND — subscription does not exist @@ -42,16 +39,10 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do 7, # INVALID_ARGUMENT — bad config / subscription name 3, - # UNAUTHENTICATED — bad or expired credentials - 16, # CANCELLED — external cancellation (self-cancellation is handled separately) 1 ]) - # UNAVAILABLE (14) with this message means an intentional server shutdown: - # retrying would connect to the same dying backend. Treat as terminal. - @shutdown_now_message "Server shutdownNow invoked" - @type classification :: :retryable | :terminal @doc """ @@ -62,17 +53,11 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do stop processing messages. """ @spec classify(term()) :: classification() - def classify(%GRPC.RPCError{status: status, message: message}) do - cond do - MapSet.member?(@terminal_status_codes, status) -> - :terminal - - # UNAVAILABLE with shutdown message is permanent - status == 14 and String.contains?(message || "", @shutdown_now_message) -> - :terminal - - true -> - :retryable + def classify(%GRPC.RPCError{status: status}) do + if MapSet.member?(@terminal_status_codes, status) do + :terminal + else + :retryable end end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 47ff311..5c4c24e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -7,10 +7,14 @@ defmodule BroadwayCloudPubSub.Streaming.Options do @default_max_outstanding_messages 1_000 @default_max_outstanding_bytes 100 * 1024 * 1024 @default_stream_ack_deadline_seconds 60 - @default_lease_extension_percent 0.6 - @default_backoff_min 1_000 - @default_backoff_max 30_000 + # 60 minutes — matches Go's MaxExtension default. + @default_max_extension_ms 60 * 60 * 1_000 + # gax defaults: https://github.com/googleapis/gax-go/blob/main/v2/call_option.go + @default_backoff_min 100 + @default_backoff_max 60_000 @default_keepalive_interval_ms 30_000 + @default_ack_batch_interval_ms 100 + @default_ack_batch_max_size 2_500 definition = [ # Handled by Broadway. @@ -54,15 +58,17 @@ defmodule BroadwayCloudPubSub.Streaming.Options do The producer will extend leases automatically before this deadline. """ ], - lease_extension_percent: [ - type: - {:custom, __MODULE__, :type_float_between_0_and_1, [[{:name, :lease_extension_percent}]]}, - default: @default_lease_extension_percent, + max_extension_ms: [ + type: :pos_integer, + default: @default_max_extension_ms, doc: """ - The fraction of `stream_ack_deadline_seconds` at which leases are - extended. For example, with a deadline of 60s and a percent of 0.6, - leases are extended every 36s. Must be between 0.0 and 1.0 exclusive. - Defaults to 0.6. + The maximum total time in milliseconds that a message's ack deadline will + be extended from the moment of initial receipt. After this duration, the + message is dropped from lease management and the server will redeliver it. + + This prevents a stuck consumer from holding messages indefinitely. + Matches the Go client's `MaxExtension` default of 60 minutes. + Defaults to #{div(@default_max_extension_ms, 60_000)} minutes. """ ], client_id: [ @@ -141,12 +147,29 @@ defmodule BroadwayCloudPubSub.Streaming.Options do backoff_min: [ type: :pos_integer, default: @default_backoff_min, - doc: "Minimum reconnection backoff in milliseconds. Defaults to 1000." + doc: + "Minimum reconnection backoff in milliseconds. Matches the gax default of 100ms. Defaults to 100." ], backoff_max: [ type: :pos_integer, default: @default_backoff_max, - doc: "Maximum reconnection backoff in milliseconds. Defaults to 30000." + doc: + "Maximum reconnection backoff in milliseconds. Matches the gax default of 60s. Defaults to 60000." + ], + retry_deadline_ms: [ + type: :pos_integer, + default: 60_000, + doc: """ + Maximum total time in milliseconds that the unary RPC client (UnaryRpcClient) + will spend retrying a single acknowledge or modifyAckDeadline request before + giving up and dropping the ack_ids. + + Each retry attempt uses jittered exponential backoff starting at 100ms and + capped at 60s. The default of 60,000ms (60 seconds) matches standard gax + retry behaviour. When exactly-once delivery is enabled (auto-detected from + subscription properties), this value should be increased to 600,000ms (600 + seconds) to match the Go client's extended retry deadline for exactly-once acks. + """ ], keepalive_interval_ms: [ type: :pos_integer, @@ -160,6 +183,30 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Defaults to 30000. """ ], + ack_batch_interval_ms: [ + type: + {:custom, __MODULE__, :type_integer_in_range, + [[{:name, :ack_batch_interval_ms}, {:min, 10}, {:max, 5_000}]]}, + default: @default_ack_batch_interval_ms, + doc: """ + Interval in milliseconds at which batched ack and modifyAckDeadline + requests are flushed to the Pub/Sub service via unary RPCs. + Lower values reduce end-to-end ack latency; higher values improve + batching efficiency. Defaults to 100. + """ + ], + ack_batch_max_size: [ + type: + {:custom, __MODULE__, :type_integer_in_range, + [[{:name, :ack_batch_max_size}, {:min, 1}, {:max, 10_000}]]}, + default: @default_ack_batch_max_size, + doc: """ + Maximum number of ack_ids to accumulate before triggering an + immediate flush, regardless of the timer. Each unary RPC carries + at most 2,500 ack_ids (the Google API limit), so values above 2,500 + result in multiple RPCs per flush. Defaults to 2500. + """ + ], adapter: [ type: {:in, [:gun, :mint]}, default: :gun, @@ -192,6 +239,43 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Defaults to `true`. """ ], + drain_timeout_ms: [ + type: :pos_integer, + default: 30_000, + doc: """ + Maximum time in milliseconds to wait for in-flight messages to be + processed and acknowledged during graceful shutdown. After this timeout, + any remaining outstanding messages are nacked (per the `on_shutdown` + setting) and the connection is force-closed. + + This drain phase mirrors Go's `iterator.stop()` which waits for the + `drained` channel to close (all outstanding messages acked) before + calling `CloseSend`. Defaults to 30 seconds. + """ + ], + enable_message_ordering: [ + type: :boolean, + default: false, + doc: """ + When `true`, messages with the same `ordering_key` are routed to the + same Broadway processor and processed sequentially. This guarantees + in-order delivery for ordered subscriptions. + + Ordering is enforced via Broadway's built-in `:partition_by` option, + which assigns messages with the same `orderingKey` metadata to the + same processor partition. The subscription itself must also have + message ordering enabled in Google Cloud Pub/Sub. + + When `false` (default), messages are distributed across processors + without regard to ordering key, matching the unordered behaviour of a + standard Pub/Sub subscription. + + Note: the server will also report whether the subscription has ordering + enabled in each `StreamingPullResponse.subscription_properties`. This + client-side option controls whether to enforce it in the Broadway + processing topology. + """ + ], # Testing options test_pid: [type: :pid, doc: false], @@ -241,15 +325,6 @@ defmodule BroadwayCloudPubSub.Streaming.Options do "expected :#{name} to be an integer between #{min} and #{max}, got: #{inspect(value)}"} end - def type_float_between_0_and_1(value, _) when is_float(value) and value > 0.0 and value < 1.0 do - {:ok, value} - end - - def type_float_between_0_and_1(value, [{:name, name}]) do - {:error, - "expected :#{name} to be a float between 0.0 and 1.0 exclusive, got: #{inspect(value)}"} - end - def type_ack_option(:ack, _), do: {:ok, :ack} def type_ack_option(:noop, _), do: {:ok, :noop} def type_ack_option(:nack, _), do: {:ok, {:nack, 0}} diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index 0cf73f8..38fea75 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -5,10 +5,22 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do ## Overview - `StreamingProducer` opens a persistent bidirectional gRPC stream to the - Pub/Sub service and receives messages as the server pushes them. This is - more efficient than the default HTTP Pull approach (`BroadwayCloudPubSub.Producer`) - for workloads that require low latency or high throughput. + This producer opens a persistent bidirectional gRPC stream to the Pub/Sub + service and receives messages as the server pushes them. This is more + efficient than the HTTP pull approach (`BroadwayCloudPubSub.Producer`) for + workloads that require low latency or high throughput. + + The architecture has three layers: + + 1. **StreamManager** (GenServer) — owns the gRPC stream and connection + lifecycle, manages lease extensions, and dispatches messages to the + Producer when downstream demand is available. + 2. **Producer** (GenStage) — receives messages from StreamManager and + forwards them to Broadway processors. Tracks demand from downstream + stages. + 3. **UnaryAckSupervisor** — supervises AckBatcher and UnaryRpcClient, which + batch and send acknowledgement and deadline-modification requests via + separate unary RPCs (not on the streaming connection). ## Usage @@ -28,93 +40,205 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do #{NimbleOptions.docs(BroadwayCloudPubSub.Streaming.Options.definition())} - ### Required options + ## Acknowledgements - * `:subscription` - The full subscription name, e.g. - `"projects/my-project/subscriptions/my-subscription"`. + Use `:on_success` and `:on_failure` to control how messages are acknowledged + with Pub/Sub. Both can also be changed per-message via + `Broadway.Message.configure_ack/2`. - ### Auth options + Supported values: - * `:goth` - The `Goth` module to use for authentication (e.g. `MyApp.Goth`). - * `:token_generator` - Custom MFArgs token generator as an alternative to `:goth`. + * `:ack` — acknowledge the message; Pub/Sub removes it from the subscription. + * `:noop` — do nothing; the message is redelivered after the subscription's + `ackDeadlineSeconds` expires. + * `:nack` — equivalent to `{:nack, 0}`; makes the message immediately + available for redelivery. + * `{:nack, seconds}` — sets `ackDeadlineSeconds` to `seconds` for the + message, controlling when it becomes available for redelivery (0–600). - ### Flow control + Acks and deadline modifications are batched by **AckBatcher** and flushed to + Pub/Sub via unary RPCs at a configurable interval (`:ack_batch_interval_ms`, + default 100ms) or when the batch reaches `:ack_batch_max_size` (default 2500). + Batching is done on a separate unary connection, independently of the + streaming connection. - * `:max_outstanding_messages` - Maximum number of unacked messages the server - will push. Defaults to 1000. - * `:max_outstanding_bytes` - Maximum total size of unacked messages. Defaults - to 100 MiB. + ## Flow control - ### Shutdown + Flow control is managed at the gRPC stream level via `:max_outstanding_messages` + and `:max_outstanding_bytes`. The Pub/Sub server will not push more messages + than these limits allow. This is the primary backpressure mechanism. - * `:on_shutdown` - What to do with unprocessed messages on shutdown. - Defaults to `{:nack, 5}` (redeliver after 5 seconds). + StreamManager also tracks GenStage demand from the Producer and buffers + messages internally when demand is zero, preventing unbounded mailbox growth. - ## Differences from BroadwayCloudPubSub.Producer + ## Lease management - * **Push-based with demand signaling**: Messages arrive via a persistent gRPC - stream. The producer tracks GenStage demand from downstream consumers and - signals StreamManager when capacity is available. StreamManager buffers - messages internally when demand is zero, preventing unbounded mailbox growth. - * **Flow control**: Controlled by `max_outstanding_messages` / `max_outstanding_bytes` - on the gRPC stream rather than by `max_number_of_messages` per HTTP request. - This is the primary backpressure mechanism — the Pub/Sub server will not push - more than `max_outstanding_messages` unacked messages. - * **Shutdown**: By default, unprocessed messages are returned to Pub/Sub with a - short delay (`on_shutdown: {:nack, 5}`), analogous to AMQP channel close behavior. + The producer automatically extends message acknowledgement deadlines before + they expire. Leases are extended by sending `modifyAckDeadline` requests via + the AckBatcher. The extension interval is derived from the effective ack + deadline with randomized jitter to spread out RPC calls. + + Messages are tracked until they are acknowledged, nacked, or until + `:max_extension_ms` elapses (default 60 minutes), after which the server + redelivers them. This prevents a stuck consumer from holding messages + indefinitely. + + ## Exactly-once delivery + + When the subscription has exactly-once delivery enabled, the server signals + this via `StreamingPullResponse.subscription_properties`. The producer + detects this automatically and enforces a minimum lease extension interval + (the server requires at least 60 seconds between extensions for exactly-once + subscriptions). + + For exactly-once subscriptions, increase `:retry_deadline_ms` to 600,000ms + (10 minutes) to allow the unary RPC client enough time to retry transient + ack failures — the server requires successful ack receipt before guaranteeing + exactly-once semantics. + + ## Message ordering + + Set `enable_message_ordering: true` to route messages with the same + `ordering_key` to the same Broadway processor, ensuring sequential processing + per key. The subscription must also have message ordering enabled in Pub/Sub. + + Ordering is enforced via Broadway's `:partition_by` option, which is + automatically injected into all processor groups when this option is set. + + ## Graceful shutdown + + On shutdown, the producer: + + 1. Nacks all buffered messages (received but not yet dispatched to + processors) per the `:on_shutdown` option (default `{:nack, 5}`). + 2. Stops the gRPC stream to prevent new messages from arriving. + 3. Waits up to `:drain_timeout_ms` (default 30s) for in-flight messages + (dispatched to processors but not yet acked/nacked) to be processed. + 4. Force-closes the stream after the drain timeout. + + ## Error handling + + gRPC stream errors are classified as retryable or terminal: + + * **Retryable** (e.g. `DEADLINE_EXCEEDED`, `UNAVAILABLE`, `UNAUTHENTICATED`) — + the stream is closed and reconnected after a backoff delay. A new OAuth2 + token is fetched on each reconnect. + * **Terminal** (e.g. `NOT_FOUND`, `PERMISSION_DENIED`, `INVALID_ARGUMENT`) — + the StreamManager stops and Broadway's supervision restarts the pipeline. + + Reconnect backoff is configurable via `:backoff_type`, `:backoff_min`, and + `:backoff_max`. The default is randomized exponential (`:rand_exp`) starting + at 100ms and capped at 60s. ## Telemetry This producer emits the following [Telemetry](https://github.com/beam-telemetry/telemetry) - events: + events. All events include metadata `%{name: broadway_name, subscription: subscription}`. - * `[:broadway_cloud_pub_sub, :stream, :connect]` - Emitted when a gRPC - StreamingPull connection is successfully established. + ### Stream events (prefix: `[:broadway_cloud_pub_sub, :stream, ...]`) + + * `:connect` — gRPC StreamingPull stream successfully established. Measurements: `%{}` - * `[:broadway_cloud_pub_sub, :stream, :disconnect]` - Emitted when the - gRPC stream is closed or encounters an error. + * `:disconnect` — gRPC stream closed or errored. Measurements: `%{reason: term()}` - * `[:broadway_cloud_pub_sub, :stream, :receive_messages]` - Emitted when - messages are received from the gRPC stream and forwarded to the producer. + * `:receive_messages` — messages received from the stream and forwarded to + the producer. Measurements: `%{count: pos_integer()}` - * `[:broadway_cloud_pub_sub, :stream, :ack]` - Emitted when messages are - acknowledged on the gRPC stream. + * `:ack` — acknowledge request dispatched to AckBatcher. Measurements: `%{count: pos_integer()}` - * `[:broadway_cloud_pub_sub, :stream, :connection_failure]` - Emitted when - a connection attempt fails before the stream is established. + * `:terminal_error` — non-retryable gRPC error received. StreamManager stops + after this event. Measurements: `%{reason: term()}` - * `[:broadway_cloud_pub_sub, :stream, :terminal_error]` - Emitted when a - non-retryable gRPC error is received (e.g. NOT_FOUND, PERMISSION_DENIED). - The StreamManager will stop after this event is emitted. + * `:connection_failure` — connection attempt failed before the stream was + established (unary RPC client). Measurements: `%{reason: term()}` - * `[:broadway_cloud_pub_sub, :stream, :ack_buffered]` - Emitted when an - ack/nack request is buffered because the gRPC stream is temporarily - unavailable (e.g. during reconnection). + * `:ack_failure` — an acknowledge RPC failed after retries (unary RPC client). + + Measurements: `%{count: pos_integer()}` + + * `:modack_failure` — a modifyAckDeadline RPC failed after retries (unary RPC client). + + Measurements: `%{count: pos_integer()}` + + * `:permanent_failure` — one or more ack_ids were permanently rejected by + the server (e.g. ack_id expired). These are dropped and not retried. + + Measurements: `%{count: pos_integer()}` + + * `:keepalive` — HTTP/2 PING frame sent on the gRPC connection to keep it + alive. Only emitted when using the `:gun` adapter. + + Measurements: `%{}` + + * `:extend_leases` — lease extension cycle ran; modack requests dispatched + for outstanding messages. + + Measurements: `%{count: non_neg_integer()}` + + * `:drain_timeout` — graceful shutdown drain timed out before all in-flight + messages were processed. + + Measurements: `%{}` + + * `:drain_complete` — all in-flight messages were processed before the drain + timeout; stream closed cleanly. + + Measurements: `%{}` + + * `:reconnect` — reconnect scheduled after a retryable stream error. - Measurements: `%{buffer_size: non_neg_integer()}` + Measurements: `%{delay: non_neg_integer()}` - All events include the following metadata: + * `:flush_deferred` — AckBatcher flush deferred because UnaryRpcClient was + not yet available (e.g. restarting). - * `:name` - the Broadway topology name - * `:subscription` - the full subscription name + Measurements: `%{ack_count: non_neg_integer(), modack_groups: non_neg_integer()}` + + ## Pub/Sub Emulator + + To use with the local Pub/Sub emulator: + + {BroadwayCloudPubSub.Streaming.Producer, + subscription: "projects/my-project/subscriptions/my-subscription", + grpc_endpoint: "localhost:8085", + use_ssl: false, + token_generator: {MyApp, :emulator_token, []}} + + ## Differences from `BroadwayCloudPubSub.Producer` + + * **Push-based**: Messages arrive via a persistent gRPC stream rather than + being fetched on demand via HTTP pull requests. + * **Flow control**: Controlled by `:max_outstanding_messages` and + `:max_outstanding_bytes` on the gRPC stream level, rather than by + `:max_number_of_messages` per pull request. + * **Shutdown behaviour**: By default, unprocessed messages are returned to + Pub/Sub with a short delay (`on_shutdown: {:nack, 5}`) so they are + redelivered quickly on rolling deploys. The pull producer does not nack + on shutdown. + * **Ack path**: Acks are batched and sent via a separate unary RPC + connection managed by AckBatcher and UnaryRpcClient, not on the streaming + connection itself. + * **Lease extension**: The streaming producer automatically extends message + deadlines to prevent redelivery while messages are being processed. The + pull producer relies on the subscription-level ack deadline only. """ use GenStage - alias BroadwayCloudPubSub.Streaming.{StreamManager, Options} + alias BroadwayCloudPubSub.Streaming.{StreamManager, UnaryAckSupervisor, Options} @behaviour Broadway.Producer @@ -131,20 +255,60 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do |> assign_client_id() |> assign_token_generator() - # Broadway will start the returned child specs under its supervisor. - # We use the Broadway pipeline name as the StreamManager's registered name. broadway_name = broadway_opts[:name] - manager_name = Module.concat(broadway_name, StreamManager) - manager_opts = Keyword.put(opts, :name, manager_name) + # Config forwarded to UnaryRpcClient and AckBatcher via the supervisor. + # These keys are a subset of the full opts — only what the unary path needs. + unary_config = + opts + |> Keyword.take([ + :subscription, + :token_generator, + :grpc_endpoint, + :use_ssl, + :adapter, + :backoff_type, + :backoff_min, + :backoff_max, + :ack_batch_interval_ms, + :ack_batch_max_size, + :retry_deadline_ms + ]) + |> Keyword.put(:broadway_name, broadway_name) + + sup_name = Module.concat(broadway_name, UnaryAckSupervisor) + + unary_sup_spec = %{ + id: sup_name, + start: + {UnaryAckSupervisor, :start_link, + [[name: sup_name, broadway_name: broadway_name, config: unary_config]]}, + restart: :permanent, + type: :supervisor + } + + # Pass broadway_name so StreamManager can derive the AckBatcher registered name. + stream_manager_name = Module.concat(broadway_name, StreamManager) + + manager_opts = + opts + |> Keyword.put(:name, stream_manager_name) + |> Keyword.put(:broadway_name, broadway_name) - child_spec = %{ - id: StreamManager, + manager_spec = %{ + id: stream_manager_name, start: {StreamManager, :start_link, [manager_opts]}, restart: :permanent } - {[child_spec], put_in(broadway_opts, [:producer, :module], {producer_module, opts})} + options = + broadway_opts + |> put_in([:producer, :module], {producer_module, opts}) + |> maybe_inject_partition_by(opts) + + # UnaryAckSupervisor is listed first so it starts before StreamManager, + # ensuring AckBatcher is alive when the first acks are dispatched. + {[unary_sup_spec, manager_spec], options} end @impl GenStage @@ -159,12 +323,21 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # Tell the StreamManager our pid so it can forward messages to us :ok = StreamManager.set_producer(manager_pid, self()) - # Store ack config in persistent_term for acknowledger lookup + # Store the manager's *registered name* (not its PID) in persistent_term so + # the Acknowledger can route acks even after a StreamManager restart. PIDs + # become stale on restart; names always resolve to the current process. ack_config = %{on_success: config.on_success, on_failure: config.on_failure} - :persistent_term.put(ack_ref, {manager_pid, ack_config}) + :persistent_term.put(ack_ref, {manager_name, ack_config}) {:producer, - %{manager_pid: manager_pid, ack_ref: ack_ref, config: config, draining: false, demand: 0}} + %{ + manager_pid: manager_pid, + manager_name: manager_name, + ack_ref: ack_ref, + config: config, + draining: false, + demand: 0 + }} end @impl GenStage @@ -185,33 +358,39 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do @impl Broadway.Producer def prepare_for_draining(state) do - StreamManager.stop_receiving(state.manager_pid) - {:noreply, [], %{state | draining: true}} - end - - @impl GenStage - def terminate(_reason, state) do %{manager_pid: manager_pid, config: config} = state - if Process.alive?(manager_pid) do - # Nack outstanding messages per on_shutdown option - outstanding = StreamManager.get_outstanding(manager_pid) + # 1. Get buffered messages (not yet dispatched to processors) and nack them + # immediately per on_shutdown config. These messages will be redelivered + # by the server after the configured delay. + buffered = StreamManager.get_buffered(manager_pid) - case {config[:on_shutdown], outstanding} do - {_, []} -> - :ok + case {config[:on_shutdown], buffered} do + {_, []} -> + :ok - {:noop, _} -> - :ok + {:noop, _} -> + :ok - {{:nack, delay_seconds}, ack_ids} -> - StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) + {{:nack, delay_seconds}, ack_ids} -> + StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) + end - {:nack, ack_ids} -> - StreamManager.modify_deadline(manager_pid, ack_ids, 0) - end + # 2. Stop receiving new messages and begin the drain phase. StreamManager + # will close the reader, start a drain timer, and close the stream once + # all outstanding (in-flight) messages have been acked/nacked. + StreamManager.stop_receiving(manager_pid) - # Flush buffered acks and close stream + {:noreply, [], %{state | draining: true}} + end + + @impl GenStage + def terminate(_reason, state) do + %{manager_pid: manager_pid} = state + + if Process.alive?(manager_pid) do + # The drain phase in prepare_for_draining already handled buffered and + # outstanding messages. Just ensure the stream is closed cleanly. StreamManager.close(manager_pid) end @@ -226,6 +405,17 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do defp validate_options!(opts) do case NimbleOptions.validate(opts, Options.definition()) do {:ok, validated} -> + # Cross-field validation: backoff_min must not exceed backoff_max. + # NimbleOptions validates each field independently but cannot express + # relationships between fields. + min = validated[:backoff_min] + max = validated[:backoff_max] + + if min > max do + raise ArgumentError, + "invalid Streaming.Producer options: :backoff_min (#{min}) must be <= :backoff_max (#{max})" + end + validated {:error, err} -> @@ -247,4 +437,31 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Keyword.put(opts, :token_generator, generator) end end + + # When enable_message_ordering is true, inject a :partition_by function into + # each processor group so that messages with the same ordering_key are always + # routed to the same processor (ensuring sequential processing per key). + # + # Broadway's :partition_by option accepts a function that takes a Broadway.Message + # and returns a partition key. Broadway hashes the key and routes all messages + # with the same hash to the same processor stage. Messages with an empty or nil + # ordering_key are all routed to partition 0 (unordered messages interleave freely). + defp maybe_inject_partition_by(broadway_opts, opts) do + if opts[:enable_message_ordering] do + partition_fn = fn %Broadway.Message{metadata: %{orderingKey: key}} -> + key or "" + end + + processors = + broadway_opts + |> Keyword.get(:processors, []) + |> Enum.map(fn {name, proc_opts} -> + {name, Keyword.put_new(proc_opts, :partition_by, partition_fn)} + end) + + Keyword.put(broadway_opts, :processors, processors) + else + broadway_opts + end + end end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 4749403..79815e0 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -5,16 +5,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Responsibilities: # - Connect and reconnect with exponential backoff # - Receive messages from the stream and forward them to the producer - # - Accept ack/modifyAckDeadline requests from StreamingAcknowledger and - # send them on the stream + # - Route ack/modifyAckDeadline requests to AckBatcher, which sends them + # as unary RPCs via UnaryRpcClient (independent of this stream) # - Track outstanding (delivered but not acked) message ack_ids for # lease management and shutdown nacking - # - Extend message leases periodically via modifyAckDeadline - # - Buffer ack/nack requests during reconnection and replay on connect + # - Extend message leases periodically via modifyAckDeadline (through AckBatcher) # - Buffer incoming messages when the producer has no pending demand # (demand-based backpressure via notify_demand/1) # - Send keep-alive pings every 30s to prevent server idle timeout - # (matches the Go pingTicker and Python Heartbeater behaviour) # # Backpressure design: # The producer calls notify_demand/2 whenever Broadway signals new demand, @@ -30,47 +28,52 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # A dedicated `StreamReader` process owns the gRPC stream for both the # Gun and Mint adapters. The reader calls `GRPC.Stub.recv/2` and forwards # decoded messages back as `{:stream_messages, msgs}`. See `StreamReader` - # for a detailed explanation of why a separate process is needed and how - # both adapters behave identically from this module's perspective. + # for a detailed explanation of why a separate process is needed. # # Keep-alive pings: # Google's servers close idle StreamingPull connections after ~60 seconds - # of inactivity. Both the official Go (pingTicker) and Python (Heartbeater) - # libraries send an empty StreamingPullRequest every 30 seconds to prevent - # this. We do the same via the :send_keepalive timer. The timer is started - # when the stream opens and cancelled when it closes. + # of inactivity. We send an empty StreamingPullRequest every 30 seconds to + # prevent this via the :send_keepalive timer. # # Reconnect deduplication: # Multiple events can arrive close together on a disconnect — e.g. # {:stream_error} followed by {:stream_closed} or an {:EXIT} signal. - # Without deduplication, each would schedule a separate :connect message, - # causing two concurrent connection attempts. We track the pending reconnect - # timer ref in `reconnect_ref` and skip scheduling if one is already set. + # We track the pending reconnect timer ref in `reconnect_ref` and skip + # scheduling if one is already set. # # Error classification: # gRPC errors are classified as :retryable (reconnect) or :terminal (stop). # Terminal errors (NOT_FOUND, PERMISSION_DENIED, etc.) indicate a permanent - # misconfiguration; retrying forever would be counterproductive. The GenServer - # stops with {:terminal_error, reason} and Broadway's supervision restarts it, - # which will surface the error via normal OTP crash reporting. - # - # Skip-backoff optimisation: - # If a stream error arrives quickly after the stream opened, we apply the - # full exponential backoff. If the stream was alive for >30s before failing - # (meaning the server already had time to send a DEADLINE_EXCEEDED), we skip - # the backoff sleep and reconnect immediately — matching the Go optimisation. + # misconfiguration. The GenServer stops and Broadway's supervision restarts it. use GenServer require Logger alias BroadwayCloudPubSub.{Backoff, MessageBuilder} - alias BroadwayCloudPubSub.Streaming.{ErrorClassifier, StreamReader} + + alias BroadwayCloudPubSub.Streaming.{ + AckBatcher, + AckTimeDistribution, + ErrorClassifier, + StreamReader + } + alias Google.Pubsub.V1.StreamingPullRequest - # Default keep-alive interval — matches Go's pingTicker and Python's Heartbeater. - # The server's inactivity timeout is ~60s; pinging at half that prevents closure. + # Default keep-alive interval. The server's inactivity timeout is ~60s; + # pinging at half that prevents closure. @default_keepalive_ms 30_000 + @default_drain_timeout_ms 30_000 + + # Grace period (seconds) subtracted from the adaptive deadline to compute the + # lease extension interval. Ensures the modack reaches the server before the + # current deadline expires. + @grace_period_seconds 5 + + # Minimum ack deadline for exactly-once delivery mode. + @min_deadline_exactly_once_seconds 60 + defstruct [ :producer_pid, :config, @@ -81,29 +84,46 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :reader_pid, :backoff, :lease_timer, - :lease_extension_interval_ms, + # Distribution for tracking message processing times, used to compute the + # adaptive p99 ack deadline. + :ack_time_dist, :receiving, # Timer ref for the pending :connect message. Non-nil means a reconnect is # already scheduled — prevents double-scheduling from multiple close signals. :reconnect_ref, # Timer ref for the periodic :send_keepalive message. :keepalive_timer, - # Monotonic timestamp (ms) of when the current stream was opened. - # Used for the skip-backoff optimisation: if the stream ran >30s before - # failing, we skip the backoff sleep and reconnect immediately. - :stream_opened_at, - outstanding: MapSet.new(), + # Registered name of the AckBatcher (not PID) so we survive UnaryAckSupervisor + # restarts within a supervision cycle. + :ack_batcher, + # Whether the producer has asked us to stop (prepare_for_draining called). + # When true, new incoming messages from the stream are ignored and we close + # the reader immediately. + draining: false, + # Timer ref for the drain timeout. Non-nil means we are waiting for in-flight + # messages to be acked before closing the stream. + drain_timer: nil, + # Whether the subscription has message ordering enabled, as reported by the + # server in StreamingPullResponse.subscription_properties. + # Updated dynamically on each response that includes subscription_properties. + ordering_enabled: false, + # Whether the subscription has exactly-once delivery enabled, as reported by the + # server in StreamingPullResponse.subscription_properties. + # When true, the minimum ack deadline extension is raised from 10s to 60s. + # Updated dynamically on each response that includes subscription_properties. + exactly_once_enabled: false, + # Map of ack_id => %{received_at: monotonic_ms, max_expiry: monotonic_ms} + # for outstanding (delivered but not yet acked) messages. + # received_at is used to compute processing duration for the adaptive p99 deadline. + # max_expiry marks the absolute wall time beyond which we stop extending the lease. + outstanding: %{}, # Messages buffered while the producer has no pending demand. + # Stored as an Erlang :queue for O(1) enqueue and O(1) dequeue. # Naturally bounded by max_outstanding_messages (server-side flow control). - message_buffer: [], + message_buffer: :queue.new(), # How many messages the producer can currently accept. # Refreshed on each notify_demand/2; decremented when messages are flushed. - pending_demand: 0, - # Ack/nack requests buffered while the gRPC stream is down (reconnecting). - # Replayed in FIFO order on successful reconnect. Naturally bounded by - # max_outstanding_messages — no more acks can arrive than messages delivered. - ack_buffer: [], - ack_buffer_size: 0 + pending_demand: 0 ] # --- Public API --- @@ -173,6 +193,19 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do GenServer.call(pid, :close, 10_000) end + @doc """ + Returns the ack_ids of messages that are buffered in StreamManager but have + not yet been dispatched to Broadway processors. These are messages received + from the gRPC stream that are waiting for demand. + + Called from the producer's `prepare_for_draining/1` to nack buffered messages + during graceful shutdown before they are delivered to the pipeline. + """ + @spec get_buffered(pid()) :: [String.t()] + def get_buffered(pid) do + GenServer.call(pid, :get_buffered) + end + @doc """ Signals the current demand from the producer. The `amount` is the producer's total accumulated demand (not a delta). The StreamManager uses it as an upper @@ -199,15 +232,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do max: config.backoff_max ) - deadline_s = config.stream_ack_deadline_seconds - extension_percent = config.lease_extension_percent - lease_extension_interval_ms = round(deadline_s * extension_percent * 1000) + ack_batcher = Module.concat(config.broadway_name, AckBatcher) state = %__MODULE__{ producer_pid: nil, config: config, backoff: backoff, - lease_extension_interval_ms: lease_extension_interval_ms, + ack_time_dist: AckTimeDistribution.new(config.stream_ack_deadline_seconds), + ack_batcher: ack_batcher, receiving: true, pending_demand: 0 } @@ -218,7 +250,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @impl GenServer def handle_info(:connect, state) do - # Clear the reconnect_ref — we are now executing the scheduled connect. state = %{state | reconnect_ref: nil} case connect(state) do @@ -237,24 +268,17 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do conn_pid = grpc_stream.channel.adapter_payload.conn_pid backoff = Backoff.reset(state.backoff) - pre_flush_state = %{ + state = %{ state | grpc_stream: grpc_stream, conn_pid: conn_pid, - backoff: backoff, - stream_opened_at: now_ms() + backoff: backoff } - case flush_ack_buffer(pre_flush_state) do - {:ok, state} -> - state = schedule_lease_timer(state) - state = schedule_keepalive_timer(state) - emit_telemetry(:connect, %{}, state.config) - {:noreply, state} - - {:error, reason, state} -> - {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} - end + state = schedule_lease_timer(state) + state = schedule_keepalive_timer(state) + emit_telemetry(:connect, %{}, state.config) + {:noreply, state} end # Stale :stream_opened from a previous reader (race during reconnect) — ignore. @@ -267,7 +291,23 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do if state.receiving and messages != [] do broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) ack_ids = Enum.map(messages, & &1.ack_id) - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.put(&2, &1)) + + now = now_ms() + max_extension_ms = state.config.max_extension_ms + + new_outstanding = + Enum.reduce(ack_ids, state.outstanding, fn ack_id, acc -> + Map.put(acc, ack_id, %{received_at: now, max_expiry: now + max_extension_ms}) + end) + + # Receipt modack: immediately extend the ack deadline with the current adaptive + # p99 value. This synchronises the server-side timer with the client-side timer, + # compensating for network latency between when the server sent the message and + # when we received it. Matches Go's receiptTicker and Python's receipt modack. + # Sent as a unary RPC via AckBatcher — independent of the bidi stream. + adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) + emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} else @@ -275,6 +315,21 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end + # Subscription properties update forwarded from the StreamReader. + # The server sends these in StreamingPullResponse.subscription_properties on + # any response (including heartbeats) when the subscription's settings change. + def handle_info( + {:subscription_properties, + %{ + message_ordering_enabled: ordering_enabled, + exactly_once_delivery_enabled: exactly_once_enabled + } = _props}, + state + ) do + {:noreply, + %{state | ordering_enabled: ordering_enabled, exactly_once_enabled: exactly_once_enabled}} + end + # Stream-level gRPC error reported by the StreamReader. # Classify: retryable errors trigger reconnect; terminal errors stop the GenServer. def handle_info({:stream_error, error}, state) do @@ -323,43 +378,60 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:extend_leases, state) do - if MapSet.size(state.outstanding) == 0 or state.grpc_stream == nil do - timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) - {:noreply, %{state | lease_timer: timer}} - else - ack_ids = MapSet.to_list(state.outstanding) - deadline = state.config.stream_ack_deadline_seconds - deadlines = List.duplicate(deadline, length(ack_ids)) - - case send_on_stream(state.grpc_stream, %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - }) do - :ok -> - timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) - {:noreply, %{state | lease_timer: timer}} - - {:error, reason} -> - {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} + now = now_ms() + adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + + # When exactly-once delivery is enabled, enforce a higher minimum deadline of 60s. + effective_deadline = + if state.exactly_once_enabled do + max(adaptive_deadline, @min_deadline_exactly_once_seconds) + else + adaptive_deadline end + + # Partition into still-valid (before max_expiry) and expired (past max_expiry). + # Expired messages are dropped from lease management — the server will redeliver them. + {valid, expired} = + Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) + + if map_size(expired) > 0 do + emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) + end + + emit_telemetry( + :extend_leases, + %{count: map_size(valid), deadline: effective_deadline}, + state.config + ) + + if map_size(valid) > 0 do + AckBatcher.modack(state.ack_batcher, Map.keys(valid), effective_deadline) end + + # Schedule next tick: (effective_deadline - grace_period) with jitter, minimum 1s. + # Jitter factor in [0.8, 0.9) prevents all StreamManagers from extending in lockstep. + base_interval_ms = max(1_000, (effective_deadline - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + next_interval_ms = round(base_interval_ms * jitter_factor) + timer = Process.send_after(self(), :extend_leases, next_interval_ms) + {:noreply, %{state | outstanding: valid, lease_timer: timer}} end # Periodic keep-alive ping: send an empty StreamingPullRequest to prevent the - # server from closing an idle stream. Matches Go's pingTicker (30s) and Python's - # Heartbeater (30s). The server's inactivity timeout is ~60s; pinging at half - # that gives a comfortable margin. + # server from closing an idle stream. The server's inactivity timeout is ~60s. def handle_info(:send_keepalive, %{grpc_stream: nil} = state) do - # Stream is disconnected — don't ping, but reschedule for when it reconnects. - # (Timer will be cancelled and restarted by close_stream/schedule_keepalive_timer.) {:noreply, state} end def handle_info(:send_keepalive, state) do - case send_on_stream(state.grpc_stream, %StreamingPullRequest{}) do - :ok -> + adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + keepalive_request = %StreamingPullRequest{stream_ack_deadline_seconds: adaptive_deadline} + + case send_on_stream(state.grpc_stream, keepalive_request) do + {:ok, stream} -> + emit_telemetry(:keepalive, %{deadline: adaptive_deadline}, state.config) timer = schedule_keepalive_after(state.config) - {:noreply, %{state | keepalive_timer: timer}} + {:noreply, %{state | grpc_stream: stream, keepalive_timer: timer}} {:error, reason} -> {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} @@ -367,15 +439,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end # Mint adapter signals connection loss to its parent process. - # When the test (or the real stack) routes this signal to StreamManager, - # treat it the same as a stream error: reset and reconnect. def handle_info({:elixir_grpc, :connection_down, conn_pid}, %{conn_pid: conn_pid} = state) do emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} end # Gun adapter signals connection loss via :gun_down messages. - # Guard on the stored conn_pid to ignore stale/other connections. def handle_info( {:gun_down, conn_pid, _protocol, _reason, _killed_streams}, %{conn_pid: conn_pid} = state @@ -384,62 +453,77 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} end + def handle_info(:drain_timeout, state) do + emit_telemetry(:drain_timeout, %{}, state.config) + state = close_stream(%{state | drain_timer: nil}) + {:noreply, state} + end + def handle_info(_msg, state) do {:noreply, state} end @impl GenServer def handle_cast({:acknowledge, ack_ids}, state) do - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &MapSet.delete(&2, &1)) - state = %{state | outstanding: new_outstanding} + now = now_ms() + + # Record processing times for the adaptive p99 deadline calculation. + ack_time_dist = + Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, dist -> + case Map.get(state.outstanding, ack_id) do + %{received_at: received_at} -> + duration_s = max(1, div(now - received_at, 1_000)) + AckTimeDistribution.record(dist, duration_s) + + nil -> + dist + end + end) - if state.grpc_stream do - case send_on_stream(state.grpc_stream, %StreamingPullRequest{ack_ids: ack_ids}) do - :ok -> - emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) - {:noreply, state} + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) + state = %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} - {:error, reason} -> - {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} - end - else - {:noreply, buffer_ack_request(state, {:ack, ack_ids})} - end + AckBatcher.ack(state.ack_batcher, ack_ids) + emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) + + {:noreply, maybe_complete_drain(state)} end def handle_cast({:modify_deadline, ack_ids, deadline_seconds}, state) do - new_outstanding = + now = now_ms() + + # On nack (deadline == 0), record processing times and remove from outstanding + # so they are not lease-extended further. On non-zero deadline changes, + # keep the ack_ids in outstanding unchanged. + {new_outstanding, ack_time_dist} = if deadline_seconds == 0 do - Enum.reduce(ack_ids, state.outstanding, &MapSet.delete(&2, &1)) + dist = + Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, acc -> + case Map.get(state.outstanding, ack_id) do + %{received_at: received_at} -> + duration_s = max(1, div(now - received_at, 1_000)) + AckTimeDistribution.record(acc, duration_s) + + nil -> + acc + end + end) + + outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) + {outstanding, dist} else - state.outstanding + {state.outstanding, state.ack_time_dist} end - deadlines = List.duplicate(deadline_seconds, length(ack_ids)) - state = %{state | outstanding: new_outstanding} + state = %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} - if state.grpc_stream do - case send_on_stream(state.grpc_stream, %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - }) do - :ok -> - {:noreply, state} - - {:error, reason} -> - {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} - end - else - {:noreply, buffer_ack_request(state, {:modify_deadline, ack_ids, deadline_seconds})} - end + AckBatcher.modack(state.ack_batcher, ack_ids, deadline_seconds) + + {:noreply, maybe_complete_drain(state)} end # The producer signals its current total demand. Update pending_demand and # flush up to that many buffered messages to the producer. - def handle_cast({:demand_available, amount}, %{message_buffer: []} = state) do - {:noreply, %{state | pending_demand: amount}} - end - def handle_cast({:demand_available, amount}, state) do state = %{state | pending_demand: amount} {:noreply, flush_demand(state)} @@ -453,21 +537,29 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_call(:stop_receiving, _from, state) do - {:reply, :ok, %{state | receiving: false}} + # Close the reader so no new messages arrive; keep the channel open for AckBatcher. + state = close_reader(state) + state = start_drain_timer(state) + {:reply, :ok, %{state | receiving: false, draining: true}} end def handle_call(:get_outstanding, _from, state) do - {:reply, MapSet.to_list(state.outstanding), state} + {:reply, Map.keys(state.outstanding), state} + end + + def handle_call(:get_buffered, _from, state) do + ack_ids = + state.message_buffer + |> :queue.to_list() + |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) + + {:reply, ack_ids, state} end def handle_call(:close, _from, state) do - # Best-effort: flush buffered acks before closing. Errors are ignored - # because we're shutting down regardless. - state = - case flush_ack_buffer(state) do - {:ok, s} -> s - {:error, _reason, s} -> %{s | ack_buffer: [], ack_buffer_size: 0} - end + # Best-effort flush before closing. Guard against AckBatcher already being + # dead during pipeline shutdown (Broadway stops children in reverse start order). + flush_batcher_if_alive(state.ack_batcher) state = close_stream(state) {:reply, :ok, state} @@ -475,9 +567,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @impl GenServer def terminate(_reason, state) do - cancel_lease_timer(state) - cancel_keepalive_timer(state) - close_stream(state) + state + |> cancel_lease_timer() + |> cancel_keepalive_timer() + |> cancel_drain_timer() + |> close_stream() + :ok end @@ -495,18 +590,17 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Opens the gRPC channel and spawns the StreamReader, which will open the # stream and send {:stream_opened, reader_pid, grpc_stream} back to us. - # The actual grpc_stream struct is stored on {:stream_opened} receipt, not here. defp connect_stream(channel, state) do - reader_pid = StreamReader.start_link(self(), channel, state.config) - - {:ok, - %{ - state - | channel: channel, - reader_pid: reader_pid, - grpc_stream: nil, - conn_pid: nil - }} + with {:ok, reader_pid} <- StreamReader.start_link(self(), channel, state.config) do + {:ok, + %{ + state + | channel: channel, + reader_pid: reader_pid, + grpc_stream: nil, + conn_pid: nil + }} + end rescue e -> try do @@ -551,39 +645,35 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end defp send_on_stream(grpc_stream, request) do - GRPC.Stub.send_request(grpc_stream, request) - :ok + case GRPC.Stub.send_request(grpc_stream, request) do + %GRPC.Client.Stream{} = stream -> {:ok, stream} + {:error, reason} -> {:error, reason} + end catch kind, reason -> {:error, {kind, reason}} end defp reset_connection(state, reason) do - # Drop buffered (not-yet-delivered) messages on disconnect. Their ack_ids are - # in `outstanding`, so extract and remove them before closing the stream to - # avoid pointless lease-extension attempts for messages that will be redelivered. + # Drop buffered (not-yet-delivered) messages on disconnect — their ack_ids + # are in `outstanding`, so remove them to avoid pointless lease-extension + # attempts for messages that will be redelivered. buffered_ack_ids = state.message_buffer + |> :queue.to_list() |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) new_outstanding = - Enum.reduce(buffered_ack_ids, state.outstanding, &MapSet.delete(&2, &1)) + Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) # Preserve `pending_demand` across reconnection. The producer's demand counter # survives the disconnect and it won't re-signal demand it already sent. - # Clearing pending_demand here would cause a demand deadlock: the producer has - # pending demand but thinks it already notified us, while we lost the count. - # Buffered messages are dropped (the server will redeliver them), but demand - # state must carry over so the reconnected stream can deliver immediately. - # - # Record the disconnect reason so schedule_reconnect can apply the skip-backoff - # optimisation: if the stream was alive for >30s, reconnect without delay. + # Clearing it would cause a demand deadlock: the producer has pending demand + # but thinks it already notified us, while we lost the count. close_stream( %{ state - | message_buffer: [], + | message_buffer: :queue.new(), outstanding: new_outstanding, - stream_opened_at: state.stream_opened_at, - # carry stream_opened_at through for the skip-backoff check reconnect_ref: state.reconnect_ref }, reason @@ -594,7 +684,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp close_stream(%{reader_pid: nil, grpc_stream: nil} = state), do: state defp close_stream(%{reader_pid: reader_pid, grpc_stream: grpc_stream, channel: channel} = state) do - # Stop the reader first so it doesn't send more messages while we clean up. # Unlink before killing to prevent the EXIT signal from triggering reconnect. if is_pid(reader_pid) do Process.unlink(reader_pid) @@ -612,8 +701,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do if channel do # Only call disconnect if the underlying connection process is alive. # When the server closes the channel (e.g. after DEADLINE_EXCEEDED), the - # adapter's connection process may already be gone. Calling disconnect on a - # dead channel causes a FunctionClauseError inside grpc's GenServer. + # adapter's connection process may already be gone. Calling disconnect on + # a dead channel causes a FunctionClauseError inside grpc's GenServer. conn_alive? = case state.conn_pid do pid when is_pid(pid) -> Process.alive?(pid) @@ -631,12 +720,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Cancel the keep-alive timer — it will be restarted when the new stream opens. state = cancel_keepalive_timer(state) - %{state | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil} end - # close_stream with a reason — delegates to the main close_stream but also - # stores the reason for the skip-backoff optimisation in schedule_reconnect. defp close_stream(state, _reason) do close_stream(state) end @@ -647,47 +733,32 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do raise "StreamManager failed to connect and backoff is :stop — crashing" end - # Deduplication: if a :connect message is already pending, do not schedule - # another one. This prevents the double-reconnect race where {:stream_error} - # and {:stream_closed} (or {:EXIT}) both arrive within a single disconnect. + # Deduplication: if a :connect is already pending, skip to prevent the + # double-reconnect race where {:stream_error} and {:stream_closed} (or {:EXIT}) + # both arrive within a single disconnect. defp schedule_reconnect(%{reconnect_ref: ref} = state) when not is_nil(ref) do state end - defp schedule_reconnect(%{backoff: backoff, stream_opened_at: opened_at} = state) do + defp schedule_reconnect(%{backoff: backoff} = state) do {timeout, new_backoff} = Backoff.backoff(backoff) - - # Skip-backoff optimisation (matches Go's behaviour): - # If the stream was alive for more than 30 seconds before failing, the server - # had time to process a DEADLINE_EXCEEDED (or similar timeout). Adding a - # backoff delay on top of the already-long blocking period compounds the - # reconnect latency unnecessarily. Reconnect immediately instead. - effective_timeout = - if skip_backoff?(opened_at) do - 0 - else - timeout - end - - ref = Process.send_after(self(), :connect, effective_timeout) + emit_telemetry(:reconnect, %{delay: timeout}, state.config) + ref = Process.send_after(self(), :connect, timeout) %{state | backoff: new_backoff, reconnect_ref: ref} end - # Returns true if the stream was open long enough that we should skip the - # exponential backoff sleep. Threshold: 30 seconds (same as Go). - defp skip_backoff?(nil), do: false - - defp skip_backoff?(opened_at) do - now_ms() - opened_at >= 30_000 - end - defp now_ms, do: System.monotonic_time(:millisecond) # --- Private: lease management --- defp schedule_lease_timer(state) do cancel_lease_timer(state) - timer = Process.send_after(self(), :extend_leases, state.lease_extension_interval_ms) + # Initial interval: (configured deadline - grace period) with jitter, minimum 1s. + deadline_s = state.config.stream_ack_deadline_seconds + base_ms = max(1_000, (deadline_s - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + interval_ms = round(base_ms * jitter_factor) + timer = Process.send_after(self(), :extend_leases, interval_ms) %{state | lease_timer: timer} end @@ -718,80 +789,83 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do %{state | keepalive_timer: nil} end - # --- Private: ack buffering --- + # --- Private: drain --- - defp buffer_ack_request(%{ack_buffer: buffer, ack_buffer_size: size} = state, request) do - emit_telemetry(:ack_buffered, %{buffer_size: size + 1}, state.config) - %{state | ack_buffer: [request | buffer], ack_buffer_size: size + 1} - end + # Kill the reader so no new messages arrive from the gRPC stream. + # The channel stays open for AckBatcher's unary ack/modack RPCs. + defp close_reader(%{reader_pid: nil} = state), do: state - defp flush_ack_buffer(%{ack_buffer: [], grpc_stream: _} = state), do: {:ok, state} + defp close_reader(%{reader_pid: reader_pid} = state) do + Process.unlink(reader_pid) + Process.exit(reader_pid, :kill) + %{state | reader_pid: nil} + end - defp flush_ack_buffer(%{ack_buffer: _buffer, grpc_stream: nil} = state), do: {:ok, state} + defp start_drain_timer(state) do + timeout = Map.get(state.config, :drain_timeout_ms, @default_drain_timeout_ms) + timer = Process.send_after(self(), :drain_timeout, timeout) + %{state | drain_timer: timer} + end - defp flush_ack_buffer(%{ack_buffer: buffer, grpc_stream: grpc_stream} = state) do - result = - buffer - |> Enum.reverse() - |> Enum.reduce_while(:ok, fn entry, :ok -> - request = - case entry do - {:ack, ack_ids} -> - %StreamingPullRequest{ack_ids: ack_ids} + defp cancel_drain_timer(%{drain_timer: nil} = state), do: state - {:modify_deadline, ack_ids, deadline_seconds} -> - deadlines = List.duplicate(deadline_seconds, length(ack_ids)) + defp cancel_drain_timer(%{drain_timer: timer} = state) do + Process.cancel_timer(timer) + %{state | drain_timer: nil} + end - %StreamingPullRequest{ - modify_deadline_ack_ids: ack_ids, - modify_deadline_seconds: deadlines - } - end + # After each ack/nack, check if we are draining and all outstanding messages + # have been resolved. If so, cancel the drain timer and close the stream. + defp maybe_complete_drain(%{draining: true, outstanding: outstanding} = state) + when map_size(outstanding) == 0 do + state = cancel_drain_timer(state) - case send_on_stream(grpc_stream, request) do - :ok -> {:cont, :ok} - {:error, reason} -> {:halt, {:error, reason}} - end - end) + # Guard against AckBatcher already being dead during pipeline shutdown. + flush_batcher_if_alive(state.ack_batcher) - case result do - :ok -> {:ok, %{state | ack_buffer: [], ack_buffer_size: 0}} - {:error, reason} -> {:error, reason, state} - end + emit_telemetry(:drain_complete, %{}, state.config) + close_stream(state) end + defp maybe_complete_drain(state), do: state + # --- Private: message building --- # Buffer incoming messages, then flush up to pending_demand to the producer. - # Returns updated state. defp deliver_messages(state, messages) do - # Prepend for O(1); reversed on flush. - new_buffer = Enum.reduce(messages, state.message_buffer, fn msg, acc -> [msg | acc] end) + new_buffer = Enum.reduce(messages, state.message_buffer, &:queue.in(&1, &2)) flush_demand(%{state | message_buffer: new_buffer}) end # Flush up to `pending_demand` messages from the buffer to the producer. - # If the buffer is empty or pending_demand is 0, this is a no-op. defp flush_demand(%{pending_demand: 0} = state), do: state - defp flush_demand(%{message_buffer: []} = state), do: state defp flush_demand(state) do - all_messages = Enum.reverse(state.message_buffer) - to_send = min(state.pending_demand, length(all_messages)) - {batch, rest} = Enum.split(all_messages, to_send) + if :queue.is_empty(state.message_buffer) do + state + else + {remaining, demand_left, batch_reversed} = + flush_demand_loop(state.message_buffer, state.pending_demand, []) + + send(state.producer_pid, {:stream_messages, Enum.reverse(batch_reversed)}) + %{state | message_buffer: remaining, pending_demand: demand_left} + end + end - send(state.producer_pid, {:stream_messages, batch}) + defp flush_demand_loop(queue, 0, acc), do: {queue, 0, acc} - # Store remainder back in reversed (prepend-friendly) order - reversed_rest = Enum.reverse(rest) - %{state | message_buffer: reversed_rest, pending_demand: state.pending_demand - to_send} + defp flush_demand_loop(queue, n, acc) do + case :queue.out(queue) do + {{:value, msg}, rest} -> flush_demand_loop(rest, n - 1, [msg | acc]) + {:empty, _} -> {queue, n, acc} + end end defp build_broadway_message( %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, state ) do - # ack_ref is the Broadway pipeline name — the key used in :persistent_term by the producer + # ack_ref is the Broadway pipeline name, used as the persistent_term key. ack_ref = state.config.broadway[:name] acknowledger = BroadwayCloudPubSub.Streaming.Acknowledger.builder(ack_ref).(ack_id) @@ -829,6 +903,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do apply(mod, fun, args) end + # Flush AckBatcher if its process is currently alive. Guards against the + # batcher being down during pipeline shutdown (Broadway stops children in + # reverse start order). + defp flush_batcher_if_alive(nil), do: :ok + + defp flush_batcher_if_alive(batcher) do + case GenServer.whereis(batcher) do + nil -> :ok + pid -> AckBatcher.flush(pid) + end + end + # --- Private: telemetry --- defp emit_telemetry(event, measurements, config) do diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex index 8ba3656..8484276 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex @@ -11,33 +11,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do # calls, and timers). By spawning a dedicated reader process, the GenServer # remains fully responsive while streaming runs concurrently. # - # ## Unified adapter abstraction - # - # This module uses only the public `GRPC.Stub` API: - # - # 1. `Stub.streaming_pull(channel)` — opens the bidirectional stream - # 2. `GRPC.Stub.send_request(stream, initial_request)` — sends the initial - # StreamingPullRequest (subscription name, flow control settings, etc.) - # 3. `GRPC.Stub.recv(stream)` — returns an `{:ok, Enumerable.t()}` of - # decoded `{:ok, StreamingPullResponse.t()}` items - # - # Both the Gun and Mint adapters implement this interface identically from the - # caller's perspective: - # - # - **Gun**: `:gun.post` is called from this process, so Gun sends all - # `{:gun_response, :gun_data, ...}` messages to this process's mailbox. - # `GRPC.Stub.recv` returns a `Stream.unfold/2` backed by `:gun.await/3`, - # which is a selective receive that processes those mailbox messages. - # - # - **Mint**: `GRPC.Client.Adapters.Mint.ConnectionProcess` owns the TCP - # connection. A `StreamResponseProcess` is started per stream. Decoded - # messages are enqueued there and served to the caller via - # `GenServer.call(:get_response, :infinity)`. - # - # In both cases the library handles gRPC frame decoding (5-byte - # length-prefixed framing + codec decode) and delivers decoded protobuf - # structs to the caller. - # # ## Message protocol with StreamManager # # After the stream is opened, this process sends the grpc_stream back: @@ -57,15 +30,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do # # After receiving `{:stream_opened, _pid, grpc_stream}`, the StreamManager # calls `GRPC.Stub.send_request(grpc_stream, request)` directly from the - # GenServer process. - # - # - **Gun**: `:gun.data/4` is a fire-and-forget `gen_statem:cast`. It can be - # called from any process regardless of who opened the stream. - # - **Mint**: `ConnectionProcess.stream_request_body/3` is also a GenServer - # cast, callable from any process. - # - # Both are safe to call from the StreamManager GenServer concurrently with - # the reader process enumerating the receive stream. + # GenServer process. Both the Gun and Mint adapters implement this as a + # fire-and-forget cast, safe to call from any process concurrently with the + # reader process enumerating the receive stream. alias Google.Pubsub.V1.{StreamingPullRequest, StreamingPullResponse} alias Google.Pubsub.V1.Subscriber.Stub @@ -79,7 +46,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do """ @spec start_link(pid(), GRPC.Channel.t(), map()) :: pid() def start_link(manager, channel, config) do - spawn_link(fn -> run(manager, channel, config) end) + Task.start_link(fn -> run(manager, channel, config) end) end # --- Private --- @@ -115,12 +82,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do defp enumerate(enum, manager) do enum |> Stream.each(fn - {:ok, %StreamingPullResponse{received_messages: msgs}} when msgs != [] -> - send(manager, {:stream_messages, msgs}) + {:ok, %StreamingPullResponse{received_messages: msgs, subscription_properties: props}} -> + # Forward subscription_properties whenever the server sends them. + # The server may send this on any response (including heartbeats) to + # signal that the subscription's ordering or exactly-once settings have + # changed. StreamManager stores the latest value in state. + if props != nil do + send(manager, {:subscription_properties, props}) + end - {:ok, %StreamingPullResponse{}} -> - # Heartbeat / empty response — nothing to forward - :ok + if msgs != [] do + send(manager, {:stream_messages, msgs}) + end {:error, error} -> send(manager, {:stream_error, error}) diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex new file mode 100644 index 0000000..1290635 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex @@ -0,0 +1,72 @@ +defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do + @moduledoc false + + # Supervisor that owns the AckBatcher and UnaryRpcClient for a single Broadway + # Streaming.Producer pipeline. + # + # Uses :one_for_one so each child restarts independently. AckBatcher accumulates + # pending ack_ids in its state; restarting it when UnaryRpcClient crashes would + # permanently lose those buffered acks (messages would be redelivered by the + # server after deadline expiry, causing duplicate processing). + # + # AckBatcher references UnaryRpcClient by its registered name derived from the + # Broadway pipeline name, so it survives UnaryRpcClient restarts automatically. + # Any flush attempted while UnaryRpcClient is down is deferred to the next + # timer tick. + # + # Started by prepare_for_start/2 as a Broadway supervisor child before + # StreamManager, guaranteeing the batcher and RPC client are available when + # StreamManager first processes ack requests. + + use Supervisor + + alias BroadwayCloudPubSub.Streaming.{AckBatcher, UnaryRpcClient} + + @spec start_link(keyword()) :: Supervisor.on_start() + def start_link(opts) do + {name, opts} = Keyword.pop(opts, :name) + + if name do + Supervisor.start_link(__MODULE__, opts, name: name) + else + Supervisor.start_link(__MODULE__, opts) + end + end + + @impl Supervisor + def init(opts) do + broadway_name = Keyword.fetch!(opts, :broadway_name) + config = Keyword.fetch!(opts, :config) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + rpc_client_opts = + config + |> Keyword.put(:name, rpc_client_name) + + batcher_opts = + config + |> Keyword.put(:name, batcher_name) + |> Keyword.put(:rpc_client, rpc_client_name) + + children = [ + %{ + id: UnaryRpcClient, + start: {UnaryRpcClient, :start_link, [rpc_client_opts]}, + restart: :permanent + }, + %{ + id: AckBatcher, + start: {AckBatcher, :start_link, [batcher_opts]}, + restart: :permanent, + # AckBatcher.flush/1 uses GenServer.call with a 15_000ms timeout. + # The OTP default shutdown of 5_000ms would kill AckBatcher before + # an in-progress flush can complete, dropping buffered acks. + shutdown: 20_000 + } + ] + + Supervisor.init(children, strategy: :one_for_one) + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex new file mode 100644 index 0000000..c3d210d --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -0,0 +1,411 @@ +defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do + @moduledoc false + + # GenServer that owns a dedicated gRPC channel for unary Acknowledge and + # ModifyAckDeadline RPCs. + # + # Acks and modacks are delivered independently of the StreamingPull stream state — + # if the stream is reconnecting, acks can still be sent and will succeed. A + # separate channel avoids HOL-blocking the message stream with ack traffic. + # + # ## Error handling + # + # Each RPC is attempted exactly once on the current channel: + # + # - Success → return :ok, emit telemetry + # - Retryable error → schedule async channel reconnect via send(self(), :reconnect), + # return {:error, reason}. The caller (AckBatcher) retains the + # ack_ids and retries on the next flush timer tick. + # - Terminal error → stop the GenServer via {:stop, reason, reply, state} so the + # supervisor restarts it fresh. The caller receives {:error, reason} + # first (from the reply in the stop tuple) so it can retain ack_ids. + # + # Retry timing and back-pressure belong to the caller (AckBatcher), not here. + # + # ## API + # + # acknowledge/2 and modify_ack_deadline/3 are synchronous calls that return + # {:ok, remaining_ack_ids} on completion. remaining_ack_ids is the list of ack_ids + # that were NOT successfully delivered (empty list on full success). On a hard + # process error (noproc, timeout) they return {:error, reason}. + + use GenServer + + alias BroadwayCloudPubSub.{Backoff} + alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier} + alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest, Subscriber} + + require Logger + + @max_ack_ids_per_request 2_500 + + defstruct [ + :config, + :channel, + :backoff, + # True when a :reconnect message is already queued in the mailbox. + # Prevents multiple concurrent reconnect attempts from stacking up. + reconnect_pending: false + ] + + # --- Public API --- + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + {name, opts} = Keyword.pop(opts, :name) + + if name do + GenServer.start_link(__MODULE__, opts, name: name) + else + GenServer.start_link(__MODULE__, opts) + end + end + + @doc """ + Sends acknowledge requests for the given ack_ids. + + Large lists are chunked to @max_ack_ids_per_request. Returns + {:ok, remaining_ack_ids} where remaining_ack_ids is the list of ack_ids + that could not be delivered (empty on full success). Returns {:error, reason} + only on a hard process failure (noproc, timeout, etc.). + """ + @spec acknowledge(GenServer.server(), [String.t()]) :: {:ok, [String.t()]} | {:error, term()} + def acknowledge(pid, ack_ids) when is_list(ack_ids) do + ack_ids + |> Enum.chunk_every(@max_ack_ids_per_request) + |> Enum.reduce({:ok, []}, fn + chunk, {:ok, failed_so_far} -> + case GenServer.call(pid, {:acknowledge, chunk}, 30_000) do + :ok -> {:ok, failed_so_far} + {:error, _reason} -> {:ok, failed_so_far ++ chunk} + end + + _chunk, {:error, _} = err -> + # Hard process error — don't attempt remaining chunks + err + end) + catch + :exit, reason -> + {:error, {:call_failed, reason}} + end + + @doc """ + Sends modifyAckDeadline requests for the given ack_ids and deadline. + + Same chunking and return semantics as acknowledge/2. + """ + @spec modify_ack_deadline(GenServer.server(), [String.t()], non_neg_integer()) :: + {:ok, [String.t()]} | {:error, term()} + def modify_ack_deadline(pid, ack_ids, deadline_seconds) when is_list(ack_ids) do + ack_ids + |> Enum.chunk_every(@max_ack_ids_per_request) + |> Enum.reduce({:ok, []}, fn + chunk, {:ok, failed_so_far} -> + case GenServer.call(pid, {:modify_ack_deadline, chunk, deadline_seconds}, 30_000) do + :ok -> {:ok, failed_so_far} + {:error, _reason} -> {:ok, failed_so_far ++ chunk} + end + + _chunk, {:error, _} = err -> + err + end) + catch + :exit, reason -> + {:error, {:call_failed, reason}} + end + + # --- GenServer callbacks --- + + @impl GenServer + def init(opts) do + # Trap exits so that when the Mint/Gun ConnectionProcess linked by + # GRPC.Stub.connect exits normally (e.g. on disconnect/shutdown), the + # {:EXIT, pid, :normal} signal is delivered as a handle_info message + # instead of killing this GenServer. + Process.flag(:trap_exit, true) + config = Map.new(opts) + + backoff = + Backoff.new( + type: config.backoff_type, + min: config.backoff_min, + max: config.backoff_max + ) + + state = %__MODULE__{config: config, backoff: backoff} + + # Open initial channel immediately. + case open_channel(state) do + {:ok, channel} -> + {:ok, %{state | channel: channel}} + + {:error, reason} -> + emit_telemetry(:connection_failure, %{reason: reason}, config) + {:ok, state} + end + end + + @impl GenServer + def handle_call({:acknowledge, ack_ids}, _from, state) do + state = ensure_channel(state) + + case state.channel do + nil -> + {:reply, {:error, :no_channel}, state} + + channel -> + request = %AcknowledgeRequest{ + subscription: state.config.subscription, + ack_ids: ack_ids + } + + case Subscriber.Stub.acknowledge(channel, request, timeout: 30_000) do + {:ok, _} -> + emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) + {:reply, :ok, state} + + {:error, error} -> + case ErrorClassifier.classify(error) do + :retryable -> + # Parse per-ack-ID errors from the gRPC error details. + # For exactly-once subscriptions, the server can return a + # retryable RPC error that contains per-ack-ID permanent + # failures embedded in google.rpc.ErrorInfo details. + # Permanent failures are dropped; transient ones are returned + # to AckBatcher for retry on the next flush. + per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) + {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) + + if permanent_ids != [] do + emit_telemetry( + :permanent_failure, + %{count: length(permanent_ids)}, + state.config + ) + end + + emit_telemetry( + :ack_failure, + %{count: length(transient_ids), reason: error}, + state.config + ) + + state = schedule_reconnect(state) + {:reply, {:error, {error, transient_ids}}, state} + + :terminal -> + Logger.error( + "[UnaryRpcClient] Terminal error on ack (#{length(ack_ids)} ids): #{inspect(error)}" + ) + + # Reply first so caller can retain ack_ids, then stop so supervisor restarts fresh. + {:stop, {:terminal_error, error}, {:error, error}, state} + end + end + end + end + + def handle_call({:modify_ack_deadline, ack_ids, deadline_seconds}, _from, state) do + state = ensure_channel(state) + + case state.channel do + nil -> + {:reply, {:error, :no_channel}, state} + + channel -> + request = %ModifyAckDeadlineRequest{ + subscription: state.config.subscription, + ack_ids: ack_ids, + ack_deadline_seconds: deadline_seconds + } + + case Subscriber.Stub.modify_ack_deadline(channel, request, timeout: 30_000) do + {:ok, _} -> + emit_telemetry(:modack, %{count: length(ack_ids)}, state.config) + {:reply, :ok, state} + + {:error, error} -> + case ErrorClassifier.classify(error) do + :retryable -> + per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) + {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) + + if permanent_ids != [] do + emit_telemetry( + :permanent_failure, + %{count: length(permanent_ids)}, + state.config + ) + end + + emit_telemetry( + :modack_failure, + %{count: length(transient_ids), deadline: deadline_seconds, reason: error}, + state.config + ) + + state = schedule_reconnect(state) + {:reply, {:error, {error, transient_ids}}, state} + + :terminal -> + Logger.error( + "[UnaryRpcClient] Terminal error on modack (#{length(ack_ids)} ids, deadline=#{deadline_seconds}s): #{inspect(error)}" + ) + + {:stop, {:terminal_error, error}, {:error, error}, state} + end + end + end + end + + @impl GenServer + def handle_info(:reconnect, state) do + # Clear the pending flag before attempting so new errors during reconnect + # can queue a fresh :reconnect if needed. + state = %{state | reconnect_pending: false} + state = disconnect_channel(state) + + case open_channel(state) do + {:ok, channel} -> + emit_telemetry(:connect, %{}, state.config) + backoff = Backoff.reset(state.backoff) + {:noreply, %{state | channel: channel, backoff: backoff}} + + {:error, reason} -> + emit_telemetry(:connection_failure, %{reason: reason}, state.config) + {delay, new_backoff} = Backoff.backoff(state.backoff) + Process.send_after(self(), :reconnect, delay || state.config.backoff_min) + {:noreply, %{state | channel: nil, backoff: new_backoff, reconnect_pending: true}} + end + end + + # The Mint/Gun ConnectionProcess spawned by GRPC.Stub.connect is linked to + # this GenServer. With trap_exit enabled (set in init/1), its normal exit on + # disconnect/shutdown is delivered here rather than killing us. + # + # :normal — peer disconnected cleanly; nil out the channel so + # ensure_channel/1 will reopen it on the next request. + # other — unexpected crash; schedule a reconnect. + def handle_info({:EXIT, _pid, :normal}, state) do + {:noreply, %{state | channel: nil}} + end + + def handle_info({:EXIT, _pid, _reason}, state) do + state = schedule_reconnect(%{state | channel: nil}) + {:noreply, state} + end + + def handle_info(_msg, state), do: {:noreply, state} + + @impl GenServer + def terminate(_reason, %{channel: channel}) when not is_nil(channel) do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + + :ok + end + + def terminate(_reason, _state), do: :ok + + # --- Private --- + + defp ensure_channel(%{channel: nil} = state) do + case open_channel(state) do + {:ok, channel} -> %{state | channel: channel} + {:error, _} -> state + end + end + + defp ensure_channel(state), do: state + + defp schedule_reconnect(%{channel: nil} = state), do: state + # Skip if a :reconnect is already queued — prevents churn when multiple + # retryable RPC errors arrive before :reconnect is processed. + defp schedule_reconnect(%{reconnect_pending: true} = state), do: state + + defp schedule_reconnect(state) do + send(self(), :reconnect) + %{state | reconnect_pending: true} + end + + defp disconnect_channel(%{channel: nil} = state), do: state + + defp disconnect_channel(%{channel: channel} = state) do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + + %{state | channel: nil} + end + + defp open_channel(%{config: config}) do + token_result = + case config.token_generator do + {mod, fun, args} -> apply(mod, fun, args) + end + + with {:ok, token} <- token_result do + adapter_mod = + case config.adapter do + :gun -> GRPC.Client.Adapters.Gun + :mint -> GRPC.Client.Adapters.Mint + end + + base_opts = [ + adapter: adapter_mod, + headers: [{"authorization", "Bearer #{token}"}], + adapter_opts: [http2_opts: %{settings_timeout: :infinity}] + ] + + opts = + if config.use_ssl do + cred = GRPC.Credential.new(ssl: [cacerts: :public_key.cacerts_get()]) + Keyword.put(base_opts, :cred, cred) + else + base_opts + end + + case GRPC.Stub.connect(config.grpc_endpoint, opts) do + {:ok, channel} -> {:ok, channel} + {:error, reason} -> {:error, {:connect_failed, reason}} + end + end + end + + defp emit_telemetry(event, measurements, config) do + metadata = %{ + name: config.broadway_name, + subscription: config.subscription + } + + :telemetry.execute( + [:broadway_cloud_pub_sub, :unary, event], + measurements, + metadata + ) + end + + # Splits ack_ids into {transient, permanent} based on per-ack-ID error details + # parsed from the gRPC error. If there are no per-ack-ID details (the common + # case), all ids are treated as transient so the caller retries them all. + defp split_by_ack_result(ack_ids, per_ack_errors) when map_size(per_ack_errors) == 0 do + {ack_ids, []} + end + + defp split_by_ack_result(ack_ids, per_ack_errors) do + {transient, permanent} = + Enum.reduce(ack_ids, {[], []}, fn ack_id, {t, p} -> + case Map.get(per_ack_errors, ack_id) do + {:permanent, _reason} -> {t, [ack_id | p]} + _ -> {[ack_id | t], p} + end + end) + + {Enum.reverse(transient), Enum.reverse(permanent)} + end +end diff --git a/test/broadway_cloud_pub_sub/backoff_test.exs b/test/broadway_cloud_pub_sub/backoff_test.exs index 9eefe9f..5ee4b62 100644 --- a/test/broadway_cloud_pub_sub/backoff_test.exs +++ b/test/broadway_cloud_pub_sub/backoff_test.exs @@ -25,8 +25,8 @@ defmodule BroadwayCloudPubSub.BackoffTest do test "uses default min and max when not provided" do b = Backoff.new() - assert b.min == 1_000 - assert b.max == 30_000 + assert b.min == 100 + assert b.max == 60_000 end test "accepts custom min and max" do diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs new file mode 100644 index 0000000..4e5421e --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -0,0 +1,388 @@ +defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.AckBatcher + + # A spy GenServer that records every call it receives and forwards them to + # the test process so we can assert on them. Returns :ok to all calls so + # AckBatcher sees {:ok, []} (full success) for every flush. + defmodule SpyRpcClient do + use GenServer + + def start_link(opts) do + {name, opts} = Keyword.pop(opts, :name) + {test_pid, _} = Keyword.pop(opts, :test_pid) + + if name do + GenServer.start_link(__MODULE__, test_pid, name: name) + else + GenServer.start_link(__MODULE__, test_pid) + end + end + + def init(test_pid), do: {:ok, test_pid} + + # Spy on call-based API — notify test process and return :ok so + # AckBatcher accumulates {:ok, []} (all delivered, nothing retained). + def handle_call({:acknowledge, _ack_ids} = msg, _from, test_pid) do + send(test_pid, {:rpc, msg}) + {:reply, :ok, test_pid} + end + + def handle_call({:modify_ack_deadline, _ids, _deadline} = msg, _from, test_pid) do + send(test_pid, {:rpc, msg}) + {:reply, :ok, test_pid} + end + + def handle_call(:ping, _from, state), do: {:reply, :ok, state} + end + + # A spy that returns {:error, :unavailable} for the first call, then :ok. + defmodule FlakyRpcClient do + use GenServer + + def start_link(test_pid) do + GenServer.start_link(__MODULE__, {test_pid, 0}) + end + + def init(state), do: {:ok, state} + + def handle_call({:acknowledge, ack_ids}, _from, {test_pid, call_count}) do + send(test_pid, {:rpc, {:acknowledge, ack_ids}, call_count}) + reply = if call_count == 0, do: {:error, :unavailable}, else: :ok + {:reply, reply, {test_pid, call_count + 1}} + end + + def handle_call({:modify_ack_deadline, _ids, _deadline} = msg, _from, {test_pid, count}) do + send(test_pid, {:rpc, msg, count}) + {:reply, :ok, {test_pid, count + 1}} + end + end + + # A spy that always fails modack for deadline=30 but succeeds for deadline=60. + defmodule SelectiveFlakyRpc do + use GenServer + + def start_link(test_pid) do + GenServer.start_link(__MODULE__, {test_pid, 0}) + end + + def init(state), do: {:ok, state} + + def handle_call({:acknowledge, _ids}, _from, {test_pid, count}) do + {:reply, :ok, {test_pid, count + 1}} + end + + def handle_call({:modify_ack_deadline, ids, deadline}, _from, {test_pid, count}) do + send(test_pid, {:rpc, {:modack, ids, deadline}, count}) + reply = if deadline == 30 and count == 0, do: {:error, :unavailable}, else: :ok + {:reply, reply, {test_pid, count + 1}} + end + end + + # Start a spy RPC client + AckBatcher pair. + # Returns {batcher_pid, rpc_client_pid}. + defp start_batcher(extra_opts \\ []) do + test_pid = self() + + {:ok, rpc_pid} = SpyRpcClient.start_link(test_pid: test_pid) + + opts = + Keyword.merge( + [ + rpc_client: rpc_pid, + ack_batch_interval_ms: 50, + ack_batch_max_size: 10 + ], + extra_opts + ) + + {:ok, batcher} = AckBatcher.start_link(opts) + {batcher, rpc_pid} + end + + # ============================================================ + # ack/2 + # ============================================================ + + describe "ack/2" do + test "queues ack_ids and flushes them on the next timer tick" do + {batcher, _rpc} = start_batcher() + + AckBatcher.ack(batcher, ["id-1", "id-2"]) + + # Timer fires after 50ms + assert_receive {:rpc, {:acknowledge, ids}}, 200 + assert Enum.sort(ids) == ["id-1", "id-2"] + end + + test "no-op when list is empty" do + {batcher, _rpc} = start_batcher() + + AckBatcher.ack(batcher, []) + + refute_receive {:rpc, _}, 100 + end + + test "accumulates multiple ack calls before flush" do + # Long interval so the timer doesn't fire mid-test + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + + AckBatcher.ack(batcher, ["id-1"]) + AckBatcher.ack(batcher, ["id-2"]) + AckBatcher.ack(batcher, ["id-3"]) + + AckBatcher.flush(batcher) + + assert_receive {:rpc, {:acknowledge, ids}}, 500 + assert Enum.sort(ids) == ["id-1", "id-2", "id-3"] + end + end + + # ============================================================ + # modack/3 + # ============================================================ + + describe "modack/3" do + test "queues modack_ids and flushes them on the next timer tick" do + {batcher, _rpc} = start_batcher() + + AckBatcher.modack(batcher, ["id-a"], 30) + + assert_receive {:rpc, {:modify_ack_deadline, ids, 30}}, 200 + assert ids == ["id-a"] + end + + test "groups modacks by deadline — one RPC per unique deadline per flush" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + + AckBatcher.modack(batcher, ["id-1", "id-2"], 30) + AckBatcher.modack(batcher, ["id-3"], 60) + AckBatcher.modack(batcher, ["id-4"], 30) + + AckBatcher.flush(batcher) + + # We expect exactly two :modify_ack_deadline messages — one per deadline + rpcs = collect_rpcs(2, 500) + + {ids_30, deadline_30} = find_modack(rpcs, 30) + {ids_60, deadline_60} = find_modack(rpcs, 60) + + assert deadline_30 == 30 + assert Enum.sort(ids_30) == ["id-1", "id-2", "id-4"] + + assert deadline_60 == 60 + assert ids_60 == ["id-3"] + end + + test "no-op when list is empty" do + {batcher, _rpc} = start_batcher() + + AckBatcher.modack(batcher, [], 30) + + refute_receive {:rpc, _}, 100 + end + end + + # ============================================================ + # flush/1 + # ============================================================ + + describe "flush/1" do + test "flush/1 sends all pending acks and modacks synchronously" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + + AckBatcher.ack(batcher, ["ack-1"]) + AckBatcher.modack(batcher, ["mod-1"], 30) + + :ok = AckBatcher.flush(batcher) + + assert_receive {:rpc, {:acknowledge, _}}, 500 + assert_receive {:rpc, {:modify_ack_deadline, _, 30}}, 500 + end + + test "flush/1 is a no-op when nothing is queued" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + + :ok = AckBatcher.flush(batcher) + + refute_receive {:rpc, _}, 100 + end + + test "flush/1 resets the state — subsequent timer does not re-send" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + + AckBatcher.ack(batcher, ["id-1"]) + :ok = AckBatcher.flush(batcher) + + # Drain the flushed message + assert_receive {:rpc, {:acknowledge, _}}, 500 + + # Should receive no further RPC from a duplicate flush + refute_receive {:rpc, _}, 100 + end + end + + # ============================================================ + # Max batch size — size-triggered flush + # ============================================================ + + describe "size-triggered flush" do + test "flushes immediately when ack_count reaches ack_batch_max_size" do + # batch_max_size = 3, long timer so only size triggers the flush + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000, ack_batch_max_size: 3) + + AckBatcher.ack(batcher, ["id-1", "id-2", "id-3"]) + + # Should flush without waiting for the timer + assert_receive {:rpc, {:acknowledge, ids}}, 200 + assert length(ids) == 3 + end + + test "flushes when combined ack + modack count reaches max_size" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000, ack_batch_max_size: 3) + + AckBatcher.ack(batcher, ["id-1"]) + AckBatcher.modack(batcher, ["id-2", "id-3"], 30) + + # Combined count == 3 — should trigger flush + assert_receive {:rpc, _}, 200 + end + end + + # ============================================================ + # Timer behaviour + # ============================================================ + + describe "timer" do + test "timer fires automatically without explicit flush" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 30) + + AckBatcher.ack(batcher, ["timer-id"]) + + assert_receive {:rpc, {:acknowledge, ["timer-id"]}}, 300 + end + + test "timer resets after each flush — sends again on next tick" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 30) + + AckBatcher.ack(batcher, ["tick-1"]) + assert_receive {:rpc, {:acknowledge, _}}, 300 + + AckBatcher.ack(batcher, ["tick-2"]) + assert_receive {:rpc, {:acknowledge, _}}, 300 + end + end + + # ============================================================ + # Partial failure handling — acks retained on RPC failure + # ============================================================ + + describe "partial failure handling" do + test "ack_ids are retained in state when flush fails and retried on next tick" do + test_pid = self() + {:ok, flaky} = FlakyRpcClient.start_link(test_pid) + + {:ok, batcher} = + AckBatcher.start_link( + rpc_client: flaky, + ack_batch_interval_ms: 40, + ack_batch_max_size: 100 + ) + + AckBatcher.ack(batcher, ["id-1", "id-2"]) + + # First timer tick — RPC fails, ack_ids retained + assert_receive {:rpc, {:acknowledge, first_ids}, 0}, 300 + assert Enum.sort(first_ids) == ["id-1", "id-2"] + + # Second timer tick — RPC succeeds, ack_ids cleared + assert_receive {:rpc, {:acknowledge, retry_ids}, 1}, 300 + assert Enum.sort(retry_ids) == ["id-1", "id-2"] + + # After successful flush, state should be clear — no further RPCs + refute_receive {:rpc, {:acknowledge, _}, _}, 100 + end + + test "modack_ids for a failing deadline group are retained independently" do + test_pid = self() + + {:ok, selective} = SelectiveFlakyRpc.start_link(test_pid) + + {:ok, batcher} = + AckBatcher.start_link( + rpc_client: selective, + ack_batch_interval_ms: 40, + ack_batch_max_size: 100 + ) + + AckBatcher.modack(batcher, ["id-30"], 30) + AckBatcher.modack(batcher, ["id-60"], 60) + + # First tick: deadline=30 fails, deadline=60 succeeds + # Both are attempted in the same flush (Enum.reduce over modack_ids) + assert_receive {:rpc, {:modack, _, 30}, 0}, 300 + assert_receive {:rpc, {:modack, _, 60}, _}, 300 + + # Second tick: deadline=30 is retried (count=1 now → succeeds), deadline=60 is gone + assert_receive {:rpc, {:modack, ["id-30"], 30}, _}, 300 + refute_receive {:rpc, {:modack, ["id-60"], 60}, _}, 100 + end + end + + # ============================================================ + # RPC client unavailable — defer flush + # ============================================================ + + describe "RPC client unavailability" do + test "flush is deferred gracefully when rpc_client process is not registered" do + # Use a name that is never registered so GenServer.whereis returns nil + fake_name = Module.concat(__MODULE__, "NeverRegistered#{System.unique_integer()}") + + {:ok, batcher} = + AckBatcher.start_link( + rpc_client: fake_name, + ack_batch_interval_ms: 50, + ack_batch_max_size: 100 + ) + + AckBatcher.ack(batcher, ["id-orphan"]) + + # Flush should not crash the batcher even though rpc_client is not alive + :ok = AckBatcher.flush(batcher) + + assert Process.alive?(batcher) + + # Ack_ids must still be retained (not silently dropped) + state = :sys.get_state(batcher) + assert state.ack_count == 1 + assert state.ack_ids == ["id-orphan"] + end + end + + # ============================================================ + # Helpers + # ============================================================ + + # Collect exactly `count` {:rpc, _} messages from the mailbox within `timeout` ms. + defp collect_rpcs(count, timeout) do + Enum.map(1..count, fn _ -> + receive do + {:rpc, msg} -> msg + after + timeout -> flunk("Expected #{count} RPC messages but timed out") + end + end) + end + + defp find_modack(rpcs, deadline) do + result = + Enum.find_value(rpcs, fn + {:modify_ack_deadline, ids, ^deadline} -> {ids, deadline} + _ -> nil + end) + + assert result, "Expected :modify_ack_deadline with deadline #{deadline}" + result + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/ack_result_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_result_test.exs new file mode 100644 index 0000000..b5950c9 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/ack_result_test.exs @@ -0,0 +1,232 @@ +defmodule BroadwayCloudPubSub.Streaming.AckResultTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.AckResult + + # ============================================================ + # Constructors + # ============================================================ + + describe "success/1" do + test "returns an AckResult with status :success and nil error" do + result = AckResult.success("ack-123") + assert %AckResult{ack_id: "ack-123", status: :success, error: nil} = result + end + end + + describe "failure/3" do + test "returns an AckResult with the given status and error" do + result = AckResult.failure("ack-456", :invalid_ack_id, "expired") + assert %AckResult{ack_id: "ack-456", status: :invalid_ack_id, error: "expired"} = result + end + + test "supports all documented status atoms" do + for status <- [:permission_denied, :failed_precondition, :invalid_ack_id, :other] do + result = AckResult.failure("ack-x", status, nil) + assert result.status == status + end + end + end + + # ============================================================ + # transient_reason?/1 + # ============================================================ + + describe "transient_reason?/1" do + test "returns true for TRANSIENT_ prefix" do + assert AckResult.transient_reason?("TRANSIENT_FAILURE_INVALID_ACK_ID") == true + end + + test "returns true for any TRANSIENT_ variant" do + assert AckResult.transient_reason?("TRANSIENT_SOMETHING_ELSE") == true + end + + test "returns false for PERMANENT_ prefix" do + refute AckResult.transient_reason?("PERMANENT_FAILURE_INVALID_ACK_ID") + end + + test "returns false for empty string" do + refute AckResult.transient_reason?("") + end + + test "returns false for unrecognised reason" do + refute AckResult.transient_reason?("UNKNOWN_REASON") + end + end + + # ============================================================ + # parse_error_details/1 + # ============================================================ + + describe "parse_error_details/1 — nil / empty" do + test "returns empty map for nil" do + assert AckResult.parse_error_details(nil) == %{} + end + + test "returns empty map for empty list" do + assert AckResult.parse_error_details([]) == %{} + end + end + + describe "parse_error_details/1 — unrecognised Any type_url" do + test "ignores Any entries with a non-ErrorInfo type_url" do + other_any = %Google.Protobuf.Any{ + type_url: "type.googleapis.com/google.rpc.Status", + value: <<>> + } + + assert AckResult.parse_error_details([other_any]) == %{} + end + end + + describe "parse_error_details/1 — transient per-ack-id errors" do + test "returns :transient for ack_ids with TRANSIENT_ reason" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "TRANSIENT_FAILURE_INVALID_ACK_ID", + domain: "pubsub.googleapis.com", + metadata: %{ + "ack-1" => "TRANSIENT_FAILURE_INVALID_ACK_ID", + "ack-2" => "TRANSIENT_FAILURE_INVALID_ACK_ID" + } + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result == %{ + "ack-1" => :transient, + "ack-2" => :transient + } + end + + test "returns :transient for all TRANSIENT_ variants" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "TRANSIENT_SOMETHING", + domain: "pubsub.googleapis.com", + metadata: %{"ack-x" => "TRANSIENT_SOMETHING"} + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result["ack-x"] == :transient + end + end + + describe "parse_error_details/1 — permanent per-ack-id errors" do + test "returns {:permanent, reason} for ack_ids with PERMANENT_ reason" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "PERMANENT_FAILURE_INVALID_ACK_ID", + domain: "pubsub.googleapis.com", + metadata: %{"ack-3" => "PERMANENT_FAILURE_INVALID_ACK_ID"} + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result == %{"ack-3" => {:permanent, "PERMANENT_FAILURE_INVALID_ACK_ID"}} + end + + test "returns {:permanent, reason} for unrecognised (non-TRANSIENT_) reason" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "SOMETHING_COMPLETELY_DIFFERENT", + domain: "pubsub.googleapis.com", + metadata: %{"ack-4" => "SOMETHING_COMPLETELY_DIFFERENT"} + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result["ack-4"] == {:permanent, "SOMETHING_COMPLETELY_DIFFERENT"} + end + end + + describe "parse_error_details/1 — mixed transient and permanent" do + test "correctly classifies each ack_id independently" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "MIXED", + domain: "pubsub.googleapis.com", + metadata: %{ + "ack-transient" => "TRANSIENT_FAILURE_INVALID_ACK_ID", + "ack-permanent" => "PERMANENT_FAILURE_INVALID_ACK_ID" + } + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result["ack-transient"] == :transient + assert result["ack-permanent"] == {:permanent, "PERMANENT_FAILURE_INVALID_ACK_ID"} + end + end + + describe "parse_error_details/1 — multiple Any details" do + test "merges results from multiple ErrorInfo entries" do + error_info_1 = %Google.Rpc.ErrorInfo{ + reason: "TRANSIENT_FAILURE_INVALID_ACK_ID", + domain: "pubsub.googleapis.com", + metadata: %{"ack-a" => "TRANSIENT_FAILURE_INVALID_ACK_ID"} + } + + error_info_2 = %Google.Rpc.ErrorInfo{ + reason: "PERMANENT_FAILURE_INVALID_ACK_ID", + domain: "pubsub.googleapis.com", + metadata: %{"ack-b" => "PERMANENT_FAILURE_INVALID_ACK_ID"} + } + + details = [ + build_error_info_any(error_info_1), + build_error_info_any(error_info_2) + ] + + result = AckResult.parse_error_details(details) + + assert result["ack-a"] == :transient + assert result["ack-b"] == {:permanent, "PERMANENT_FAILURE_INVALID_ACK_ID"} + end + + test "skips non-ErrorInfo entries between valid ones" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "TRANSIENT_FAILURE", + domain: "pubsub.googleapis.com", + metadata: %{"ack-z" => "TRANSIENT_FAILURE"} + } + + other_any = %Google.Protobuf.Any{ + type_url: "type.googleapis.com/google.rpc.Status", + value: <<1, 2, 3>> + } + + result = AckResult.parse_error_details([other_any, build_error_info_any(error_info)]) + assert result == %{"ack-z" => :transient} + end + end + + describe "parse_error_details/1 — empty metadata" do + test "returns empty map when ErrorInfo metadata is empty" do + error_info = %Google.Rpc.ErrorInfo{ + reason: "SOME_ERROR", + domain: "pubsub.googleapis.com", + metadata: %{} + } + + any_proto = build_error_info_any(error_info) + result = AckResult.parse_error_details([any_proto]) + + assert result == %{} + end + end + + # ============================================================ + # Helpers + # ============================================================ + + # Encodes a Google.Rpc.ErrorInfo struct into a Google.Protobuf.Any + # with the correct type_url, matching what the Pub/Sub server sends. + defp build_error_info_any(%Google.Rpc.ErrorInfo{} = error_info) do + %Google.Protobuf.Any{ + type_url: "type.googleapis.com/google.rpc.ErrorInfo", + value: Google.Rpc.ErrorInfo.encode(error_info) + } + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/ack_time_distribution_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_time_distribution_test.exs new file mode 100644 index 0000000..2e9aa0f --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/ack_time_distribution_test.exs @@ -0,0 +1,142 @@ +defmodule BroadwayCloudPubSub.Streaming.AckTimeDistributionTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.AckTimeDistribution + + describe "new/1" do + test "creates a distribution with the given default deadline" do + dist = AckTimeDistribution.new(60) + assert AckTimeDistribution.sample_count(dist) == 0 + # Before enough samples, returns the default + assert AckTimeDistribution.percentile(dist, 0.99) == 60 + end + + test "clamps default deadline to minimum 10s" do + dist = AckTimeDistribution.new(5) + assert AckTimeDistribution.percentile(dist, 0.99) == 10 + end + + test "clamps default deadline to maximum 600s" do + dist = AckTimeDistribution.new(9999) + assert AckTimeDistribution.percentile(dist, 0.99) == 600 + end + end + + describe "record/2" do + test "records a sample and increments count" do + dist = AckTimeDistribution.new(60) + dist = AckTimeDistribution.record(dist, 30) + assert AckTimeDistribution.sample_count(dist) == 1 + end + + test "clamps recorded value to minimum 10s" do + dist = AckTimeDistribution.new(60) + + # Fill to min_samples (10) with clamped values + dist = Enum.reduce(1..10, dist, fn _, d -> AckTimeDistribution.record(d, 2) end) + # All 10 samples should be clamped to 10s + assert AckTimeDistribution.percentile(dist, 0.99) == 10 + end + + test "clamps recorded value to maximum 600s" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..10, dist, fn _, d -> AckTimeDistribution.record(d, 9999) end) + assert AckTimeDistribution.percentile(dist, 0.99) == 600 + end + + test "counters grow monotonically — no eviction beyond 1000 samples" do + dist = AckTimeDistribution.new(60) + # Add 1500 samples — all should be counted (no 1000-cap eviction) + dist = Enum.reduce(1..1500, dist, fn _, d -> AckTimeDistribution.record(d, 30) end) + assert AckTimeDistribution.sample_count(dist) == 1500 + end + end + + describe "percentile/2" do + test "returns default before 10 samples are collected (cold start)" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..9, dist, fn _, d -> AckTimeDistribution.record(d, 120) end) + # 9 samples — still returns default + assert AckTimeDistribution.percentile(dist, 0.99) == 60 + end + + test "uses real data after 10 samples" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..10, dist, fn _, d -> AckTimeDistribution.record(d, 120) end) + # 10 samples of 120s → p99 should be 120 + assert AckTimeDistribution.percentile(dist, 0.99) == 120 + end + + test "p99 is correct for a uniform distribution" do + dist = AckTimeDistribution.new(60) + # 100 samples: 1s, 2s, ..., 100s (each clamped to min 10s) + dist = Enum.reduce(1..100, dist, fn i, d -> AckTimeDistribution.record(d, i) end) + # p99 of the clamped distribution — 99th bucket >= 99th value + p99 = AckTimeDistribution.percentile(dist, 0.99) + assert p99 >= 95 and p99 <= 100 + end + + test "p50 (median) is correct for a uniform distribution" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..100, dist, fn i, d -> AckTimeDistribution.record(d, i) end) + p50 = AckTimeDistribution.percentile(dist, 0.50) + # Median of 1..100 clamped to 10..100 → around 50 + assert p50 >= 45 and p50 <= 55 + end + + test "all same values returns that value" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..20, dist, fn _, d -> AckTimeDistribution.record(d, 45) end) + assert AckTimeDistribution.percentile(dist, 0.99) == 45 + assert AckTimeDistribution.percentile(dist, 0.50) == 45 + end + + test "result is always clamped to 10-600 range" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..10, dist, fn _, d -> AckTimeDistribution.record(d, 300) end) + p99 = AckTimeDistribution.percentile(dist, 0.99) + assert p99 >= 10 and p99 <= 600 + end + + test "p0 returns the minimum value in the distribution" do + dist = AckTimeDistribution.new(60) + # Records 10s..200s; p0 should return the smallest bucket with data = 10 + dist = Enum.reduce(1..20, dist, fn i, d -> AckTimeDistribution.record(d, i * 10) end) + p0 = AckTimeDistribution.percentile(dist, 0.0) + assert p0 == 10 + end + + test "p100 returns the maximum value in the distribution" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..20, dist, fn i, d -> AckTimeDistribution.record(d, i * 10) end) + p100 = AckTimeDistribution.percentile(dist, 1.0) + # 20 * 10 = 200s + assert p100 == 200 + end + + test "monotonic: adding more data at a higher value raises p99" do + dist = AckTimeDistribution.new(60) + # 100 samples at 30s + dist = Enum.reduce(1..100, dist, fn _, d -> AckTimeDistribution.record(d, 30) end) + p99_before = AckTimeDistribution.percentile(dist, 0.99) + # Add 10 samples at 300s — now 110 total; p99 = 109th sample = 300s + dist = Enum.reduce(1..10, dist, fn _, d -> AckTimeDistribution.record(d, 300) end) + p99_after = AckTimeDistribution.percentile(dist, 0.99) + assert p99_before == 30 + assert p99_after == 300 + end + end + + describe "sample_count/1" do + test "zero for new distribution" do + dist = AckTimeDistribution.new(60) + assert AckTimeDistribution.sample_count(dist) == 0 + end + + test "grows monotonically without an upper cap" do + dist = AckTimeDistribution.new(60) + dist = Enum.reduce(1..2000, dist, fn _, d -> AckTimeDistribution.record(d, 30) end) + assert AckTimeDistribution.sample_count(dist) == 2000 + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs index 6e69fbc..6948150 100644 --- a/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs @@ -27,6 +27,15 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifierTest do assert ErrorClassifier.classify(rpc_error(14)) == :retryable end + test "UNAVAILABLE (14) with 'Server shutdownNow invoked' is retryable" do + assert ErrorClassifier.classify(rpc_error(14, "Server shutdownNow invoked")) == :retryable + end + + test "UNAVAILABLE (14) with message containing shutdown string is retryable" do + assert ErrorClassifier.classify(rpc_error(14, "prefix Server shutdownNow invoked suffix")) == + :retryable + end + test "UNKNOWN (2) is retryable" do assert ErrorClassifier.classify(rpc_error(2)) == :retryable end @@ -34,6 +43,10 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifierTest do test "RESOURCE_EXHAUSTED (8) is retryable" do assert ErrorClassifier.classify(rpc_error(8)) == :retryable end + + test "UNAUTHENTICATED (16) is retryable" do + assert ErrorClassifier.classify(rpc_error(16)) == :retryable + end end describe "classify/1 — terminal gRPC status codes" do @@ -49,22 +62,9 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifierTest do assert ErrorClassifier.classify(rpc_error(3)) == :terminal end - test "UNAUTHENTICATED (16) is terminal" do - assert ErrorClassifier.classify(rpc_error(16)) == :terminal - end - test "CANCELLED (1) is terminal" do assert ErrorClassifier.classify(rpc_error(1)) == :terminal end - - test "UNAVAILABLE (14) with 'Server shutdownNow invoked' is terminal" do - assert ErrorClassifier.classify(rpc_error(14, "Server shutdownNow invoked")) == :terminal - end - - test "UNAVAILABLE (14) with message containing shutdown string is terminal" do - assert ErrorClassifier.classify(rpc_error(14, "prefix Server shutdownNow invoked suffix")) == - :terminal - end end describe "classify/1 — non-gRPC errors" do diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index b26a0d5..1f8199f 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -249,4 +249,30 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do Options.type_shutdown_option(:ack, name: :on_shutdown) end end + + describe "enable_message_ordering" do + test "defaults to false" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:enable_message_ordering] == false + end + + test "accepts true" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", enable_message_ordering: true) + + assert opts[:enable_message_ordering] == true + end + + test "accepts false explicitly" do + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", enable_message_ordering: false) + + assert opts[:enable_message_ordering] == false + end + + test "rejects non-boolean" do + assert {:error, _} = + validate(subscription: "projects/p/subscriptions/s", enable_message_ordering: 1) + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 7bef2da..01af9e7 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -1,17 +1,19 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do use ExUnit.Case, async: true - alias BroadwayCloudPubSub.Streaming.StreamManager + import ExUnit.CaptureLog + + alias BroadwayCloudPubSub.Streaming.{AckBatcher, StreamManager} # Minimal config with enough keys to satisfy StreamManager.init/1 # (mirrors what Options produces after validation + defaults). defp base_config do [ + broadway_name: __MODULE__, subscription: "projects/test/subscriptions/test-sub", max_outstanding_messages: 1_000, max_outstanding_bytes: 104_857_600, stream_ack_deadline_seconds: 60, - lease_extension_percent: 0.6, backoff_type: :exp, backoff_min: 1_000, backoff_max: 30_000, @@ -21,6 +23,10 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do keepalive_interval_ms: 30_000, on_success: :ack, on_failure: :noop, + on_shutdown: {:nack, 5}, + max_extension_ms: 3_600_000, + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500, client_id: "test-client-id", token_generator: {__MODULE__, :noop_token, []}, broadway: [name: __MODULE__] @@ -29,29 +35,49 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do def noop_token, do: {:ok, "test-token"} + # A minimal stub GenServer that silently accepts any cast (ack/modack). + # Used as the rpc_client for AckBatcher so no real gRPC calls are made. + defmodule StubRpcClient do + use GenServer + def start_link(name), do: GenServer.start_link(__MODULE__, :ok, name: name) + def init(:ok), do: {:ok, :ok} + def handle_cast(_msg, state), do: {:noreply, state} + def handle_call(_msg, _from, state), do: {:reply, :ok, state} + end + # Start a StreamManager, inject producer_pid so it doesn't try to connect. + # Also starts a real AckBatcher (backed by StubRpcClient) registered under + # the name that StreamManager derives from broadway_name. defp start_manager(extra_opts \\ []) do - test_pid = self() - opts = Keyword.merge(base_config(), extra_opts) + # Generate a unique broadway_name per test invocation to avoid registered-name + # collisions when multiple tests run concurrently or sequentially. + broadway_name = Module.concat(__MODULE__, "Run#{System.unique_integer([:positive])}") + + opts = + base_config() |> Keyword.put(:broadway_name, broadway_name) |> Keyword.merge(extra_opts) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + # Start stub RPC client so AckBatcher can call it + {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + + # Start a real AckBatcher registered under the name StreamManager will use + {:ok, _batcher} = + AckBatcher.start_link( + name: batcher_name, + rpc_client: rpc_client_name, + ack_batch_interval_ms: Keyword.get(opts, :ack_batch_interval_ms, 100), + ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) + ) + {:ok, pid} = StreamManager.start_link(opts) - # Inject state: set producer_pid to test process and skip the real connect. - # NOTE: pass test_pid explicitly — self() inside :sys.replace_state runs in - # the GenServer process context, not the test process. - :sys.replace_state(pid, fn state -> - %{state | producer_pid: test_pid} - end) + StreamManager.set_producer(pid, self()) pid end - # Inject a fake grpc_stream into state so ack paths see a connected stream. - defp inject_connected(pid) do - :sys.replace_state(pid, fn state -> - %{state | grpc_stream: :fake_stream} - end) - end - # ============================================================ # Demand signaling # ============================================================ @@ -60,15 +86,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do test "stores pending_demand when message buffer is empty" do pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | pending_demand: 0, message_buffer: []} end) + StreamManager.notify_demand(pid, 0) StreamManager.notify_demand(pid, 10) # Allow the async cast to be processed - :sys.get_state(pid) - state = :sys.get_state(pid) + assert state.pending_demand == 10 - assert state.message_buffer == [] + assert :queue.is_empty(state.message_buffer) end end @@ -82,7 +107,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ] :sys.replace_state(pid, fn s -> - %{s | pending_demand: 0, message_buffer: Enum.reverse(msgs)} + %{s | pending_demand: 0, message_buffer: :queue.from_list(msgs)} end) StreamManager.notify_demand(pid, 10) @@ -91,7 +116,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert Enum.map(received, & &1.data) == ["msg1", "msg2"] state = :sys.get_state(pid) - assert state.message_buffer == [] + assert :queue.is_empty(state.message_buffer) assert state.pending_demand == 8 end @@ -107,7 +132,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end :sys.replace_state(pid, fn s -> - %{s | pending_demand: 0, message_buffer: Enum.reverse(msgs)} + %{s | pending_demand: 0, message_buffer: :queue.from_list(msgs)} end) StreamManager.notify_demand(pid, 2) @@ -117,7 +142,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert Enum.map(received, & &1.data) == ["msg1", "msg2"] state = :sys.get_state(pid) - assert length(state.message_buffer) == 3 + assert :queue.len(state.message_buffer) == 3 assert state.pending_demand == 0 StreamManager.notify_demand(pid, 10) @@ -127,7 +152,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert Enum.map(received2, & &1.data) == ["msg3", "msg4", "msg5"] state = :sys.get_state(pid) - assert state.message_buffer == [] + assert :queue.is_empty(state.message_buffer) assert state.pending_demand == 7 end end @@ -135,7 +160,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "stream_messages → message delivery" do test "messages are forwarded immediately when pending_demand > 0" do pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | pending_demand: 10} end) + StreamManager.notify_demand(pid, 10) fake_msg = %Google.Pubsub.V1.ReceivedMessage{ ack_id: "ack-1", @@ -157,12 +182,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do state = :sys.get_state(pid) assert state.pending_demand == 9 - assert state.message_buffer == [] + assert :queue.is_empty(state.message_buffer) end test "messages are buffered when pending_demand is 0" do pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + StreamManager.notify_demand(pid, 0) fake_msg = %Google.Pubsub.V1.ReceivedMessage{ ack_id: "ack-2", @@ -181,12 +206,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do refute_receive {:stream_messages, _}, 100 state = :sys.get_state(pid) - assert length(state.message_buffer) == 1 + assert :queue.len(state.message_buffer) == 1 end test "buffer is flushed in FIFO order on notify_demand" do pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + StreamManager.notify_demand(pid, 0) for i <- 1..3 do msg = %Google.Pubsub.V1.ReceivedMessage{ @@ -213,91 +238,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end end - # ============================================================ - # Ack buffering — no cap - # ============================================================ - - describe "ack buffer — unbounded" do - test "buffers acks when grpc_stream is nil" do - pid = start_manager() - - :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) - - StreamManager.acknowledge(pid, ["ack-1", "ack-2"]) - - :sys.get_state(pid) - - state = :sys.get_state(pid) - assert state.ack_buffer_size == 1 - assert state.ack_buffer != [] - end - - test "buffer grows without dropping entries" do - pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) - - count = 20_000 - - for i <- 1..count do - StreamManager.acknowledge(pid, ["ack-#{i}"]) - end - - :sys.get_state(pid) - - state = :sys.get_state(pid) - assert state.ack_buffer_size == count - end - - test "flushed buffer is replayed on reconnect (connect_stream)" do - pid = start_manager() - - :sys.replace_state(pid, fn s -> - %{ - s - | grpc_stream: nil, - ack_buffer: [{:ack, ["id-1"]}, {:ack, ["id-2"]}], - ack_buffer_size: 2 - } - end) - - inject_connected(pid) - - StreamManager.close(pid) - - state = :sys.get_state(pid) - assert state.ack_buffer == [] - assert state.ack_buffer_size == 0 - end - end - - # ============================================================ - # Ack buffer telemetry - # ============================================================ - - describe "ack buffer telemetry" do - test "emits :ack_buffered telemetry when buffering an ack" do - pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) - - test_pid = self() - - :telemetry.attach( - "test-ack-buffered", - [:broadway_cloud_pub_sub, :stream, :ack_buffered], - fn _event, measurements, _metadata, _config -> - send(test_pid, {:telemetry, measurements}) - end, - nil - ) - - StreamManager.acknowledge(pid, ["ack-x"]) - - assert_receive {:telemetry, %{buffer_size: 1}} - - :telemetry.detach("test-ack-buffered") - end - end - # ============================================================ # receiving flag — draining # ============================================================ @@ -305,8 +245,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "stop_receiving/1" do test "messages are not forwarded after stop_receiving even when pending_demand > 0" do pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | pending_demand: 10} end) + StreamManager.notify_demand(pid, 10) StreamManager.stop_receiving(pid) fake_msg = %Google.Pubsub.V1.ReceivedMessage{ @@ -336,23 +276,28 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # Use a very short keepalive interval so the test doesn't wait 30s. # With a fake stream, send_on_stream will throw, which should trigger # a reconnect instead of being silently swallowed. - pid = start_manager(keepalive_interval_ms: 10) + logs = + capture_log(fn -> + pid = start_manager(keepalive_interval_ms: 10) - :sys.replace_state(pid, fn s -> %{s | grpc_stream: :fake_stream} end) + :sys.replace_state(pid, fn s -> %{s | grpc_stream: :fake_stream} end) - # Bootstrap the keepalive cycle — normally started by {:stream_opened}, - # but we injected the stream directly via replace_state. - send(pid, :send_keepalive) + # Bootstrap the keepalive cycle — normally started by {:stream_opened}, + # but we injected the stream directly via replace_state. + send(pid, :send_keepalive) - Process.sleep(30) + :sys.get_state(pid) - assert Process.alive?(pid) + assert Process.alive?(pid) - # After a send failure, the stream is reset (grpc_stream: nil) and a - # reconnect is scheduled. - state = :sys.get_state(pid) - assert state.grpc_stream == nil - assert state.reconnect_ref != nil + # After a send failure, the stream is reset (grpc_stream: nil) and a + # reconnect is scheduled. + state = :sys.get_state(pid) + assert state.grpc_stream == nil + assert state.reconnect_ref != nil + end) + + assert logs =~ "GRPC.Client.Connection stopping as requested" end test "does not crash when stream is nil (reconnecting)" do @@ -360,7 +305,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) - Process.sleep(30) + send(pid, :send_keepalive) + + :sys.get_state(pid) assert Process.alive?(pid) end @@ -381,7 +328,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do s | grpc_stream: :fake_stream, conn_pid: self(), - stream_opened_at: System.monotonic_time(:millisecond), keepalive_timer: timer } end) @@ -440,77 +386,111 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "terminal gRPC errors stop the GenServer" do test "NOT_FOUND (5) stops the GenServer" do - pid = start_manager() - ref = Process.monitor(pid) - Process.unlink(pid) + logs = + capture_log(fn -> + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) - send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end) - assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + assert logs =~ + "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}" end test "PERMISSION_DENIED (7) stops the GenServer" do - pid = start_manager() - ref = Process.monitor(pid) - Process.unlink(pid) + logs = + capture_log(fn -> + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 7, message: "permission denied"}}) - send(pid, {:stream_error, %GRPC.RPCError{status: 7, message: "permission denied"}}) + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end) - assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + assert logs =~ + "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 7, message: \"permission denied\", details: nil}" end test "INVALID_ARGUMENT (3) stops the GenServer" do - pid = start_manager() - ref = Process.monitor(pid) - Process.unlink(pid) + logs = + capture_log(fn -> + pid = start_manager() + ref = Process.monitor(pid) + Process.unlink(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 3, message: "bad argument"}}) - send(pid, {:stream_error, %GRPC.RPCError{status: 3, message: "bad argument"}}) + assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + end) - assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + assert logs =~ + "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 3, message: \"bad argument\", details: nil}" end - test "UNAUTHENTICATED (16) stops the GenServer" do - pid = start_manager() + test "UNAUTHENTICATED (16) schedules reconnect without stopping" do + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) ref = Process.monitor(pid) - Process.unlink(pid) send(pid, {:stream_error, %GRPC.RPCError{status: 16, message: "unauthenticated"}}) + :sys.get_state(pid) - assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 + refute_received {:DOWN, ^ref, :process, ^pid, _} + assert Process.alive?(pid) + + state = :sys.get_state(pid) + assert state.reconnect_ref != nil end - test "UNAVAILABLE (14) with 'Server shutdownNow invoked' stops the GenServer" do - pid = start_manager() + test "UNAVAILABLE (14) with 'Server shutdownNow invoked' schedules reconnect without stopping" do + pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) ref = Process.monitor(pid) - Process.unlink(pid) send( pid, {:stream_error, %GRPC.RPCError{status: 14, message: "Server shutdownNow invoked"}} ) - assert_receive {:DOWN, ^ref, :process, ^pid, {:terminal_error, _}}, 1_000 - end - - test "terminal error emits :terminal_error telemetry before stopping" do - pid = start_manager() - test_pid = self() - Process.unlink(pid) - - :telemetry.attach( - "test-terminal-error-#{inspect(pid)}", - [:broadway_cloud_pub_sub, :stream, :terminal_error], - fn _event, measurements, _metadata, _config -> - send(test_pid, {:telemetry, :terminal_error, measurements}) - end, - nil - ) + :sys.get_state(pid) - send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + refute_received {:DOWN, ^ref, :process, ^pid, _} + assert Process.alive?(pid) - assert_receive {:telemetry, :terminal_error, %{reason: _}}, 1_000 + state = :sys.get_state(pid) + assert state.reconnect_ref != nil + end - :telemetry.detach("test-terminal-error-#{inspect(pid)}") + test "terminal error emits :terminal_error telemetry before stopping" do + logs = + capture_log(fn -> + pid = start_manager() + test_pid = self() + telemetry_name = "test-terminal-error-#{inspect(pid)}" + Process.unlink(pid) + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :stream, :terminal_error], + fn _event, measurements, _metadata, _config -> + send(test_pid, {:telemetry, :terminal_error, measurements}) + end, + nil + ) + + send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + + assert_receive {:telemetry, :terminal_error, %{reason: _}}, 1_000 + + :telemetry.detach(telemetry_name) + end) + + assert logs =~ + "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}" end end @@ -542,54 +522,253 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end # ============================================================ - # Skip-backoff optimisation + # Phase 5: Ordering key support — subscription_properties # ============================================================ - describe "skip-backoff on long-lived stream" do - test "reconnects immediately (0ms) when stream was open for >30 seconds" do - # Use a non-routable endpoint so the connect attempt fails at TCP level - # (not by reaching a real server that returns a terminal auth error). - pid = start_manager(backoff_min: 5_000, backoff_max: 30_000, grpc_endpoint: "localhost:1") + describe "subscription_properties — ordering_enabled" do + test "ordering_enabled defaults to false" do + pid = start_manager() + state = :sys.get_state(pid) + assert state.ordering_enabled == false + end - # Pretend the stream opened 31 seconds ago - opened_at = System.monotonic_time(:millisecond) - 31_000 + test "updates ordering_enabled to true on {:subscription_properties, ...}" do + pid = start_manager() - :sys.replace_state(pid, fn s -> %{s | stream_opened_at: opened_at} end) + props = %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: true, + exactly_once_delivery_enabled: false + } - send(pid, {:stream_closed}) + send(pid, {:subscription_properties, props}) + # Sync: ensure the message is processed :sys.get_state(pid) state = :sys.get_state(pid) - assert state.reconnect_ref != nil + assert state.ordering_enabled == true + end - # With 0ms effective timeout, :connect should arrive almost immediately. - # Give it 500ms — even with scheduling jitter this is far below the 5s backoff. - # The connect attempt will fail (no gRPC), but the GenServer remains alive. - Process.sleep(200) - assert Process.alive?(pid) + test "updates ordering_enabled to false when server reports false" do + pid = start_manager() + + # First set to true + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: true, + exactly_once_delivery_enabled: false + }} + ) + + :sys.get_state(pid) + assert :sys.get_state(pid).ordering_enabled == true + + # Then server sends false (can happen mid-stream) + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + :sys.get_state(pid) + assert :sys.get_state(pid).ordering_enabled == false end - test "applies full backoff when stream was open for <30 seconds" do - pid = start_manager(backoff_min: 5_000, backoff_max: 30_000) + test "ignores other messages — ordering_enabled unchanged" do + pid = start_manager() - # Stream opened 1 second ago — should NOT skip backoff - opened_at = System.monotonic_time(:millisecond) - 1_000 + # Unrelated message + send(pid, {:some_other_event, :ignored}) + :sys.get_state(pid) - :sys.replace_state(pid, fn s -> %{s | stream_opened_at: opened_at} end) + state = :sys.get_state(pid) + assert state.ordering_enabled == false + end + end + + # ============================================================ + # Phase 6: Exactly-once delivery — subscription_properties + # ============================================================ + + describe "subscription_properties — exactly_once_enabled" do + test "exactly_once_enabled defaults to false" do + pid = start_manager() + state = :sys.get_state(pid) + assert state.exactly_once_enabled == false + end + + test "updates exactly_once_enabled to true when server reports true" do + pid = start_manager() + + props = %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: true + } + + send(pid, {:subscription_properties, props}) + :sys.get_state(pid) + + assert :sys.get_state(pid).exactly_once_enabled == true + end + + test "updates exactly_once_enabled back to false when server reports false" do + pid = start_manager() + + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: true + }} + ) - send(pid, {:stream_closed}) :sys.get_state(pid) + assert :sys.get_state(pid).exactly_once_enabled == true + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + :sys.get_state(pid) + assert :sys.get_state(pid).exactly_once_enabled == false + end + + test "ordering_enabled and exactly_once_enabled are updated together" do + pid = start_manager() + + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: true, + exactly_once_delivery_enabled: true + }} + ) + + :sys.get_state(pid) state = :sys.get_state(pid) - assert state.reconnect_ref != nil + assert state.ordering_enabled == true + assert state.exactly_once_enabled == true + end + end + + # ============================================================ + # Phase 6: Exactly-once delivery — extend_leases minimum deadline + # ============================================================ - # With 5s minimum backoff, no :connect should arrive within 500ms - Process.sleep(300) + describe "extend_leases — exactly_once_enabled deadline enforcement" do + # Inject outstanding messages and trigger :extend_leases, then capture + # the modack call via the StubRpcClient + process mailbox inspection. + # We can't easily intercept AckBatcher calls here, so we validate behaviour + # by inspecting how long until the next :extend_leases fires. - # State should still show the same reconnect_ref (connect hasn't fired) - state2 = :sys.get_state(pid) - assert state2.reconnect_ref == state.reconnect_ref - assert Process.alive?(pid) + test "uses adaptive deadline (no 60s floor) when exactly_once_enabled is false" do + pid = start_manager() + + # With <10 samples the distribution returns the default deadline (60s). + # We verify the next timer is scheduled within a reasonable range. + now_ms = System.monotonic_time(:millisecond) + + # Inject one outstanding message and a lease timer that fires immediately. + :sys.replace_state(pid, fn s -> + %{ + s + | exactly_once_enabled: false, + outstanding: %{ + "ack-normal" => %{ + received_at: now_ms - 5_000, + max_expiry: now_ms + 3_600_000 + } + } + } + end) + + # Fire the :extend_leases handler directly + send(pid, :extend_leases) + :sys.get_state(pid) + + state = :sys.get_state(pid) + # Lease timer should be re-scheduled with a positive ref + assert state.lease_timer != nil + end + + test "enforces 60s minimum deadline when exactly_once_enabled is true" do + pid = start_manager() + now_ms = System.monotonic_time(:millisecond) + + # With exactly_once_enabled: true and an adaptive deadline of e.g. 10s + # (cold start default clamped to the min), effective_deadline must be + # at least 60. We validate by measuring the scheduled next interval: + # interval = (effective_deadline - 5) * 1000 * jitter_factor + # With effective_deadline=60: interval in [(60-5)*1000*0.8, (60-5)*1000*0.9] + # = [44_000, 49_500] + :sys.replace_state(pid, fn s -> + %{ + s + | exactly_once_enabled: true, + outstanding: %{ + "ack-eo" => %{ + received_at: now_ms - 1_000, + max_expiry: now_ms + 3_600_000 + } + } + } + end) + + send(pid, :extend_leases) + :sys.get_state(pid) + + state = :sys.get_state(pid) + # The timer must be set and represent a deadline >= 60s. + # We read back the remaining time from the timer ref. + assert state.lease_timer != nil + remaining_ms = Process.read_timer(state.lease_timer) + # The next extension should fire well before the 60s deadline expires. + # We assert >= 40_000 as a lower bound with tolerance for scheduling jitter. + assert remaining_ms >= 40_000, + "Expected next lease timer >= 40s for exactly-once, got #{remaining_ms}ms" + end + + test "uses normal interval (much shorter) when exactly_once_enabled is false" do + pid = start_manager(stream_ack_deadline_seconds: 20) + now_ms = System.monotonic_time(:millisecond) + + :sys.replace_state(pid, fn s -> + %{ + s + | exactly_once_enabled: false, + outstanding: %{ + "ack-normal" => %{ + received_at: now_ms - 1_000, + max_expiry: now_ms + 3_600_000 + } + } + } + end) + + send(pid, :extend_leases) + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.lease_timer != nil + remaining_ms = Process.read_timer(state.lease_timer) + + # With stream_ack_deadline_seconds=20 and adaptive deadline defaulting + # to 60 (cold start default), effective = 60s (no exactly_once floor). + # Interval = (60 - 5) * 1000 * jitter ≈ [44_000, 49_500). + # The key check: it should NOT be forced to >= 44_000 due to exactly_once + # — we simply verify it's a valid positive number. + assert is_integer(remaining_ms) and remaining_ms > 0 end end end diff --git a/test/broadway_cloud_pub_sub/streaming/stress_test.exs b/test/broadway_cloud_pub_sub/streaming/stress_test.exs new file mode 100644 index 0000000..f0be870 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/stress_test.exs @@ -0,0 +1,1839 @@ +defmodule BroadwayCloudPubSub.Streaming.StressTest do + @moduledoc """ + Stress tests for the Streaming Producer against the Pub/Sub emulator. + + Covers: + 1. High-volume burst (1000+ messages, fast processing) + 2. Rapid sequential batches + 3. Message completeness verification under load + 4. Streaming vs Pull producer comparison + 5. Both :gun and :mint adapters + 6. Pipeline stop/restart during message flow + 7. Concurrent publishers while pipeline processes + 8. High concurrency processors with near-zero processing time + + Run with: + + mix test test/broadway_cloud_pub_sub/streaming/stress_test.exs --only stress + + Requires the Pub/Sub emulator running on localhost:8085. + """ + + use ExUnit.Case, async: false + + @moduletag :stress + @moduletag timeout: 120_000 + + require Logger + + alias BroadwayCloudPubSub.PubSubEmulator + + # --------------------------------------------------------------------------- + # Token generators + # --------------------------------------------------------------------------- + + def noop_token, do: {:ok, "emulator-no-auth"} + + # --------------------------------------------------------------------------- + # Test Pipelines + # --------------------------------------------------------------------------- + + defmodule StreamingPipeline do + @moduledoc "Streaming pipeline that sends received data to test process." + use Broadway + + def start_link(opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + subscription = Keyword.fetch!(opts, :subscription) + emulator_host = Keyword.fetch!(opts, :emulator_host) + name = Keyword.fetch!(opts, :name) + adapter = Keyword.get(opts, :adapter, :gun) + max_outstanding = Keyword.get(opts, :max_outstanding, 1000) + processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) + + Broadway.start_link(__MODULE__, + name: name, + producer: [ + module: + {BroadwayCloudPubSub.Streaming.Producer, + subscription: subscription, + token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, + grpc_endpoint: emulator_host, + use_ssl: false, + adapter: adapter, + max_outstanding_messages: max_outstanding, + on_failure: {:nack, 0}}, + concurrency: 1 + ], + processors: [ + default: [concurrency: processor_concurrency] + ], + context: %{test_pid: test_pid} + ) + end + + @impl Broadway + def handle_message(:default, message, %{test_pid: test_pid}) do + send(test_pid, {:msg, message.data}) + message + end + + @impl Broadway + def handle_failed(messages, %{test_pid: test_pid}) do + Enum.each(messages, fn msg -> + send(test_pid, {:failed, msg.data}) + end) + + messages + end + end + + defmodule PullPipeline do + @moduledoc "Pull-based pipeline for comparison." + use Broadway + + def start_link(opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + subscription = Keyword.fetch!(opts, :subscription) + emulator_host = Keyword.fetch!(opts, :emulator_host) + name = Keyword.fetch!(opts, :name) + processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) + + Broadway.start_link(__MODULE__, + name: name, + producer: [ + module: + {BroadwayCloudPubSub.Producer, + subscription: subscription, + token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, + base_url: "http://#{emulator_host}", + receive_interval: 50, + max_number_of_messages: 100, + on_failure: {:nack, 0}}, + concurrency: 1 + ], + processors: [ + default: [concurrency: processor_concurrency] + ], + context: %{test_pid: test_pid} + ) + end + + @impl Broadway + def handle_message(:default, message, %{test_pid: test_pid}) do + send(test_pid, {:msg, message.data}) + message + end + + @impl Broadway + def handle_failed(messages, %{test_pid: test_pid}) do + Enum.each(messages, fn msg -> + send(test_pid, {:failed, msg.data}) + end) + + messages + end + end + + defmodule SlowPipeline do + @moduledoc "Pipeline with configurable processing delay." + use Broadway + + def start_link(opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + subscription = Keyword.fetch!(opts, :subscription) + emulator_host = Keyword.fetch!(opts, :emulator_host) + name = Keyword.fetch!(opts, :name) + delay_ms = Keyword.get(opts, :delay_ms, 0) + adapter = Keyword.get(opts, :adapter, :gun) + max_outstanding = Keyword.get(opts, :max_outstanding, 1000) + processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) + + Broadway.start_link(__MODULE__, + name: name, + producer: [ + module: + {BroadwayCloudPubSub.Streaming.Producer, + subscription: subscription, + token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, + grpc_endpoint: emulator_host, + use_ssl: false, + adapter: adapter, + max_outstanding_messages: max_outstanding, + on_failure: {:nack, 0}}, + concurrency: 1 + ], + processors: [ + default: [concurrency: processor_concurrency] + ], + context: %{test_pid: test_pid, delay_ms: delay_ms} + ) + end + + @impl Broadway + def handle_message(:default, message, %{test_pid: test_pid, delay_ms: delay_ms}) do + if delay_ms > 0, do: Process.sleep(delay_ms) + send(test_pid, {:msg, message.data}) + message + end + + @impl Broadway + def handle_failed(messages, %{test_pid: test_pid}) do + Enum.each(messages, fn msg -> + send(test_pid, {:failed, msg.data}) + end) + + messages + end + end + + # --------------------------------------------------------------------------- + # Helpers + # --------------------------------------------------------------------------- + + setup_all do + # GRPC.Client.Supervisor is started automatically by the grpc_client OTP application + PubSubEmulator.start() + :ok + end + + defp unique_name, do: :"StressPipeline#{:erlang.unique_integer([:positive])}" + + defp setup_infra(prefix) do + topic = "#{prefix}-#{:erlang.unique_integer([:positive])}" + sub = "#{prefix}-sub-#{:erlang.unique_integer([:positive])}" + {_full_topic, full_sub} = PubSubEmulator.setup_topic_and_subscription(topic, sub) + {topic, sub, full_sub} + end + + defp stop_pipeline(pid) do + ref = Process.monitor(pid) + + try do + Broadway.stop(pid) + catch + :exit, _ -> :ok + end + + receive do + {:DOWN, ^ref, :process, ^pid, _} -> :ok + after + 10_000 -> :ok + end + end + + defp collect_messages(expected_count, timeout_ms) do + deadline = System.monotonic_time(:millisecond) + timeout_ms + collect_messages_loop(expected_count, deadline, []) + end + + defp collect_messages_loop(0, _deadline, acc), do: {:ok, Enum.reverse(acc)} + + defp collect_messages_loop(remaining, deadline, acc) do + now = System.monotonic_time(:millisecond) + wait = max(deadline - now, 0) + + receive do + {:msg, data} -> + collect_messages_loop(remaining - 1, deadline, [data | acc]) + after + wait -> + {:partial, Enum.reverse(acc), remaining} + end + end + + defp publish_in_batches(topic, total, batch_size) do + payloads = Enum.map(1..total, &"msg-#{&1}") + + payloads + |> Enum.chunk_every(batch_size) + |> Enum.each(fn batch -> + {:ok, _ids} = PubSubEmulator.publish(topic, batch) + end) + + payloads + end + + # --------------------------------------------------------------------------- + # Scenario 1: High-volume burst — 1000 messages, fast processing, Gun adapter + # --------------------------------------------------------------------------- + + describe "Scenario 1: High-volume burst (Gun)" do + test "receives all 1000 messages without loss" do + {topic, _sub, full_sub} = setup_infra("burst-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 1000, + processor_concurrency: 8 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 1000, 200) + + case collect_messages(1000, 60_000) do + {:ok, received} -> + assert length(received) == 1000 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 1/Gun] All 1000 messages received.") + + {:partial, received, remaining} -> + Logger.warning( + "[Stress 1/Gun] Only #{length(received)}/1000 received, #{remaining} missing" + ) + + flunk("Missing #{remaining} messages out of 1000") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 2: High-volume burst — 1000 messages, Mint adapter + # --------------------------------------------------------------------------- + + describe "Scenario 2: High-volume burst (Mint)" do + test "receives all 1000 messages without loss" do + {topic, _sub, full_sub} = setup_infra("burst-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 1000, + processor_concurrency: 8 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 1000, 200) + + case collect_messages(1000, 60_000) do + {:ok, received} -> + assert length(received) == 1000 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 2/Mint] All 1000 messages received.") + + {:partial, received, remaining} -> + Logger.warning( + "[Stress 2/Mint] Only #{length(received)}/1000 received, #{remaining} missing" + ) + + flunk("Missing #{remaining} messages out of 1000") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 3: Rapid sequential batches — 5 bursts of 200, no pause + # --------------------------------------------------------------------------- + + describe "Scenario 3: Rapid sequential batches" do + test "handles 5 rapid bursts of 200 messages (Gun)" do + {topic, _sub, full_sub} = setup_infra("rapid-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 1000, + processor_concurrency: 8 + ) + + Process.sleep(500) + + # Publish 5 bursts of 200 messages back-to-back + all_expected = + Enum.flat_map(1..5, fn batch_num -> + payloads = Enum.map(1..200, &"batch#{batch_num}-msg-#{&1}") + {:ok, _ids} = PubSubEmulator.publish(topic, payloads) + payloads + end) + + case collect_messages(1000, 60_000) do + {:ok, received} -> + assert length(received) == 1000 + assert Enum.sort(received) == Enum.sort(all_expected) + Logger.info("[Stress 3] All 1000 messages from 5 bursts received.") + + {:partial, received, remaining} -> + Logger.warning("[Stress 3] #{length(received)}/1000 received, #{remaining} missing") + + flunk("Missing #{remaining} messages after rapid sequential batches") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 4: Demand pressure — low max_outstanding, high message volume + # --------------------------------------------------------------------------- + + describe "Scenario 4: Demand pressure with low max_outstanding" do + test "handles 500 messages with max_outstanding=10 (Gun)" do + {topic, _sub, full_sub} = setup_infra("demand-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + # Very low outstanding — forces heavy demand cycling + max_outstanding: 10, + processor_concurrency: 2 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 500, 50) + + case collect_messages(500, 60_000) do + {:ok, received} -> + assert length(received) == 500 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 4] All 500 messages received with max_outstanding=10.") + + {:partial, received, remaining} -> + Logger.warning("[Stress 4] #{length(received)}/500 received, #{remaining} missing") + + flunk("Missing #{remaining} messages with constrained outstanding") + end + + stop_pipeline(pid) + end + + test "handles 500 messages with max_outstanding=10 (Mint)" do + {topic, _sub, full_sub} = setup_infra("demand-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 10, + processor_concurrency: 2 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 500, 50) + + case collect_messages(500, 60_000) do + {:ok, received} -> + assert length(received) == 500 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 4/Mint] All 500 messages received with max_outstanding=10.") + + {:partial, received, remaining} -> + Logger.warning("[Stress 4/Mint] #{length(received)}/500 received, #{remaining} missing") + + flunk("Missing #{remaining} messages with constrained outstanding (Mint)") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 5: Streaming vs Pull producer comparison + # --------------------------------------------------------------------------- + + describe "Scenario 5: Streaming vs Pull comparison" do + test "both producers receive all 200 messages" do + # -- Streaming (Gun) -- + {s_topic, _s_sub, s_full_sub} = setup_infra("cmp-stream") + s_name = unique_name() + + {:ok, s_pid} = + StreamingPipeline.start_link( + name: s_name, + test_pid: self(), + subscription: s_full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + s_expected = publish_in_batches(s_topic, 200, 100) + s_start = System.monotonic_time(:millisecond) + + s_result = collect_messages(200, 30_000) + s_elapsed = System.monotonic_time(:millisecond) - s_start + + stop_pipeline(s_pid) + + # -- Pull -- + {p_topic, _p_sub, p_full_sub} = setup_infra("cmp-pull") + p_name = unique_name() + + {:ok, p_pid} = + PullPipeline.start_link( + name: p_name, + test_pid: self(), + subscription: p_full_sub, + emulator_host: PubSubEmulator.host(), + processor_concurrency: 4 + ) + + Process.sleep(500) + + p_expected = publish_in_batches(p_topic, 200, 100) + p_start = System.monotonic_time(:millisecond) + + p_result = collect_messages(200, 30_000) + p_elapsed = System.monotonic_time(:millisecond) - p_start + + stop_pipeline(p_pid) + + # Assert both received everything + case s_result do + {:ok, s_received} -> + assert length(s_received) == 200 + assert Enum.sort(s_received) == Enum.sort(s_expected) + + {:partial, s_received, s_remaining} -> + flunk("Streaming: only #{length(s_received)}/200, missing #{s_remaining}") + end + + case p_result do + {:ok, p_received} -> + assert length(p_received) == 200 + assert Enum.sort(p_received) == Enum.sort(p_expected) + + {:partial, p_received, p_remaining} -> + flunk("Pull: only #{length(p_received)}/200, missing #{p_remaining}") + end + + Logger.info("[Stress 5] Streaming: #{s_elapsed}ms, Pull: #{p_elapsed}ms for 200 messages") + end + end + + # --------------------------------------------------------------------------- + # Scenario 6: Pipeline stop/restart during message flow + # --------------------------------------------------------------------------- + + describe "Scenario 6: Stop and restart during message flow" do + test "no messages lost after restart" do + {topic, _sub, full_sub} = setup_infra("restart") + name = unique_name() + + # Start pipeline + {:ok, pid1} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 100, + processor_concurrency: 4 + ) + + Process.sleep(500) + + # Publish first batch + batch1 = Enum.map(1..100, &"phase1-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch1) + + # Collect some messages, then stop + Process.sleep(2000) + stop_pipeline(pid1) + + # Drain whatever arrived from first pipeline + phase1_received = drain_mailbox() + + # Publish second batch while pipeline is down + batch2 = Enum.map(1..100, &"phase2-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch2) + + # Restart with a DIFFERENT name (since the old name is taken by the stopped process) + name2 = unique_name() + + {:ok, pid2} = + StreamingPipeline.start_link( + name: name2, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + # All messages not yet acked + batch2 should arrive + all_expected = MapSet.new(batch1 ++ batch2) + all_received_from_restart = collect_remaining(all_expected, phase1_received, 30_000) + + stop_pipeline(pid2) + + missing = MapSet.difference(all_expected, all_received_from_restart) + + if MapSet.size(missing) > 0 do + Logger.warning( + "[Stress 6] Missing #{MapSet.size(missing)} messages: #{inspect(Enum.take(MapSet.to_list(missing), 10))}" + ) + + flunk("Lost #{MapSet.size(missing)} messages across stop/restart") + end + + Logger.info("[Stress 6] All 200 messages recovered after pipeline restart.") + end + end + + # --------------------------------------------------------------------------- + # Scenario 7: Concurrent publishers + # --------------------------------------------------------------------------- + + describe "Scenario 7: Concurrent publishers" do + test "handles messages from 5 concurrent publishers (Gun)" do + {topic, _sub, full_sub} = setup_infra("conc-pub") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 1000, + processor_concurrency: 8 + ) + + Process.sleep(500) + + # 5 tasks each publishing 200 messages concurrently + tasks = + Enum.map(1..5, fn pub_id -> + Task.async(fn -> + payloads = Enum.map(1..200, &"pub#{pub_id}-msg-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, payloads) + payloads + end) + end) + + all_expected = + tasks + |> Task.await_many(30_000) + |> List.flatten() + + case collect_messages(1000, 60_000) do + {:ok, received} -> + assert length(received) == 1000 + assert Enum.sort(received) == Enum.sort(all_expected) + Logger.info("[Stress 7] All 1000 messages from 5 concurrent publishers received.") + + {:partial, received, remaining} -> + Logger.warning("[Stress 7] #{length(received)}/1000 received, #{remaining} missing") + + flunk("Missing #{remaining} messages from concurrent publishers") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 8: High concurrency processors, near-zero processing time + # --------------------------------------------------------------------------- + + describe "Scenario 8: High processor concurrency, zero delay" do + test "16 processors handle 2000 messages with zero processing time (Gun)" do + {topic, _sub, full_sub} = setup_infra("fast-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 2000, + processor_concurrency: 16 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 2000, 200) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(2000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 2000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 8/Gun] 2000 messages with 16 processors in #{elapsed}ms (#{Float.round(2000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + elapsed = System.monotonic_time(:millisecond) - start_time + + Logger.warning( + "[Stress 8/Gun] #{length(received)}/2000 in #{elapsed}ms, #{remaining} missing" + ) + + flunk("Missing #{remaining} messages with high concurrency") + end + + stop_pipeline(pid) + end + + test "16 processors handle 2000 messages with zero processing time (Mint)" do + {topic, _sub, full_sub} = setup_infra("fast-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 2000, + processor_concurrency: 16 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 2000, 200) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(2000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 2000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 8/Mint] 2000 messages with 16 processors in #{elapsed}ms (#{Float.round(2000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + elapsed = System.monotonic_time(:millisecond) - start_time + + Logger.warning( + "[Stress 8/Mint] #{length(received)}/2000 in #{elapsed}ms, #{remaining} missing" + ) + + flunk("Missing #{remaining} messages with high concurrency (Mint)") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 9: No duplicate messages + # --------------------------------------------------------------------------- + + describe "Scenario 9: No duplicate delivery" do + test "500 messages arrive exactly once (Gun)" do + {topic, _sub, full_sub} = setup_infra("nodup-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 500, 100) + + case collect_messages(500, 30_000) do + {:ok, received} -> + assert length(received) == 500 + + unique = Enum.uniq(received) + + if length(unique) != length(received) do + dupes = received -- unique + Logger.warning("[Stress 9] Duplicates found: #{inspect(Enum.take(dupes, 10))}") + flunk("Found #{length(received) - length(unique)} duplicate messages") + end + + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 9] 500 messages, zero duplicates.") + + {:partial, received, remaining} -> + flunk("Only #{length(received)}/500 received, #{remaining} missing") + end + + # Wait a bit for any late duplicates + Process.sleep(2000) + late_dupes = drain_mailbox() + + if length(late_dupes) > 0 do + Logger.warning( + "[Stress 9] #{length(late_dupes)} late duplicate(s) arrived after collect!" + ) + + flunk("#{length(late_dupes)} late duplicates detected") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 10: Messages published BEFORE pipeline starts + # --------------------------------------------------------------------------- + + describe "Scenario 10: Pre-published messages" do + test "receives messages that were published before pipeline started" do + {topic, _sub, full_sub} = setup_infra("prepub") + + # Publish BEFORE pipeline exists + expected = publish_in_batches(topic, 300, 100) + + # Wait to make sure messages are committed in emulator + Process.sleep(500) + + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + case collect_messages(300, 30_000) do + {:ok, received} -> + assert length(received) == 300 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 10] All 300 pre-published messages received.") + + {:partial, received, remaining} -> + flunk("Only #{length(received)}/300 pre-published messages, #{remaining} missing") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 11: Slow processing with message backlog + # --------------------------------------------------------------------------- + + describe "Scenario 11: Slow processing (simulated delay)" do + test "handles 100 messages with 50ms processing delay" do + {topic, _sub, full_sub} = setup_infra("slow-proc") + name = unique_name() + + {:ok, pid} = + SlowPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 100, + processor_concurrency: 4, + delay_ms: 50 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 100, 50) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(100, 60_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 100 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info("[Stress 11] 100 messages with 50ms delay in #{elapsed}ms") + + {:partial, received, remaining} -> + flunk("Only #{length(received)}/100, #{remaining} missing with slow processing") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Private helpers for Scenario 6 + # --------------------------------------------------------------------------- + + defp drain_mailbox do + drain_mailbox_loop([]) + end + + defp drain_mailbox_loop(acc) do + receive do + {:msg, data} -> drain_mailbox_loop([data | acc]) + {:failed, data} -> drain_mailbox_loop([data | acc]) + after + 100 -> Enum.reverse(acc) + end + end + + defp collect_remaining(all_expected, already_received, timeout_ms) do + received_set = MapSet.new(already_received) + remaining = MapSet.difference(all_expected, received_set) + remaining_count = MapSet.size(remaining) + + if remaining_count == 0 do + received_set + else + deadline = System.monotonic_time(:millisecond) + timeout_ms + collect_remaining_loop(received_set, all_expected, deadline) + end + end + + defp collect_remaining_loop(received_set, all_expected, deadline) do + if MapSet.equal?(received_set, all_expected) do + received_set + else + now = System.monotonic_time(:millisecond) + wait = max(deadline - now, 0) + + receive do + {:msg, data} -> + collect_remaining_loop(MapSet.put(received_set, data), all_expected, deadline) + after + wait -> received_set + end + end + end + + # =========================================================================== + # AGGRESSIVE SCENARIOS — Trying to break the producer + # =========================================================================== + + # --------------------------------------------------------------------------- + # Scenario 12: Extreme backpressure — max_outstanding = 1 + # Forces the producer to deliver ONE message at a time. + # --------------------------------------------------------------------------- + + describe "Scenario 12: Extreme backpressure (max_outstanding=1)" do + test "100 messages with max_outstanding=1 (Gun)" do + {topic, _sub, full_sub} = setup_infra("extreme-bp-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 1, + processor_concurrency: 1 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 100, 20) + + case collect_messages(100, 60_000) do + {:ok, received} -> + assert length(received) == 100 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 12/Gun] 100 messages with max_outstanding=1 — all received.") + + {:partial, received, remaining} -> + flunk("max_outstanding=1 (Gun): only #{length(received)}/100, missing #{remaining}") + end + + stop_pipeline(pid) + end + + test "100 messages with max_outstanding=1 (Mint)" do + {topic, _sub, full_sub} = setup_infra("extreme-bp-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 1, + processor_concurrency: 1 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 100, 20) + + case collect_messages(100, 60_000) do + {:ok, received} -> + assert length(received) == 100 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 12/Mint] 100 messages with max_outstanding=1 — all received.") + + {:partial, received, remaining} -> + flunk("max_outstanding=1 (Mint): only #{length(received)}/100, missing #{remaining}") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 13: Massive burst — 5000 messages + # Tests the upper end of throughput. + # --------------------------------------------------------------------------- + + describe "Scenario 13: Massive burst (5000 messages)" do + test "receives all 5000 messages (Gun)" do + {topic, _sub, full_sub} = setup_infra("massive-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 5000, + processor_concurrency: 16 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 5000, 500) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(5000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 5000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 13/Gun] 5000 messages in #{elapsed}ms " <> + "(#{Float.round(5000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + elapsed = System.monotonic_time(:millisecond) - start_time + + flunk( + "Massive burst (Gun): #{length(received)}/5000 in #{elapsed}ms, #{remaining} missing" + ) + end + + stop_pipeline(pid) + end + + test "receives all 5000 messages (Mint)" do + {topic, _sub, full_sub} = setup_infra("massive-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 5000, + processor_concurrency: 16 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 5000, 500) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(5000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 5000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 13/Mint] 5000 messages in #{elapsed}ms " <> + "(#{Float.round(5000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + elapsed = System.monotonic_time(:millisecond) - start_time + + flunk( + "Massive burst (Mint): #{length(received)}/5000 in #{elapsed}ms, #{remaining} missing" + ) + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 14: Kill the gRPC connection mid-stream + # Simulates network failure while messages are actively flowing. + # --------------------------------------------------------------------------- + + describe "Scenario 14: Kill connection during active message flow" do + test "recovers after connection kill (Gun)" do + {topic, _sub, full_sub} = setup_infra("connkill-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + # Publish initial batch + batch1 = Enum.map(1..200, &"before-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch1) + + # Wait for batch1 to be fully consumed + case collect_messages(200, 15_000) do + {:ok, _} -> + Logger.info("[Stress 14/Gun] batch1 fully consumed before kill") + + {:partial, received, remaining} -> + Logger.info( + "[Stress 14/Gun] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" + ) + end + + # Kill the Gun connection process and simulate gun_down so StreamManager + # detects the disconnect and reconnects. + stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + sm_state = :sys.get_state(stream_manager) + + case sm_state do + %{conn_pid: conn_pid} when is_pid(conn_pid) -> + Logger.info("[Stress 14/Gun] Killing conn_pid: #{inspect(conn_pid)}") + Process.exit(conn_pid, :kill) + + # Simulate the gun_down message that Gun would normally send + # send(stream_manager, {:gun_down, conn_pid, :http2, :killed, []}) + + _ -> + Logger.warning("[Stress 14/Gun] No conn_pid found in state") + end + + # Wait for reconnect (backoff_min is 1000ms by default, plus connection setup) + Process.sleep(5000) + + # Inspect StreamManager state to verify reconnection + sm_state_after = :sys.get_state(stream_manager) + + Logger.info( + "[Stress 14/Gun] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> + "pending_demand=#{sm_state_after.pending_demand}, " <> + "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> + "outstanding=#{map_size(sm_state_after.outstanding)}" + ) + + # Publish more AFTER reconnect + batch2 = Enum.map(1..200, &"after-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch2) + + case collect_messages(200, 30_000) do + {:ok, received} -> + assert length(received) == 200 + Logger.info("[Stress 14/Gun] All 200 post-kill messages received.") + + {:partial, received, remaining} -> + # Log final state for diagnostics + sm_final = :sys.get_state(stream_manager) + + Logger.warning( + "[Stress 14/Gun] Post-kill: #{length(received)}/200, #{remaining} missing. " <> + "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> + "buffer=#{:queue.len(sm_final.message_buffer)}" + ) + + flunk("Lost #{remaining} messages after connection kill") + end + + stop_pipeline(pid) + end + + test "recovers after connection kill — gun_down only (Gun)" do + {topic, _sub, full_sub} = setup_infra("connkill-gun2") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + batch1 = Enum.map(1..200, &"before-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch1) + + case collect_messages(200, 15_000) do + {:ok, _} -> + Logger.info("[Stress 14/Gun2] batch1 fully consumed before kill") + + {:partial, received, remaining} -> + Logger.info( + "[Stress 14/Gun2] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" + ) + end + + # Mirror the Mint test exactly: get conn_pid, kill the process, then send the + # adapter-level disconnect signal so StreamManager detects it via the new handler. + stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + sm_state = :sys.get_state(stream_manager) + + case sm_state do + %{conn_pid: conn_pid} when is_pid(conn_pid) -> + Logger.info("[Stress 14/Gun2] Killing conn_pid: #{inspect(conn_pid)}") + Process.exit(conn_pid, :kill) + # Send the gun_down signal directly to StreamManager — mirrors the Mint + # test which sends {:elixir_grpc, :connection_down, conn_pid}. + send(stream_manager, {:gun_down, conn_pid, :http2, :killed, []}) + + _ -> + Logger.warning("[Stress 14/Gun2] No conn_pid found in state") + end + + Process.sleep(5000) + + sm_state_after = :sys.get_state(stream_manager) + + Logger.info( + "[Stress 14/Gun2] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> + "pending_demand=#{sm_state_after.pending_demand}, " <> + "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> + "outstanding=#{map_size(sm_state_after.outstanding)}" + ) + + batch2 = Enum.map(1..200, &"after-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch2) + + case collect_messages(200, 30_000) do + {:ok, received} -> + assert length(received) == 200 + Logger.info("[Stress 14/Gun2] All 200 post-kill messages received.") + + {:partial, received, remaining} -> + sm_final = :sys.get_state(stream_manager) + + Logger.warning( + "[Stress 14/Gun2] Post-kill: #{length(received)}/200, #{remaining} missing. " <> + "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> + "buffer=#{:queue.len(sm_final.message_buffer)}" + ) + + flunk("Lost #{remaining} messages after connection kill (Gun2)") + end + + stop_pipeline(pid) + end + + test "recovers after connection kill (Mint)" do + {topic, _sub, full_sub} = setup_infra("connkill-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 500, + processor_concurrency: 4 + ) + + Process.sleep(500) + + batch1 = Enum.map(1..200, &"before-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch1) + + # Wait for batch1 to be fully consumed + case collect_messages(200, 15_000) do + {:ok, _} -> + Logger.info("[Stress 14/Mint] batch1 fully consumed before kill") + + {:partial, received, remaining} -> + Logger.info( + "[Stress 14/Mint] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" + ) + end + + # For Mint, kill the conn_pid and simulate the connection_down message + stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + sm_state = :sys.get_state(stream_manager) + + case sm_state do + %{conn_pid: conn_pid} when is_pid(conn_pid) -> + Logger.info("[Stress 14/Mint] Killing conn_pid: #{inspect(conn_pid)}") + Process.exit(conn_pid, :kill) + # Simulate the Mint connection down event + send(stream_manager, {:elixir_grpc, :connection_down, conn_pid}) + + _ -> + Logger.warning("[Stress 14/Mint] No conn_pid found in state") + end + + # Wait for reconnect + Process.sleep(5000) + + # Inspect StreamManager state to verify reconnection + sm_state_after = :sys.get_state(stream_manager) + + Logger.info( + "[Stress 14/Mint] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> + "pending_demand=#{sm_state_after.pending_demand}, " <> + "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> + "outstanding=#{map_size(sm_state_after.outstanding)}" + ) + + batch2 = Enum.map(1..200, &"after-kill-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, batch2) + + case collect_messages(200, 30_000) do + {:ok, received} -> + assert length(received) == 200 + Logger.info("[Stress 14/Mint] All 200 post-kill messages received.") + + {:partial, received, remaining} -> + sm_final = :sys.get_state(stream_manager) + + Logger.warning( + "[Stress 14/Mint] Post-kill: #{length(received)}/200, #{remaining} missing. " <> + "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> + "buffer=#{:queue.len(sm_final.message_buffer)}" + ) + + flunk("Lost #{remaining} messages after connection kill (Mint)") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 15: Publish continuously while processing + # Simulates a steady stream of publishes during active consumption. + # --------------------------------------------------------------------------- + + describe "Scenario 15: Continuous publish during processing" do + test "handles steady publish stream (Gun)" do + {topic, _sub, full_sub} = setup_infra("continuous-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 8 + ) + + Process.sleep(500) + + # Publish 20 batches of 50 with small delays between batches + # to simulate a steady incoming stream + total_msgs = 1000 + + publisher_task = + Task.async(fn -> + Enum.flat_map(1..20, fn batch_num -> + payloads = Enum.map(1..50, &"stream-b#{batch_num}-#{&1}") + {:ok, _} = PubSubEmulator.publish(topic, payloads) + # Small delay to simulate realistic publishing rate + Process.sleep(50) + payloads + end) + end) + + all_expected = Task.await(publisher_task, 30_000) + + case collect_messages(total_msgs, 60_000) do + {:ok, received} -> + assert length(received) == total_msgs + assert Enum.sort(received) == Enum.sort(all_expected) + + Logger.info( + "[Stress 15/Gun] All #{total_msgs} continuously-published messages received." + ) + + {:partial, received, remaining} -> + flunk( + "Continuous publish (Gun): #{length(received)}/#{total_msgs}, #{remaining} missing" + ) + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 16: Extreme processor concurrency — 32 processors, zero delay + # Tests for race conditions in demand handling. + # --------------------------------------------------------------------------- + + describe "Scenario 16: Extreme processor concurrency (32)" do + test "32 processors handle 3000 messages (Gun)" do + {topic, _sub, full_sub} = setup_infra("extreme-proc-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 3000, + processor_concurrency: 32 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 3000, 300) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(3000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 3000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 16/Gun] 3000 msgs, 32 processors in #{elapsed}ms " <> + "(#{Float.round(3000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + flunk("32 processors (Gun): #{length(received)}/3000, #{remaining} missing") + end + + stop_pipeline(pid) + end + + test "32 processors handle 3000 messages (Mint)" do + {topic, _sub, full_sub} = setup_infra("extreme-proc-mint") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :mint, + max_outstanding: 3000, + processor_concurrency: 32 + ) + + Process.sleep(500) + + expected = publish_in_batches(topic, 3000, 300) + + start_time = System.monotonic_time(:millisecond) + + case collect_messages(3000, 90_000) do + {:ok, received} -> + elapsed = System.monotonic_time(:millisecond) - start_time + assert length(received) == 3000 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 16/Mint] 3000 msgs, 32 processors in #{elapsed}ms " <> + "(#{Float.round(3000 / (elapsed / 1000), 1)} msgs/sec)" + ) + + {:partial, received, remaining} -> + flunk("32 processors (Mint): #{length(received)}/3000, #{remaining} missing") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 17: Rapid stop/start cycles + # Tests that the producer cleans up properly when repeatedly started/stopped. + # --------------------------------------------------------------------------- + + describe "Scenario 17: Rapid stop/start cycles" do + test "survives 5 rapid start/stop cycles, then processes all messages" do + {topic, _sub, full_sub} = setup_infra("rapid-restart") + + # Start and stop 5 times rapidly + Enum.each(1..5, fn cycle -> + name = unique_name() + + {:ok, cycle_pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 100, + processor_concurrency: 2 + ) + + Process.sleep(200) + stop_pipeline(cycle_pid) + Logger.info("[Stress 17] Cycle #{cycle}/5 completed.") + end) + + # Drain any stale messages from cycles + drain_mailbox() + + # Now publish messages and start a final pipeline + expected = publish_in_batches(topic, 200, 100) + Process.sleep(200) + + final_name = unique_name() + + {:ok, final_pid} = + StreamingPipeline.start_link( + name: final_name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 500, + processor_concurrency: 4 + ) + + case collect_messages(200, 30_000) do + {:ok, received} -> + assert length(received) == 200 + assert Enum.sort(received) == Enum.sort(expected) + Logger.info("[Stress 17] All 200 messages received after 5 rapid start/stop cycles.") + + {:partial, received, remaining} -> + flunk("After rapid restarts: #{length(received)}/200, #{remaining} missing") + end + + stop_pipeline(final_pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 18: Large message payloads + # Tests with big payloads (10KB each) to stress the gRPC frame parser. + # --------------------------------------------------------------------------- + + describe "Scenario 18: Large message payloads" do + test "handles 100 messages of ~10KB each (Gun)" do + {topic, _sub, full_sub} = setup_infra("large-payload-gun") + name = unique_name() + + {:ok, pid} = + StreamingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 100, + processor_concurrency: 4 + ) + + Process.sleep(500) + + # Each message is ~10KB + large_payloads = + Enum.map(1..100, fn i -> + padding = String.duplicate("X", 10_000) + "large-#{i}-#{padding}" + end) + + # Publish in smaller batches to avoid HTTP payload limits + large_payloads + |> Enum.chunk_every(10) + |> Enum.each(fn batch -> + {:ok, _} = PubSubEmulator.publish(topic, batch) + end) + + case collect_messages(100, 60_000) do + {:ok, received} -> + assert length(received) == 100 + # Verify content integrity — each should start with "large-N-" + Enum.each(received, fn msg -> + assert String.starts_with?(msg, "large-") + end) + + Logger.info("[Stress 18/Gun] All 100 large (~10KB) messages received intact.") + + {:partial, received, remaining} -> + flunk("Large payloads (Gun): #{length(received)}/100, #{remaining} missing") + end + + stop_pipeline(pid) + end + end + + # --------------------------------------------------------------------------- + # Scenario 19: Interleaved message failures + # Odd-numbered messages are failed; verify they get nacked properly + # and even-numbered ones succeed. + # --------------------------------------------------------------------------- + + defmodule FailingPipeline do + @moduledoc "Pipeline that fails odd-numbered messages." + use Broadway + + def start_link(opts) do + test_pid = Keyword.fetch!(opts, :test_pid) + subscription = Keyword.fetch!(opts, :subscription) + emulator_host = Keyword.fetch!(opts, :emulator_host) + name = Keyword.fetch!(opts, :name) + adapter = Keyword.get(opts, :adapter, :gun) + max_outstanding = Keyword.get(opts, :max_outstanding, 500) + processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) + + Broadway.start_link(__MODULE__, + name: name, + producer: [ + module: + {BroadwayCloudPubSub.Streaming.Producer, + subscription: subscription, + token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, + grpc_endpoint: emulator_host, + use_ssl: false, + adapter: adapter, + max_outstanding_messages: max_outstanding, + on_failure: {:nack, 0}}, + concurrency: 1 + ], + processors: [ + default: [concurrency: processor_concurrency] + ], + context: %{test_pid: test_pid} + ) + end + + @impl Broadway + def handle_message(:default, message, %{test_pid: test_pid}) do + data = message.data + + # Fail odd messages + if String.contains?(data, "-odd-") do + send(test_pid, {:will_fail, data}) + Broadway.Message.failed(message, :intentional_failure) + else + send(test_pid, {:msg, data}) + message + end + end + + @impl Broadway + def handle_failed(messages, %{test_pid: test_pid}) do + Enum.each(messages, fn msg -> + send(test_pid, {:failed, msg.data}) + end) + + messages + end + end + + describe "Scenario 19: Interleaved message failures" do + test "even messages succeed, odd messages are nacked and redelivered" do + {topic, _sub, full_sub} = setup_infra("fail-interleave") + name = unique_name() + + {:ok, pid} = + FailingPipeline.start_link( + name: name, + test_pid: self(), + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 200, + processor_concurrency: 4 + ) + + Process.sleep(500) + + # Publish 50 even and 50 odd messages + even_msgs = Enum.map(1..50, &"even-#{&1}") + odd_msgs = Enum.map(1..50, &"msg-odd-#{&1}") + all_msgs = Enum.shuffle(even_msgs ++ odd_msgs) + + {:ok, _} = PubSubEmulator.publish(topic, all_msgs) + + # Collect the 50 even messages that should succeed + case collect_messages(50, 30_000) do + {:ok, received} -> + assert length(received) == 50 + + Enum.each(received, fn msg -> + assert String.starts_with?(msg, "even-"), + "Expected only even messages, got: #{msg}" + end) + + Logger.info("[Stress 19] 50 even messages received, 50 odd messages properly failed.") + + {:partial, received, remaining} -> + Logger.warning( + "[Stress 19] Only #{length(received)}/50 even messages, #{remaining} missing" + ) + + # This is still acceptable — the test is about failure handling + flunk("Missing #{remaining} even messages") + end + + # Stop the pipeline BEFORE draining failure messages to prevent + # infinite nack/redeliver cycles on the odd messages + stop_pipeline(pid) + + # Check that we got failure notifications for odd messages + failed_msgs = drain_tagged_mailbox(:failed) + will_fail_msgs = drain_tagged_mailbox(:will_fail) + + Logger.info( + "[Stress 19] Failed callbacks: #{length(failed_msgs)}, will_fail signals: #{length(will_fail_msgs)}" + ) + + # At least some odd messages should have triggered :failed + assert length(failed_msgs) > 0 or length(will_fail_msgs) > 0, + "Expected at least some failure notifications for odd messages" + end + end + + # --------------------------------------------------------------------------- + # Scenario 20: Multiple concurrent pipelines on same subscription + # Tests competing consumers behavior. + # --------------------------------------------------------------------------- + + describe "Scenario 20: Multiple competing consumers" do + test "two pipelines on same subscription collectively receive all messages" do + {topic, _sub, full_sub} = setup_infra("competing") + + collector_pid = self() + + # Start two competing pipelines on the same subscription + name1 = unique_name() + name2 = unique_name() + + {:ok, pid1} = + StreamingPipeline.start_link( + name: name1, + test_pid: collector_pid, + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 250, + processor_concurrency: 4 + ) + + {:ok, pid2} = + StreamingPipeline.start_link( + name: name2, + test_pid: collector_pid, + subscription: full_sub, + emulator_host: PubSubEmulator.host(), + adapter: :gun, + max_outstanding: 250, + processor_concurrency: 4 + ) + + Process.sleep(1000) + + expected = publish_in_batches(topic, 500, 100) + + case collect_messages(500, 60_000) do + {:ok, received} -> + assert length(received) == 500 + assert Enum.sort(received) == Enum.sort(expected) + + Logger.info( + "[Stress 20] Two competing consumers collectively received all 500 messages." + ) + + {:partial, received, remaining} -> + Logger.warning("[Stress 20] #{length(received)}/500, #{remaining} missing") + flunk("Competing consumers: #{remaining} messages missing") + end + + stop_pipeline(pid1) + stop_pipeline(pid2) + end + end + + # --------------------------------------------------------------------------- + # Helper for Scenario 19 + # --------------------------------------------------------------------------- + + defp drain_tagged_mailbox(tag) do + drain_tagged_loop(tag, []) + end + + defp drain_tagged_loop(tag, acc) do + receive do + {^tag, data} -> drain_tagged_loop(tag, [data | acc]) + after + 200 -> Enum.reverse(acc) + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs new file mode 100644 index 0000000..4bde16c --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -0,0 +1,253 @@ +defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.UnaryRpcClient + + # ============================================================ + # Chunking logic — pure caller-side, no GenServer needed + # ============================================================ + + describe "acknowledge/2 chunking (caller-side logic)" do + test "a list of 5001 ids is split into 3 chunks of at most 2500" do + ids = Enum.map(1..5_001, &"id-#{&1}") + # chunk_every(2500) => [2500, 2500, 1] + chunks = Enum.chunk_every(ids, 2_500) + assert length(chunks) == 3 + assert Enum.map(chunks, &length/1) == [2_500, 2_500, 1] + end + + test "a list of exactly 2500 ids is a single chunk" do + ids = Enum.map(1..2_500, &"id-#{&1}") + chunks = Enum.chunk_every(ids, 2_500) + assert length(chunks) == 1 + end + + test "an empty list produces no chunks" do + chunks = Enum.chunk_every([], 2_500) + assert chunks == [] + end + end + + describe "modify_ack_deadline/3 chunking (caller-side logic)" do + test "7500 ids produce 3 chunks" do + ids = Enum.map(1..7_500, &"id-#{&1}") + chunks = Enum.chunk_every(ids, 2_500) + assert length(chunks) == 3 + end + end + + # ============================================================ + # start_link / init — using a fast-failing fake token so + # channel open fails at auth rather than TCP-connect time. + # ============================================================ + + defp base_config_bad_token do + [ + broadway_name: __MODULE__, + subscription: "projects/test/subscriptions/test-sub", + token_generator: {__MODULE__, :fail_token, []}, + adapter: :gun, + grpc_endpoint: "pubsub.googleapis.com:443", + use_ssl: false, + backoff_type: :exp, + backoff_min: 100, + backoff_max: 60_000 + ] + end + + # Token generator that returns an error — causes open_channel to fail + # immediately without attempting a TCP connection. + def fail_token, do: {:error, :no_token} + def noop_token, do: {:ok, "test-token"} + + defp start_client_no_channel(extra_opts \\ []) do + opts = Keyword.merge(base_config_bad_token(), extra_opts) + {:ok, pid} = UnaryRpcClient.start_link(opts) + pid + end + + describe "start_link/1" do + test "starts successfully even when initial channel connect fails" do + pid = start_client_no_channel() + assert Process.alive?(pid) + end + + test "registers under :name when provided" do + name = Module.concat(__MODULE__, Named) + {:ok, _pid} = UnaryRpcClient.start_link(Keyword.put(base_config_bad_token(), :name, name)) + assert Process.whereis(name) != nil + end + + test "channel is nil when initial token fetch fails" do + pid = start_client_no_channel() + state = :sys.get_state(pid) + assert state.channel == nil + end + end + + # ============================================================ + # State structure + # ============================================================ + + describe "state structure" do + test "config map contains expected keys" do + pid = start_client_no_channel() + state = :sys.get_state(pid) + assert Map.has_key?(state.config, :broadway_name) + assert Map.has_key?(state.config, :subscription) + assert Map.has_key?(state.config, :token_generator) + assert Map.has_key?(state.config, :adapter) + assert state.config.broadway_name == __MODULE__ + end + + test "state has a :backoff field for reconnect exponential backoff" do + pid = start_client_no_channel() + state = :sys.get_state(pid) + assert Map.has_key?(state, :backoff) + end + end + + # ============================================================ + # Call-based API returns {:error, :no_channel} when channel is nil + # ============================================================ + + describe "calls with no channel" do + test "acknowledge/2 returns {:ok, all_ids} (retained) when channel is nil" do + pid = start_client_no_channel() + + result = UnaryRpcClient.acknowledge(pid, ["id-1", "id-2"]) + + # No channel → {:error, :no_channel} per chunk → accumulated into {:ok, all_ids} + assert {:ok, retained} = result + assert Enum.sort(retained) == ["id-1", "id-2"] + end + + test "modify_ack_deadline/3 returns {:ok, all_ids} (retained) when channel is nil" do + pid = start_client_no_channel() + + result = UnaryRpcClient.modify_ack_deadline(pid, ["id-1"], 30) + + assert {:ok, retained} = result + assert retained == ["id-1"] + end + + test "GenServer stays alive after calls with no channel" do + pid = start_client_no_channel() + + UnaryRpcClient.acknowledge(pid, ["id-1"]) + UnaryRpcClient.modify_ack_deadline(pid, ["id-2"], 30) + + assert Process.alive?(pid) + end + end + + # ============================================================ + # Partial-success tracking — caller-side reduce logic + # ============================================================ + + describe "partial-success reduce logic" do + test "all chunks succeed → {:ok, []}" do + # Simulate the reduce logic directly without a real GenServer + chunks = [["id-1", "id-2"], ["id-3"]] + + result = + Enum.reduce(chunks, {:ok, []}, fn + chunk, {:ok, failed_so_far} -> + # Simulate :ok from each chunk + case :ok do + :ok -> {:ok, failed_so_far} + {:error, _} -> {:ok, failed_so_far ++ chunk} + end + + _chunk, {:error, _} = err -> + err + end) + + assert result == {:ok, []} + end + + test "second chunk fails → {:ok, second_chunk_ids} retained" do + chunks = [["id-1", "id-2"], ["id-3", "id-4"]] + + result = + Enum.reduce(chunks, {:ok, []}, fn + ["id-1", "id-2"] = chunk, {:ok, failed} -> + # First chunk succeeds + _ = chunk + {:ok, failed} + + chunk, {:ok, failed} -> + # Second chunk fails + {:ok, failed ++ chunk} + + _chunk, {:error, _} = err -> + err + end) + + assert result == {:ok, ["id-3", "id-4"]} + end + + test "hard process error short-circuits remaining chunks" do + chunks = [["id-1"], ["id-2"], ["id-3"]] + visited = :atomics.new(1, []) + + result = + Enum.reduce(chunks, {:ok, []}, fn + _chunk, {:error, _} = err -> + err + + ["id-1"], {:ok, _failed} -> + :atomics.add(visited, 1, 1) + {:error, {:call_failed, :noproc}} + + chunk, {:ok, failed} -> + :atomics.add(visited, 1, 1) + {:ok, failed ++ chunk} + end) + + assert {:error, {:call_failed, :noproc}} = result + # Only the first chunk was visited before error short-circuited + assert :atomics.get(visited, 1) == 1 + end + end + + # ============================================================ + # Telemetry metadata uses broadway_name + # ============================================================ + + describe "telemetry metadata" do + test "config has :broadway_name key (used by emit_telemetry)" do + pid = start_client_no_channel() + state = :sys.get_state(pid) + + assert is_atom(state.config.broadway_name) + refute Map.has_key?(state.config, :broadway) + end + end + + # ============================================================ + # handle_info(:reconnect) — async reconnect path + # ============================================================ + + describe "handle_info(:reconnect)" do + test "reconnect message is handled without crash" do + pid = start_client_no_channel() + + # Send reconnect — will fail (bad token) but must not crash + send(pid, :reconnect) + :sys.get_state(pid) + + assert Process.alive?(pid) + end + + test "channel remains nil after reconnect with bad token" do + pid = start_client_no_channel() + + send(pid, :reconnect) + :sys.get_state(pid) + + state = :sys.get_state(pid) + assert state.channel == nil + end + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs index 7f28561..8d3891f 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1 +1 @@ -ExUnit.start(exclude: [:integration]) +ExUnit.start(exclude: [:integration, :stress]) From 7d3b28fbc03341679b22828deb62bb6c6756aada Mon Sep 17 00:00:00 2001 From: Rock Date: Tue, 31 Mar 2026 10:51:05 +0200 Subject: [PATCH 04/29] feat: implement stream draining and graceful shutdown Add support for graceful draining of streaming pull connections, ensuring in-flight messages are properly acknowledged before shutdown. Introduce partition-based message routing for multi-processor topologies. Key changes: - Implement prepare_for_draining callback in the producer - Add drain_timeout option for controlling shutdown behavior - Close inner gRPC streams during draining - Add partition_by support in the producer for message routing - Add gRPC test adapters for draining behavior verification --- .../streaming/options.ex | 38 +- .../streaming/producer.ex | 18 +- .../streaming/stream_manager.ex | 71 ++- .../streaming/unary_rpc_client.ex | 10 +- .../streaming/options_test.exs | 44 ++ .../streaming/stream_manager_test.exs | 542 ++++++++++-------- .../streaming/unary_rpc_client_test.exs | 5 +- test/support/grpc_dynamic_adapter.ex | 210 +++++++ test/support/grpc_test_adapter.ex | 51 ++ 9 files changed, 722 insertions(+), 267 deletions(-) create mode 100644 test/support/grpc_dynamic_adapter.ex create mode 100644 test/support/grpc_test_adapter.ex diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 5c4c24e..9f45f2b 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -208,7 +208,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do """ ], adapter: [ - type: {:in, [:gun, :mint]}, + type: {:custom, __MODULE__, :type_adapter, [[{:name, :adapter}]]}, default: :gun, doc: """ The gRPC HTTP/2 adapter to use for the streaming connection. @@ -217,9 +217,11 @@ defmodule BroadwayCloudPubSub.Streaming.Options do and is the traditional adapter for the Elixir gRPC library. * `:mint` — Uses the Mint HTTP/2 client. Mint may be preferable in deployment environments where Gun is not available or not desired. + * Any module — A custom module implementing the `GRPC.Client.Adapter` + behaviour. Useful for test adapters and alternative implementations. - Both adapters are provided by the `grpc_client` dependency. The adapter choice - does not affect the public API or message semantics. + Both built-in adapters are provided by the `grpc_client` dependency. The + adapter choice does not affect the public API or message semantics. """ ], grpc_endpoint: [ @@ -278,8 +280,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do ], # Testing options - test_pid: [type: :pid, doc: false], - message_server: [type: :pid, doc: false] + test_pid: [type: :pid, doc: false] ] @definition NimbleOptions.new!(definition) @@ -353,4 +354,31 @@ defmodule BroadwayCloudPubSub.Streaming.Options do "expected :#{name} to be :nack, :noop, or {:nack, integer} where " <> "integer is between 0 and 600, got: #{inspect(value)}"} end + + def type_adapter(:gun, _), do: {:ok, GRPC.Client.Adapters.Gun} + def type_adapter(:mint, _), do: {:ok, GRPC.Client.Adapters.Mint} + + def type_adapter(mod, [{:name, name}]) when is_atom(mod) do + case Code.ensure_loaded(mod) do + {:module, ^mod} -> + if function_exported?(mod, :connect, 2) do + {:ok, mod} + else + {:error, + "expected :#{name} to be a module implementing GRPC.Client.Adapter, " <> + "but #{inspect(mod)} does not export connect/2"} + end + + {:error, _} -> + {:error, + "expected :#{name} to be :gun, :mint, or a module implementing GRPC.Client.Adapter, " <> + "but #{inspect(mod)} is not a loaded module"} + end + end + + def type_adapter(value, [{:name, name}]) do + {:error, + "expected :#{name} to be :gun, :mint, or a module implementing GRPC.Client.Adapter, " <> + "got: #{inspect(value)}"} + end end diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index 38fea75..c54cf81 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -448,15 +448,11 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # ordering_key are all routed to partition 0 (unordered messages interleave freely). defp maybe_inject_partition_by(broadway_opts, opts) do if opts[:enable_message_ordering] do - partition_fn = fn %Broadway.Message{metadata: %{orderingKey: key}} -> - key or "" - end - processors = broadway_opts |> Keyword.get(:processors, []) |> Enum.map(fn {name, proc_opts} -> - {name, Keyword.put_new(proc_opts, :partition_by, partition_fn)} + {name, Keyword.put_new(proc_opts, :partition_by, &__MODULE__.partition_by/1)} end) Keyword.put(broadway_opts, :processors, processors) @@ -464,4 +460,16 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do broadway_opts end end + + def partition_by(%Broadway.Message{metadata: %{orderingKey: key}}) when is_binary(key) do + :erlang.phash2(key) + end + + def partition_by(%Broadway.Message{metadata: %{orderingKey: key}}) when is_integer(key) do + key + end + + def partition_by(_) do + :erlang.unique_integer([:positive]) + end end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 79815e0..d6a5137 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -349,7 +349,21 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Server closed the stream normally (StreamReader enumeration exhausted) def handle_info({:stream_closed}, state) do emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} + + # The stream ended naturally: the Mint ConnectionProcess already called + # StreamResponseProcess.done/1 and popped the request_ref from its state + # when it received the HTTP/2 END_STREAM frame. Calling GRPC.Stub.cancel + # now would make the ConnectionProcess try to send :done to the + # already-stopped StreamResponseProcess, crashing the ConnectionProcess. + # Nil out grpc_stream so close_stream/1 skips the cancel for this case. + state = %{state | grpc_stream: nil} + + if state.draining do + # Mid-drain: do not open a new stream; just clean up reader/channel. + {:noreply, reset_connection(state, :stream_closed)} + else + {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} + end end # StreamReader process exited normally — stream ended cleanly. @@ -359,7 +373,15 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do if state.grpc_stream do emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} + + # Same rationale as {:stream_closed}: stream ended naturally, skip cancel. + state = %{state | grpc_stream: nil} + + if state.draining do + {:noreply, reset_connection(state, :stream_closed)} + else + {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} + end else # Already handled by {:stream_closed} — just clear the reader_pid {:noreply, %{state | reader_pid: nil}} @@ -582,6 +604,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do with {:ok, token} <- fetch_token(config), {:ok, channel} <- open_channel(config, token) do connect_stream(channel, state) + else + {:error, reason} -> {:error, reason, state} end rescue e -> @@ -616,18 +640,20 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = config, token ) do - adapter_mod = - case adapter do - :gun -> GRPC.Client.Adapters.Gun - :mint -> GRPC.Client.Adapters.Mint - end - keepalive_interval_ms = Map.get(config, :keepalive_interval_ms, 30_000) + adapter_opts = [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] + + adapter_opts = + case Map.get(config, :test_pid) do + nil -> adapter_opts + pid -> Keyword.put(adapter_opts, :test_pid, pid) + end + base_opts = [ - adapter: adapter_mod, + adapter: adapter, headers: [{"authorization", "Bearer #{token}"}], - adapter_opts: [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] + adapter_opts: adapter_opts ] opts = @@ -691,10 +717,27 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end if grpc_stream do - try do - GRPC.Stub.cancel(grpc_stream) - catch - _, _ -> :ok + # Guard against cancelling a stream whose StreamResponseProcess is already + # dead. The Mint ConnectionProcess calls StreamResponseProcess.done/1 + # (a synchronous GenServer.call) as part of handling {:cancel_request, ...}. + # If the StreamResponseProcess is already gone — because the reader was killed + # and the linked SRP died with it — the ConnectionProcess crashes with + # "no process" even though our try/catch protects the StreamManager. + # Checking liveness first lets us skip the cancel when there's nothing to + # cancel safely. For Gun-based streams, payload.stream_response_pid is nil + # so the guard is always true there. + srp_alive? = + case grpc_stream do + %{payload: %{stream_response_pid: pid}} when is_pid(pid) -> Process.alive?(pid) + _ -> true + end + + if srp_alive? do + try do + GRPC.Stub.cancel(grpc_stream) + catch + _, _ -> :ok + end end end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index c3d210d..071d6ae 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -350,16 +350,12 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do end with {:ok, token} <- token_result do - adapter_mod = - case config.adapter do - :gun -> GRPC.Client.Adapters.Gun - :mint -> GRPC.Client.Adapters.Mint - end + adapter_opts = [http2_opts: %{settings_timeout: :infinity}] base_opts = [ - adapter: adapter_mod, + adapter: config.adapter, headers: [{"authorization", "Bearer #{token}"}], - adapter_opts: [http2_opts: %{settings_timeout: :infinity}] + adapter_opts: adapter_opts ] opts = diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 1f8199f..09cc9f3 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -250,6 +250,50 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do end end + describe "adapter" do + test "defaults to GRPC.Client.Adapters.Gun" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:adapter] == GRPC.Client.Adapters.Gun + end + + test ":gun resolves to GRPC.Client.Adapters.Gun" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", adapter: :gun) + assert opts[:adapter] == GRPC.Client.Adapters.Gun + end + + test ":mint resolves to GRPC.Client.Adapters.Mint" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", adapter: :mint) + assert opts[:adapter] == GRPC.Client.Adapters.Mint + end + + test "accepts a custom module that exports connect/2" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + adapter: BroadwayCloudPubSub.Test.GrpcTestAdapter + ) + + assert opts[:adapter] == BroadwayCloudPubSub.Test.GrpcTestAdapter + end + + test "rejects a non-atom value" do + assert {:error, err} = + validate(subscription: "projects/p/subscriptions/s", adapter: "gun") + + assert Exception.message(err) =~ "adapter" + end + + test "rejects an atom that is not a loaded module" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + adapter: VeryUnlikelyToExist.ModuleAtom + ) + + assert Exception.message(err) =~ "adapter" + end + end + describe "enable_message_ordering" do test "defaults to false" do {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 01af9e7..c0ba2eb 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -4,6 +4,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do import ExUnit.CaptureLog alias BroadwayCloudPubSub.Streaming.{AckBatcher, StreamManager} + alias BroadwayCloudPubSub.Test.GrpcDynamicAdapter # Minimal config with enough keys to satisfy StreamManager.init/1 # (mirrors what Options produces after validation + defaults). @@ -18,8 +19,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do backoff_min: 1_000, backoff_max: 30_000, use_ssl: true, - adapter: :gun, - grpc_endpoint: "pubsub.googleapis.com:443", + adapter: GrpcDynamicAdapter, + grpc_endpoint: "localhost:1234", keepalive_interval_ms: 30_000, on_success: :ack, on_failure: :noop, @@ -28,12 +29,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ack_batch_interval_ms: 100, ack_batch_max_size: 2_500, client_id: "test-client-id", - token_generator: {__MODULE__, :noop_token, []}, + # fail token by default, most of the tests don't need a live stream, and this avoids GRPC.Client.Connection stop logs + token_generator: {__MODULE__, :fail_token, []}, broadway: [name: __MODULE__] ] end def noop_token, do: {:ok, "test-token"} + def fail_token, do: {:error, :no_token} # A minimal stub GenServer that silently accepts any cast (ack/modack). # Used as the rpc_client for AckBatcher so no real gRPC calls are made. @@ -78,6 +81,44 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do pid end + # Synchronous barrier: drains all prior mailbox messages in StreamManager + # before returning. Safe to use instead of :sys.get_state/1 for sync purposes. + defp sync(pid), do: StreamManager.get_buffered(pid) + + # Build a minimal ReceivedMessage for sending into {:stream_messages, ...}. + defp received_message(ack_id, data) do + %Google.Pubsub.V1.ReceivedMessage{ + ack_id: ack_id, + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-#{ack_id}", + data: data, + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + end + + # Open a live dynamic adapter stream. + # Starts the manager with noop_token so the adapter actually connects, + # waits for the connection handshake, then calls fun.(pid, ctrl). + defp with_live_stream(extra_opts \\ [], fun) do + pid = + start_manager( + [ + adapter: GrpcDynamicAdapter, + test_pid: self(), + token_generator: {__MODULE__, :noop_token, []} + ] ++ extra_opts + ) + + assert_receive {:adapter_connected, ctrl}, 2_000 + # Wait for the initial StreamingPullRequest so the stream is fully open + assert_receive {:adapter_call, {:send_data, _}}, 2_000 + fun.(pid, ctrl) + end + # ============================================================ # Demand signaling # ============================================================ @@ -89,11 +130,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do StreamManager.notify_demand(pid, 0) StreamManager.notify_demand(pid, 10) - # Allow the async cast to be processed - state = :sys.get_state(pid) + # Send one real message — if demand > 0, it will be forwarded immediately + send(pid, {:stream_messages, [received_message("demand-probe", "probe")]}) + assert_receive {:stream_messages, [_]}, 500 - assert state.pending_demand == 10 - assert :queue.is_empty(state.message_buffer) + # Buffer is empty: all demand was consumed by the forwarded message + assert StreamManager.get_buffered(pid) == [] end end @@ -101,59 +143,53 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do test "flushes buffered messages to producer and decrements pending_demand" do pid = start_manager() - msgs = [ - %Broadway.Message{data: "msg1", acknowledger: {Broadway.NoopAcknowledger, nil, nil}}, - %Broadway.Message{data: "msg2", acknowledger: {Broadway.NoopAcknowledger, nil, nil}} - ] - - :sys.replace_state(pid, fn s -> - %{s | pending_demand: 0, message_buffer: :queue.from_list(msgs)} - end) + # Buffer two messages with demand=0 + StreamManager.notify_demand(pid, 0) + send(pid, {:stream_messages, [received_message("buf-1", "msg1")]}) + send(pid, {:stream_messages, [received_message("buf-2", "msg2")]}) + # Wait for both to be buffered (sync via get_buffered) + assert length(StreamManager.get_buffered(pid)) == 2 + # Now demand arrives — should flush both at once StreamManager.notify_demand(pid, 10) assert_receive {:stream_messages, received} assert Enum.map(received, & &1.data) == ["msg1", "msg2"] - state = :sys.get_state(pid) - assert :queue.is_empty(state.message_buffer) - assert state.pending_demand == 8 + # Buffer should be empty; remaining demand consumed 2 of 10 + assert StreamManager.get_buffered(pid) == [] end test "flushes only up to pending_demand, keeps remainder buffered" do pid = start_manager() - msgs = - for i <- 1..5 do - %Broadway.Message{ - data: "msg#{i}", - acknowledger: {Broadway.NoopAcknowledger, nil, nil} - } - end + # Buffer 5 messages with demand=0 + StreamManager.notify_demand(pid, 0) - :sys.replace_state(pid, fn s -> - %{s | pending_demand: 0, message_buffer: :queue.from_list(msgs)} - end) + for i <- 1..5 do + send(pid, {:stream_messages, [received_message("buf-#{i}", "msg#{i}")]}) + end + assert length(StreamManager.get_buffered(pid)) == 5 + + # Demand for 2 — should flush exactly 2 StreamManager.notify_demand(pid, 2) assert_receive {:stream_messages, received} assert length(received) == 2 assert Enum.map(received, & &1.data) == ["msg1", "msg2"] - state = :sys.get_state(pid) - assert :queue.len(state.message_buffer) == 3 - assert state.pending_demand == 0 + # 3 remain buffered + assert length(StreamManager.get_buffered(pid)) == 3 + # Demand for 10 — should flush the remaining 3 StreamManager.notify_demand(pid, 10) assert_receive {:stream_messages, received2} assert length(received2) == 3 assert Enum.map(received2, & &1.data) == ["msg3", "msg4", "msg5"] - state = :sys.get_state(pid) - assert :queue.is_empty(state.message_buffer) - assert state.pending_demand == 7 + assert StreamManager.get_buffered(pid) == [] end end @@ -162,51 +198,25 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do pid = start_manager() StreamManager.notify_demand(pid, 10) - fake_msg = %Google.Pubsub.V1.ReceivedMessage{ - ack_id: "ack-1", - message: %Google.Pubsub.V1.PubsubMessage{ - message_id: "msg-1", - data: "hello", - attributes: %{}, - ordering_key: "", - publish_time: nil - }, - delivery_attempt: 1 - } - - send(pid, {:stream_messages, [fake_msg]}) + send(pid, {:stream_messages, [received_message("ack-1", "hello")]}) assert_receive {:stream_messages, messages} assert length(messages) == 1 assert hd(messages).data == "hello" - state = :sys.get_state(pid) - assert state.pending_demand == 9 - assert :queue.is_empty(state.message_buffer) + # Buffer should be empty (demand consumed the message immediately) + assert StreamManager.get_buffered(pid) == [] end test "messages are buffered when pending_demand is 0" do pid = start_manager() StreamManager.notify_demand(pid, 0) - fake_msg = %Google.Pubsub.V1.ReceivedMessage{ - ack_id: "ack-2", - message: %Google.Pubsub.V1.PubsubMessage{ - message_id: "msg-2", - data: "buffered", - attributes: %{}, - ordering_key: "", - publish_time: nil - }, - delivery_attempt: 1 - } - - send(pid, {:stream_messages, [fake_msg]}) + send(pid, {:stream_messages, [received_message("ack-2", "buffered")]}) refute_receive {:stream_messages, _}, 100 - state = :sys.get_state(pid) - assert :queue.len(state.message_buffer) == 1 + assert length(StreamManager.get_buffered(pid)) == 1 end test "buffer is flushed in FIFO order on notify_demand" do @@ -214,22 +224,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do StreamManager.notify_demand(pid, 0) for i <- 1..3 do - msg = %Google.Pubsub.V1.ReceivedMessage{ - ack_id: "ack-#{i}", - message: %Google.Pubsub.V1.PubsubMessage{ - message_id: "msg-#{i}", - data: "data-#{i}", - attributes: %{}, - ordering_key: "", - publish_time: nil - }, - delivery_attempt: 1 - } - - send(pid, {:stream_messages, [msg]}) + send(pid, {:stream_messages, [received_message("ack-#{i}", "data-#{i}")]}) end - :sys.get_state(pid) + # Sync: ensure all 3 are buffered before we signal demand + assert length(StreamManager.get_buffered(pid)) == 3 StreamManager.notify_demand(pid, 10) @@ -249,19 +248,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do StreamManager.notify_demand(pid, 10) StreamManager.stop_receiving(pid) - fake_msg = %Google.Pubsub.V1.ReceivedMessage{ - ack_id: "drain-ack", - message: %Google.Pubsub.V1.PubsubMessage{ - message_id: "drain-msg", - data: "should not arrive", - attributes: %{}, - ordering_key: "", - publish_time: nil - }, - delivery_attempt: 1 - } - - send(pid, {:stream_messages, [fake_msg]}) + send(pid, {:stream_messages, [received_message("drain-ack", "should not arrive")]}) refute_receive {:stream_messages, _}, 200 end @@ -273,67 +260,56 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "keep-alive ping" do test "triggers reconnect when send fails (fake stream)" do - # Use a very short keepalive interval so the test doesn't wait 30s. - # With a fake stream, send_on_stream will throw, which should trigger - # a reconnect instead of being silently swallowed. - logs = - capture_log(fn -> - pid = start_manager(keepalive_interval_ms: 10) - - :sys.replace_state(pid, fn s -> %{s | grpc_stream: :fake_stream} end) + # Use a very short keepalive interval and a high backoff so the automatic + # reconnect triggered by the failed send doesn't fire another :connect + # during the test assertions. + pid = start_manager(keepalive_interval_ms: 10, backoff_min: 60_000) - # Bootstrap the keepalive cycle — normally started by {:stream_opened}, - # but we injected the stream directly via replace_state. - send(pid, :send_keepalive) + :sys.replace_state(pid, fn s -> %{s | grpc_stream: :fake_stream} end) - :sys.get_state(pid) + # Bootstrap the keepalive cycle — normally started by {:stream_opened}, + # but we injected the stream directly via replace_state. + send(pid, :send_keepalive) - assert Process.alive?(pid) + # Sync barrier: let both the keepalive handler and any subsequent + # handle_info(:connect, ...) fully settle before reading state. + sync(pid) - # After a send failure, the stream is reset (grpc_stream: nil) and a - # reconnect is scheduled. - state = :sys.get_state(pid) - assert state.grpc_stream == nil - assert state.reconnect_ref != nil - end) + assert Process.alive?(pid) - assert logs =~ "GRPC.Client.Connection stopping as requested" + # After a send failure, the stream is reset (grpc_stream: nil) and a + # reconnect is scheduled. + state = :sys.get_state(pid) + assert state.grpc_stream == nil + assert state.reconnect_ref != nil end test "does not crash when stream is nil (reconnecting)" do + # grpc_stream is already nil after start_manager with fail_token — no replace_state needed. pid = start_manager(keepalive_interval_ms: 10) - :sys.replace_state(pid, fn s -> %{s | grpc_stream: nil} end) - send(pid, :send_keepalive) - :sys.get_state(pid) + sync(pid) assert Process.alive?(pid) end test "keepalive_timer is nil before stream opens" do pid = start_manager() + # No public API for keepalive_timer — :sys.get_state required state = :sys.get_state(pid) assert state.keepalive_timer == nil end test "keepalive_timer is set when stream is active" do - pid = start_manager(keepalive_interval_ms: 60_000) - - :sys.replace_state(pid, fn s -> - timer = Process.send_after(self(), :send_keepalive, 60_000) - - %{ - s - | grpc_stream: :fake_stream, - conn_pid: self(), - keepalive_timer: timer - } + # Use a live stream so {:stream_opened} fires, which calls schedule_keepalive_timer/1. + # A very long interval ensures it doesn't fire during the test. + with_live_stream([keepalive_interval_ms: 60_000], fn pid, _ctrl -> + # No public API for keepalive_timer — :sys.get_state required + state = :sys.get_state(pid) + assert state.keepalive_timer != nil end) - - state = :sys.get_state(pid) - assert state.keepalive_timer != nil end end @@ -349,8 +325,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) send(pid, {:stream_closed}) - :sys.get_state(pid) + sync(pid) + # No public API for reconnect_ref — :sys.get_state required state = :sys.get_state(pid) first_ref = state.reconnect_ref @@ -359,7 +336,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # Send another close signal — ref must not change (dedup kicks in) send(pid, {:stream_closed}) - :sys.get_state(pid) + sync(pid) state2 = :sys.get_state(pid) assert state2.reconnect_ref == first_ref @@ -369,11 +346,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) - :sys.get_state(pid) + sync(pid) # Manually fire :connect (connect() will fail — no real gRPC — but that's fine) send(pid, :connect) - :sys.get_state(pid) + sync(pid) # GenServer should still be alive assert Process.alive?(pid) @@ -438,11 +415,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ref = Process.monitor(pid) send(pid, {:stream_error, %GRPC.RPCError{status: 16, message: "unauthenticated"}}) - :sys.get_state(pid) + sync(pid) refute_received {:DOWN, ^ref, :process, ^pid, _} assert Process.alive?(pid) + # No public API for reconnect_ref — :sys.get_state required state = :sys.get_state(pid) assert state.reconnect_ref != nil end @@ -456,7 +434,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do {:stream_error, %GRPC.RPCError{status: 14, message: "Server shutdownNow invoked"}} ) - :sys.get_state(pid) + sync(pid) refute_received {:DOWN, ^ref, :process, ^pid, _} assert Process.alive?(pid) @@ -500,7 +478,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ref = Process.monitor(pid) send(pid, {:stream_error, %GRPC.RPCError{status: 4, message: "timeout"}}) - :sys.get_state(pid) + sync(pid) refute_received {:DOWN, ^ref, :process, ^pid, _} assert Process.alive?(pid) @@ -514,7 +492,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ref = Process.monitor(pid) send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "service temporarily down"}}) - :sys.get_state(pid) + sync(pid) refute_received {:DOWN, ^ref, :process, ^pid, _} assert Process.alive?(pid) @@ -528,6 +506,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "subscription_properties — ordering_enabled" do test "ordering_enabled defaults to false" do pid = start_manager() + # No public API for ordering_enabled — :sys.get_state required state = :sys.get_state(pid) assert state.ordering_enabled == false end @@ -541,8 +520,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do } send(pid, {:subscription_properties, props}) - # Sync: ensure the message is processed - :sys.get_state(pid) + sync(pid) state = :sys.get_state(pid) assert state.ordering_enabled == true @@ -561,7 +539,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do }} ) - :sys.get_state(pid) + sync(pid) assert :sys.get_state(pid).ordering_enabled == true # Then server sends false (can happen mid-stream) @@ -574,7 +552,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do }} ) - :sys.get_state(pid) + sync(pid) assert :sys.get_state(pid).ordering_enabled == false end @@ -583,7 +561,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # Unrelated message send(pid, {:some_other_event, :ignored}) - :sys.get_state(pid) + sync(pid) state = :sys.get_state(pid) assert state.ordering_enabled == false @@ -597,6 +575,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do describe "subscription_properties — exactly_once_enabled" do test "exactly_once_enabled defaults to false" do pid = start_manager() + # No public API for exactly_once_enabled — :sys.get_state required state = :sys.get_state(pid) assert state.exactly_once_enabled == false end @@ -610,7 +589,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do } send(pid, {:subscription_properties, props}) - :sys.get_state(pid) + sync(pid) assert :sys.get_state(pid).exactly_once_enabled == true end @@ -627,7 +606,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do }} ) - :sys.get_state(pid) + sync(pid) assert :sys.get_state(pid).exactly_once_enabled == true send( @@ -639,7 +618,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do }} ) - :sys.get_state(pid) + sync(pid) assert :sys.get_state(pid).exactly_once_enabled == false end @@ -655,7 +634,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do }} ) - :sys.get_state(pid) + sync(pid) state = :sys.get_state(pid) assert state.ordering_enabled == true assert state.exactly_once_enabled == true @@ -667,108 +646,203 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # ============================================================ describe "extend_leases — exactly_once_enabled deadline enforcement" do - # Inject outstanding messages and trigger :extend_leases, then capture - # the modack call via the StubRpcClient + process mailbox inspection. - # We can't easily intercept AckBatcher calls here, so we validate behaviour - # by inspecting how long until the next :extend_leases fires. + # We use with_live_stream so the adapter actually connects, then push a + # real StreamingPullResponse to get a message into `outstanding` naturally. + # Then we control exactly_once_enabled via {:subscription_properties, ...}. + + defp push_one_message(ctrl, ack_id, data) do + response = %Google.Pubsub.V1.StreamingPullResponse{ + received_messages: [ + %Google.Pubsub.V1.ReceivedMessage{ + ack_id: ack_id, + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-#{ack_id}", + data: data, + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + ] + } + + GrpcDynamicAdapter.push_response(ctrl, response) + end test "uses adaptive deadline (no 60s floor) when exactly_once_enabled is false" do - pid = start_manager() + with_live_stream(fn pid, ctrl -> + # Put one message into outstanding by pushing a response with demand + StreamManager.notify_demand(pid, 1) + push_one_message(ctrl, "ack-normal", "data") + assert_receive {:stream_messages, [_]}, 2_000 + + # Ensure exactly_once is false (the default) + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + sync(pid) + + # Fire the :extend_leases handler directly + send(pid, :extend_leases) + sync(pid) + + # No public API for lease_timer — :sys.get_state required + state = :sys.get_state(pid) + assert state.lease_timer != nil + end) + end - # With <10 samples the distribution returns the default deadline (60s). - # We verify the next timer is scheduled within a reasonable range. - now_ms = System.monotonic_time(:millisecond) - - # Inject one outstanding message and a lease timer that fires immediately. - :sys.replace_state(pid, fn s -> - %{ - s - | exactly_once_enabled: false, - outstanding: %{ - "ack-normal" => %{ - received_at: now_ms - 5_000, - max_expiry: now_ms + 3_600_000 - } - } - } + test "enforces 60s minimum deadline when exactly_once_enabled is true" do + with_live_stream(fn pid, ctrl -> + # Put one message into outstanding + StreamManager.notify_demand(pid, 1) + push_one_message(ctrl, "ack-eo", "data") + assert_receive {:stream_messages, [_]}, 2_000 + + # Enable exactly-once + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: true + }} + ) + + sync(pid) + + send(pid, :extend_leases) + sync(pid) + + # No public API for lease_timer — :sys.get_state required + state = :sys.get_state(pid) + assert state.lease_timer != nil + remaining_ms = Process.read_timer(state.lease_timer) + # With effective_deadline=60s: interval in [(60-5)*1000*0.8, (60-5)*1000*0.9] + # = [44_000, 49_500) + assert remaining_ms >= 40_000, + "Expected next lease timer >= 40s for exactly-once, got #{remaining_ms}ms" end) + end - # Fire the :extend_leases handler directly - send(pid, :extend_leases) - :sys.get_state(pid) + test "uses normal interval (much shorter) when exactly_once_enabled is false" do + with_live_stream([stream_ack_deadline_seconds: 20], fn pid, ctrl -> + # Put one message into outstanding + StreamManager.notify_demand(pid, 1) + push_one_message(ctrl, "ack-normal2", "data") + assert_receive {:stream_messages, [_]}, 2_000 + + # Ensure exactly_once is false + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + sync(pid) + + send(pid, :extend_leases) + sync(pid) + + state = :sys.get_state(pid) + assert state.lease_timer != nil + remaining_ms = Process.read_timer(state.lease_timer) + + # With stream_ack_deadline_seconds=20 and adaptive deadline defaulting + # to 60 (cold start default), effective = 60s (no exactly_once floor). + # The key check: it's a valid positive number. + assert is_integer(remaining_ms) and remaining_ms > 0 + end) + end + end - state = :sys.get_state(pid) - # Lease timer should be re-scheduled with a positive ref - assert state.lease_timer != nil + # ============================================================ + # Dynamic adapter — real connection flow without a real server + # ============================================================ + + describe "with GrpcDynamicAdapter" do + test "adapter receives connect call and sends initial StreamingPullRequest" do + pid = + start_manager( + adapter: GrpcDynamicAdapter, + test_pid: self(), + token_generator: {__MODULE__, :noop_token, []} + ) + + # Adapter signals connection to the test process + assert_receive {:adapter_connected, _ctrl}, 2_000 + + # StreamReader sends the initial StreamingPullRequest via send_data + assert_receive {:adapter_call, {:send_data, _initial_request}}, 2_000 + + assert Process.alive?(pid) end - test "enforces 60s minimum deadline when exactly_once_enabled is true" do - pid = start_manager() - now_ms = System.monotonic_time(:millisecond) - - # With exactly_once_enabled: true and an adaptive deadline of e.g. 10s - # (cold start default clamped to the min), effective_deadline must be - # at least 60. We validate by measuring the scheduled next interval: - # interval = (effective_deadline - 5) * 1000 * jitter_factor - # With effective_deadline=60: interval in [(60-5)*1000*0.8, (60-5)*1000*0.9] - # = [44_000, 49_500] - :sys.replace_state(pid, fn s -> - %{ - s - | exactly_once_enabled: true, - outstanding: %{ - "ack-eo" => %{ - received_at: now_ms - 1_000, - max_expiry: now_ms + 3_600_000 - } - } - } - end) + test "messages pushed into the stream are forwarded to the producer" do + pid = + start_manager( + adapter: GrpcDynamicAdapter, + test_pid: self(), + token_generator: {__MODULE__, :noop_token, []} + ) - send(pid, :extend_leases) - :sys.get_state(pid) + assert_receive {:adapter_connected, ctrl}, 2_000 + # Wait for send_data (initial request) so stream is fully open + assert_receive {:adapter_call, {:send_data, _}}, 2_000 - state = :sys.get_state(pid) - # The timer must be set and represent a deadline >= 60s. - # We read back the remaining time from the timer ref. - assert state.lease_timer != nil - remaining_ms = Process.read_timer(state.lease_timer) - # The next extension should fire well before the 60s deadline expires. - # We assert >= 40_000 as a lower bound with tolerance for scheduling jitter. - assert remaining_ms >= 40_000, - "Expected next lease timer >= 40s for exactly-once, got #{remaining_ms}ms" + StreamManager.notify_demand(pid, 10) + + response = %Google.Pubsub.V1.StreamingPullResponse{ + received_messages: [ + %Google.Pubsub.V1.ReceivedMessage{ + ack_id: "dyn-ack-1", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "dyn-msg-1", + data: "hello-dynamic", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + ] + } + + GrpcDynamicAdapter.push_response(ctrl, response) + + assert_receive {:stream_messages, messages}, 2_000 + assert length(messages) == 1 + assert hd(messages).data == "hello-dynamic" end - test "uses normal interval (much shorter) when exactly_once_enabled is false" do - pid = start_manager(stream_ack_deadline_seconds: 20) - now_ms = System.monotonic_time(:millisecond) - - :sys.replace_state(pid, fn s -> - %{ - s - | exactly_once_enabled: false, - outstanding: %{ - "ack-normal" => %{ - received_at: now_ms - 1_000, - max_expiry: now_ms + 3_600_000 - } - } - } - end) + test "end_stream and cancel notifications reach test_pid" do + _pid = + start_manager( + adapter: GrpcDynamicAdapter, + test_pid: self(), + token_generator: {__MODULE__, :noop_token, []} + ) - send(pid, :extend_leases) - :sys.get_state(pid) + assert_receive {:adapter_connected, ctrl}, 2_000 + assert_receive {:adapter_call, {:send_data, _}}, 2_000 - state = :sys.get_state(pid) - assert state.lease_timer != nil - remaining_ms = Process.read_timer(state.lease_timer) - - # With stream_ack_deadline_seconds=20 and adaptive deadline defaulting - # to 60 (cold start default), effective = 60s (no exactly_once floor). - # Interval = (60 - 5) * 1000 * jitter ≈ [44_000, 49_500). - # The key check: it should NOT be forced to >= 44_000 due to exactly_once - # — we simply verify it's a valid positive number. - assert is_integer(remaining_ms) and remaining_ms > 0 + # Signal end-of-stream so the reader exits cleanly + GrpcDynamicAdapter.push_end_stream(ctrl) + + # StreamManager will close the reader and schedule reconnect, + # which triggers a new connect — we just verify the process survives + # and the test doesn't hang. + Process.sleep(100) end end end diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs index 4bde16c..43d89bc 100644 --- a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -2,6 +2,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do use ExUnit.Case, async: true alias BroadwayCloudPubSub.Streaming.UnaryRpcClient + alias BroadwayCloudPubSub.Test.GrpcDynamicAdapter # ============================================================ # Chunking logic — pure caller-side, no GenServer needed @@ -46,8 +47,8 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do broadway_name: __MODULE__, subscription: "projects/test/subscriptions/test-sub", token_generator: {__MODULE__, :fail_token, []}, - adapter: :gun, - grpc_endpoint: "pubsub.googleapis.com:443", + adapter: GrpcDynamicAdapter, + grpc_endpoint: "localhost:12345", use_ssl: false, backoff_type: :exp, backoff_min: 100, diff --git a/test/support/grpc_dynamic_adapter.ex b/test/support/grpc_dynamic_adapter.ex new file mode 100644 index 0000000..9674669 --- /dev/null +++ b/test/support/grpc_dynamic_adapter.ex @@ -0,0 +1,210 @@ +defmodule BroadwayCloudPubSub.Test.GrpcDynamicAdapter do + @moduledoc """ + A controllable GRPC adapter for unit tests. + + Unlike `GrpcTestAdapter`, this adapter keeps the stream open and allows the + test process to push responses, errors, or an end-of-stream signal into the + live stream. + + ## Usage + + # Start a StreamManager with this adapter and a test_pid. + pid = start_manager(adapter: GrpcDynamicAdapter, test_pid: self()) + + # Wait for the adapter to signal that it connected. + assert_receive {:adapter_connected, ctrl} + + # Push a response into the stream. + GrpcDynamicAdapter.push_response(ctrl, %StreamingPullResponse{...}) + + # End the stream. + GrpcDynamicAdapter.push_end_stream(ctrl) + + ## Notifications sent to `test_pid` + + * `{:adapter_connected, ctrl}` — adapter connected, `ctrl` is the Controller pid + * `{:adapter_call, :connect}` — `connect/2` was called + * `{:adapter_call, :disconnect}` — `disconnect/1` was called + * `{:adapter_call, :send_headers}` — `send_headers/2` was called + * `{:adapter_call, :send_request, message}` — `send_request/3` was called + * `{:adapter_call, :send_data, message}` — `send_data/3` was called + * `{:adapter_call, :end_stream}` — `end_stream/1` was called + * `{:adapter_call, :cancel}` — `cancel/1` was called + """ + + @behaviour GRPC.Client.Adapter + + # --- Controller GenServer --- + + defmodule Controller do + @moduledoc false + use GenServer + + def start_link do + GenServer.start_link(__MODULE__, :ok) + end + + def stop(ctrl), do: GenServer.stop(ctrl) + + @doc "Push a decoded response into the stream." + def push_response(ctrl, response) do + GenServer.cast(ctrl, {:push, {:ok, response}}) + end + + @doc "Push an error into the stream." + def push_error(ctrl, error) do + GenServer.cast(ctrl, {:push, {:error, error}}) + end + + @doc "Signal end-of-stream — the receive_data enumerable will halt." + def push_end_stream(ctrl) do + GenServer.cast(ctrl, {:push, :end_stream}) + end + + @doc "Block until one item is available in the queue (called from StreamReader process)." + def pop(ctrl, timeout) do + GenServer.call(ctrl, :pop, timeout) + end + + # --- GenServer callbacks --- + + def init(:ok) do + {:ok, %{queue: :queue.new(), waiting: nil}} + end + + def handle_cast({:push, item}, %{waiting: nil} = state) do + {:noreply, %{state | queue: :queue.in(item, state.queue)}} + end + + def handle_cast({:push, item}, %{waiting: from} = state) do + GenServer.reply(from, item) + {:noreply, %{state | waiting: nil}} + end + + def handle_call(:pop, from, state) do + case :queue.out(state.queue) do + {{:value, item}, rest} -> + {:reply, item, %{state | queue: rest}} + + {:empty, _} -> + {:noreply, %{state | waiting: from}} + end + end + end + + # --- Adapter callbacks --- + + @impl GRPC.Client.Adapter + def connect(%GRPC.Channel{} = channel, opts) do + test_pid = Keyword.get(opts, :test_pid) + {:ok, ctrl} = Controller.start_link() + + if test_pid do + send(test_pid, {:adapter_connected, ctrl}) + send(test_pid, {:adapter_call, :connect}) + end + + payload = %{conn_pid: self(), ctrl: ctrl, test_pid: test_pid} + {:ok, %{channel | adapter_payload: payload}} + end + + @impl GRPC.Client.Adapter + def disconnect(%GRPC.Channel{} = channel) do + notify(channel, :disconnect) + + case channel.adapter_payload do + %{ctrl: ctrl} when is_pid(ctrl) -> + if Process.alive?(ctrl), do: Controller.stop(ctrl) + + _ -> + :ok + end + + {:ok, %{channel | adapter_payload: %{conn_pid: nil}}} + end + + @impl GRPC.Client.Adapter + def send_headers(stream, _opts) do + notify_stream(stream, :send_headers) + GRPC.Client.Stream.put_payload(stream, :stream_ref, make_ref()) + end + + @impl GRPC.Client.Adapter + def send_request(stream, message, _opts) do + notify_stream(stream, {:send_request, message}) + GRPC.Client.Stream.put_payload(stream, :stream_ref, make_ref()) + end + + @impl GRPC.Client.Adapter + def send_data(stream, message, _opts) do + notify_stream(stream, {:send_data, message}) + stream + end + + @impl GRPC.Client.Adapter + def end_stream(stream) do + notify_stream(stream, :end_stream) + stream + end + + @impl GRPC.Client.Adapter + def cancel(stream) do + notify_stream(stream, :cancel) + :ok + end + + # Returns a lazy Stream that blocks on Controller.pop/2 until items are pushed. + @impl GRPC.Client.Adapter + def receive_data(stream, _opts) do + ctrl = stream.channel.adapter_payload.ctrl + + lazy = + Stream.resource( + fn -> ctrl end, + fn ctrl -> + try do + case Controller.pop(ctrl, 5_000) do + :end_stream -> {:halt, ctrl} + {:ok, response} -> {[{:ok, response}], ctrl} + {:error, err} -> {[{:error, err}], ctrl} + end + rescue + _ -> {:halt, ctrl} + catch + :exit, _ -> {:halt, ctrl} + end + end, + fn _ctrl -> :ok end + ) + + {:ok, lazy} + end + + # --- Public API (delegates to Controller) --- + + defdelegate push_response(ctrl, response), to: Controller + defdelegate push_error(ctrl, error), to: Controller + defdelegate push_end_stream(ctrl), to: Controller + + # --- Private helpers --- + + defp notify(channel, event) do + case channel.adapter_payload do + %{test_pid: pid} when is_pid(pid) -> + send(pid, {:adapter_call, event}) + + _ -> + :ok + end + end + + defp notify_stream(stream, event) do + case stream.channel.adapter_payload do + %{test_pid: pid} when is_pid(pid) -> + send(pid, {:adapter_call, event}) + + _ -> + :ok + end + end +end diff --git a/test/support/grpc_test_adapter.ex b/test/support/grpc_test_adapter.ex new file mode 100644 index 0000000..50a7725 --- /dev/null +++ b/test/support/grpc_test_adapter.ex @@ -0,0 +1,51 @@ +defmodule BroadwayCloudPubSub.Test.GrpcTestAdapter do + @moduledoc """ + A static (no-op) GRPC adapter for unit tests. + + - `connect/2` populates `adapter_payload` with `conn_pid: self()` so that + StreamManager's `{:stream_opened, ...}` handler can read a live pid. + - `receive_data/2` returns `{:ok, []}` — an empty enumerable — which causes + StreamReader to immediately send `{:stream_closed}` and exit. StreamManager + will schedule a reconnect, which is acceptable for tests that inject state + directly via `:sys.replace_state`. + - All other callbacks are no-ops that keep the stream struct intact. + + Use `BroadwayCloudPubSub.Test.GrpcDynamicAdapter` when you need to push + responses into an open stream from the test process. + """ + + @behaviour GRPC.Client.Adapter + + @impl GRPC.Client.Adapter + def connect(%GRPC.Channel{} = channel, _opts) do + {:ok, %{channel | adapter_payload: %{conn_pid: self()}}} + end + + @impl GRPC.Client.Adapter + def disconnect(%GRPC.Channel{} = channel) do + {:ok, %{channel | adapter_payload: %{conn_pid: nil}}} + end + + @impl GRPC.Client.Adapter + def send_headers(stream, _opts) do + GRPC.Client.Stream.put_payload(stream, :stream_ref, make_ref()) + end + + @impl GRPC.Client.Adapter + def send_request(stream, _message, _opts) do + GRPC.Client.Stream.put_payload(stream, :stream_ref, make_ref()) + end + + @impl GRPC.Client.Adapter + def send_data(stream, _message, _opts), do: stream + + @impl GRPC.Client.Adapter + def end_stream(stream), do: stream + + @impl GRPC.Client.Adapter + def cancel(_stream), do: :ok + + # Returns an empty enumerable — StreamReader sends {:stream_closed} immediately. + @impl GRPC.Client.Adapter + def receive_data(_stream, _opts), do: {:ok, []} +end From 81ea05efc08db86e6759c9715c20311c7b7cbdef Mon Sep 17 00:00:00 2001 From: Rock Date: Thu, 2 Apr 2026 00:17:54 +0200 Subject: [PATCH 05/29] feat: add synchronous acknowledgment and error classification Introduce synchronous acknowledgment mode and improve error handling for failed and cancelled gRPC operations. Harden the producer for production use with better stream lifecycle management. Key changes: - Add synchronous ack mode for at-least-once delivery guarantees - Classify FAILED_PRECONDITION and CANCELLED errors appropriately - Simplify producer and stream manager internal state - Improve ack batcher with configurable batch sizes - Remove unused streaming pull response handling code --- .../streaming/ack_batcher.ex | 261 +++++++-- .../streaming/error_classifier.ex | 20 +- .../streaming/options.ex | 17 +- .../streaming/producer.ex | 13 +- .../streaming/stream_manager.ex | 541 ++++++++++-------- .../streaming/stream_reader.ex | 14 +- .../streaming/unary_rpc_client.ex | 51 +- mix.exs | 9 + .../streaming/ack_batcher_test.exs | 214 +++++++ .../streaming/error_classifier_test.exs | 10 +- .../streaming/stream_manager_test.exs | 507 +++++++++++++++- 11 files changed, 1311 insertions(+), 346 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 00f4a1c..919d662 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -2,42 +2,42 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do @moduledoc false # GenServer that accumulates ack and modifyAckDeadline requests and flushes - # them to UnaryRpcClient on a configurable timer or size threshold. - # - # ## Modack grouping - # - # ModifyAckDeadline requests carry a single deadline value for all ack IDs in - # the request. We group modack requests by deadline value so that one unary RPC - # is sent per unique deadline per flush cycle. - # - # ## Flush triggers - # - # 1. Timer fires (every ack_batch_interval_ms) - # 2. Accumulated ack count reaches ack_batch_max_size - # 3. Explicit `flush/1` call (used during graceful shutdown) - # - # ## Relationship to UnaryRpcClient - # - # AckBatcher and UnaryRpcClient are siblings under UnaryAckSupervisor. The - # batcher looks up the RPC client by its registered name derived from the - # Broadway pipeline name. + # them to UnaryRpcClient on a timer, size threshold, or explicit flush. + # Modacks are grouped by deadline value: one unary RPC per unique deadline per flush. use GenServer alias BroadwayCloudPubSub.Streaming.UnaryRpcClient + @max_modack_attempts 3 + defstruct [ :rpc_client, :batch_interval_ms, :batch_max_size, :timer_ref, - # Accumulated ack_ids waiting to be flushed. + # nil = no deadline. Set to 600_000ms when exactly-once delivery is enabled. + retry_deadline_ms: nil, ack_ids: [], ack_count: 0, - # Accumulated modacks: %{deadline_seconds => [ack_id]} - modack_ids: %{} + # %{deadline_seconds => [ack_id]} + modack_ids: %{}, + # Monotonic ms of when each ack_id was first queued; cleaned up on success or expiry. + ack_first_queued: %{}, + modack_first_queued: %{}, + # Per-ack-ID attempt count; cleaned up each flush via sweep over remaining_modacks. + modack_attempts: %{} ] + @doc """ + Updates the retry deadline at runtime. Called by StreamManager when it detects + a change in exactly-once delivery status from subscription_properties. + """ + @spec update_retry_deadline(GenServer.server(), pos_integer()) :: :ok + def update_retry_deadline(pid, retry_deadline_ms) do + GenServer.cast(pid, {:update_retry_deadline, retry_deadline_ms}) + end + @spec start_link(keyword()) :: GenServer.on_start() def start_link(opts) do {name, opts} = Keyword.pop(opts, :name) @@ -69,6 +69,20 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do def modack(_pid, [], _deadline), do: :ok + @doc """ + Sends a receipt modack for exactly-once delivery. Spawns a Task that calls + UnaryRpcClient.modify_ack_deadline/3 and sends the result to `reply_to` + as `{:receipt_modack_result, ref, result}`. + + Unlike modack/3, this is NOT batched — it runs immediately because + exactly-once delivery requires confirmation before dispatching messages. + """ + @spec receipt_modack(GenServer.server(), reference(), pid(), [String.t()], non_neg_integer()) :: + :ok + def receipt_modack(pid, ref, reply_to, ack_ids, deadline_seconds) do + GenServer.cast(pid, {:receipt_modack, ref, reply_to, ack_ids, deadline_seconds}) + end + @doc """ Flushes all pending batches synchronously. Used during graceful shutdown to ensure no acks are dropped before the process terminates. @@ -87,7 +101,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do state = %__MODULE__{ rpc_client: config.rpc_client, batch_interval_ms: config.ack_batch_interval_ms, - batch_max_size: config.ack_batch_max_size + batch_max_size: config.ack_batch_max_size, + retry_deadline_ms: config[:retry_deadline_ms] } {:ok, schedule_flush(state)} @@ -95,15 +110,15 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do @impl GenServer def handle_cast({:ack, ack_ids}, state) do + now = System.monotonic_time(:millisecond) new_ids = ack_ids ++ state.ack_ids new_count = state.ack_count + length(ack_ids) - state = %{state | ack_ids: new_ids, ack_count: new_count} + # put_new: don't reset timestamp if this ack_id is already being retried + new_ts = Enum.reduce(ack_ids, state.ack_first_queued, &Map.put_new(&2, &1, now)) + state = %{state | ack_ids: new_ids, ack_count: new_count, ack_first_queued: new_ts} state = if new_count >= state.batch_max_size do - # Size-triggered flush: reschedule the timer so periodic flushing - # continues. Without rescheduling, timer_ref is left nil after - # do_flush cancels it and no further periodic flushes ever occur. do_flush(state) else state @@ -113,11 +128,22 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end def handle_cast({:modack, ack_ids, deadline_seconds}, state) do + now = System.monotonic_time(:millisecond) + new_modack_ids = Map.update(state.modack_ids, deadline_seconds, ack_ids, &(ack_ids ++ &1)) total_modack_count = new_modack_ids |> Map.values() |> Enum.map(&length/1) |> Enum.sum() - state = %{state | modack_ids: new_modack_ids} + # put_new: don't reset timestamp or attempt count for already-tracked ids + new_ts = Enum.reduce(ack_ids, state.modack_first_queued, &Map.put_new(&2, &1, now)) + new_attempts = Enum.reduce(ack_ids, state.modack_attempts, &Map.put_new(&2, &1, 0)) + + state = %{ + state + | modack_ids: new_modack_ids, + modack_first_queued: new_ts, + modack_attempts: new_attempts + } state = if state.ack_count + total_modack_count >= state.batch_max_size do @@ -129,6 +155,24 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do {:noreply, state} end + def handle_cast({:update_retry_deadline, retry_deadline_ms}, state) do + {:noreply, %{state | retry_deadline_ms: retry_deadline_ms}} + end + + # Receipt modack for exactly-once delivery. Spawns a Task that calls + # UnaryRpcClient directly (bypassing batching) and sends the result back + # to the caller. The Task is fire-and-forget from AckBatcher's perspective. + def handle_cast({:receipt_modack, ref, reply_to, ack_ids, deadline_seconds}, state) do + rpc_client = state.rpc_client + + Task.start(fn -> + result = UnaryRpcClient.modify_ack_deadline(rpc_client, ack_ids, deadline_seconds) + send(reply_to, {:receipt_modack_result, ref, result}) + end) + + {:noreply, state} + end + @impl GenServer def handle_call(:flush, _from, state) do state = do_flush(state) @@ -157,8 +201,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do schedule_flush(state) _pid -> - # Each step runs independently — a failure in flush_acks does not - # prevent flush_modacks from running. + # Each step runs independently; a flush_acks failure does not block flush_modacks. state |> flush_acks() |> flush_modacks() @@ -171,28 +214,34 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do defp flush_acks(state) do case UnaryRpcClient.acknowledge(state.rpc_client, state.ack_ids) do {:ok, []} -> - %{state | ack_ids: [], ack_count: 0} + %{state | ack_ids: [], ack_count: 0, ack_first_queued: %{}} {:ok, remaining_ids} -> - # Partial success — retain only the failed ack_ids for next flush - %{state | ack_ids: remaining_ids, ack_count: length(remaining_ids)} + state |> put_retained_acks(remaining_ids) |> expire_stale_acks() {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> - # Per-ack-ID partial failure: permanent ids already dropped by - # UnaryRpcClient. Retain only the transient ids for retry. - %{state | ack_ids: transient_ids, ack_count: length(transient_ids)} + # Permanent ids already dropped by UnaryRpcClient; retain only transient. + state |> put_retained_acks(transient_ids) |> expire_stale_acks() {:error, _reason} -> - # Total failure — retain all ack_ids - state + expire_stale_acks(state) end end defp flush_modacks(%{modack_ids: modacks} = state) when map_size(modacks) == 0, do: state defp flush_modacks(state) do - # Each deadline group is attempted independently — failure in one group does - # not prevent the others from being flushed. + all_ids = state.modack_ids |> Map.values() |> List.flatten() + + # Increment attempt count for all ids about to be flushed. + attempts = + Enum.reduce(all_ids, state.modack_attempts, fn id, acc -> + Map.update(acc, id, 1, &(&1 + 1)) + end) + + state = %{state | modack_attempts: attempts} + + # Each deadline group is attempted independently. remaining_modacks = Enum.reduce(state.modack_ids, %{}, fn {deadline, ids}, remaining -> case UnaryRpcClient.modify_ack_deadline(state.rpc_client, ids, deadline) do @@ -200,24 +249,131 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do remaining {:ok, remaining_ids} -> - # Partial success — retain only the failed ids for this deadline - Map.put(remaining, deadline, remaining_ids) + keep = apply_modack_retry_limit(remaining_ids, state.modack_attempts) + if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> - # Per-ack-ID partial failure: retain only transient ids. - if transient_ids == [] do - remaining - else - Map.put(remaining, deadline, transient_ids) - end + keep = apply_modack_retry_limit(transient_ids, state.modack_attempts) + if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) {:error, _reason} -> - # Total failure for this deadline — retain all ids - Map.put(remaining, deadline, ids) + keep = apply_modack_retry_limit(ids, state.modack_attempts) + if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) end end) - %{state | modack_ids: remaining_modacks} + # Cleanup sweep: bound tracking maps to currently-pending ids only. + still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() + + clean_attempts = + Map.filter(state.modack_attempts, fn {id, _} -> MapSet.member?(still_pending, id) end) + + clean_ts = + Map.filter(state.modack_first_queued, fn {id, _} -> MapSet.member?(still_pending, id) end) + + state = %{ + state + | modack_ids: remaining_modacks, + modack_attempts: clean_attempts, + modack_first_queued: clean_ts + } + + expire_stale_modacks(state) + end + + # Drops modack ids that have reached the maximum attempt count and emits telemetry. + defp apply_modack_retry_limit(ids, attempts) do + {keep, drop} = + Enum.split_with(ids, fn id -> Map.get(attempts, id, 0) < @max_modack_attempts end) + + if drop != [] do + :telemetry.execute( + [:broadway_cloud_pub_sub, :stream, :modack_retry_exhausted], + %{count: length(drop)}, + %{} + ) + end + + keep + end + + # Replaces the pending ack_ids with the given retained set and cleans up + # ack_first_queued to contain only the retained ids. + defp put_retained_acks(state, retained_ids) do + retained_set = MapSet.new(retained_ids) + + clean_ts = + Map.filter(state.ack_first_queued, fn {id, _} -> MapSet.member?(retained_set, id) end) + + %{state | ack_ids: retained_ids, ack_count: length(retained_ids), ack_first_queued: clean_ts} + end + + defp expire_stale_acks(%{retry_deadline_ms: nil} = state), do: state + + defp expire_stale_acks(state) do + now = System.monotonic_time(:millisecond) + + {live, expired} = + Enum.split_with(state.ack_ids, fn id -> + case Map.get(state.ack_first_queued, id) do + nil -> true + ts -> now - ts < state.retry_deadline_ms + end + end) + + if expired != [] do + :telemetry.execute( + [:broadway_cloud_pub_sub, :stream, :ack_retry_expired], + %{count: length(expired)}, + %{} + ) + end + + clean_ts = Map.drop(state.ack_first_queued, expired) + %{state | ack_ids: live, ack_count: length(live), ack_first_queued: clean_ts} + end + + defp expire_stale_modacks(%{retry_deadline_ms: nil} = state), do: state + + defp expire_stale_modacks(state) do + now = System.monotonic_time(:millisecond) + + {remaining_modacks, expired_count} = + Enum.reduce(state.modack_ids, {%{}, 0}, fn {deadline, ids}, {acc, dropped} -> + {live, expired} = + Enum.split_with(ids, fn id -> + case Map.get(state.modack_first_queued, id) do + nil -> true + ts -> now - ts < state.retry_deadline_ms + end + end) + + acc = if live == [], do: acc, else: Map.put(acc, deadline, live) + {acc, dropped + length(expired)} + end) + + if expired_count > 0 do + :telemetry.execute( + [:broadway_cloud_pub_sub, :stream, :modack_retry_expired], + %{count: expired_count}, + %{} + ) + end + + still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() + + clean_ts = + Map.filter(state.modack_first_queued, fn {id, _} -> MapSet.member?(still_pending, id) end) + + clean_attempts = + Map.filter(state.modack_attempts, fn {id, _} -> MapSet.member?(still_pending, id) end) + + %{ + state + | modack_ids: remaining_modacks, + modack_first_queued: clean_ts, + modack_attempts: clean_attempts + } end defp schedule_flush(state) do @@ -230,8 +386,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do defp cancel_timer(%{timer_ref: ref} = state) do Process.cancel_timer(ref) - # Drain any :flush_timer message that was already delivered to the mailbox - # before cancel_timer ran, to prevent an extra flush after the cancel. + # Drain any :flush_timer already in the mailbox to prevent a double flush. receive do :flush_timer -> :ok after diff --git a/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex index 2cd4c9c..26a9ec8 100644 --- a/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex +++ b/lib/broadway_cloud_pub_sub/streaming/error_classifier.ex @@ -30,7 +30,21 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do # - NOT_FOUND (5) — subscription does not exist # - PERMISSION_DENIED (7) — service account lacks Subscriber role # - INVALID_ARGUMENT (3) — bad subscription name or flow-control params - # - CANCELLED (1) — deliberate cancellation (not self-initiated) + # - FAILED_PRECONDITION (9)— subscription in wrong state (e.g. detached or + # seeking). Reconnecting without a config change + # will not resolve the condition. + # + # CANCELLED (1) is classified as **retryable** because in practice it occurs + # in two benign scenarios: + # + # 1. Server-side stream teardown — the server cancels the stream during load + # balancing or backend rotation. Reconnecting immediately succeeds. + # 2. Client-side stream replacement — some libraries (Node.js) cancel a + # stream to open a fresh one. The cancellation is expected, not terminal. + # + # During graceful shutdown the StreamReader is killed before any CANCELLED + # error can be forwarded, so a retryable classification does not cause an + # unwanted reconnect during drain. @terminal_status_codes MapSet.new([ # NOT_FOUND — subscription does not exist @@ -39,8 +53,8 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifier do 7, # INVALID_ARGUMENT — bad config / subscription name 3, - # CANCELLED — external cancellation (self-cancellation is handled separately) - 1 + # FAILED_PRECONDITION — subscription in wrong state + 9 ]) @type classification :: :retryable | :terminal diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 9f45f2b..4fdcfc1 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -160,15 +160,14 @@ defmodule BroadwayCloudPubSub.Streaming.Options do type: :pos_integer, default: 60_000, doc: """ - Maximum total time in milliseconds that the unary RPC client (UnaryRpcClient) - will spend retrying a single acknowledge or modifyAckDeadline request before - giving up and dropping the ack_ids. - - Each retry attempt uses jittered exponential backoff starting at 100ms and - capped at 60s. The default of 60,000ms (60 seconds) matches standard gax - retry behaviour. When exactly-once delivery is enabled (auto-detected from - subscription properties), this value should be increased to 600,000ms (600 - seconds) to match the Go client's extended retry deadline for exactly-once acks. + Maximum total time in milliseconds to keep retrying a failed acknowledge or + modifyAckDeadline request before giving up and dropping the ack_ids. + + The default of 60,000ms (60 seconds) applies to standard delivery subscriptions. + When exactly-once delivery is detected from subscription properties, the library + automatically switches to 600,000ms (600 seconds) to match the Go client's + extended retry deadline for exactly-once acks. The configured value is restored + if exactly-once delivery is later disabled on the subscription. """ ], keepalive_interval_ms: [ diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index c54cf81..eb1c70c 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -360,25 +360,21 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do def prepare_for_draining(state) do %{manager_pid: manager_pid, config: config} = state - # 1. Get buffered messages (not yet dispatched to processors) and nack them - # immediately per on_shutdown config. These messages will be redelivered - # by the server after the configured delay. + # Nack buffered messages (not yet dispatched to processors) per on_shutdown config. buffered = StreamManager.get_buffered(manager_pid) case {config[:on_shutdown], buffered} do {_, []} -> :ok - {:noop, _} -> + {:noop, _buffered_ids} -> :ok {{:nack, delay_seconds}, ack_ids} -> StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) end - # 2. Stop receiving new messages and begin the drain phase. StreamManager - # will close the reader, start a drain timer, and close the stream once - # all outstanding (in-flight) messages have been acked/nacked. + # Stop receiving new messages and begin the drain phase. StreamManager.stop_receiving(manager_pid) {:noreply, [], %{state | draining: true}} @@ -389,12 +385,9 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do %{manager_pid: manager_pid} = state if Process.alive?(manager_pid) do - # The drain phase in prepare_for_draining already handled buffered and - # outstanding messages. Just ensure the stream is closed cleanly. StreamManager.close(manager_pid) end - # Clean up persistent_term :persistent_term.erase(state.ack_ref) :ok diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index d6a5137..8085114 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -2,49 +2,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @moduledoc false # GenServer that owns the gRPC bidirectional StreamingPull connection. - # Responsibilities: - # - Connect and reconnect with exponential backoff - # - Receive messages from the stream and forward them to the producer - # - Route ack/modifyAckDeadline requests to AckBatcher, which sends them - # as unary RPCs via UnaryRpcClient (independent of this stream) - # - Track outstanding (delivered but not acked) message ack_ids for - # lease management and shutdown nacking - # - Extend message leases periodically via modifyAckDeadline (through AckBatcher) - # - Buffer incoming messages when the producer has no pending demand - # (demand-based backpressure via notify_demand/1) - # - Send keep-alive pings every 30s to prevent server idle timeout - # - # Backpressure design: - # The producer calls notify_demand/2 whenever Broadway signals new demand, - # passing the total accumulated demand count. StreamManager keeps a - # `pending_demand` integer. When `pending_demand` is 0, incoming messages - # are stored in `message_buffer` instead of being forwarded. On each - # notify_demand/2 or incoming gRPC batch, up to `pending_demand` messages - # are flushed to the producer with the rest remaining buffered. The buffer - # is naturally bounded by `max_outstanding_messages` (the Pub/Sub server - # will not push more unacked messages than that limit). - # - # gRPC streaming: - # A dedicated `StreamReader` process owns the gRPC stream for both the - # Gun and Mint adapters. The reader calls `GRPC.Stub.recv/2` and forwards - # decoded messages back as `{:stream_messages, msgs}`. See `StreamReader` - # for a detailed explanation of why a separate process is needed. - # - # Keep-alive pings: - # Google's servers close idle StreamingPull connections after ~60 seconds - # of inactivity. We send an empty StreamingPullRequest every 30 seconds to - # prevent this via the :send_keepalive timer. - # - # Reconnect deduplication: - # Multiple events can arrive close together on a disconnect — e.g. - # {:stream_error} followed by {:stream_closed} or an {:EXIT} signal. - # We track the pending reconnect timer ref in `reconnect_ref` and skip - # scheduling if one is already set. - # - # Error classification: - # gRPC errors are classified as :retryable (reconnect) or :terminal (stop). - # Terminal errors (NOT_FOUND, PERMISSION_DENIED, etc.) indicate a permanent - # misconfiguration. The GenServer stops and Broadway's supervision restarts it. + # Manages connection lifecycle, message dispatch with demand-based backpressure, + # lease extension, keep-alive pings, and graceful drain on shutdown. + # See decisions.md for design rationale. use GenServer require Logger @@ -60,18 +20,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do alias Google.Pubsub.V1.StreamingPullRequest - # Default keep-alive interval. The server's inactivity timeout is ~60s; - # pinging at half that prevents closure. + # The server's inactivity timeout is ~60s; pinging at half that prevents closure. @default_keepalive_ms 30_000 @default_drain_timeout_ms 30_000 - # Grace period (seconds) subtracted from the adaptive deadline to compute the - # lease extension interval. Ensures the modack reaches the server before the - # current deadline expires. + # Exactly-once delivery requires a longer retry window to handle server-side transient failures. + @exactly_once_retry_deadline_ms 600_000 + + # Subtracted from the adaptive deadline when computing the lease extension interval. @grace_period_seconds 5 - # Minimum ack deadline for exactly-once delivery mode. + # Minimum ack deadline enforced by the server for exactly-once subscriptions. @min_deadline_exactly_once_seconds 60 defstruct [ @@ -80,50 +40,31 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :channel, :grpc_stream, :conn_pid, - # Pid of the linked StreamReader process that enumerates GRPC.Stub.recv/2 + # Pid of the linked StreamReader process. :reader_pid, :backoff, :lease_timer, - # Distribution for tracking message processing times, used to compute the - # adaptive p99 ack deadline. + # Tracks message processing times for the adaptive p99 ack deadline. :ack_time_dist, :receiving, - # Timer ref for the pending :connect message. Non-nil means a reconnect is - # already scheduled — prevents double-scheduling from multiple close signals. + # Non-nil when a reconnect is already scheduled — prevents double-scheduling. :reconnect_ref, - # Timer ref for the periodic :send_keepalive message. :keepalive_timer, - # Registered name of the AckBatcher (not PID) so we survive UnaryAckSupervisor - # restarts within a supervision cycle. + # Registered name (not PID) so we survive UnaryAckSupervisor restarts. :ack_batcher, - # Whether the producer has asked us to stop (prepare_for_draining called). - # When true, new incoming messages from the stream are ignored and we close - # the reader immediately. draining: false, - # Timer ref for the drain timeout. Non-nil means we are waiting for in-flight - # messages to be acked before closing the stream. drain_timer: nil, - # Whether the subscription has message ordering enabled, as reported by the - # server in StreamingPullResponse.subscription_properties. - # Updated dynamically on each response that includes subscription_properties. ordering_enabled: false, - # Whether the subscription has exactly-once delivery enabled, as reported by the - # server in StreamingPullResponse.subscription_properties. - # When true, the minimum ack deadline extension is raised from 10s to 60s. - # Updated dynamically on each response that includes subscription_properties. + # Updated from StreamingPullResponse.subscription_properties. exactly_once_enabled: false, - # Map of ack_id => %{received_at: monotonic_ms, max_expiry: monotonic_ms} - # for outstanding (delivered but not yet acked) messages. - # received_at is used to compute processing duration for the adaptive p99 deadline. - # max_expiry marks the absolute wall time beyond which we stop extending the lease. + # ack_id => %{received_at: monotonic_ms, max_expiry: monotonic_ms} outstanding: %{}, - # Messages buffered while the producer has no pending demand. - # Stored as an Erlang :queue for O(1) enqueue and O(1) dequeue. - # Naturally bounded by max_outstanding_messages (server-side flow control). + # Buffered messages waiting for producer demand. Bounded by max_outstanding_messages. message_buffer: :queue.new(), - # How many messages the producer can currently accept. - # Refreshed on each notify_demand/2; decremented when messages are flushed. - pending_demand: 0 + pending_demand: 0, + # In-flight receipt modack RPCs for exactly-once delivery. + # ref => %{broadway_messages, ack_ids, received_at}. See decisions.md. + pending_receipt_modacks: %{} ] # --- Public API --- @@ -286,38 +227,104 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:noreply, state} end - # Decoded messages forwarded from the StreamReader + # Decoded messages forwarded from the StreamReader. def handle_info({:stream_messages, messages}, state) do if state.receiving and messages != [] do broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) ack_ids = Enum.map(messages, & &1.ack_id) now = now_ms() - max_extension_ms = state.config.max_extension_ms - - new_outstanding = - Enum.reduce(ack_ids, state.outstanding, fn ack_id, acc -> - Map.put(acc, ack_id, %{received_at: now, max_expiry: now + max_extension_ms}) - end) - - # Receipt modack: immediately extend the ack deadline with the current adaptive - # p99 value. This synchronises the server-side timer with the client-side timer, - # compensating for network latency between when the server sent the message and - # when we received it. Matches Go's receiptTicker and Python's receipt modack. - # Sent as a unary RPC via AckBatcher — independent of the bidi stream. adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) - AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) - emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} + if state.exactly_once_enabled do + # Exactly-once receipt modack gate: hold messages until the receipt modack + # RPC confirms success. Messages whose modack fails are dropped (server redelivers). + effective_deadline = max(adaptive_deadline, @min_deadline_exactly_once_seconds) + ref = make_ref() + AckBatcher.receipt_modack(state.ack_batcher, ref, self(), ack_ids, effective_deadline) + + pending = + Map.put(state.pending_receipt_modacks, ref, %{ + broadway_messages: broadway_messages, + ack_ids: ack_ids, + received_at: now + }) + + {:noreply, %{state | pending_receipt_modacks: pending}} + else + # Standard delivery: fire-and-forget receipt modack, dispatch immediately. + new_outstanding = + add_to_outstanding(state.outstanding, ack_ids, now, state.config.max_extension_ms) + + AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) + emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) + {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} + end else {:noreply, state} end end - # Subscription properties update forwarded from the StreamReader. - # The server sends these in StreamingPullResponse.subscription_properties on - # any response (including heartbeats) when the subscription's settings change. + # Result of an exactly-once receipt modack RPC sent via AckBatcher.receipt_modack/5. + # Messages are delivered only if the receipt modack succeeded; otherwise dropped + # (the server will redeliver them). + def handle_info({:receipt_modack_result, ref, result}, state) do + case Map.pop(state.pending_receipt_modacks, ref) do + {nil, _} -> + # Stale or unknown ref (e.g. cleared during drain) — ignore. + {:noreply, state} + + {pending, rest} -> + state = %{state | pending_receipt_modacks: rest} + + case result do + {:ok, []} -> + new_outstanding = + add_to_outstanding( + state.outstanding, + pending.ack_ids, + pending.received_at, + state.config.max_extension_ms + ) + + emit_telemetry( + :receive_messages, + %{count: length(pending.broadway_messages)}, + state.config + ) + + {:noreply, + deliver_messages(%{state | outstanding: new_outstanding}, pending.broadway_messages)} + + {:ok, failed_ids} -> + # Partial success — deliver only messages whose modack succeeded. + {ok_msgs, ok_ids} = + partition_succeeded(pending.broadway_messages, pending.ack_ids, failed_ids) + + new_outstanding = + add_to_outstanding( + state.outstanding, + ok_ids, + pending.received_at, + state.config.max_extension_ms + ) + + if ok_msgs != [] do + emit_telemetry(:receive_messages, %{count: length(ok_msgs)}, state.config) + {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} + else + {:noreply, %{state | outstanding: new_outstanding}} + end + + {:error, _reason} -> + # Total failure — drop all messages (server will redeliver). + {:noreply, state} + end + end + end + + # Subscription properties forwarded from the StreamReader. + # Sent by the server on any response when subscription settings change. def handle_info( {:subscription_properties, %{ @@ -326,16 +333,28 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do } = _props}, state ) do + # Propagate retry deadline change to AckBatcher when exactly-once status changes. + if exactly_once_enabled != state.exactly_once_enabled do + new_deadline = + if exactly_once_enabled, + do: @exactly_once_retry_deadline_ms, + else: Map.get(state.config, :retry_deadline_ms, 60_000) + + AckBatcher.update_retry_deadline(state.ack_batcher, new_deadline) + end + {:noreply, %{state | ordering_enabled: ordering_enabled, exactly_once_enabled: exactly_once_enabled}} end # Stream-level gRPC error reported by the StreamReader. - # Classify: retryable errors trigger reconnect; terminal errors stop the GenServer. + # Retryable errors trigger reconnect; terminal errors stop the GenServer. def handle_info({:stream_error, error}, state) do case ErrorClassifier.classify(error) do :terminal -> - Logger.error("Terminal Cloud Pub/Sub gRPC error — stopping: #{inspect(error)}") + Logger.error( + "Terminal gRPC stream error on subscription #{state.config.subscription} - reason: #{inspect(error)}. Stopping StreamManager." + ) emit_telemetry(:terminal_error, %{reason: error}, state.config) {:stop, {:terminal_error, error}, close_stream(state)} @@ -346,35 +365,27 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end - # Server closed the stream normally (StreamReader enumeration exhausted) + # Server closed the stream normally (StreamReader enumeration exhausted). def handle_info({:stream_closed}, state) do emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - # The stream ended naturally: the Mint ConnectionProcess already called - # StreamResponseProcess.done/1 and popped the request_ref from its state - # when it received the HTTP/2 END_STREAM frame. Calling GRPC.Stub.cancel - # now would make the ConnectionProcess try to send :done to the - # already-stopped StreamResponseProcess, crashing the ConnectionProcess. - # Nil out grpc_stream so close_stream/1 skips the cancel for this case. + # Stream ended naturally; nil out grpc_stream to skip cancel in close_stream/1. + # See decisions.md for why cancelling after a server-initiated close crashes the Mint ConnectionProcess. state = %{state | grpc_stream: nil} if state.draining do - # Mid-drain: do not open a new stream; just clean up reader/channel. {:noreply, reset_connection(state, :stream_closed)} else {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} end end - # StreamReader process exited normally — stream ended cleanly. - # {:stream_closed} is sent before the exit, so this is a duplicate signal. - # We only reconnect if grpc_stream is still set (meaning the stream_closed - # message wasn't processed first). + # StreamReader exited normally — {:stream_closed} should arrive first. + # Only reconnect if grpc_stream is still set (stream_closed not yet processed). def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do if state.grpc_stream do emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) - - # Same rationale as {:stream_closed}: stream ended naturally, skip cancel. + # Same rationale as {:stream_closed}: skip cancel on natural close. state = %{state | grpc_stream: nil} if state.draining do @@ -383,64 +394,27 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} end else - # Already handled by {:stream_closed} — just clear the reader_pid + # Already handled by {:stream_closed} — just clear the reader_pid. {:noreply, %{state | reader_pid: nil}} end end - # StreamReader process crashed — reconnect + # StreamReader crashed — reconnect. def handle_info({:EXIT, pid, reason}, %{reader_pid: pid} = state) do emit_telemetry(:disconnect, %{reason: reason}, state.config) {:noreply, schedule_reconnect(reset_connection(state, reason))} end - # Catch-all for other EXIT signals (e.g. from the supervisor during shutdown) + # Catch-all for other EXIT signals (e.g. from the supervisor during shutdown). def handle_info({:EXIT, _pid, _reason}, state) do {:noreply, state} end def handle_info(:extend_leases, state) do - now = now_ms() - adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) - - # When exactly-once delivery is enabled, enforce a higher minimum deadline of 60s. - effective_deadline = - if state.exactly_once_enabled do - max(adaptive_deadline, @min_deadline_exactly_once_seconds) - else - adaptive_deadline - end - - # Partition into still-valid (before max_expiry) and expired (past max_expiry). - # Expired messages are dropped from lease management — the server will redeliver them. - {valid, expired} = - Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) - - if map_size(expired) > 0 do - emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) - end - - emit_telemetry( - :extend_leases, - %{count: map_size(valid), deadline: effective_deadline}, - state.config - ) - - if map_size(valid) > 0 do - AckBatcher.modack(state.ack_batcher, Map.keys(valid), effective_deadline) - end - - # Schedule next tick: (effective_deadline - grace_period) with jitter, minimum 1s. - # Jitter factor in [0.8, 0.9) prevents all StreamManagers from extending in lockstep. - base_interval_ms = max(1_000, (effective_deadline - @grace_period_seconds) * 1_000) - jitter_factor = 0.8 + :rand.uniform() * 0.1 - next_interval_ms = round(base_interval_ms * jitter_factor) - timer = Process.send_after(self(), :extend_leases, next_interval_ms) - {:noreply, %{state | outstanding: valid, lease_timer: timer}} + {:noreply, do_extend_leases(state)} end - # Periodic keep-alive ping: send an empty StreamingPullRequest to prevent the - # server from closing an idle stream. The server's inactivity timeout is ~60s. + # Periodic keep-alive ping to prevent the server from closing an idle stream. def handle_info(:send_keepalive, %{grpc_stream: nil} = state) do {:noreply, state} end @@ -460,13 +434,13 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end - # Mint adapter signals connection loss to its parent process. + # Mint adapter signals connection loss. def handle_info({:elixir_grpc, :connection_down, conn_pid}, %{conn_pid: conn_pid} = state) do emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} end - # Gun adapter signals connection loss via :gun_down messages. + # Gun adapter signals connection loss. def handle_info( {:gun_down, conn_pid, _protocol, _reason, _killed_streams}, %{conn_pid: conn_pid} = state @@ -477,8 +451,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do def handle_info(:drain_timeout, state) do emit_telemetry(:drain_timeout, %{}, state.config) - state = close_stream(%{state | drain_timer: nil}) - {:noreply, state} + {:noreply, close_stream(%{state | drain_timer: nil})} end def handle_info(_msg, state) do @@ -559,6 +532,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_call(:stop_receiving, _from, state) do + # Nack pending receipt modacks so the server redelivers them quickly. + state = nack_pending_receipt_modacks(state) # Close the reader so no new messages arrive; keep the channel open for AckBatcher. state = close_reader(state) state = start_drain_timer(state) @@ -579,10 +554,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_call(:close, _from, state) do - # Best-effort flush before closing. Guard against AckBatcher already being - # dead during pipeline shutdown (Broadway stops children in reverse start order). + # Best-effort flush; AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) - state = close_stream(state) {:reply, :ok, state} end @@ -680,94 +653,87 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end defp reset_connection(state, reason) do - # Drop buffered (not-yet-delivered) messages on disconnect — their ack_ids - # are in `outstanding`, so remove them to avoid pointless lease-extension - # attempts for messages that will be redelivered. + # Drop buffered messages on disconnect; their ack_ids are already in outstanding + # so removing them avoids pointless lease-extension for messages that will redeliver. buffered_ack_ids = state.message_buffer |> :queue.to_list() |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) - new_outstanding = - Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) + new_outstanding = Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) - # Preserve `pending_demand` across reconnection. The producer's demand counter - # survives the disconnect and it won't re-signal demand it already sent. - # Clearing it would cause a demand deadlock: the producer has pending demand - # but thinks it already notified us, while we lost the count. + # Preserve pending_demand across reconnection to avoid a demand deadlock. + # See decisions.md. close_stream( - %{ - state - | message_buffer: :queue.new(), - outstanding: new_outstanding, - reconnect_ref: state.reconnect_ref - }, + %{state | message_buffer: :queue.new(), outstanding: new_outstanding}, reason ) end - # Overload that does not carry a reason (used by close_stream directly) defp close_stream(%{reader_pid: nil, grpc_stream: nil} = state), do: state - defp close_stream(%{reader_pid: reader_pid, grpc_stream: grpc_stream, channel: channel} = state) do + defp close_stream(state) do + state + |> stop_reader() + |> cancel_grpc_stream() + |> disconnect_channel() + |> cancel_keepalive_timer() + |> then(&%{&1 | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil}) + end + + defp close_stream(state, _reason), do: close_stream(state) + + defp stop_reader(%{reader_pid: nil} = state), do: state + + defp stop_reader(%{reader_pid: reader_pid} = state) do # Unlink before killing to prevent the EXIT signal from triggering reconnect. - if is_pid(reader_pid) do - Process.unlink(reader_pid) - Process.exit(reader_pid, :kill) - end + Process.unlink(reader_pid) + Process.exit(reader_pid, :kill) + state + end - if grpc_stream do - # Guard against cancelling a stream whose StreamResponseProcess is already - # dead. The Mint ConnectionProcess calls StreamResponseProcess.done/1 - # (a synchronous GenServer.call) as part of handling {:cancel_request, ...}. - # If the StreamResponseProcess is already gone — because the reader was killed - # and the linked SRP died with it — the ConnectionProcess crashes with - # "no process" even though our try/catch protects the StreamManager. - # Checking liveness first lets us skip the cancel when there's nothing to - # cancel safely. For Gun-based streams, payload.stream_response_pid is nil - # so the guard is always true there. - srp_alive? = - case grpc_stream do - %{payload: %{stream_response_pid: pid}} when is_pid(pid) -> Process.alive?(pid) - _ -> true - end + defp cancel_grpc_stream(%{grpc_stream: nil} = state), do: state - if srp_alive? do - try do - GRPC.Stub.cancel(grpc_stream) - catch - _, _ -> :ok - end + defp cancel_grpc_stream(%{grpc_stream: grpc_stream} = state) do + # Skip cancel if the Mint StreamResponseProcess is already dead — calling + # GRPC.Stub.cancel would crash the ConnectionProcess. See decisions.md. + srp_alive? = + case grpc_stream do + %{payload: %{stream_response_pid: pid}} when is_pid(pid) -> Process.alive?(pid) + _ -> true end - end - - if channel do - # Only call disconnect if the underlying connection process is alive. - # When the server closes the channel (e.g. after DEADLINE_EXCEEDED), the - # adapter's connection process may already be gone. Calling disconnect on - # a dead channel causes a FunctionClauseError inside grpc's GenServer. - conn_alive? = - case state.conn_pid do - pid when is_pid(pid) -> Process.alive?(pid) - _ -> true - end - if conn_alive? do - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end + if srp_alive? do + try do + GRPC.Stub.cancel(grpc_stream) + catch + _, _ -> :ok end end - # Cancel the keep-alive timer — it will be restarted when the new stream opens. - state = cancel_keepalive_timer(state) - %{state | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil} + state end - defp close_stream(state, _reason) do - close_stream(state) + defp disconnect_channel(%{channel: nil} = state), do: state + + defp disconnect_channel(%{channel: channel} = state) do + # Only disconnect if the connection process is alive; a dead channel causes + # a FunctionClauseError inside the gRPC GenServer. See decisions.md. + conn_alive? = + case state.conn_pid do + pid when is_pid(pid) -> Process.alive?(pid) + _ -> true + end + + if conn_alive? do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + end + + state end # --- Private: backoff --- @@ -776,7 +742,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do raise "StreamManager failed to connect and backoff is :stop — crashing" end - # Deduplication: if a :connect is already pending, skip to prevent the + # Deduplication: skip if a :connect is already pending to prevent the # double-reconnect race where {:stream_error} and {:stream_closed} (or {:EXIT}) # both arrive within a single disconnect. defp schedule_reconnect(%{reconnect_ref: ref} = state) when not is_nil(ref) do @@ -792,6 +758,47 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp now_ms, do: System.monotonic_time(:millisecond) + # --- Private: lease extension --- + + defp do_extend_leases(state) do + now = now_ms() + adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + + # Enforce minimum 60s for exactly-once subscriptions. + effective_deadline = + if state.exactly_once_enabled, + do: max(adaptive_deadline, @min_deadline_exactly_once_seconds), + else: adaptive_deadline + + # Partition into still-valid and expired (past max_expiry — server will redeliver). + {valid, expired} = + Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) + + if map_size(expired) > 0 do + emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) + end + + emit_telemetry( + :extend_leases, + %{count: map_size(valid), deadline: effective_deadline}, + state.config + ) + + if map_size(valid) > 0 do + AckBatcher.modack(state.ack_batcher, Map.keys(valid), effective_deadline) + end + + # Schedule next tick with jitter in [0.8, 0.9) to spread out concurrent StreamManagers. + base_interval_ms = max(1_000, (effective_deadline - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + timer = Process.send_after(self(), :extend_leases, round(base_interval_ms * jitter_factor)) + + state + |> Map.put(:outstanding, valid) + |> Map.put(:lease_timer, timer) + |> sweep_stale_pending_modacks() + end + # --- Private: lease management --- defp schedule_lease_timer(state) do @@ -834,8 +841,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # --- Private: drain --- - # Kill the reader so no new messages arrive from the gRPC stream. - # The channel stays open for AckBatcher's unary ack/modack RPCs. + # Kill the reader so no new messages arrive; keep the channel open for AckBatcher. defp close_reader(%{reader_pid: nil} = state), do: state defp close_reader(%{reader_pid: reader_pid} = state) do @@ -859,13 +865,13 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # After each ack/nack, check if we are draining and all outstanding messages # have been resolved. If so, cancel the drain timer and close the stream. - defp maybe_complete_drain(%{draining: true, outstanding: outstanding} = state) - when map_size(outstanding) == 0 do + defp maybe_complete_drain( + %{draining: true, outstanding: outstanding, pending_receipt_modacks: pending} = state + ) + when map_size(outstanding) == 0 and map_size(pending) == 0 do state = cancel_drain_timer(state) - - # Guard against AckBatcher already being dead during pipeline shutdown. + # AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) - emit_telemetry(:drain_complete, %{}, state.config) close_stream(state) end @@ -904,6 +910,69 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end + # Build outstanding entries for a list of confirmed ack_ids. + defp add_to_outstanding(outstanding, ack_ids, received_at, max_extension_ms) do + Enum.reduce(ack_ids, outstanding, fn ack_id, acc -> + Map.put(acc, ack_id, %{received_at: received_at, max_expiry: received_at + max_extension_ms}) + end) + end + + # Split broadway_messages into {succeeded_msgs, succeeded_ids} by removing + # messages whose ack_id is in failed_ids. + defp partition_succeeded(broadway_messages, all_ack_ids, failed_ids) do + failed_set = MapSet.new(failed_ids) + + {ok_msgs_reversed, ok_ids_reversed} = + Enum.zip(broadway_messages, all_ack_ids) + |> Enum.reduce({[], []}, fn {msg, id}, {msgs_acc, ids_acc} -> + if MapSet.member?(failed_set, id) do + {msgs_acc, ids_acc} + else + {[msg | msgs_acc], [id | ids_acc]} + end + end) + + {Enum.reverse(ok_msgs_reversed), Enum.reverse(ok_ids_reversed)} + end + + # Stale pending receipt modacks (older than 60s) are nacked for fast redelivery. + @receipt_modack_stale_ms 60_000 + + defp sweep_stale_pending_modacks(state) do + now = now_ms() + cutoff = now - @receipt_modack_stale_ms + + {stale, fresh} = + Map.split_with(state.pending_receipt_modacks, fn {_ref, %{received_at: t}} -> + t < cutoff + end) + + if map_size(stale) > 0 do + stale_ids = stale |> Map.values() |> Enum.flat_map(& &1.ack_ids) + AckBatcher.modack(state.ack_batcher, stale_ids, 0) + emit_telemetry(:receipt_modack_stale, %{count: length(stale_ids)}, state.config) + end + + %{state | pending_receipt_modacks: fresh} + end + + # Nack all messages held in pending_receipt_modacks so the server redelivers + # them quickly. Used during drain/shutdown. + defp nack_pending_receipt_modacks(%{pending_receipt_modacks: pending} = state) + when map_size(pending) == 0, + do: state + + defp nack_pending_receipt_modacks(state) do + pending_ids = + state.pending_receipt_modacks + |> Map.values() + |> Enum.flat_map(& &1.ack_ids) + + {_action, deadline} = state.config.on_shutdown + AckBatcher.modack(state.ack_batcher, pending_ids, deadline) + %{state | pending_receipt_modacks: %{}} + end + defp build_broadway_message( %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, state diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex index 8484276..3b4611d 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex @@ -71,11 +71,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do send(manager, {:stream_opened, self(), grpc_stream}) case GRPC.Stub.recv(grpc_stream, timeout: :infinity) do - {:ok, enum} -> - enumerate(enum, manager) - - {:error, error} -> - send(manager, {:stream_error, error}) + {:ok, enum} -> enumerate(enum, manager) + {:error, error} -> send(manager, {:stream_error, error}) end end @@ -100,10 +97,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do end) |> Stream.run() - # Stream exhausted normally — notify manager before exit. - # StreamManager will also receive {:EXIT, reader_pid, :normal} and - # schedule reconnect, but sending {:stream_closed} allows distinguishing - # normal closes from crashes in logs/telemetry. + # Stream exhausted normally — notify manager before exiting. + # Sending {:stream_closed} lets StreamManager distinguish normal closes + # from crashes before the {:EXIT, reader_pid, :normal} signal arrives. send(manager, {:stream_closed}) end end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index 071d6ae..d36b010 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -159,20 +159,26 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ack_ids: ack_ids } - case Subscriber.Stub.acknowledge(channel, request, timeout: 30_000) do + result = + :telemetry.span( + [:broadway_cloud_pub_sub, :unary, :ack], + %{name: state.config.broadway_name, subscription: state.config.subscription}, + fn -> + {Subscriber.Stub.acknowledge(channel, request, timeout: 30_000), + %{count: length(ack_ids)}} + end + ) + + case result do {:ok, _} -> - emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) {:reply, :ok, state} {:error, error} -> case ErrorClassifier.classify(error) do :retryable -> - # Parse per-ack-ID errors from the gRPC error details. - # For exactly-once subscriptions, the server can return a - # retryable RPC error that contains per-ack-ID permanent - # failures embedded in google.rpc.ErrorInfo details. - # Permanent failures are dropped; transient ones are returned - # to AckBatcher for retry on the next flush. + # For exactly-once subscriptions, retryable RPC errors may embed + # per-ack-ID permanent failures in error details. Permanent ids + # are dropped; transient ones are returned to AckBatcher for retry. per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) @@ -195,7 +201,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do :terminal -> Logger.error( - "[UnaryRpcClient] Terminal error on ack (#{length(ack_ids)} ids): #{inspect(error)}" + "Unable to acknowledge messages with Cloud Pub/Sub via gRPC - reason: #{inspect(error)}" ) # Reply first so caller can retain ack_ids, then stop so supervisor restarts fresh. @@ -219,9 +225,18 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ack_deadline_seconds: deadline_seconds } - case Subscriber.Stub.modify_ack_deadline(channel, request, timeout: 30_000) do + result = + :telemetry.span( + [:broadway_cloud_pub_sub, :unary, :modack], + %{name: state.config.broadway_name, subscription: state.config.subscription}, + fn -> + {Subscriber.Stub.modify_ack_deadline(channel, request, timeout: 30_000), + %{count: length(ack_ids)}} + end + ) + + case result do {:ok, _} -> - emit_telemetry(:modack, %{count: length(ack_ids)}, state.config) {:reply, :ok, state} {:error, error} -> @@ -249,7 +264,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do :terminal -> Logger.error( - "[UnaryRpcClient] Terminal error on modack (#{length(ack_ids)} ids, deadline=#{deadline_seconds}s): #{inspect(error)}" + "Unable to modify ack deadline for messages with Cloud Pub/Sub via gRPC - reason: #{inspect(error)}" ) {:stop, {:terminal_error, error}, {:error, error}, state} @@ -279,13 +294,9 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do end end - # The Mint/Gun ConnectionProcess spawned by GRPC.Stub.connect is linked to - # this GenServer. With trap_exit enabled (set in init/1), its normal exit on - # disconnect/shutdown is delivered here rather than killing us. - # - # :normal — peer disconnected cleanly; nil out the channel so - # ensure_channel/1 will reopen it on the next request. - # other — unexpected crash; schedule a reconnect. + # The Mint/Gun ConnectionProcess is linked to this GenServer (trap_exit in init/1). + # :normal = clean disconnect; nil out channel so ensure_channel/1 reopens it. + # other = unexpected crash; schedule a reconnect. def handle_info({:EXIT, _pid, :normal}, state) do {:noreply, %{state | channel: nil}} end @@ -298,7 +309,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do def handle_info(_msg, state), do: {:noreply, state} @impl GenServer - def terminate(_reason, %{channel: channel}) when not is_nil(channel) do + def terminate(_reason, %{channel: channel} = _state) when not is_nil(channel) do try do GRPC.Stub.disconnect(channel) catch diff --git a/mix.exs b/mix.exs index 65a7134..590b126 100644 --- a/mix.exs +++ b/mix.exs @@ -57,6 +57,15 @@ defmodule BroadwayCloudPubSub.MixProject do extras: [ "README.md", "CHANGELOG.md" + ], + groups_for_modules: [ + Pull: [ + BroadwayCloudPubSub.Producer, + BroadwayCloudPubSub.Client + ], + Streaming: [ + BroadwayCloudPubSub.Streaming.Producer + ] ] ] end diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs index 4e5421e..e539038 100644 --- a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -360,6 +360,220 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do end end + # ============================================================ + # receipt_modack/5 — exactly-once delivery + # ============================================================ + + describe "receipt_modack/5" do + test "spawns a task that calls modify_ack_deadline and sends result to reply_to" do + {batcher, _rpc} = start_batcher() + ref = make_ref() + + AckBatcher.receipt_modack(batcher, ref, self(), ["id-eo-1", "id-eo-2"], 60) + + # SpyRpcClient returns :ok (i.e., {:ok, []}) for modify_ack_deadline + assert_receive {:receipt_modack_result, ^ref, {:ok, []}}, 500 + # The RPC was also made to the spy + assert_receive {:rpc, {:modify_ack_deadline, ids, 60}}, 500 + assert Enum.sort(ids) == ["id-eo-1", "id-eo-2"] + end + + test "result is sent to the specified reply_to pid, not the batcher" do + {batcher, _rpc} = start_batcher() + ref = make_ref() + # reply_to is self(), so we expect the message here + AckBatcher.receipt_modack(batcher, ref, self(), ["id-reply"], 60) + + assert_receive {:receipt_modack_result, ^ref, _result}, 500 + end + + test "does NOT add ack_ids to the batcher's pending batch" do + {batcher, _rpc} = start_batcher(ack_batch_interval_ms: 10_000) + ref = make_ref() + + AckBatcher.receipt_modack(batcher, ref, self(), ["id-not-batched"], 60) + # Wait for the task to complete + assert_receive {:receipt_modack_result, ^ref, _}, 500 + + # State should have no pending ack_ids or modack_ids + state = :sys.get_state(batcher) + assert state.ack_ids == [] + assert state.modack_ids == %{} + end + + test "multiple concurrent receipt_modacks use independent refs" do + {batcher, _rpc} = start_batcher() + ref1 = make_ref() + ref2 = make_ref() + + AckBatcher.receipt_modack(batcher, ref1, self(), ["id-a"], 60) + AckBatcher.receipt_modack(batcher, ref2, self(), ["id-b"], 60) + + results = + for _ <- 1..2 do + receive do + {:receipt_modack_result, ref, result} -> {ref, result} + after + 500 -> flunk("Expected 2 receipt_modack_result messages") + end + end + + result_refs = Enum.map(results, &elem(&1, 0)) + + assert Enum.sort_by(result_refs, &:erlang.ref_to_list/1) == + Enum.sort_by([ref1, ref2], &:erlang.ref_to_list/1) + end + end + + # ============================================================ + # update_retry_deadline/2 — exactly-once auto-switch + # ============================================================ + + describe "update_retry_deadline/2" do + test "updates retry_deadline_ms in state" do + {batcher, _rpc} = start_batcher() + + # Default is nil (not configured in start_batcher) + state = :sys.get_state(batcher) + assert state.retry_deadline_ms == nil + + AckBatcher.update_retry_deadline(batcher, 600_000) + # Cast is async — sync via flush + AckBatcher.flush(batcher) + + state = :sys.get_state(batcher) + assert state.retry_deadline_ms == 600_000 + end + + test "restores configured deadline when exactly-once is disabled" do + {batcher, _rpc} = start_batcher() + + AckBatcher.update_retry_deadline(batcher, 600_000) + AckBatcher.flush(batcher) + assert :sys.get_state(batcher).retry_deadline_ms == 600_000 + + AckBatcher.update_retry_deadline(batcher, 60_000) + AckBatcher.flush(batcher) + assert :sys.get_state(batcher).retry_deadline_ms == 60_000 + end + end + + # ============================================================ + # Modack retry limit — @max_modack_attempts = 3 + # ============================================================ + + # RPC client that always fails modify_ack_deadline so we can observe the retry limit. + defmodule AlwaysFailModackRpc do + use GenServer + + def start_link(test_pid) do + GenServer.start_link(__MODULE__, {test_pid, 0}) + end + + def init(state), do: {:ok, state} + + def handle_call({:acknowledge, _ids}, _from, {test_pid, count}) do + {:reply, :ok, {test_pid, count + 1}} + end + + def handle_call({:modify_ack_deadline, ids, deadline}, _from, {test_pid, count}) do + send(test_pid, {:rpc, {:modack, ids, deadline}, count}) + # Always return a retryable error to force retries up to the limit + {:reply, {:error, :unavailable}, {test_pid, count + 1}} + end + end + + describe "modack retry limit" do + test "drops modack ack_ids after 3 failed attempts" do + test_pid = self() + {:ok, rpc} = AlwaysFailModackRpc.start_link(test_pid) + + {:ok, batcher} = + AckBatcher.start_link( + rpc_client: rpc, + ack_batch_interval_ms: 30, + ack_batch_max_size: 100 + ) + + AckBatcher.modack(batcher, ["id-exhaust"], 30) + + # Attempt 1 (count=0) + assert_receive {:rpc, {:modack, ["id-exhaust"], 30}, 0}, 500 + # Attempt 2 (count=1) + assert_receive {:rpc, {:modack, ["id-exhaust"], 30}, 1}, 500 + # Attempt 3 (count=2) + assert_receive {:rpc, {:modack, ["id-exhaust"], 30}, 2}, 500 + + # After 3 attempts the id is dropped — no further RPC calls for it + refute_receive {:rpc, {:modack, ["id-exhaust"], 30}, _}, 200 + + # State should be clear + state = :sys.get_state(batcher) + assert state.modack_ids == %{} + assert state.modack_attempts == %{} + end + + test "other ack_ids are not affected by one id reaching the retry limit" do + test_pid = self() + + # Only fail for "id-bad", succeed for everything else + {:ok, rpc} = + GenServer.start_link( + BroadwayCloudPubSub.Streaming.AckBatcherTest.AlwaysFailModackRpc, + {test_pid, 0} + ) + + # Use SelectiveFlakyRpc indirectly: we test via the state, not via RPC spy + {:ok, batcher_a} = + AckBatcher.start_link( + rpc_client: rpc, + ack_batch_interval_ms: 10_000, + ack_batch_max_size: 100 + ) + + # Add two ids with the same deadline; the rpc always fails + AckBatcher.modack(batcher_a, ["id-1", "id-2"], 30) + + # After 3 flushes, both should be dropped + AckBatcher.flush(batcher_a) + AckBatcher.flush(batcher_a) + AckBatcher.flush(batcher_a) + AckBatcher.flush(batcher_a) + + state = :sys.get_state(batcher_a) + assert state.modack_ids == %{} + end + + test "retry limit is per-ack-id — surviving ids stay in state after others are dropped" do + # AlwaysFailModackRpc fails every modify_ack_deadline call. + # Both "id-bad" and "id-good" will exhaust the 3-attempt limit, so after + # 3 flushes the modack state should be fully cleared. + test_pid = self() + {:ok, rpc} = AlwaysFailModackRpc.start_link(test_pid) + + {:ok, batcher_b} = + AckBatcher.start_link( + rpc_client: rpc, + ack_batch_interval_ms: 10_000, + ack_batch_max_size: 100 + ) + + AckBatcher.modack(batcher_b, ["id-bad", "id-good"], 30) + + # 3 explicit flushes exhaust the retry limit for both ids + AckBatcher.flush(batcher_b) + AckBatcher.flush(batcher_b) + AckBatcher.flush(batcher_b) + # One more flush to let the cleanup sweep run + AckBatcher.flush(batcher_b) + + state = :sys.get_state(batcher_b) + remaining_ids = state.modack_ids |> Map.values() |> List.flatten() + refute "id-bad" in remaining_ids + refute "id-good" in remaining_ids + end + end + # ============================================================ # Helpers # ============================================================ diff --git a/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs index 6948150..c9cdba9 100644 --- a/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/error_classifier_test.exs @@ -62,8 +62,14 @@ defmodule BroadwayCloudPubSub.Streaming.ErrorClassifierTest do assert ErrorClassifier.classify(rpc_error(3)) == :terminal end - test "CANCELLED (1) is terminal" do - assert ErrorClassifier.classify(rpc_error(1)) == :terminal + test "FAILED_PRECONDITION (9) is terminal" do + assert ErrorClassifier.classify(rpc_error(9)) == :terminal + end + end + + describe "classify/1 — CANCELLED is retryable" do + test "CANCELLED (1) is retryable (server-side stream teardown or client replacement)" do + assert ErrorClassifier.classify(rpc_error(1)) == :retryable end end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index c0ba2eb..5847f0e 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -375,7 +375,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end) assert logs =~ - "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}" + "Terminal gRPC stream error on subscription projects/test/subscriptions/test-sub - reason: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}. Stopping StreamManager." end test "PERMISSION_DENIED (7) stops the GenServer" do @@ -391,7 +391,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end) assert logs =~ - "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 7, message: \"permission denied\", details: nil}" + "Terminal gRPC stream error on subscription projects/test/subscriptions/test-sub - reason: %GRPC.RPCError{status: 7, message: \"permission denied\", details: nil}. Stopping StreamManager." end test "INVALID_ARGUMENT (3) stops the GenServer" do @@ -407,7 +407,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end) assert logs =~ - "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 3, message: \"bad argument\", details: nil}" + "Terminal gRPC stream error on subscription projects/test/subscriptions/test-sub - reason: %GRPC.RPCError{status: 3, message: \"bad argument\", details: nil}. Stopping StreamManager." end test "UNAUTHENTICATED (16) schedules reconnect without stopping" do @@ -468,7 +468,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end) assert logs =~ - "Terminal Cloud Pub/Sub gRPC error — stopping: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}" + "Terminal gRPC stream error on subscription projects/test/subscriptions/test-sub - reason: %GRPC.RPCError{status: 5, message: \"not found\", details: nil}. Stopping StreamManager." end end @@ -845,4 +845,503 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do Process.sleep(100) end end + + # ============================================================ + # Exactly-once delivery — receipt modack gate + # ============================================================ + + # A spy RPC client for use in exactly-once tests. It records calls and allows + # the test to control the response by sending {:set_modack_response, result}. + defmodule SpyRpcClientForEO do + use GenServer + + def start_link(test_pid) do + GenServer.start_link(__MODULE__, test_pid) + end + + def init(test_pid), do: {:ok, %{test_pid: test_pid, next_response: :ok}} + + def handle_call({:modify_ack_deadline, ids, deadline}, _from, state) do + send(state.test_pid, {:rpc_call, {:modack, ids, deadline}}) + result = if state.next_response == :ok, do: :ok, else: state.next_response + {:reply, result, %{state | next_response: :ok}} + end + + def handle_call({:acknowledge, _ids}, _from, state) do + {:reply, :ok, state} + end + + def handle_call(:ping, _from, state), do: {:reply, :ok, state} + + # Synchronous setter so callers can guarantee the response is set before + # any concurrent Task fires the next RPC call. + def handle_call({:set_response_sync, response}, _from, state) do + {:reply, :ok, %{state | next_response: response}} + end + + def handle_cast({:set_response, response}, state) do + {:noreply, %{state | next_response: response}} + end + end + + # Start a StreamManager backed by a SpyRpcClientForEO so we can control + # RPC responses for exactly-once tests. + defp start_manager_with_spy_rpc(extra_opts \\ []) do + broadway_name = Module.concat(__MODULE__, "EORun#{System.unique_integer([:positive])}") + + opts = + base_config() + |> Keyword.put(:broadway_name, broadway_name) + |> Keyword.merge(extra_opts) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + test_pid = self() + {:ok, rpc_pid} = SpyRpcClientForEO.start_link(test_pid) + # Register under the name AckBatcher will use + Process.register(rpc_pid, rpc_client_name) + + {:ok, _batcher} = + AckBatcher.start_link( + name: batcher_name, + rpc_client: rpc_client_name, + ack_batch_interval_ms: Keyword.get(opts, :ack_batch_interval_ms, 50), + ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) + ) + + {:ok, pid} = StreamManager.start_link(opts) + StreamManager.set_producer(pid, self()) + + {pid, rpc_pid} + end + + # Enable exactly-once delivery on a running StreamManager + defp enable_exactly_once(pid) do + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: true + }} + ) + + sync(pid) + end + + describe "exactly-once receipt modack gate — {:stream_messages, ...}" do + test "in exactly-once mode, messages are NOT immediately forwarded to producer" do + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("eo-ack-1", "data")]}) + sync(pid) + + # Receipt modack RPC is in-flight; message not yet delivered + refute_received {:stream_messages, _} + + # State has one pending entry + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 1 + end + + test "in standard mode, messages are forwarded immediately (no gating)" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("std-ack-1", "data")]}) + + assert_receive {:stream_messages, [msg]}, 500 + assert msg.data == "data" + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + end + + test "messages are added to outstanding after receipt modack succeeds" do + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("eo-ack-2", "data")]}) + sync(pid) + + state_before = :sys.get_state(pid) + [ref] = Map.keys(state_before.pending_receipt_modacks) + + # Simulate receipt modack success + send(pid, {:receipt_modack_result, ref, {:ok, []}}) + sync(pid) + + state_after = :sys.get_state(pid) + assert map_size(state_after.pending_receipt_modacks) == 0 + assert Map.has_key?(state_after.outstanding, "eo-ack-2") + end + end + + # Injects a pending_receipt_modacks entry into StreamManager state directly, + # bypassing the AckBatcher/Task chain. Used for tests that need to control + # which receipt_modack_result variant the handler sees. + defp inject_pending_receipt_modack(pid, ref, ack_ids, data_by_id) do + broadway_msgs = + Enum.map(ack_ids, fn ack_id -> + %Broadway.Message{ + data: Map.get(data_by_id, ack_id, ack_id), + metadata: %{}, + acknowledger: BroadwayCloudPubSub.Streaming.Acknowledger.builder(__MODULE__).(ack_id) + } + end) + + :sys.replace_state(pid, fn s -> + entry = %{ + broadway_messages: broadway_msgs, + ack_ids: ack_ids, + received_at: System.monotonic_time(:millisecond) + } + + %{s | pending_receipt_modacks: Map.put(s.pending_receipt_modacks, ref, entry)} + end) + end + + describe "exactly-once — {:receipt_modack_result, ref, result}" do + test "total success: all messages delivered and added to outstanding" do + # SpyRpcClientForEO defaults to :ok, so the Task auto-fires {:ok, []} result. + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("r1", "d1"), received_message("r2", "d2")]}) + + # The Task fires the RPC and sends back {:receipt_modack_result, ref, {:ok, []}} + # automatically. Wait for the resulting message delivery. + assert_receive {:stream_messages, msgs}, 500 + assert length(msgs) == 2 + assert Enum.map(msgs, & &1.data) |> Enum.sort() == ["d1", "d2"] + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + assert Map.has_key?(state.outstanding, "r1") + assert Map.has_key?(state.outstanding, "r2") + end + + test "total failure: no messages delivered, nothing added to outstanding" do + {pid, rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + # Configure spy synchronously to return {:error, :unavailable} for the next modack call + :ok = GenServer.call(rpc, {:set_response_sync, {:error, :unavailable}}) + + send(pid, {:stream_messages, [received_message("fail-1", "data")]}) + # Wait for the Task's RPC call to complete and result to be processed + sync(pid) + # Give extra time for the async Task result to arrive and be processed + Process.sleep(200) + sync(pid) + + refute_received {:stream_messages, _} + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + assert map_size(state.outstanding) == 0 + end + + test "partial success: only succeeded messages delivered, failed dropped" do + # Inject the pending entry directly to avoid racing with the auto-Task. + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + + inject_pending_receipt_modack(pid, ref, ["ok-id", "bad-id"], %{ + "ok-id" => "good", + "bad-id" => "dropped" + }) + + send(pid, {:receipt_modack_result, ref, {:ok, ["bad-id"]}}) + + assert_receive {:stream_messages, msgs}, 500 + assert length(msgs) == 1 + assert hd(msgs).data == "good" + + state = :sys.get_state(pid) + assert Map.has_key?(state.outstanding, "ok-id") + refute Map.has_key?(state.outstanding, "bad-id") + end + + test "partial success with all failed: no messages delivered" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["all-bad"], %{"all-bad" => "data"}) + + send(pid, {:receipt_modack_result, ref, {:ok, ["all-bad"]}}) + sync(pid) + + refute_received {:stream_messages, _} + assert map_size(:sys.get_state(pid).outstanding) == 0 + end + + test "stale/unknown ref is ignored gracefully" do + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + + stale_ref = make_ref() + send(pid, {:receipt_modack_result, stale_ref, {:ok, []}}) + sync(pid) + + assert Process.alive?(pid) + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 0 + end + end + + describe "exactly-once — retry deadline auto-switch" do + test "AckBatcher retry_deadline_ms switches to 600s when exactly-once is enabled" do + broadway_name = Module.concat(__MODULE__, "RD#{System.unique_integer([:positive])}") + + opts = + base_config() + |> Keyword.put(:broadway_name, broadway_name) + |> Keyword.put(:retry_deadline_ms, 60_000) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + + {:ok, _batcher} = + AckBatcher.start_link( + name: batcher_name, + rpc_client: rpc_client_name, + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500, + retry_deadline_ms: 60_000 + ) + + {:ok, pid} = StreamManager.start_link(opts) + StreamManager.set_producer(pid, self()) + + batcher_pid = Process.whereis(batcher_name) + assert :sys.get_state(batcher_pid).retry_deadline_ms == 60_000 + + # Enable exactly-once + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: true + }} + ) + + sync(pid) + # Cast is async — let AckBatcher process it + AckBatcher.flush(batcher_pid) + + assert :sys.get_state(batcher_pid).retry_deadline_ms == 600_000 + end + + test "AckBatcher retry_deadline_ms is restored to configured value when exactly-once is disabled" do + broadway_name = Module.concat(__MODULE__, "RD2#{System.unique_integer([:positive])}") + + opts = + base_config() + |> Keyword.put(:broadway_name, broadway_name) + |> Keyword.put(:retry_deadline_ms, 60_000) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + + {:ok, _batcher} = + AckBatcher.start_link( + name: batcher_name, + rpc_client: rpc_client_name, + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500, + retry_deadline_ms: 60_000 + ) + + {:ok, pid} = StreamManager.start_link(opts) + StreamManager.set_producer(pid, self()) + + batcher_pid = Process.whereis(batcher_name) + + enable_exactly_once(pid) + AckBatcher.flush(batcher_pid) + assert :sys.get_state(batcher_pid).retry_deadline_ms == 600_000 + + # Disable exactly-once + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + sync(pid) + AckBatcher.flush(batcher_pid) + assert :sys.get_state(batcher_pid).retry_deadline_ms == 60_000 + end + + test "retry_deadline_ms is NOT updated when exactly_once status does not change" do + broadway_name = Module.concat(__MODULE__, "RD3#{System.unique_integer([:positive])}") + + opts = + base_config() + |> Keyword.put(:broadway_name, broadway_name) + |> Keyword.put(:retry_deadline_ms, 60_000) + + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) + batcher_name = Module.concat(broadway_name, AckBatcher) + + {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + + {:ok, _batcher} = + AckBatcher.start_link( + name: batcher_name, + rpc_client: rpc_client_name, + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500 + ) + + {:ok, pid} = StreamManager.start_link(opts) + StreamManager.set_producer(pid, self()) + + batcher_pid = Process.whereis(batcher_name) + initial_deadline = :sys.get_state(batcher_pid).retry_deadline_ms + + # Send the same exactly_once=false twice — no update should happen + send( + pid, + {:subscription_properties, + %Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties{ + message_ordering_enabled: false, + exactly_once_delivery_enabled: false + }} + ) + + sync(pid) + AckBatcher.flush(batcher_pid) + + assert :sys.get_state(batcher_pid).retry_deadline_ms == initial_deadline + end + end + + describe "exactly-once — stale pending_receipt_modacks sweep" do + test "entries older than 60s are nacked with deadline=0 during extend_leases" do + pid = start_manager() + + # Inject a stale entry (received_at far in the past) + stale_ref = make_ref() + + stale_entry = %{ + broadway_messages: [], + ack_ids: ["stale-ack-1"], + received_at: System.monotonic_time(:millisecond) - 120_000 + } + + :sys.replace_state(pid, fn s -> + %{s | pending_receipt_modacks: Map.put(s.pending_receipt_modacks, stale_ref, stale_entry)} + end) + + # Trigger extend_leases which runs the sweep + send(pid, :extend_leases) + sync(pid) + + # Stale entry should be removed + state = :sys.get_state(pid) + refute Map.has_key?(state.pending_receipt_modacks, stale_ref) + end + + test "fresh entries are NOT swept during extend_leases" do + pid = start_manager() + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["fresh-ack"], %{"fresh-ack" => "data"}) + + send(pid, :extend_leases) + sync(pid) + + # Fresh entry should survive the sweep + assert Map.has_key?(:sys.get_state(pid).pending_receipt_modacks, ref) + end + end + + describe "exactly-once — drain nack pending receipt modacks" do + test "pending receipt modacks are nacked on stop_receiving" do + pid = start_manager() + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["drain-eo"], %{"drain-eo" => "data"}) + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 1 + + StreamManager.stop_receiving(pid) + sync(pid) + + # After drain, pending_receipt_modacks should be cleared + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + end + + test "receipt_modack_result after drain (cleared pending) is ignored gracefully" do + pid = start_manager() + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["drain-stale"], %{"drain-stale" => "data"}) + + # Drain clears the pending map + StreamManager.stop_receiving(pid) + sync(pid) + + # RPC result arrives after drain — should be ignored, not crash + send(pid, {:receipt_modack_result, ref, {:ok, []}}) + sync(pid) + + assert Process.alive?(pid) + end + end + + describe "exactly-once — pending_receipt_modacks NOT cleared on reconnect" do + test "pending_receipt_modacks survives a stream_error reset" do + # Use inject_pending_receipt_modack to avoid races with the auto-Task. + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["reconnect-ack"], %{"reconnect-ack" => "data"}) + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 1 + + # Simulate a retryable stream error (triggers reconnect, not drain) + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + # pending_receipt_modacks must survive — ack_ids are valid across reconnects + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 1 + end + + test "receipt_modack_result arriving after reconnect is still processed correctly" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["post-reconnect"], %{"post-reconnect" => "data"}) + + # Reconnect + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + # Result arrives post-reconnect — should still deliver + send(pid, {:receipt_modack_result, ref, {:ok, []}}) + + assert_receive {:stream_messages, [msg]}, 500 + assert msg.data == "data" + end + end end From dc35ffca6f459c83fa0979ce48adf488c99bb4a6 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 8 Apr 2026 00:14:44 +0200 Subject: [PATCH 06/29] refactor: extract gRPC client abstraction and regenerate protobuf Introduce a client behaviour (Streaming.Client) and default gRPC implementation (Streaming.GrpcClient) to decouple the stream manager from the gRPC transport. Regenerate protobuf definitions using buf. Key changes: - Add Streaming.Client behaviour for pluggable transport - Add Streaming.GrpcClient as the default implementation - Add buf.gen.yaml and buf.yaml for protobuf generation - Regenerate Pub/Sub v1 protobuf modules - Update StreamManager and UnaryRpcClient to use client abstraction --- README.md | 2 +- buf.gen.yaml | 16 + buf.yaml | 1 + .../proto/google/pubsub/v1/pubsub.pb.ex | 464 +++++++++++++----- .../proto/google/pubsub/v1/schema.pb.ex | 80 ++- .../streaming/client.ex | 132 +++++ .../streaming/grpc_client.ex | 165 +++++++ .../streaming/options.ex | 11 + .../streaming/producer.ex | 9 +- .../streaming/stream_manager.ex | 89 +--- .../streaming/stream_reader.ex | 19 +- .../streaming/unary_rpc_client.ex | 83 +--- mix.exs | 5 +- mix.lock | 5 +- .../streaming/stream_manager_test.exs | 43 ++ .../streaming/unary_rpc_client_test.exs | 12 +- 16 files changed, 847 insertions(+), 289 deletions(-) create mode 100644 buf.gen.yaml create mode 100644 buf.yaml create mode 100644 lib/broadway_cloud_pub_sub/streaming/client.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/grpc_client.ex diff --git a/README.md b/README.md index 8f019ff..e8e31f8 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ def deps do [ {:broadway_cloud_pub_sub, "~> 0.10.0"}, {:goth, "~> 1.3"}, - {:grpc, "~> 0.9"}, + {:grpc, "~> 1.0"}, {:protobuf, "~> 0.12"} ] end diff --git a/buf.gen.yaml b/buf.gen.yaml new file mode 100644 index 0000000..8e15831 --- /dev/null +++ b/buf.gen.yaml @@ -0,0 +1,16 @@ +version: v2 +clean: true +managed: + enabled: true +inputs: + - zip_archive: https://github.com/googleapis/googleapis/archive/refs/heads/master.zip + strip_components: 1 + paths: + - google/pubsub/v1 +plugins: + - local: protoc-gen-elixir + out: lib/broadway_cloud_pub_sub/proto + opt: + - paths=source_relative + - include_docs=true + - plugins=grpc diff --git a/buf.yaml b/buf.yaml new file mode 100644 index 0000000..b4d4e47 --- /dev/null +++ b/buf.yaml @@ -0,0 +1 @@ +version: v2 diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex index 28c2b11..cd83678 100644 --- a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex +++ b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex @@ -1,5 +1,7 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State do - @moduledoc false + @moduledoc """ + Possible states for ingestion from Amazon Kinesis Data Streams. + """ use Protobuf, enum: true, @@ -16,7 +18,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State do - @moduledoc false + @moduledoc """ + Possible states for ingestion from Cloud Storage. + """ use Protobuf, enum: true, @@ -33,7 +37,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State do - @moduledoc false + @moduledoc """ + Possible states for managed ingestion from Event Hubs. + """ use Protobuf, enum: true, @@ -52,7 +58,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State do - @moduledoc false + @moduledoc """ + Possible states for managed ingestion from Amazon MSK. + """ use Protobuf, enum: true, @@ -69,7 +77,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State do - @moduledoc false + @moduledoc """ + Possible states for managed ingestion from Confluent Cloud. + """ use Protobuf, enum: true, @@ -87,7 +97,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State do end defmodule Google.Pubsub.V1.PlatformLogsSettings.Severity do - @moduledoc false + @moduledoc """ + Severity levels of Platform Logs. + """ use Protobuf, enum: true, @@ -104,7 +116,9 @@ defmodule Google.Pubsub.V1.PlatformLogsSettings.Severity do end defmodule Google.Pubsub.V1.Topic.State do - @moduledoc false + @moduledoc """ + The state of the topic. + """ use Protobuf, enum: true, @@ -118,7 +132,9 @@ defmodule Google.Pubsub.V1.Topic.State do end defmodule Google.Pubsub.V1.Subscription.State do - @moduledoc false + @moduledoc """ + Possible states for a subscription. + """ use Protobuf, enum: true, @@ -132,7 +148,9 @@ defmodule Google.Pubsub.V1.Subscription.State do end defmodule Google.Pubsub.V1.BigQueryConfig.State do - @moduledoc false + @moduledoc """ + Possible states for a BigQuery subscription. + """ use Protobuf, enum: true, @@ -150,7 +168,10 @@ defmodule Google.Pubsub.V1.BigQueryConfig.State do end defmodule Google.Pubsub.V1.BigtableConfig.State do - @moduledoc false + @moduledoc """ + Possible states for a Bigtable subscription. + Note: more states could be added in the future. Please code accordingly. + """ use Protobuf, enum: true, @@ -169,7 +190,9 @@ defmodule Google.Pubsub.V1.BigtableConfig.State do end defmodule Google.Pubsub.V1.CloudStorageConfig.State do - @moduledoc false + @moduledoc """ + Possible states for a Cloud Storage subscription. + """ use Protobuf, enum: true, @@ -187,7 +210,9 @@ defmodule Google.Pubsub.V1.CloudStorageConfig.State do end defmodule Google.Pubsub.V1.MessageStoragePolicy do - @moduledoc false + @moduledoc """ + A policy constraining the storage of messages published to the topic. + """ use Protobuf, full_name: "google.pubsub.v1.MessageStoragePolicy", @@ -205,7 +230,9 @@ defmodule Google.Pubsub.V1.MessageStoragePolicy do end defmodule Google.Pubsub.V1.SchemaSettings do - @moduledoc false + @moduledoc """ + Settings for validating messages published against a schema. + """ use Protobuf, full_name: "google.pubsub.v1.SchemaSettings", @@ -219,7 +246,9 @@ defmodule Google.Pubsub.V1.SchemaSettings do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis do - @moduledoc false + @moduledoc """ + Ingestion settings for Amazon Kinesis Data Streams. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsKinesis", @@ -239,7 +268,11 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat do - @moduledoc false + @moduledoc """ + Configuration for reading Cloud Storage data in text format. Each line of + text as specified by the delimiter will be set to the `data` field of a + Pub/Sub message. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.TextFormat", @@ -250,7 +283,11 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat d end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat do - @moduledoc false + @moduledoc """ + Configuration for reading Cloud Storage data in Avro binary format. The + bytes of each object will be set to the `data` field of a Pub/Sub + message. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.AvroFormat", @@ -259,7 +296,12 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat d end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat do - @moduledoc false + @moduledoc """ + Configuration for reading Cloud Storage data written via [Cloud Storage + subscriptions](https://cloud.google.com/pubsub/docs/cloudstorage). The + data and attributes fields of the originally exported Pub/Sub message + will be restored when publishing. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat", @@ -268,7 +310,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFo end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage do - @moduledoc false + @moduledoc """ + Ingestion settings for Cloud Storage. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage", @@ -316,7 +360,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs do - @moduledoc false + @moduledoc """ + Ingestion settings for Azure Event Hubs. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.AzureEventHubs", @@ -339,7 +385,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk do - @moduledoc false + @moduledoc """ + Ingestion settings for Amazon MSK. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsMsk", @@ -359,7 +407,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud do - @moduledoc false + @moduledoc """ + Ingestion settings for Confluent Cloud. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings.ConfluentCloud", @@ -380,7 +430,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud do end defmodule Google.Pubsub.V1.IngestionDataSourceSettings do - @moduledoc false + @moduledoc """ + Settings for an ingestion data source on a topic. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionDataSourceSettings", @@ -432,7 +484,9 @@ defmodule Google.Pubsub.V1.IngestionDataSourceSettings do end defmodule Google.Pubsub.V1.PlatformLogsSettings do - @moduledoc false + @moduledoc """ + Settings for Platform Logs produced by Pub/Sub. + """ use Protobuf, full_name: "google.pubsub.v1.PlatformLogsSettings", @@ -447,7 +501,16 @@ defmodule Google.Pubsub.V1.PlatformLogsSettings do end defmodule Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason do - @moduledoc false + @moduledoc """ + Specifies the reason why some data may have been left out of + the desired Pub/Sub message due to the API message limits + (https://cloud.google.com/pubsub/quotas#resource_limits). For example, + when the number of attributes is larger than 100, the number of + attributes is truncated to 100 to respect the limit on the attribute count. + Other attribute limits are treated similarly. When the size of the desired + message would've been larger than 10MB, the message won't be published at + all, and ingestion of the subsequent messages will proceed as normal. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.ApiViolationReason", @@ -456,7 +519,10 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason do - @moduledoc false + @moduledoc """ + Set when an Avro file is unsupported or its format is not valid. When this + occurs, one or more Avro objects won't be ingested. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.AvroFailureReason", @@ -465,7 +531,10 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason do - @moduledoc false + @moduledoc """ + Set when a Pub/Sub message fails to get published due to a schema + validation violation. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.SchemaViolationReason", @@ -474,7 +543,10 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason do - @moduledoc false + @moduledoc """ + Set when a Pub/Sub message fails to get published due to a message + transformation error. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.MessageTransformationFailureReason", @@ -483,7 +555,9 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureRea end defmodule Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure do - @moduledoc false + @moduledoc """ + Failure when ingesting from a Cloud Storage source. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.CloudStorageFailure", @@ -526,7 +600,9 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure do end defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason do - @moduledoc false + @moduledoc """ + Failure when ingesting from an Amazon MSK source. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.AwsMskFailureReason", @@ -563,7 +639,9 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason do - @moduledoc false + @moduledoc """ + Failure when ingesting from an Azure Event Hubs source. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.AzureEventHubsFailureReason", @@ -600,7 +678,9 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason do - @moduledoc false + @moduledoc """ + Failure when ingesting from a Confluent Cloud source. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.ConfluentCloudFailureReason", @@ -637,7 +717,9 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason do - @moduledoc false + @moduledoc """ + Failure when ingesting from an AWS Kinesis source. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent.AwsKinesisFailureReason", @@ -673,7 +755,10 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason do end defmodule Google.Pubsub.V1.IngestionFailureEvent do - @moduledoc false + @moduledoc """ + Payload of the Platform Log entry sent when a failure is encountered while + ingesting. + """ use Protobuf, full_name: "google.pubsub.v1.IngestionFailureEvent", @@ -722,7 +807,10 @@ defmodule Google.Pubsub.V1.IngestionFailureEvent do end defmodule Google.Pubsub.V1.JavaScriptUDF do - @moduledoc false + @moduledoc """ + User-defined JavaScript function that can transform or filter a Pub/Sub + message. + """ use Protobuf, full_name: "google.pubsub.v1.JavaScriptUDF", @@ -734,7 +822,9 @@ defmodule Google.Pubsub.V1.JavaScriptUDF do end defmodule Google.Pubsub.V1.AIInference.UnstructuredInference do - @moduledoc false + @moduledoc """ + Configuration for making inferences using arbitrary JSON payloads. + """ use Protobuf, full_name: "google.pubsub.v1.AIInference.UnstructuredInference", @@ -745,7 +835,9 @@ defmodule Google.Pubsub.V1.AIInference.UnstructuredInference do end defmodule Google.Pubsub.V1.AIInference do - @moduledoc false + @moduledoc """ + Configuration for making inference requests against Vertex AI models. + """ use Protobuf, full_name: "google.pubsub.v1.AIInference", @@ -771,7 +863,9 @@ defmodule Google.Pubsub.V1.AIInference do end defmodule Google.Pubsub.V1.MessageTransform do - @moduledoc false + @moduledoc """ + All supported message transforms types. + """ use Protobuf, full_name: "google.pubsub.v1.MessageTransform", @@ -799,8 +893,6 @@ defmodule Google.Pubsub.V1.MessageTransform do end defmodule Google.Pubsub.V1.Topic.LabelsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.Topic.LabelsEntry", map: true, @@ -812,8 +904,6 @@ defmodule Google.Pubsub.V1.Topic.LabelsEntry do end defmodule Google.Pubsub.V1.Topic.TagsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.Topic.TagsEntry", map: true, @@ -825,7 +915,9 @@ defmodule Google.Pubsub.V1.Topic.TagsEntry do end defmodule Google.Pubsub.V1.Topic do - @moduledoc false + @moduledoc """ + A topic resource. + """ use Protobuf, full_name: "google.pubsub.v1.Topic", @@ -887,8 +979,6 @@ defmodule Google.Pubsub.V1.Topic do end defmodule Google.Pubsub.V1.PubsubMessage.AttributesEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.PubsubMessage.AttributesEntry", map: true, @@ -900,7 +990,16 @@ defmodule Google.Pubsub.V1.PubsubMessage.AttributesEntry do end defmodule Google.Pubsub.V1.PubsubMessage do - @moduledoc false + @moduledoc """ + A message that is published by publishers and consumed by subscribers. The + message must contain either a non-empty data field or at least one attribute. + Note that client libraries represent this object differently + depending on the language. See the corresponding [client library + documentation](https://cloud.google.com/pubsub/docs/reference/libraries) for + more information. See [quotas and limits] + (https://cloud.google.com/pubsub/quotas) for more information about message + limits. + """ use Protobuf, full_name: "google.pubsub.v1.PubsubMessage", @@ -922,7 +1021,9 @@ defmodule Google.Pubsub.V1.PubsubMessage do end defmodule Google.Pubsub.V1.GetTopicRequest do - @moduledoc false + @moduledoc """ + Request for the GetTopic method. + """ use Protobuf, full_name: "google.pubsub.v1.GetTopicRequest", @@ -933,7 +1034,9 @@ defmodule Google.Pubsub.V1.GetTopicRequest do end defmodule Google.Pubsub.V1.UpdateTopicRequest do - @moduledoc false + @moduledoc """ + Request for the UpdateTopic method. + """ use Protobuf, full_name: "google.pubsub.v1.UpdateTopicRequest", @@ -950,7 +1053,9 @@ defmodule Google.Pubsub.V1.UpdateTopicRequest do end defmodule Google.Pubsub.V1.PublishRequest do - @moduledoc false + @moduledoc """ + Request for the Publish method. + """ use Protobuf, full_name: "google.pubsub.v1.PublishRequest", @@ -962,7 +1067,9 @@ defmodule Google.Pubsub.V1.PublishRequest do end defmodule Google.Pubsub.V1.PublishResponse do - @moduledoc false + @moduledoc """ + Response for the `Publish` method. + """ use Protobuf, full_name: "google.pubsub.v1.PublishResponse", @@ -978,7 +1085,9 @@ defmodule Google.Pubsub.V1.PublishResponse do end defmodule Google.Pubsub.V1.ListTopicsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListTopics` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicsRequest", @@ -991,7 +1100,9 @@ defmodule Google.Pubsub.V1.ListTopicsRequest do end defmodule Google.Pubsub.V1.ListTopicsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListTopics` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicsResponse", @@ -1003,7 +1114,9 @@ defmodule Google.Pubsub.V1.ListTopicsResponse do end defmodule Google.Pubsub.V1.ListTopicSubscriptionsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListTopicSubscriptions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicSubscriptionsRequest", @@ -1016,7 +1129,9 @@ defmodule Google.Pubsub.V1.ListTopicSubscriptionsRequest do end defmodule Google.Pubsub.V1.ListTopicSubscriptionsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListTopicSubscriptions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicSubscriptionsResponse", @@ -1028,7 +1143,9 @@ defmodule Google.Pubsub.V1.ListTopicSubscriptionsResponse do end defmodule Google.Pubsub.V1.ListTopicSnapshotsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListTopicSnapshots` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicSnapshotsRequest", @@ -1041,7 +1158,9 @@ defmodule Google.Pubsub.V1.ListTopicSnapshotsRequest do end defmodule Google.Pubsub.V1.ListTopicSnapshotsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListTopicSnapshots` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListTopicSnapshotsResponse", @@ -1053,7 +1172,9 @@ defmodule Google.Pubsub.V1.ListTopicSnapshotsResponse do end defmodule Google.Pubsub.V1.DeleteTopicRequest do - @moduledoc false + @moduledoc """ + Request for the `DeleteTopic` method. + """ use Protobuf, full_name: "google.pubsub.v1.DeleteTopicRequest", @@ -1064,7 +1185,9 @@ defmodule Google.Pubsub.V1.DeleteTopicRequest do end defmodule Google.Pubsub.V1.DetachSubscriptionRequest do - @moduledoc false + @moduledoc """ + Request for the DetachSubscription method. + """ use Protobuf, full_name: "google.pubsub.v1.DetachSubscriptionRequest", @@ -1075,7 +1198,10 @@ defmodule Google.Pubsub.V1.DetachSubscriptionRequest do end defmodule Google.Pubsub.V1.DetachSubscriptionResponse do - @moduledoc false + @moduledoc """ + Response for the DetachSubscription method. + Reserved for future use. + """ use Protobuf, full_name: "google.pubsub.v1.DetachSubscriptionResponse", @@ -1084,7 +1210,10 @@ defmodule Google.Pubsub.V1.DetachSubscriptionResponse do end defmodule Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo do - @moduledoc false + @moduledoc """ + Information about an associated [Analytics Hub + subscription](https://cloud.google.com/bigquery/docs/analytics-hub-manage-subscriptions). + """ use Protobuf, full_name: "google.pubsub.v1.Subscription.AnalyticsHubSubscriptionInfo", @@ -1096,8 +1225,6 @@ defmodule Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo do end defmodule Google.Pubsub.V1.Subscription.LabelsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.Subscription.LabelsEntry", map: true, @@ -1109,8 +1236,6 @@ defmodule Google.Pubsub.V1.Subscription.LabelsEntry do end defmodule Google.Pubsub.V1.Subscription.TagsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.Subscription.TagsEntry", map: true, @@ -1122,7 +1247,11 @@ defmodule Google.Pubsub.V1.Subscription.TagsEntry do end defmodule Google.Pubsub.V1.Subscription do - @moduledoc false + @moduledoc """ + A subscription resource. If none of `push_config`, `bigquery_config`, or + `cloud_storage_config` is set, then the subscriber will pull and ack messages + using API methods. At most one of these fields may be set. + """ use Protobuf, full_name: "google.pubsub.v1.Subscription", @@ -1245,7 +1374,19 @@ defmodule Google.Pubsub.V1.Subscription do end defmodule Google.Pubsub.V1.RetryPolicy do - @moduledoc false + @moduledoc """ + A policy that specifies how Pub/Sub retries message delivery. + + Retry delay will be exponential based on provided minimum and maximum + backoffs. https://en.wikipedia.org/wiki/Exponential_backoff. + + RetryPolicy will be triggered on NACKs or acknowledgment deadline exceeded + events for a given message. + + Retry Policy is implemented on a best effort basis. At times, the delay + between consecutive deliveries may not match the configuration. That is, + delay can be more or less than configured backoff. + """ use Protobuf, full_name: "google.pubsub.v1.RetryPolicy", @@ -1266,7 +1407,13 @@ defmodule Google.Pubsub.V1.RetryPolicy do end defmodule Google.Pubsub.V1.DeadLetterPolicy do - @moduledoc false + @moduledoc """ + Dead lettering is done on a best effort basis. The same message might be + dead lettered multiple times. + + If validation on any of the fields fails at subscription creation/updation, + the create/update subscription request will fail. + """ use Protobuf, full_name: "google.pubsub.v1.DeadLetterPolicy", @@ -1283,7 +1430,10 @@ defmodule Google.Pubsub.V1.DeadLetterPolicy do end defmodule Google.Pubsub.V1.ExpirationPolicy do - @moduledoc false + @moduledoc """ + A policy that specifies the conditions for resource expiration (i.e., + automatic resource deletion). + """ use Protobuf, full_name: "google.pubsub.v1.ExpirationPolicy", @@ -1294,7 +1444,11 @@ defmodule Google.Pubsub.V1.ExpirationPolicy do end defmodule Google.Pubsub.V1.PushConfig.OidcToken do - @moduledoc false + @moduledoc """ + Contains information needed for generating an + [OpenID Connect + token](https://developers.google.com/identity/protocols/OpenIDConnect). + """ use Protobuf, full_name: "google.pubsub.v1.PushConfig.OidcToken", @@ -1311,7 +1465,11 @@ defmodule Google.Pubsub.V1.PushConfig.OidcToken do end defmodule Google.Pubsub.V1.PushConfig.PubsubWrapper do - @moduledoc false + @moduledoc """ + The payload to the push endpoint is in the form of the JSON representation + of a PubsubMessage + (https://cloud.google.com/pubsub/docs/reference/rpc/google.pubsub.v1#pubsubmessage). + """ use Protobuf, full_name: "google.pubsub.v1.PushConfig.PubsubWrapper", @@ -1320,7 +1478,9 @@ defmodule Google.Pubsub.V1.PushConfig.PubsubWrapper do end defmodule Google.Pubsub.V1.PushConfig.NoWrapper do - @moduledoc false + @moduledoc """ + Sets the `data` field as the HTTP body for delivery. + """ use Protobuf, full_name: "google.pubsub.v1.PushConfig.NoWrapper", @@ -1331,8 +1491,6 @@ defmodule Google.Pubsub.V1.PushConfig.NoWrapper do end defmodule Google.Pubsub.V1.PushConfig.AttributesEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.PushConfig.AttributesEntry", map: true, @@ -1344,7 +1502,9 @@ defmodule Google.Pubsub.V1.PushConfig.AttributesEntry do end defmodule Google.Pubsub.V1.PushConfig do - @moduledoc false + @moduledoc """ + Configuration for a push delivery endpoint. + """ use Protobuf, full_name: "google.pubsub.v1.PushConfig", @@ -1387,7 +1547,9 @@ defmodule Google.Pubsub.V1.PushConfig do end defmodule Google.Pubsub.V1.BigQueryConfig do - @moduledoc false + @moduledoc """ + Configuration for a BigQuery subscription. + """ use Protobuf, full_name: "google.pubsub.v1.BigQueryConfig", @@ -1409,7 +1571,14 @@ defmodule Google.Pubsub.V1.BigQueryConfig do end defmodule Google.Pubsub.V1.BigtableConfig do - @moduledoc false + @moduledoc """ + Configuration for a Bigtable subscription. The Pub/Sub message will be + written to a Bigtable row as follows: + - row key: subscription name and message ID delimited by #. + - columns: message bytes written to a single column family "data" with an + empty-string column qualifier. + - cell timestamp: the message publish timestamp. + """ use Protobuf, full_name: "google.pubsub.v1.BigtableConfig", @@ -1430,7 +1599,11 @@ defmodule Google.Pubsub.V1.BigtableConfig do end defmodule Google.Pubsub.V1.CloudStorageConfig.TextConfig do - @moduledoc false + @moduledoc """ + Configuration for writing message data in text format. + Message payloads will be written to files as raw text, separated by a + newline. + """ use Protobuf, full_name: "google.pubsub.v1.CloudStorageConfig.TextConfig", @@ -1439,7 +1612,10 @@ defmodule Google.Pubsub.V1.CloudStorageConfig.TextConfig do end defmodule Google.Pubsub.V1.CloudStorageConfig.AvroConfig do - @moduledoc false + @moduledoc """ + Configuration for writing message data in Avro format. + Message payloads and metadata will be written to files as an Avro binary. + """ use Protobuf, full_name: "google.pubsub.v1.CloudStorageConfig.AvroConfig", @@ -1451,7 +1627,9 @@ defmodule Google.Pubsub.V1.CloudStorageConfig.AvroConfig do end defmodule Google.Pubsub.V1.CloudStorageConfig do - @moduledoc false + @moduledoc """ + Configuration for a Cloud Storage subscription. + """ use Protobuf, full_name: "google.pubsub.v1.CloudStorageConfig", @@ -1502,7 +1680,9 @@ defmodule Google.Pubsub.V1.CloudStorageConfig do end defmodule Google.Pubsub.V1.ReceivedMessage do - @moduledoc false + @moduledoc """ + A message and its corresponding acknowledgment ID. + """ use Protobuf, full_name: "google.pubsub.v1.ReceivedMessage", @@ -1515,7 +1695,9 @@ defmodule Google.Pubsub.V1.ReceivedMessage do end defmodule Google.Pubsub.V1.GetSubscriptionRequest do - @moduledoc false + @moduledoc """ + Request for the GetSubscription method. + """ use Protobuf, full_name: "google.pubsub.v1.GetSubscriptionRequest", @@ -1526,7 +1708,9 @@ defmodule Google.Pubsub.V1.GetSubscriptionRequest do end defmodule Google.Pubsub.V1.UpdateSubscriptionRequest do - @moduledoc false + @moduledoc """ + Request for the UpdateSubscription method. + """ use Protobuf, full_name: "google.pubsub.v1.UpdateSubscriptionRequest", @@ -1543,7 +1727,9 @@ defmodule Google.Pubsub.V1.UpdateSubscriptionRequest do end defmodule Google.Pubsub.V1.ListSubscriptionsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListSubscriptions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSubscriptionsRequest", @@ -1556,7 +1742,9 @@ defmodule Google.Pubsub.V1.ListSubscriptionsRequest do end defmodule Google.Pubsub.V1.ListSubscriptionsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListSubscriptions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSubscriptionsResponse", @@ -1568,7 +1756,9 @@ defmodule Google.Pubsub.V1.ListSubscriptionsResponse do end defmodule Google.Pubsub.V1.DeleteSubscriptionRequest do - @moduledoc false + @moduledoc """ + Request for the DeleteSubscription method. + """ use Protobuf, full_name: "google.pubsub.v1.DeleteSubscriptionRequest", @@ -1579,7 +1769,9 @@ defmodule Google.Pubsub.V1.DeleteSubscriptionRequest do end defmodule Google.Pubsub.V1.ModifyPushConfigRequest do - @moduledoc false + @moduledoc """ + Request for the ModifyPushConfig method. + """ use Protobuf, full_name: "google.pubsub.v1.ModifyPushConfigRequest", @@ -1596,7 +1788,9 @@ defmodule Google.Pubsub.V1.ModifyPushConfigRequest do end defmodule Google.Pubsub.V1.PullRequest do - @moduledoc false + @moduledoc """ + Request for the `Pull` method. + """ use Protobuf, full_name: "google.pubsub.v1.PullRequest", @@ -1609,7 +1803,9 @@ defmodule Google.Pubsub.V1.PullRequest do end defmodule Google.Pubsub.V1.PullResponse do - @moduledoc false + @moduledoc """ + Response for the `Pull` method. + """ use Protobuf, full_name: "google.pubsub.v1.PullResponse", @@ -1625,7 +1821,9 @@ defmodule Google.Pubsub.V1.PullResponse do end defmodule Google.Pubsub.V1.ModifyAckDeadlineRequest do - @moduledoc false + @moduledoc """ + Request for the ModifyAckDeadline method. + """ use Protobuf, full_name: "google.pubsub.v1.ModifyAckDeadlineRequest", @@ -1643,7 +1841,9 @@ defmodule Google.Pubsub.V1.ModifyAckDeadlineRequest do end defmodule Google.Pubsub.V1.AcknowledgeRequest do - @moduledoc false + @moduledoc """ + Request for the Acknowledge method. + """ use Protobuf, full_name: "google.pubsub.v1.AcknowledgeRequest", @@ -1655,7 +1855,11 @@ defmodule Google.Pubsub.V1.AcknowledgeRequest do end defmodule Google.Pubsub.V1.StreamingPullRequest do - @moduledoc false + @moduledoc """ + Request for the `StreamingPull` streaming RPC method. This request is used to + establish the initial stream as well as to stream acknowledgments and ack + deadline modifications from the client to the server. + """ use Protobuf, full_name: "google.pubsub.v1.StreamingPullRequest", @@ -1703,7 +1907,10 @@ defmodule Google.Pubsub.V1.StreamingPullRequest do end defmodule Google.Pubsub.V1.StreamingPullResponse.AcknowledgeConfirmation do - @moduledoc false + @moduledoc """ + Acknowledgment IDs sent in one or more previous requests to acknowledge a + previously received message. + """ use Protobuf, full_name: "google.pubsub.v1.StreamingPullResponse.AcknowledgeConfirmation", @@ -1735,7 +1942,10 @@ defmodule Google.Pubsub.V1.StreamingPullResponse.AcknowledgeConfirmation do end defmodule Google.Pubsub.V1.StreamingPullResponse.ModifyAckDeadlineConfirmation do - @moduledoc false + @moduledoc """ + Acknowledgment IDs sent in one or more previous requests to modify the + deadline for a specific message. + """ use Protobuf, full_name: "google.pubsub.v1.StreamingPullResponse.ModifyAckDeadlineConfirmation", @@ -1760,7 +1970,9 @@ defmodule Google.Pubsub.V1.StreamingPullResponse.ModifyAckDeadlineConfirmation d end defmodule Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties do - @moduledoc false + @moduledoc """ + Subscription properties sent as part of the response. + """ use Protobuf, full_name: "google.pubsub.v1.StreamingPullResponse.SubscriptionProperties", @@ -1781,7 +1993,10 @@ defmodule Google.Pubsub.V1.StreamingPullResponse.SubscriptionProperties do end defmodule Google.Pubsub.V1.StreamingPullResponse do - @moduledoc false + @moduledoc """ + Response for the `StreamingPull` method. This response is used to stream + messages from the server to the client. + """ use Protobuf, full_name: "google.pubsub.v1.StreamingPullResponse", @@ -1815,8 +2030,6 @@ defmodule Google.Pubsub.V1.StreamingPullResponse do end defmodule Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.CreateSnapshotRequest.LabelsEntry", map: true, @@ -1828,8 +2041,6 @@ defmodule Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry do end defmodule Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.CreateSnapshotRequest.TagsEntry", map: true, @@ -1841,7 +2052,9 @@ defmodule Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry do end defmodule Google.Pubsub.V1.CreateSnapshotRequest do - @moduledoc false + @moduledoc """ + Request for the `CreateSnapshot` method. + """ use Protobuf, full_name: "google.pubsub.v1.CreateSnapshotRequest", @@ -1867,7 +2080,9 @@ defmodule Google.Pubsub.V1.CreateSnapshotRequest do end defmodule Google.Pubsub.V1.UpdateSnapshotRequest do - @moduledoc false + @moduledoc """ + Request for the UpdateSnapshot method. + """ use Protobuf, full_name: "google.pubsub.v1.UpdateSnapshotRequest", @@ -1884,8 +2099,6 @@ defmodule Google.Pubsub.V1.UpdateSnapshotRequest do end defmodule Google.Pubsub.V1.Snapshot.LabelsEntry do - @moduledoc false - use Protobuf, full_name: "google.pubsub.v1.Snapshot.LabelsEntry", map: true, @@ -1897,7 +2110,13 @@ defmodule Google.Pubsub.V1.Snapshot.LabelsEntry do end defmodule Google.Pubsub.V1.Snapshot do - @moduledoc false + @moduledoc """ + A snapshot resource. Snapshots are used in + [Seek](https://cloud.google.com/pubsub/docs/replay-overview) + operations, which allow you to manage message acknowledgments in bulk. That + is, you can set the acknowledgment state of messages in an existing + subscription to the state captured by a snapshot. + """ use Protobuf, full_name: "google.pubsub.v1.Snapshot", @@ -1922,7 +2141,9 @@ defmodule Google.Pubsub.V1.Snapshot do end defmodule Google.Pubsub.V1.GetSnapshotRequest do - @moduledoc false + @moduledoc """ + Request for the GetSnapshot method. + """ use Protobuf, full_name: "google.pubsub.v1.GetSnapshotRequest", @@ -1933,7 +2154,9 @@ defmodule Google.Pubsub.V1.GetSnapshotRequest do end defmodule Google.Pubsub.V1.ListSnapshotsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListSnapshots` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSnapshotsRequest", @@ -1946,7 +2169,9 @@ defmodule Google.Pubsub.V1.ListSnapshotsRequest do end defmodule Google.Pubsub.V1.ListSnapshotsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListSnapshots` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSnapshotsResponse", @@ -1958,7 +2183,9 @@ defmodule Google.Pubsub.V1.ListSnapshotsResponse do end defmodule Google.Pubsub.V1.DeleteSnapshotRequest do - @moduledoc false + @moduledoc """ + Request for the `DeleteSnapshot` method. + """ use Protobuf, full_name: "google.pubsub.v1.DeleteSnapshotRequest", @@ -1969,7 +2196,9 @@ defmodule Google.Pubsub.V1.DeleteSnapshotRequest do end defmodule Google.Pubsub.V1.SeekRequest do - @moduledoc false + @moduledoc """ + Request for the `Seek` method. + """ use Protobuf, full_name: "google.pubsub.v1.SeekRequest", @@ -1984,7 +2213,9 @@ defmodule Google.Pubsub.V1.SeekRequest do end defmodule Google.Pubsub.V1.SeekResponse do - @moduledoc false + @moduledoc """ + Response for the `Seek` method (this response is empty). + """ use Protobuf, full_name: "google.pubsub.v1.SeekResponse", @@ -1993,7 +2224,10 @@ defmodule Google.Pubsub.V1.SeekResponse do end defmodule Google.Pubsub.V1.Publisher.Service do - @moduledoc false + @moduledoc """ + The service that an application uses to manipulate topics, and to send + messages to a topic. + """ use GRPC.Service, name: "google.pubsub.v1.Publisher", protoc_gen_elixir_version: "0.16.0" @@ -2029,13 +2263,15 @@ defmodule Google.Pubsub.V1.Publisher.Service do end defmodule Google.Pubsub.V1.Publisher.Stub do - @moduledoc false - use GRPC.Stub, service: Google.Pubsub.V1.Publisher.Service end defmodule Google.Pubsub.V1.Subscriber.Service do - @moduledoc false + @moduledoc """ + The service that an application uses to manipulate subscriptions and to + consume messages from a subscription via the `Pull` method or by + establishing a bi-directional stream using the `StreamingPull` method. + """ use GRPC.Service, name: "google.pubsub.v1.Subscriber", protoc_gen_elixir_version: "0.16.0" @@ -2089,7 +2325,5 @@ defmodule Google.Pubsub.V1.Subscriber.Service do end defmodule Google.Pubsub.V1.Subscriber.Stub do - @moduledoc false - use GRPC.Stub, service: Google.Pubsub.V1.Subscriber.Service end diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex index 455f222..0ff74ae 100644 --- a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex +++ b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex @@ -1,5 +1,7 @@ defmodule Google.Pubsub.V1.SchemaView do - @moduledoc false + @moduledoc """ + View of Schema object fields to be returned by GetSchema and ListSchemas. + """ use Protobuf, enum: true, @@ -13,7 +15,9 @@ defmodule Google.Pubsub.V1.SchemaView do end defmodule Google.Pubsub.V1.Encoding do - @moduledoc false + @moduledoc """ + Possible encoding types for messages. + """ use Protobuf, enum: true, @@ -27,7 +31,9 @@ defmodule Google.Pubsub.V1.Encoding do end defmodule Google.Pubsub.V1.Schema.Type do - @moduledoc false + @moduledoc """ + Possible schema definition types. + """ use Protobuf, enum: true, @@ -41,7 +47,9 @@ defmodule Google.Pubsub.V1.Schema.Type do end defmodule Google.Pubsub.V1.Schema do - @moduledoc false + @moduledoc """ + A schema resource. + """ use Protobuf, full_name: "google.pubsub.v1.Schema", @@ -61,7 +69,9 @@ defmodule Google.Pubsub.V1.Schema do end defmodule Google.Pubsub.V1.CreateSchemaRequest do - @moduledoc false + @moduledoc """ + Request for the CreateSchema method. + """ use Protobuf, full_name: "google.pubsub.v1.CreateSchemaRequest", @@ -74,7 +84,9 @@ defmodule Google.Pubsub.V1.CreateSchemaRequest do end defmodule Google.Pubsub.V1.GetSchemaRequest do - @moduledoc false + @moduledoc """ + Request for the GetSchema method. + """ use Protobuf, full_name: "google.pubsub.v1.GetSchemaRequest", @@ -86,7 +98,9 @@ defmodule Google.Pubsub.V1.GetSchemaRequest do end defmodule Google.Pubsub.V1.ListSchemasRequest do - @moduledoc false + @moduledoc """ + Request for the `ListSchemas` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSchemasRequest", @@ -100,7 +114,9 @@ defmodule Google.Pubsub.V1.ListSchemasRequest do end defmodule Google.Pubsub.V1.ListSchemasResponse do - @moduledoc false + @moduledoc """ + Response for the `ListSchemas` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSchemasResponse", @@ -112,7 +128,9 @@ defmodule Google.Pubsub.V1.ListSchemasResponse do end defmodule Google.Pubsub.V1.ListSchemaRevisionsRequest do - @moduledoc false + @moduledoc """ + Request for the `ListSchemaRevisions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSchemaRevisionsRequest", @@ -126,7 +144,9 @@ defmodule Google.Pubsub.V1.ListSchemaRevisionsRequest do end defmodule Google.Pubsub.V1.ListSchemaRevisionsResponse do - @moduledoc false + @moduledoc """ + Response for the `ListSchemaRevisions` method. + """ use Protobuf, full_name: "google.pubsub.v1.ListSchemaRevisionsResponse", @@ -138,7 +158,9 @@ defmodule Google.Pubsub.V1.ListSchemaRevisionsResponse do end defmodule Google.Pubsub.V1.CommitSchemaRequest do - @moduledoc false + @moduledoc """ + Request for CommitSchema method. + """ use Protobuf, full_name: "google.pubsub.v1.CommitSchemaRequest", @@ -150,7 +172,9 @@ defmodule Google.Pubsub.V1.CommitSchemaRequest do end defmodule Google.Pubsub.V1.RollbackSchemaRequest do - @moduledoc false + @moduledoc """ + Request for the `RollbackSchema` method. + """ use Protobuf, full_name: "google.pubsub.v1.RollbackSchemaRequest", @@ -162,7 +186,9 @@ defmodule Google.Pubsub.V1.RollbackSchemaRequest do end defmodule Google.Pubsub.V1.DeleteSchemaRevisionRequest do - @moduledoc false + @moduledoc """ + Request for the `DeleteSchemaRevision` method. + """ use Protobuf, full_name: "google.pubsub.v1.DeleteSchemaRevisionRequest", @@ -174,7 +200,9 @@ defmodule Google.Pubsub.V1.DeleteSchemaRevisionRequest do end defmodule Google.Pubsub.V1.DeleteSchemaRequest do - @moduledoc false + @moduledoc """ + Request for the `DeleteSchema` method. + """ use Protobuf, full_name: "google.pubsub.v1.DeleteSchemaRequest", @@ -185,7 +213,9 @@ defmodule Google.Pubsub.V1.DeleteSchemaRequest do end defmodule Google.Pubsub.V1.ValidateSchemaRequest do - @moduledoc false + @moduledoc """ + Request for the `ValidateSchema` method. + """ use Protobuf, full_name: "google.pubsub.v1.ValidateSchemaRequest", @@ -197,7 +227,10 @@ defmodule Google.Pubsub.V1.ValidateSchemaRequest do end defmodule Google.Pubsub.V1.ValidateSchemaResponse do - @moduledoc false + @moduledoc """ + Response for the `ValidateSchema` method. + Empty for now. + """ use Protobuf, full_name: "google.pubsub.v1.ValidateSchemaResponse", @@ -206,7 +239,9 @@ defmodule Google.Pubsub.V1.ValidateSchemaResponse do end defmodule Google.Pubsub.V1.ValidateMessageRequest do - @moduledoc false + @moduledoc """ + Request for the `ValidateMessage` method. + """ use Protobuf, full_name: "google.pubsub.v1.ValidateMessageRequest", @@ -223,7 +258,10 @@ defmodule Google.Pubsub.V1.ValidateMessageRequest do end defmodule Google.Pubsub.V1.ValidateMessageResponse do - @moduledoc false + @moduledoc """ + Response for the `ValidateMessage` method. + Empty for now. + """ use Protobuf, full_name: "google.pubsub.v1.ValidateMessageResponse", @@ -232,7 +270,9 @@ defmodule Google.Pubsub.V1.ValidateMessageResponse do end defmodule Google.Pubsub.V1.SchemaService.Service do - @moduledoc false + @moduledoc """ + Service for doing schema-related operations. + """ use GRPC.Service, name: "google.pubsub.v1.SchemaService", protoc_gen_elixir_version: "0.16.0" @@ -274,7 +314,5 @@ defmodule Google.Pubsub.V1.SchemaService.Service do end defmodule Google.Pubsub.V1.SchemaService.Stub do - @moduledoc false - use GRPC.Stub, service: Google.Pubsub.V1.SchemaService.Service end diff --git a/lib/broadway_cloud_pub_sub/streaming/client.ex b/lib/broadway_cloud_pub_sub/streaming/client.ex new file mode 100644 index 0000000..ed271cc --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/client.ex @@ -0,0 +1,132 @@ +defmodule BroadwayCloudPubSub.Streaming.Client do + @moduledoc """ + Behaviour for gRPC interactions used by the streaming Pub/Sub producer. + + The default implementation is `BroadwayCloudPubSub.Streaming.GrpcClient`. + Provide a custom module via the `:grpc_client` option on + `BroadwayCloudPubSub.Streaming.Producer` for testing or alternate transports. + + ## Implementing a custom client + + A custom client module must implement all callbacks in this behaviour. The `init/1` + callback receives the full producer options and returns an opaque `config` term that + is stored in state and forwarded as the last argument to every subsequent call. + + Example: + + defmodule MyApp.FakeGrpcClient do + @behaviour BroadwayCloudPubSub.Streaming.Client + + @impl true + def init(opts), do: {:ok, Map.new(opts)} + + @impl true + def connect(config), do: {:ok, :fake_channel} + + # ... implement remaining callbacks + end + + Then configure the producer: + + {BroadwayCloudPubSub.Streaming.Producer, + grpc_client: MyApp.FakeGrpcClient, + subscription: "projects/my-project/subscriptions/my-sub", + ...} + """ + + @typedoc "An opaque term returned by `init/1` and passed to all subsequent calls." + @type config :: term() + + @typedoc "An opaque gRPC channel returned by `connect/1`." + @type channel :: term() + + @typedoc "An opaque gRPC bidirectional stream returned by `streaming_pull/2`." + @type stream :: term() + + # --- Lifecycle --- + + @doc """ + Invoked once during producer startup to normalize options into a `config` term. + + The `config` term is stored in state and forwarded as the last argument to all + subsequent callbacks, analogous to how `BroadwayCloudPubSub.Client.init/1` + works for the pull producer. + """ + @callback init(opts :: keyword()) :: {:ok, config()} | {:error, term()} + + @doc """ + Opens a gRPC channel to the Pub/Sub service. + + Called by `StreamManager` and `UnaryRpcClient` before each (re)connection. + Should handle token fetching, TLS setup, and adapter selection internally. + Returns `{:ok, channel}` on success or `{:error, reason}` on failure. + """ + @callback connect(config()) :: {:ok, channel()} | {:error, reason :: term()} + + @doc """ + Closes a gRPC channel. + + Called during reconnect, shutdown, and error recovery. Implementations should + handle the case where the channel is already closed or the connection process + is dead. + """ + @callback disconnect(channel(), config()) :: :ok + + # --- Streaming RPCs --- + + @doc """ + Opens a bidirectional `StreamingPull` gRPC stream on the given channel. + + Returns the stream struct. Called by `StreamReader` after a successful `connect/1`. + """ + @callback streaming_pull(channel(), config()) :: stream() + + @doc """ + Sends a request on an open bidirectional gRPC stream. + + Used to send the initial `StreamingPullRequest` and subsequent keep-alive or + deadline-extension requests on the stream. Returns `{:ok, stream}` (the stream + struct may be updated by the underlying library after a send) or `{:error, reason}`. + """ + @callback send_request(stream(), request :: term(), config()) :: + {:ok, stream()} | {:error, term()} + + @doc """ + Begins enumerating responses from an open bidirectional gRPC stream. + + Returns `{:ok, enumerable}` where `enumerable` yields `{:ok, response}` or + `{:error, error}` terms as the server sends data. The enumeration blocks until + the stream closes. Any timeout or blocking behaviour is an internal implementation + detail of the client. + """ + @callback recv(stream(), config()) :: {:ok, Enumerable.t()} | {:error, term()} + + @doc """ + Cancels an open gRPC stream. + + Called during graceful shutdown or error recovery to stop receiving new messages. + Implementations should handle the case where the stream is already closed. + """ + @callback cancel(stream(), config()) :: :ok + + # --- Unary RPCs --- + + @doc """ + Sends an `Acknowledge` unary RPC to the Pub/Sub service. + + `request` is a `Google.Pubsub.V1.AcknowledgeRequest` struct. Returns `{:ok, response}` + on success or `{:error, reason}` on failure. Implementations may emit telemetry spans. + """ + @callback acknowledge(channel(), request :: term(), config()) :: + {:ok, term()} | {:error, term()} + + @doc """ + Sends a `ModifyAckDeadline` unary RPC to the Pub/Sub service. + + `request` is a `Google.Pubsub.V1.ModifyAckDeadlineRequest` struct. Returns + `{:ok, response}` on success or `{:error, reason}` on failure. Implementations + may emit telemetry spans. + """ + @callback modify_ack_deadline(channel(), request :: term(), config()) :: + {:ok, term()} | {:error, term()} +end diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex new file mode 100644 index 0000000..f367b6f --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -0,0 +1,165 @@ +defmodule BroadwayCloudPubSub.Streaming.GrpcClient do + @moduledoc """ + The default gRPC client for `BroadwayCloudPubSub.Streaming.Producer`. + + Implements `BroadwayCloudPubSub.Streaming.Client` using the `grpc_client` library + with the `Google.Pubsub.V1.Subscriber.Stub` generated stub. + + This module handles: + - Token fetching and channel connection (with TLS and adapter configuration) + - Bidirectional `StreamingPull` stream management + - Unary `Acknowledge` and `ModifyAckDeadline` RPCs with telemetry spans + + ## Telemetry + + This module emits the following telemetry events: + + * `[:broadway_cloud_pub_sub, :streaming, :ack, :start | :stop | :exception]` — emitted + as a span when sending an `Acknowledge` unary RPC. + + Measurements: as described in `:telemetry.span/3`. + Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` + + * `[:broadway_cloud_pub_sub, :streaming, :modack, :start | :stop | :exception]` — emitted + as a span when sending a `ModifyAckDeadline` unary RPC. + + Measurements: as described in `:telemetry.span/3`. + Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` + """ + + @behaviour BroadwayCloudPubSub.Streaming.Client + + alias Google.Pubsub.V1.Subscriber.Stub + alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest} + + # Default RPC timeout for unary calls. + @unary_rpc_timeout_ms 30_000 + + @impl BroadwayCloudPubSub.Streaming.Client + def init(opts) do + {:ok, Map.new(opts)} + end + + @impl BroadwayCloudPubSub.Streaming.Client + def connect(config) do + with {:ok, token} <- fetch_token(config) do + open_channel(config, token) + end + rescue + e -> {:error, {:connect_failed, Exception.message(e)}} + end + + @impl BroadwayCloudPubSub.Streaming.Client + def disconnect(channel, _config) do + try do + GRPC.Stub.disconnect(channel) + catch + _, _ -> :ok + end + + :ok + end + + @impl BroadwayCloudPubSub.Streaming.Client + def streaming_pull(channel, _config) do + Stub.streaming_pull(channel, []) + end + + @impl BroadwayCloudPubSub.Streaming.Client + def send_request(stream, request, _config) do + case GRPC.Stub.send_request(stream, request) do + %GRPC.Client.Stream{} = updated_stream -> {:ok, updated_stream} + {:error, reason} -> {:error, reason} + end + catch + kind, reason -> {:error, {kind, reason}} + end + + @impl BroadwayCloudPubSub.Streaming.Client + def recv(stream, _config) do + GRPC.Stub.recv(stream, timeout: :infinity) + end + + @impl BroadwayCloudPubSub.Streaming.Client + def cancel(stream, _config) do + try do + GRPC.Stub.cancel(stream) + catch + _, _ -> :ok + end + + :ok + end + + @impl BroadwayCloudPubSub.Streaming.Client + def acknowledge(channel, %AcknowledgeRequest{ack_ids: ack_ids} = request, config) do + :telemetry.span( + [:broadway_cloud_pub_sub, :streaming, :ack], + %{name: config.broadway_name, subscription: config.subscription}, + fn -> + result = Stub.acknowledge(channel, request, timeout: @unary_rpc_timeout_ms) + + {result, + %{name: config.broadway_name, subscription: config.subscription, count: length(ack_ids)}} + end + ) + end + + @impl BroadwayCloudPubSub.Streaming.Client + def modify_ack_deadline( + channel, + %ModifyAckDeadlineRequest{ack_ids: ack_ids} = request, + config + ) do + :telemetry.span( + [:broadway_cloud_pub_sub, :streaming, :modack], + %{name: config.broadway_name, subscription: config.subscription}, + fn -> + result = Stub.modify_ack_deadline(channel, request, timeout: @unary_rpc_timeout_ms) + + {result, + %{name: config.broadway_name, subscription: config.subscription, count: length(ack_ids)}} + end + ) + end + + # --- Private --- + + defp fetch_token(%{token_generator: {mod, fun, args}}) do + apply(mod, fun, args) + end + + defp open_channel( + %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = config, + token + ) do + keepalive_interval_ms = Map.get(config, :keepalive_interval_ms, 30_000) + + adapter_opts = [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] + + adapter_opts = + case Map.get(config, :test_pid) do + nil -> adapter_opts + pid -> Keyword.put(adapter_opts, :test_pid, pid) + end + + base_opts = [ + adapter: adapter, + headers: [{"authorization", "Bearer #{token}"}], + adapter_opts: adapter_opts + ] + + opts = + if use_ssl do + cred = GRPC.Credential.new(ssl: [cacerts: :public_key.cacerts_get()]) + Keyword.put(base_opts, :cred, cred) + else + base_opts + end + + case GRPC.Stub.connect(endpoint, opts) do + {:ok, channel} -> {:ok, channel} + {:error, reason} -> {:error, {:connect_failed, reason}} + end + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 4fdcfc1..fb06936 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -277,6 +277,17 @@ defmodule BroadwayCloudPubSub.Streaming.Options do processing topology. """ ], + grpc_client: [ + type: :atom, + default: BroadwayCloudPubSub.Streaming.GrpcClient, + doc: """ + The module implementing the `BroadwayCloudPubSub.Streaming.Client` behaviour. + Defaults to `BroadwayCloudPubSub.Streaming.GrpcClient`, which uses the + `grpc_client` library to communicate with Google Cloud Pub/Sub. + + Swap this for testing or custom gRPC transports. + """ + ], # Testing options test_pid: [type: :pid, doc: false] diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index eb1c70c..c8cc1ad 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -257,6 +257,11 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do broadway_name = broadway_opts[:name] + grpc_client = opts[:grpc_client] + {:ok, client_config} = grpc_client.init(opts) + + opts = Keyword.put(opts, :grpc_client_config, client_config) + # Config forwarded to UnaryRpcClient and AckBatcher via the supervisor. # These keys are a subset of the full opts — only what the unary path needs. unary_config = @@ -272,8 +277,10 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do :backoff_max, :ack_batch_interval_ms, :ack_batch_max_size, - :retry_deadline_ms + :retry_deadline_ms, + :grpc_client ]) + |> Keyword.put(:grpc_client_config, client_config) |> Keyword.put(:broadway_name, broadway_name) sup_name = Module.concat(broadway_name, UnaryAckSupervisor) diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 8085114..d53d68a 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -37,6 +37,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defstruct [ :producer_pid, :config, + :grpc_client, + :grpc_client_config, :channel, :grpc_stream, :conn_pid, @@ -178,6 +180,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do state = %__MODULE__{ producer_pid: nil, config: config, + grpc_client: config.grpc_client, + grpc_client_config: config.grpc_client_config, backoff: backoff, ack_time_dist: AckTimeDistribution.new(config.stream_ack_deadline_seconds), ack_batcher: ack_batcher, @@ -423,7 +427,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) keepalive_request = %StreamingPullRequest{stream_ack_deadline_seconds: adaptive_deadline} - case send_on_stream(state.grpc_stream, keepalive_request) do + case send_on_stream(state.grpc_stream, keepalive_request, state) do {:ok, stream} -> emit_telemetry(:keepalive, %{deadline: adaptive_deadline}, state.config) timer = schedule_keepalive_after(state.config) @@ -573,12 +577,13 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # --- Private: connection --- - defp connect(%{config: config} = state) do - with {:ok, token} <- fetch_token(config), - {:ok, channel} <- open_channel(config, token) do - connect_stream(channel, state) - else - {:error, reason} -> {:error, reason, state} + defp connect(state) do + case state.grpc_client.connect(state.grpc_client_config) do + {:ok, channel} -> + connect_stream(channel, state) + + {:error, reason} -> + {:error, reason, state} end rescue e -> @@ -600,56 +605,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end rescue e -> - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end - + state.grpc_client.disconnect(channel, state.grpc_client_config) {:error, {:connect_failed, Exception.message(e)}, state} end - defp open_channel( - %{grpc_endpoint: endpoint, use_ssl: use_ssl, adapter: adapter} = config, - token - ) do - keepalive_interval_ms = Map.get(config, :keepalive_interval_ms, 30_000) - - adapter_opts = [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] - - adapter_opts = - case Map.get(config, :test_pid) do - nil -> adapter_opts - pid -> Keyword.put(adapter_opts, :test_pid, pid) - end - - base_opts = [ - adapter: adapter, - headers: [{"authorization", "Bearer #{token}"}], - adapter_opts: adapter_opts - ] - - opts = - if use_ssl do - cred = GRPC.Credential.new(ssl: [cacerts: :public_key.cacerts_get()]) - Keyword.put(base_opts, :cred, cred) - else - base_opts - end - - case GRPC.Stub.connect(endpoint, opts) do - {:ok, channel} -> {:ok, channel} - {:error, reason} -> {:error, {:connect_failed, reason}} - end - end - - defp send_on_stream(grpc_stream, request) do - case GRPC.Stub.send_request(grpc_stream, request) do - %GRPC.Client.Stream{} = stream -> {:ok, stream} - {:error, reason} -> {:error, reason} - end - catch - kind, reason -> {:error, {kind, reason}} + defp send_on_stream(grpc_stream, request, state) do + state.grpc_client.send_request(grpc_stream, request, state.grpc_client_config) end defp reset_connection(state, reason) do @@ -696,7 +657,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp cancel_grpc_stream(%{grpc_stream: grpc_stream} = state) do # Skip cancel if the Mint StreamResponseProcess is already dead — calling - # GRPC.Stub.cancel would crash the ConnectionProcess. See decisions.md. + # cancel would crash the ConnectionProcess. See decisions.md. srp_alive? = case grpc_stream do %{payload: %{stream_response_pid: pid}} when is_pid(pid) -> Process.alive?(pid) @@ -704,11 +665,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end if srp_alive? do - try do - GRPC.Stub.cancel(grpc_stream) - catch - _, _ -> :ok - end + state.grpc_client.cancel(grpc_stream, state.grpc_client_config) end state @@ -726,11 +683,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end if conn_alive? do - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end + state.grpc_client.disconnect(channel, state.grpc_client_config) end state @@ -1009,12 +962,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do _ -> nil end - # --- Private: auth --- - - defp fetch_token(%{token_generator: {mod, fun, args}}) do - apply(mod, fun, args) - end - # Flush AckBatcher if its process is currently alive. Guards against the # batcher being down during pipeline shutdown (Broadway stops children in # reverse start order). diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex index 3b4611d..e23f071 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex @@ -29,13 +29,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do # ## Sending on the stream (acks, deadline modifications) # # After receiving `{:stream_opened, _pid, grpc_stream}`, the StreamManager - # calls `GRPC.Stub.send_request(grpc_stream, request)` directly from the - # GenServer process. Both the Gun and Mint adapters implement this as a + # calls `grpc_client.send_request(grpc_stream, request, client_config)` directly + # from the GenServer process. Both the Gun and Mint adapters implement this as a # fire-and-forget cast, safe to call from any process concurrently with the # reader process enumerating the receive stream. alias Google.Pubsub.V1.{StreamingPullRequest, StreamingPullResponse} - alias Google.Pubsub.V1.Subscriber.Stub @doc """ Spawns a linked reader process. The reader opens the gRPC stream and sends @@ -44,7 +43,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do Returns the reader pid. """ - @spec start_link(pid(), GRPC.Channel.t(), map()) :: pid() + @spec start_link(pid(), channel :: term(), map()) :: {:ok, pid()} def start_link(manager, channel, config) do Task.start_link(fn -> run(manager, channel, config) end) end @@ -52,6 +51,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do # --- Private --- defp run(manager, channel, config) do + grpc_client = config.grpc_client + grpc_client_config = config.grpc_client_config client_id = Map.fetch!(config, :client_id) initial_request = %StreamingPullRequest{ @@ -62,15 +63,17 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do client_id: client_id } - grpc_stream = Stub.streaming_pull(channel, []) - grpc_stream = GRPC.Stub.send_request(grpc_stream, initial_request) + grpc_stream = grpc_client.streaming_pull(channel, grpc_client_config) + + {:ok, grpc_stream} = + grpc_client.send_request(grpc_stream, initial_request, grpc_client_config) # Notify the manager that the stream is open. The manager needs the - # grpc_stream struct to call GRPC.Stub.send_request for acks and deadline + # grpc_stream struct to call send_request for acks and deadline # modifications on the bidirectional stream. send(manager, {:stream_opened, self(), grpc_stream}) - case GRPC.Stub.recv(grpc_stream, timeout: :infinity) do + case grpc_client.recv(grpc_stream, grpc_client_config) do {:ok, enum} -> enumerate(enum, manager) {:error, error} -> send(manager, {:stream_error, error}) end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index d36b010..c0e09f7 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -33,7 +33,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do alias BroadwayCloudPubSub.{Backoff} alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier} - alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest, Subscriber} + alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest} require Logger @@ -41,6 +41,8 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do defstruct [ :config, + :grpc_client, + :grpc_client_config, :channel, :backoff, # True when a :reconnect message is already queued in the mailbox. @@ -132,10 +134,15 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do max: config.backoff_max ) - state = %__MODULE__{config: config, backoff: backoff} + state = %__MODULE__{ + config: config, + grpc_client: config.grpc_client, + grpc_client_config: config.grpc_client_config, + backoff: backoff + } # Open initial channel immediately. - case open_channel(state) do + case state.grpc_client.connect(state.grpc_client_config) do {:ok, channel} -> {:ok, %{state | channel: channel}} @@ -159,15 +166,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ack_ids: ack_ids } - result = - :telemetry.span( - [:broadway_cloud_pub_sub, :unary, :ack], - %{name: state.config.broadway_name, subscription: state.config.subscription}, - fn -> - {Subscriber.Stub.acknowledge(channel, request, timeout: 30_000), - %{count: length(ack_ids)}} - end - ) + result = state.grpc_client.acknowledge(channel, request, state.grpc_client_config) case result do {:ok, _} -> @@ -225,15 +224,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ack_deadline_seconds: deadline_seconds } - result = - :telemetry.span( - [:broadway_cloud_pub_sub, :unary, :modack], - %{name: state.config.broadway_name, subscription: state.config.subscription}, - fn -> - {Subscriber.Stub.modify_ack_deadline(channel, request, timeout: 30_000), - %{count: length(ack_ids)}} - end - ) + result = state.grpc_client.modify_ack_deadline(channel, request, state.grpc_client_config) case result do {:ok, _} -> @@ -280,7 +271,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do state = %{state | reconnect_pending: false} state = disconnect_channel(state) - case open_channel(state) do + case state.grpc_client.connect(state.grpc_client_config) do {:ok, channel} -> emit_telemetry(:connect, %{}, state.config) backoff = Backoff.reset(state.backoff) @@ -309,13 +300,8 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do def handle_info(_msg, state), do: {:noreply, state} @impl GenServer - def terminate(_reason, %{channel: channel} = _state) when not is_nil(channel) do - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end - + def terminate(_reason, %{channel: channel} = state) when not is_nil(channel) do + state.grpc_client.disconnect(channel, state.grpc_client_config) :ok end @@ -324,7 +310,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do # --- Private --- defp ensure_channel(%{channel: nil} = state) do - case open_channel(state) do + case state.grpc_client.connect(state.grpc_client_config) do {:ok, channel} -> %{state | channel: channel} {:error, _} -> state end @@ -345,45 +331,10 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do defp disconnect_channel(%{channel: nil} = state), do: state defp disconnect_channel(%{channel: channel} = state) do - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end - + state.grpc_client.disconnect(channel, state.grpc_client_config) %{state | channel: nil} end - defp open_channel(%{config: config}) do - token_result = - case config.token_generator do - {mod, fun, args} -> apply(mod, fun, args) - end - - with {:ok, token} <- token_result do - adapter_opts = [http2_opts: %{settings_timeout: :infinity}] - - base_opts = [ - adapter: config.adapter, - headers: [{"authorization", "Bearer #{token}"}], - adapter_opts: adapter_opts - ] - - opts = - if config.use_ssl do - cred = GRPC.Credential.new(ssl: [cacerts: :public_key.cacerts_get()]) - Keyword.put(base_opts, :cred, cred) - else - base_opts - end - - case GRPC.Stub.connect(config.grpc_endpoint, opts) do - {:ok, channel} -> {:ok, channel} - {:error, reason} -> {:error, {:connect_failed, reason}} - end - end - end - defp emit_telemetry(event, measurements, config) do metadata = %{ name: config.broadway_name, diff --git a/mix.exs b/mix.exs index 590b126..27ad1c1 100644 --- a/mix.exs +++ b/mix.exs @@ -40,8 +40,9 @@ defmodule BroadwayCloudPubSub.MixProject do {:goth, "~> 1.3", optional: true}, # TODO: Replace with Hex versions when grpc 1.0 is released {:grpc_core, - github: "elixir-grpc/grpc", sparse: "grpc_core", optional: true, override: true}, - {:grpc_client, github: "elixir-grpc/grpc", sparse: "grpc_client", optional: true}, + github: "elixir-grpc/grpc", branch: "feat/release-1.0.0-rc.1", sparse: "grpc_core", optional: true, override: true}, + {:grpc, + github: "elixir-grpc/grpc", branch: "feat/release-1.0.0-rc.1", sparse: "grpc", optional: true, override: true}, {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} diff --git a/mix.lock b/mix.lock index 47fc13f..96b55fd 100644 --- a/mix.lock +++ b/mix.lock @@ -12,9 +12,8 @@ "gen_stage": {:hex, :gen_stage, "1.3.2", "7c77e5d1e97de2c6c2f78f306f463bca64bf2f4c3cdd606affc0100b89743b7b", [:mix], [], "hexpm", "0ffae547fa777b3ed889a6b9e1e64566217413d018cabd825f786e843ffe63e7"}, "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"}, - "grpc": {:hex, :grpc, "0.11.5", "5dbde9420718b58712779ad98fff1ef50349ca0fa7cc0858ae0f826015068654", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:cowboy, "~> 2.10", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowlib, "~> 2.12", [hex: :cowlib, repo: "hexpm", optional: false]}, {:flow, "~> 1.2", [hex: :flow, repo: "hexpm", optional: false]}, {:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.14", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "0a5d8673ef16649bef0903bca01c161acfc148e4d269133b6834b2af1f07f45e"}, - "grpc_client": {:git, "https://github.com/elixir-grpc/grpc.git", "abc5e1e7d4fdb9db4eedd830a8cc5c5414a7dabd", [sparse: "grpc_client"]}, - "grpc_core": {:git, "https://github.com/elixir-grpc/grpc.git", "abc5e1e7d4fdb9db4eedd830a8cc5c5414a7dabd", [sparse: "grpc_core"]}, + "grpc": {:git, "https://github.com/elixir-grpc/grpc.git", "96c39a316baf1322499a1e75c9e187fe5d734a08", [branch: "feat/release-1.0.0-rc.1", sparse: "grpc"]}, + "grpc_core": {:git, "https://github.com/elixir-grpc/grpc.git", "96c39a316baf1322499a1e75c9e187fe5d734a08", [branch: "feat/release-1.0.0-rc.1", sparse: "grpc_core"]}, "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 5847f0e..da0ef62 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -59,6 +59,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do opts = base_config() |> Keyword.put(:broadway_name, broadway_name) |> Keyword.merge(extra_opts) + # Mirror what Producer.prepare_for_start/2 does: call grpc_client.init/1 and + # store the resulting config so StreamManager can read it from its config map. + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) @@ -894,6 +904,15 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do |> Keyword.put(:broadway_name, broadway_name) |> Keyword.merge(extra_opts) + # Mirror what Producer.prepare_for_start/2 does + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) @@ -1107,6 +1126,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do |> Keyword.put(:broadway_name, broadway_name) |> Keyword.put(:retry_deadline_ms, 60_000) + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) @@ -1152,6 +1179,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do |> Keyword.put(:broadway_name, broadway_name) |> Keyword.put(:retry_deadline_ms, 60_000) + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) @@ -1198,6 +1233,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do |> Keyword.put(:broadway_name, broadway_name) |> Keyword.put(:retry_deadline_ms, 60_000) + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs index 43d89bc..957ab75 100644 --- a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -63,6 +63,15 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do defp start_client_no_channel(extra_opts \\ []) do opts = Keyword.merge(base_config_bad_token(), extra_opts) + # Mirror what Producer.prepare_for_start/2 does: derive grpc_client_config. + grpc_client = Keyword.get(opts, :grpc_client, BroadwayCloudPubSub.Streaming.GrpcClient) + {:ok, grpc_client_config} = grpc_client.init(opts) + + opts = + opts + |> Keyword.put(:grpc_client, grpc_client) + |> Keyword.put(:grpc_client_config, grpc_client_config) + {:ok, pid} = UnaryRpcClient.start_link(opts) pid end @@ -75,8 +84,9 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do test "registers under :name when provided" do name = Module.concat(__MODULE__, Named) - {:ok, _pid} = UnaryRpcClient.start_link(Keyword.put(base_config_bad_token(), :name, name)) + pid = start_client_no_channel(name: name) assert Process.whereis(name) != nil + assert Process.alive?(pid) end test "channel is nil when initial token fetch fails" do From f3ae9e824aa304bed833237824519153fb71fddf Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 8 Apr 2026 13:35:27 +0200 Subject: [PATCH 07/29] feat: add telemetry instrumentation with metadata Add comprehensive telemetry events for the streaming producer with rich metadata including subscription, project, and topic information. Emit events for stream connections, message receipt, acknowledgments, and errors to enable observability and monitoring. Key changes: - Add Streaming.Telemetry module with event definitions - Attach metadata (subscription, project_id) to all telemetry events - Instrument GrpcClient, AckBatcher, and UnaryRpcClient - Add TelemetryHelper test support module - Bump grpc dependency to ~> 1.0.0-rc.1 --- .../streaming/ack_batcher.ex | 49 ++++---- .../streaming/client.ex | 2 +- .../streaming/grpc_client.ex | 64 +++++----- .../streaming/options.ex | 40 ++++++ .../streaming/producer.ex | 117 ++++++++++++++---- .../streaming/stream_manager.ex | 39 +++--- .../streaming/telemetry.ex | 66 ++++++++++ .../streaming/unary_rpc_client.ex | 38 ++---- mix.exs | 5 +- mix.lock | 4 +- .../pull_client_test.exs | 35 +++--- .../streaming/ack_batcher_test.exs | 103 +++++++++++++++ .../streaming/options_test.exs | 61 +++++++++ .../streaming/stream_manager_test.exs | 110 ++++++++++++++-- .../streaming/unary_rpc_client_test.exs | 64 +++++++++- test/support/telemetry_helper.ex | 33 +++++ 16 files changed, 674 insertions(+), 156 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/streaming/telemetry.ex create mode 100644 test/support/telemetry_helper.ex diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 919d662..75afab4 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -7,12 +7,15 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do use GenServer - alias BroadwayCloudPubSub.Streaming.UnaryRpcClient + alias BroadwayCloudPubSub.Streaming.{Telemetry, UnaryRpcClient} @max_modack_attempts 3 defstruct [ :rpc_client, + :broadway_name, + :subscription, + :telemetry_metadata, :batch_interval_ms, :batch_max_size, :timer_ref, @@ -100,6 +103,9 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do state = %__MODULE__{ rpc_client: config.rpc_client, + broadway_name: config[:broadway_name], + subscription: config[:subscription], + telemetry_metadata: config[:telemetry_metadata], batch_interval_ms: config.ack_batch_interval_ms, batch_max_size: config.ack_batch_max_size, retry_deadline_ms: config[:retry_deadline_ms] @@ -192,10 +198,10 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do # next timer tick to avoid a noproc crash while UnaryRpcClient is restarting. case GenServer.whereis(state.rpc_client) do nil -> - :telemetry.execute( - [:broadway_cloud_pub_sub, :stream, :flush_deferred], + emit_telemetry( + :flush_deferred, %{ack_count: state.ack_count, modack_groups: map_size(state.modack_ids)}, - %{} + state ) schedule_flush(state) @@ -249,15 +255,15 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do remaining {:ok, remaining_ids} -> - keep = apply_modack_retry_limit(remaining_ids, state.modack_attempts) + keep = apply_modack_retry_limit(remaining_ids, state.modack_attempts, state) if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> - keep = apply_modack_retry_limit(transient_ids, state.modack_attempts) + keep = apply_modack_retry_limit(transient_ids, state.modack_attempts, state) if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) {:error, _reason} -> - keep = apply_modack_retry_limit(ids, state.modack_attempts) + keep = apply_modack_retry_limit(ids, state.modack_attempts, state) if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) end end) @@ -282,16 +288,12 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end # Drops modack ids that have reached the maximum attempt count and emits telemetry. - defp apply_modack_retry_limit(ids, attempts) do + defp apply_modack_retry_limit(ids, attempts, state) do {keep, drop} = Enum.split_with(ids, fn id -> Map.get(attempts, id, 0) < @max_modack_attempts end) if drop != [] do - :telemetry.execute( - [:broadway_cloud_pub_sub, :stream, :modack_retry_exhausted], - %{count: length(drop)}, - %{} - ) + emit_telemetry(:modack_retry_exhausted, %{count: length(drop)}, state) end keep @@ -322,11 +324,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end) if expired != [] do - :telemetry.execute( - [:broadway_cloud_pub_sub, :stream, :ack_retry_expired], - %{count: length(expired)}, - %{} - ) + emit_telemetry(:ack_retry_expired, %{count: length(expired)}, state) end clean_ts = Map.drop(state.ack_first_queued, expired) @@ -353,11 +351,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end) if expired_count > 0 do - :telemetry.execute( - [:broadway_cloud_pub_sub, :stream, :modack_retry_expired], - %{count: expired_count}, - %{} - ) + emit_telemetry(:modack_retry_expired, %{count: expired_count}, state) end still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() @@ -395,4 +389,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do %{state | timer_ref: nil} end + + defp emit_telemetry(event, measurements, state) do + metadata = %{ + name: state.broadway_name, + subscription: state.subscription + } + + Telemetry.execute(:ack_batcher, event, measurements, metadata, state.telemetry_metadata) + end end diff --git a/lib/broadway_cloud_pub_sub/streaming/client.ex b/lib/broadway_cloud_pub_sub/streaming/client.ex index ed271cc..aea0d5d 100644 --- a/lib/broadway_cloud_pub_sub/streaming/client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/client.ex @@ -49,7 +49,7 @@ defmodule BroadwayCloudPubSub.Streaming.Client do Invoked once during producer startup to normalize options into a `config` term. The `config` term is stored in state and forwarded as the last argument to all - subsequent callbacks, analogous to how `BroadwayCloudPubSub.Client.init/1` + subsequent callbacks, analogous to how `c:BroadwayCloudPubSub.Client.init/1` works for the pull producer. """ @callback init(opts :: keyword()) :: {:ok, config()} | {:error, term()} diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex index f367b6f..f33dc0a 100644 --- a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -14,21 +14,25 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do This module emits the following telemetry events: - * `[:broadway_cloud_pub_sub, :streaming, :ack, :start | :stop | :exception]` — emitted - as a span when sending an `Acknowledge` unary RPC. + * `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :ack, :start | :stop | :exception]` — + emitted as a span when sending an `Acknowledge` unary RPC. Measurements: as described in `:telemetry.span/3`. Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` - * `[:broadway_cloud_pub_sub, :streaming, :modack, :start | :stop | :exception]` — emitted - as a span when sending a `ModifyAckDeadline` unary RPC. + * `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :modack, :start | :stop | :exception]` — + emitted as a span when sending a `ModifyAckDeadline` unary RPC. Measurements: as described in `:telemetry.span/3`. Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` + + Custom `BroadwayCloudPubSub.Streaming.Client` implementations that wish to emit + the same events should use the same event name prefix and metadata shape. """ @behaviour BroadwayCloudPubSub.Streaming.Client + alias BroadwayCloudPubSub.Streaming.Telemetry alias Google.Pubsub.V1.Subscriber.Stub alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest} @@ -51,13 +55,10 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do @impl BroadwayCloudPubSub.Streaming.Client def disconnect(channel, _config) do - try do - GRPC.Stub.disconnect(channel) - catch - _, _ -> :ok - end - + GRPC.Stub.disconnect(channel) :ok + catch + _, _ -> :ok end @impl BroadwayCloudPubSub.Streaming.Client @@ -82,26 +83,27 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do @impl BroadwayCloudPubSub.Streaming.Client def cancel(stream, _config) do - try do - GRPC.Stub.cancel(stream) - catch - _, _ -> :ok - end - + GRPC.Stub.cancel(stream) :ok + catch + _, _ -> :ok end @impl BroadwayCloudPubSub.Streaming.Client def acknowledge(channel, %AcknowledgeRequest{ack_ids: ack_ids} = request, config) do - :telemetry.span( - [:broadway_cloud_pub_sub, :streaming, :ack], - %{name: config.broadway_name, subscription: config.subscription}, + count = length(ack_ids) + metadata = %{name: config.broadway_name, subscription: config.subscription, count: count} + + Telemetry.span( + :grpc_client, + :ack, + metadata, fn -> result = Stub.acknowledge(channel, request, timeout: @unary_rpc_timeout_ms) - {result, - %{name: config.broadway_name, subscription: config.subscription, count: length(ack_ids)}} - end + {result, %{}} + end, + Map.get(config, :telemetry_metadata) ) end @@ -111,20 +113,22 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do %ModifyAckDeadlineRequest{ack_ids: ack_ids} = request, config ) do - :telemetry.span( - [:broadway_cloud_pub_sub, :streaming, :modack], - %{name: config.broadway_name, subscription: config.subscription}, + count = length(ack_ids) + metadata = %{name: config.broadway_name, subscription: config.subscription, count: count} + + Telemetry.span( + :grpc_client, + :modack, + metadata, fn -> result = Stub.modify_ack_deadline(channel, request, timeout: @unary_rpc_timeout_ms) - {result, - %{name: config.broadway_name, subscription: config.subscription, count: length(ack_ids)}} - end + {result, %{}} + end, + Map.get(config, :telemetry_metadata) ) end - # --- Private --- - defp fetch_token(%{token_generator: {mod, fun, args}}) do apply(mod, fun, args) end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index fb06936..7828307 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -289,6 +289,25 @@ defmodule BroadwayCloudPubSub.Streaming.Options do """ ], + telemetry_metadata: [ + type: {:custom, __MODULE__, :type_telemetry_metadata, [[]]}, + doc: """ + Extra data to attach to every telemetry event emitted by the streaming + producer. The value is included in the event metadata under the `:extra` + key. + + Accepts either: + + * A static term (e.g. a map or keyword list) — stored once and + included verbatim in every event. + * An `{module, function, args}` tuple — called on every event + emission; its return value is used as the `:extra` value. Useful + for attaching dynamic data such as node names or runtime counters. + + When not set, no `:extra` key is added to event metadata. + """ + ], + # Testing options test_pid: [type: :pid, doc: false] ] @@ -391,4 +410,25 @@ defmodule BroadwayCloudPubSub.Streaming.Options do "expected :#{name} to be :gun, :mint, or a module implementing GRPC.Client.Adapter, " <> "got: #{inspect(value)}"} end + + @doc false + def type_telemetry_metadata({m, f, a}, _opts) when is_atom(m) and is_atom(f) and is_list(a) do + case Code.ensure_loaded(m) do + {:module, ^m} -> + if function_exported?(m, f, length(a)) do + {:ok, {m, f, a}} + else + {:error, + "expected :telemetry_metadata MFA to be an exported function, " <> + "but #{inspect(m)}.#{f}/#{length(a)} is not exported"} + end + + {:error, _} -> + {:error, + "expected :telemetry_metadata MFA to reference a loaded module, " <> + "but #{inspect(m)} could not be loaded"} + end + end + + def type_telemetry_metadata(term, _opts), do: {:ok, term} end diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index c8cc1ad..f1231f5 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -133,9 +133,17 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do ## Telemetry This producer emits the following [Telemetry](https://github.com/beam-telemetry/telemetry) - events. All events include metadata `%{name: broadway_name, subscription: subscription}`. + events. All events share the top-level prefix `[:broadway_cloud_pub_sub, :streaming]`, + followed by a layer sub-prefix. - ### Stream events (prefix: `[:broadway_cloud_pub_sub, :stream, ...]`) + All event metadata maps include an `:extra` key when the `:telemetry_metadata` option + is configured. Its value is the static term provided, or the return value of the MFA + called at emission time. + + ### Stream events — `[:broadway_cloud_pub_sub, :streaming, :stream, ...]` + + Emitted by `StreamManager`. Metadata: `%{name: broadway_name, subscription: subscription}` + (plus `:extra` when `:telemetry_metadata` is set). * `:connect` — gRPC StreamingPull stream successfully established. @@ -143,7 +151,10 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do * `:disconnect` — gRPC stream closed or errored. - Measurements: `%{reason: term()}` + Measurements: `%{}` + + Metadata includes: `reason: term()` — the error or close reason + (e.g. a `GRPC.RPCError`, `:stream_closed`, `:connection_down`). * `:receive_messages` — messages received from the stream and forwarded to the producer. @@ -157,35 +168,30 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do * `:terminal_error` — non-retryable gRPC error received. StreamManager stops after this event. - Measurements: `%{reason: term()}` - - * `:connection_failure` — connection attempt failed before the stream was - established (unary RPC client). - - Measurements: `%{reason: term()}` - - * `:ack_failure` — an acknowledge RPC failed after retries (unary RPC client). - - Measurements: `%{count: pos_integer()}` + Measurements: `%{}` - * `:modack_failure` — a modifyAckDeadline RPC failed after retries (unary RPC client). + Metadata includes: `reason: term()` — the terminal gRPC error. - Measurements: `%{count: pos_integer()}` + * `:connection_failure` — connection attempt failed before the stream was + established. - * `:permanent_failure` — one or more ack_ids were permanently rejected by - the server (e.g. ack_id expired). These are dropped and not retried. + Measurements: `%{}` - Measurements: `%{count: pos_integer()}` + Metadata includes: `reason: term()` — the connection error. - * `:keepalive` — HTTP/2 PING frame sent on the gRPC connection to keep it - alive. Only emitted when using the `:gun` adapter. + * `:keepalive` — keep-alive ping sent on the gRPC connection. - Measurements: `%{}` + Measurements: `%{deadline: pos_integer()}` * `:extend_leases` — lease extension cycle ran; modack requests dispatched for outstanding messages. - Measurements: `%{count: non_neg_integer()}` + Measurements: `%{count: non_neg_integer(), deadline: pos_integer()}` + + * `:lease_expired` — outstanding messages dropped because they exceeded + `:max_extension_ms`. + + Measurements: `%{count: pos_integer()}` * `:drain_timeout` — graceful shutdown drain timed out before all in-flight messages were processed. @@ -201,11 +207,71 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Measurements: `%{delay: non_neg_integer()}` - * `:flush_deferred` — AckBatcher flush deferred because UnaryRpcClient was - not yet available (e.g. restarting). + * `:receipt_modack_stale` — pending exactly-once receipt modacks swept out + as stale and nacked for fast redelivery. + + Measurements: `%{count: pos_integer()}` + + ### AckBatcher events — `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, ...]` + + Emitted by `AckBatcher`. Metadata: `%{name: broadway_name, subscription: subscription}` + (plus `:extra` when `:telemetry_metadata` is set). + + * `:flush_deferred` — flush deferred because UnaryRpcClient was not yet + available (e.g. restarting after a crash). Measurements: `%{ack_count: non_neg_integer(), modack_groups: non_neg_integer()}` + * `:modack_retry_exhausted` — modack ack_ids dropped after reaching the + maximum retry attempt count. + + Measurements: `%{count: pos_integer()}` + + * `:ack_retry_expired` — ack ack_ids dropped because they exceeded the + exactly-once retry deadline. + + Measurements: `%{count: pos_integer()}` + + * `:modack_retry_expired` — modack ack_ids dropped because they exceeded the + exactly-once retry deadline. + + Measurements: `%{count: pos_integer()}` + + ### Unary RPC client events — `[:broadway_cloud_pub_sub, :streaming, :unary, ...]` + + Emitted by `UnaryRpcClient`. Metadata: `%{name: broadway_name, subscription: subscription}` + (plus `:extra` when `:telemetry_metadata` is set). + + * `:connect` — unary RPC channel reconnected after a failure. + + Measurements: `%{}` + + * `:connection_failure` — unary RPC channel connect attempt failed. + + Measurements: `%{}` + + Metadata includes: `reason: term()` — the connection error. + + * `:permanent_failure` — one or more ack_ids were permanently rejected by + the server (e.g. ack_id expired). These are dropped and not retried. + + Measurements: `%{count: pos_integer()}` + + ### gRPC client spans — `[:broadway_cloud_pub_sub, :streaming, :grpc_client, ...]` + + Emitted by `GrpcClient` (the default `BroadwayCloudPubSub.Streaming.Client` + implementation) as `:telemetry.span/3` spans. + Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` + (plus `:extra` when `:telemetry_metadata` is set). + + * `:ack` — wraps each `Acknowledge` unary RPC call. + + Events: `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :ack, :start | :stop | :exception]` + + * `:modack` — wraps each `ModifyAckDeadline` unary RPC call. + + Events: `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :modack, :start | :stop | :exception]` + ## Pub/Sub Emulator To use with the local Pub/Sub emulator: @@ -278,7 +344,8 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do :ack_batch_interval_ms, :ack_batch_max_size, :retry_deadline_ms, - :grpc_client + :grpc_client, + :telemetry_metadata ]) |> Keyword.put(:grpc_client_config, client_config) |> Keyword.put(:broadway_name, broadway_name) diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index d53d68a..366b59d 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -15,7 +15,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do AckBatcher, AckTimeDistribution, ErrorClassifier, - StreamReader + StreamReader, + Telemetry } alias Google.Pubsub.V1.StreamingPullRequest @@ -202,7 +203,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:noreply, new_state} {:error, reason, new_state} -> - emit_telemetry(:connection_failure, %{reason: reason}, state.config) + emit_telemetry(:connection_failure, %{}, state.config, %{reason: reason}) {:noreply, schedule_reconnect(new_state)} end end @@ -360,18 +361,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do "Terminal gRPC stream error on subscription #{state.config.subscription} - reason: #{inspect(error)}. Stopping StreamManager." ) - emit_telemetry(:terminal_error, %{reason: error}, state.config) + emit_telemetry(:terminal_error, %{}, state.config, %{reason: error}) {:stop, {:terminal_error, error}, close_stream(state)} :retryable -> - emit_telemetry(:disconnect, %{reason: error}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: error}) {:noreply, schedule_reconnect(reset_connection(state, error))} end end # Server closed the stream normally (StreamReader enumeration exhausted). def handle_info({:stream_closed}, state) do - emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: :stream_closed}) # Stream ended naturally; nil out grpc_stream to skip cancel in close_stream/1. # See decisions.md for why cancelling after a server-initiated close crashes the Mint ConnectionProcess. @@ -388,7 +389,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Only reconnect if grpc_stream is still set (stream_closed not yet processed). def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do if state.grpc_stream do - emit_telemetry(:disconnect, %{reason: :stream_closed}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: :stream_closed}) # Same rationale as {:stream_closed}: skip cancel on natural close. state = %{state | grpc_stream: nil} @@ -405,7 +406,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # StreamReader crashed — reconnect. def handle_info({:EXIT, pid, reason}, %{reader_pid: pid} = state) do - emit_telemetry(:disconnect, %{reason: reason}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: reason}) {:noreply, schedule_reconnect(reset_connection(state, reason))} end @@ -440,7 +441,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Mint adapter signals connection loss. def handle_info({:elixir_grpc, :connection_down, conn_pid}, %{conn_pid: conn_pid} = state) do - emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: :connection_down}) {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} end @@ -449,7 +450,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:gun_down, conn_pid, _protocol, _reason, _killed_streams}, %{conn_pid: conn_pid} = state ) do - emit_telemetry(:disconnect, %{reason: :connection_down}, state.config) + emit_telemetry(:disconnect, %{}, state.config, %{reason: :connection_down}) {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} end @@ -976,16 +977,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # --- Private: telemetry --- - defp emit_telemetry(event, measurements, config) do - metadata = %{ - name: config.broadway[:name], - subscription: config.subscription - } + defp emit_telemetry(event, measurements, config, extra_metadata \\ %{}) do + metadata = + Map.merge( + %{ + name: config.broadway[:name], + subscription: config.subscription + }, + extra_metadata + ) - :telemetry.execute( - [:broadway_cloud_pub_sub, :stream, event], - measurements, - metadata - ) + Telemetry.execute(:stream, event, measurements, metadata, Map.get(config, :telemetry_metadata)) end end diff --git a/lib/broadway_cloud_pub_sub/streaming/telemetry.ex b/lib/broadway_cloud_pub_sub/streaming/telemetry.ex new file mode 100644 index 0000000..62bd3c0 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/telemetry.ex @@ -0,0 +1,66 @@ +defmodule BroadwayCloudPubSub.Streaming.Telemetry do + @moduledoc false + + # Centralised telemetry helpers for the streaming Pub/Sub producer. + # + # All streaming telemetry events share the top-level prefix + # `[:broadway_cloud_pub_sub, :streaming]`, then a sub-prefix identifying + # the emitting layer: + # + # :stream — StreamManager (stream lifecycle, message flow, acks) + # :ack_batcher — AckBatcher (batch flush behaviour, retry exhaustion) + # :unary — UnaryRpcClient (channel lifecycle, permanent failures) + # :grpc_client — GrpcClient (unary RPC spans for ack and modack) + # + # Usage: + # + # Telemetry.execute(:stream, :connect, %{}, %{name: name, subscription: sub}) + # Telemetry.span(:grpc_client, :ack, %{name: name, subscription: sub}, fn -> ... end) + + @base [:broadway_cloud_pub_sub, :streaming] + + @doc """ + Executes a telemetry event under `[:broadway_cloud_pub_sub, :streaming, layer, event]`. + + `telemetry_metadata` is the raw `:telemetry_metadata` option value from the producer + config (a static term, an `{m, f, a}` tuple, or `nil`). When non-nil, its resolved + value is merged into `metadata` under the `:extra` key. + """ + @spec execute(atom(), atom(), map(), map(), term()) :: :ok + def execute(layer, event, measurements, metadata, telemetry_metadata) do + :telemetry.execute( + @base ++ [layer, event], + measurements, + maybe_put_extra(metadata, resolve_extra(telemetry_metadata)) + ) + end + + @doc """ + Wraps `fun` in a telemetry span under `[:broadway_cloud_pub_sub, :streaming, layer, event]`. + + Emits `:start`, `:stop`, and `:exception` events as per `:telemetry.span/3` semantics. + `fun` must return `{result, stop_metadata}`. + + `telemetry_metadata` is resolved once and merged into both the start metadata and the + stop metadata returned by `fun`, under the `:extra` key. + """ + @spec span(atom(), atom(), map(), (-> {term(), map()}), term()) :: term() + def span(layer, event, start_metadata, fun, telemetry_metadata) do + extra = resolve_extra(telemetry_metadata) + enriched_start = maybe_put_extra(start_metadata, extra) + + :telemetry.span(@base ++ [layer, event], enriched_start, fn -> + {result, stop_metadata} = fun.() + {result, maybe_put_extra(stop_metadata, extra)} + end) + end + + # --- Private --- + + defp resolve_extra(nil), do: nil + defp resolve_extra({m, f, a}), do: apply(m, f, a) + defp resolve_extra(term), do: term + + defp maybe_put_extra(metadata, nil), do: metadata + defp maybe_put_extra(metadata, extra), do: Map.put(metadata, :extra, extra) +end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index c0e09f7..44d3537 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -32,7 +32,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do use GenServer alias BroadwayCloudPubSub.{Backoff} - alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier} + alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier, Telemetry} alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest} require Logger @@ -147,7 +147,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do {:ok, %{state | channel: channel}} {:error, reason} -> - emit_telemetry(:connection_failure, %{reason: reason}, config) + emit_telemetry(:connection_failure, %{}, config, %{reason: reason}) {:ok, state} end end @@ -189,12 +189,6 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ) end - emit_telemetry( - :ack_failure, - %{count: length(transient_ids), reason: error}, - state.config - ) - state = schedule_reconnect(state) {:reply, {:error, {error, transient_ids}}, state} @@ -244,12 +238,6 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do ) end - emit_telemetry( - :modack_failure, - %{count: length(transient_ids), deadline: deadline_seconds, reason: error}, - state.config - ) - state = schedule_reconnect(state) {:reply, {:error, {error, transient_ids}}, state} @@ -278,7 +266,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do {:noreply, %{state | channel: channel, backoff: backoff}} {:error, reason} -> - emit_telemetry(:connection_failure, %{reason: reason}, state.config) + emit_telemetry(:connection_failure, %{}, state.config, %{reason: reason}) {delay, new_backoff} = Backoff.backoff(state.backoff) Process.send_after(self(), :reconnect, delay || state.config.backoff_min) {:noreply, %{state | channel: nil, backoff: new_backoff, reconnect_pending: true}} @@ -335,17 +323,17 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do %{state | channel: nil} end - defp emit_telemetry(event, measurements, config) do - metadata = %{ - name: config.broadway_name, - subscription: config.subscription - } + defp emit_telemetry(event, measurements, config, extra_metadata \\ %{}) do + metadata = + Map.merge( + %{ + name: config.broadway_name, + subscription: config.subscription + }, + extra_metadata + ) - :telemetry.execute( - [:broadway_cloud_pub_sub, :unary, event], - measurements, - metadata - ) + Telemetry.execute(:unary, event, measurements, metadata, Map.get(config, :telemetry_metadata)) end # Splits ack_ids into {transient, permanent} based on per-ack-ID error details diff --git a/mix.exs b/mix.exs index 27ad1c1..967cdf1 100644 --- a/mix.exs +++ b/mix.exs @@ -39,10 +39,7 @@ defmodule BroadwayCloudPubSub.MixProject do {:telemetry, "~> 0.4.3 or ~> 1.0"}, {:goth, "~> 1.3", optional: true}, # TODO: Replace with Hex versions when grpc 1.0 is released - {:grpc_core, - github: "elixir-grpc/grpc", branch: "feat/release-1.0.0-rc.1", sparse: "grpc_core", optional: true, override: true}, - {:grpc, - github: "elixir-grpc/grpc", branch: "feat/release-1.0.0-rc.1", sparse: "grpc", optional: true, override: true}, + {:grpc, "~> 1.0.0-rc.1", optional: true}, {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} diff --git a/mix.lock b/mix.lock index 96b55fd..4c428f7 100644 --- a/mix.lock +++ b/mix.lock @@ -12,8 +12,8 @@ "gen_stage": {:hex, :gen_stage, "1.3.2", "7c77e5d1e97de2c6c2f78f306f463bca64bf2f4c3cdd606affc0100b89743b7b", [:mix], [], "hexpm", "0ffae547fa777b3ed889a6b9e1e64566217413d018cabd825f786e843ffe63e7"}, "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"}, - "grpc": {:git, "https://github.com/elixir-grpc/grpc.git", "96c39a316baf1322499a1e75c9e187fe5d734a08", [branch: "feat/release-1.0.0-rc.1", sparse: "grpc"]}, - "grpc_core": {:git, "https://github.com/elixir-grpc/grpc.git", "96c39a316baf1322499a1e75c9e187fe5d734a08", [branch: "feat/release-1.0.0-rc.1", sparse: "grpc_core"]}, + "grpc": {:hex, :grpc, "1.0.0-rc.1", "790336fc827f0a22521d443c1c89e941502ce1e3ef09160c7e4798b2e148b53d", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:grpc_core, "~> 1.0.0-rc.1", [hex: :grpc_core, repo: "hexpm", optional: false]}, {:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: false]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "c60dcda2fb143769ba496bd86d33420a9e44d633555bc781deaba3668138372b"}, + "grpc_core": {:hex, :grpc_core, "1.0.0-rc.1", "d82957bca32937bb52df06596cca7550783acc139a06b70202a982ef8b59490e", [:mix], [{:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.14", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c76233ea374421da562b5b022c22614e81f9cf862da93543cff93c37c085f136"}, "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, diff --git a/test/broadway_cloud_pub_sub/pull_client_test.exs b/test/broadway_cloud_pub_sub/pull_client_test.exs index 56d0aa2..ddc5ffe 100644 --- a/test/broadway_cloud_pub_sub/pull_client_test.exs +++ b/test/broadway_cloud_pub_sub/pull_client_test.exs @@ -5,6 +5,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do alias BroadwayCloudPubSub.Acknowledger alias BroadwayCloudPubSub.PullClient + alias BroadwayCloudPubSub.Test.TelemetryHelper alias Broadway.Message @pull_response """ @@ -295,29 +296,27 @@ defmodule BroadwayCloudPubSub.PullClientTest do end test "exposes telemetry for pull requests", %{opts: base_opts} do + test_pid = self() + :telemetry.attach( :start_handler, [:broadway_cloud_pub_sub, :pull_client, :receive_messages, :start], - fn _name, _measurements, metadata, _config -> - send(self(), {:start, metadata}) - end, - %{} + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :start} ) :telemetry.attach( :stop_handler, [:broadway_cloud_pub_sub, :pull_client, :receive_messages, :stop], - fn _name, measurements, _metadata, _config -> - send(self(), {:stop, measurements}) - end, - %{} + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :stop} ) {:ok, opts} = base_opts |> Keyword.put(:max_number_of_messages, 5) |> PullClient.init() PullClient.receive_messages(10, & &1, opts) - assert_received {:start, metadata} - assert_received {:stop, measurements} + assert_received {:start, _measurements, metadata} + assert_received {:stop, measurements, _metadata} assert metadata.demand == 10 assert metadata.max_messages == 5 assert is_integer(measurements.duration) @@ -380,29 +379,27 @@ defmodule BroadwayCloudPubSub.PullClientTest do end test "emits telemetry events", %{opts: base_opts} do + test_pid = self() + :telemetry.attach( :start_handler, [:broadway_cloud_pub_sub, :pull_client, :ack, :start], - fn _name, _measurements, metadata, _config -> - send(self(), {:start, metadata}) - end, - %{} + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :start} ) :telemetry.attach( :stop_handler, [:broadway_cloud_pub_sub, :pull_client, :ack, :stop], - fn _name, measurements, metadata, _config -> - send(self(), {:stop, measurements, metadata}) - end, - %{} + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :stop} ) {:ok, opts} = PullClient.init(base_opts) PullClient.acknowledge(["1", "2", "3"], opts) - assert_received {:start, metadata} + assert_received {:start, _measurements, metadata} assert metadata.name == Broadway3 assert_received {:stop, measurements, metadata} assert is_integer(measurements.duration) diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs index e539038..8433513 100644 --- a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -2,6 +2,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do use ExUnit.Case, async: true alias BroadwayCloudPubSub.Streaming.AckBatcher + alias BroadwayCloudPubSub.Test.TelemetryHelper # A spy GenServer that records every call it receives and forwards them to # the test process so we can assert on them. Returns :ok to all calls so @@ -91,6 +92,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do Keyword.merge( [ rpc_client: rpc_pid, + broadway_name: :TestPipeline, + subscription: "projects/test/subscriptions/test-sub", ack_batch_interval_ms: 50, ack_batch_max_size: 10 ], @@ -574,6 +577,106 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do end end + # ============================================================ + # Telemetry metadata + # ============================================================ + + describe "telemetry_metadata" do + # Helper: start a batcher whose rpc_client is an atom that is never registered, + # so GenServer.whereis/1 returns nil and every flush is deferred — which reliably + # triggers the :flush_deferred telemetry event without needing to kill a process. + defp start_batcher_no_rpc(extra_opts \\ []) do + # Use a unique atom so concurrent tests don't share the same unregistered name. + rpc_name = Module.concat(__MODULE__, "NeverStarted#{System.unique_integer([:positive])}") + + opts = + Keyword.merge( + [ + rpc_client: rpc_name, + broadway_name: :TestPipeline, + subscription: "projects/test/subscriptions/test-sub", + ack_batch_interval_ms: 100_000, + ack_batch_max_size: 10_000 + ], + extra_opts + ) + + {:ok, batcher} = AckBatcher.start_link(opts) + batcher + end + + test "telemetry events include name and subscription in metadata" do + test_pid = self() + batcher = start_batcher_no_rpc() + telemetry_name = "batcher-meta-#{inspect(batcher)}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :ack_batcher, :flush_deferred], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + AckBatcher.ack(batcher, ["id-1"]) + AckBatcher.flush(batcher) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.name == :TestPipeline + assert metadata.subscription == "projects/test/subscriptions/test-sub" + refute Map.has_key?(metadata, :extra) + + :telemetry.detach(telemetry_name) + end + + test "static telemetry_metadata is included under :extra" do + extra = %{tenant_id: "acme"} + test_pid = self() + batcher = start_batcher_no_rpc(telemetry_metadata: extra) + telemetry_name = "batcher-extra-static-#{inspect(batcher)}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :ack_batcher, :flush_deferred], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + AckBatcher.ack(batcher, ["id-1"]) + AckBatcher.flush(batcher) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.name == :TestPipeline + assert metadata.subscription == "projects/test/subscriptions/test-sub" + assert metadata.extra == extra + + :telemetry.detach(telemetry_name) + end + + test "MFA telemetry_metadata is called and result is included under :extra" do + test_pid = self() + batcher = start_batcher_no_rpc(telemetry_metadata: {__MODULE__, :dynamic_meta, []}) + telemetry_name = "batcher-extra-mfa-#{inspect(batcher)}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :ack_batcher, :flush_deferred], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + AckBatcher.ack(batcher, ["id-1"]) + AckBatcher.flush(batcher) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.extra == %{dynamic: true} + + :telemetry.detach(telemetry_name) + end + end + + # MFA for telemetry_metadata test. + def dynamic_meta, do: %{dynamic: true} + # ============================================================ # Helpers # ============================================================ diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 09cc9f3..1a0cb5d 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -319,4 +319,65 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do validate(subscription: "projects/p/subscriptions/s", enable_message_ordering: 1) end end + + describe "telemetry_metadata" do + test "is optional — omitting it leaves the key absent" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + refute Keyword.has_key?(opts, :telemetry_metadata) + end + + test "accepts a static map" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + telemetry_metadata: %{tenant_id: "acme", env: :prod} + ) + + assert opts[:telemetry_metadata] == %{tenant_id: "acme", env: :prod} + end + + test "accepts any static term (keyword list, atom, string)" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: [a: 1]) + assert opts[:telemetry_metadata] == [a: 1] + + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: :my_tag) + assert opts[:telemetry_metadata] == :my_tag + + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: "label") + assert opts[:telemetry_metadata] == "label" + end + + test "accepts a valid MFA tuple" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + telemetry_metadata: {__MODULE__, :sample_meta, []} + ) + + assert opts[:telemetry_metadata] == {__MODULE__, :sample_meta, []} + end + + test "rejects an MFA whose module is not loaded" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + telemetry_metadata: {NotLoadedModuleXYZ, :some_fun, []} + ) + + assert Exception.message(err) =~ "could not be loaded" + end + + test "rejects an MFA whose function is not exported" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + telemetry_metadata: {__MODULE__, :nonexistent_fun, []} + ) + + assert Exception.message(err) =~ "not exported" + end + end + + # Used in telemetry_metadata MFA tests above. + def sample_meta, do: %{node: node()} end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index da0ef62..0809fab 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -4,7 +4,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do import ExUnit.CaptureLog alias BroadwayCloudPubSub.Streaming.{AckBatcher, StreamManager} - alias BroadwayCloudPubSub.Test.GrpcDynamicAdapter + alias BroadwayCloudPubSub.Test.{GrpcDynamicAdapter, TelemetryHelper} # Minimal config with enough keys to satisfy StreamManager.init/1 # (mirrors what Options produces after validation + defaults). @@ -95,6 +95,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # before returning. Safe to use instead of :sys.get_state/1 for sync purposes. defp sync(pid), do: StreamManager.get_buffered(pid) + # Drain all messages currently in the test process mailbox. + # Used to discard stray telemetry events emitted before we start asserting. + defp flush_mailbox do + receive do + _ -> flush_mailbox() + after + 0 -> :ok + end + end + # Build a minimal ReceivedMessage for sending into {:stream_messages, ...}. defp received_message(ack_id, data) do %Google.Pubsub.V1.ReceivedMessage{ @@ -463,16 +473,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do :telemetry.attach( telemetry_name, - [:broadway_cloud_pub_sub, :stream, :terminal_error], - fn _event, measurements, _metadata, _config -> - send(test_pid, {:telemetry, :terminal_error, measurements}) - end, - nil + [:broadway_cloud_pub_sub, :streaming, :stream, :terminal_error], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_terminal_error} ) send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) - assert_receive {:telemetry, :terminal_error, %{reason: _}}, 1_000 + assert_receive {:telemetry_terminal_error, %{}, %{reason: _}}, 1_000 :telemetry.detach(telemetry_name) end) @@ -482,6 +490,94 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end end + describe "telemetry_metadata — :extra in stream event metadata" do + # We trigger a retryable stream error and observe the :disconnect event, + # which is emitted synchronously inside handle_info({:stream_error, ...}) + # before any reconnect timer is scheduled. This avoids races with the + # :reconnect event that fires from the initial failed connection attempt. + + test "static map is included under :extra in stream event metadata" do + extra = %{tenant_id: "acme", env: :prod} + test_pid = self() + telemetry_name = "test-extra-static-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :disconnect], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + pid = start_manager(telemetry_metadata: extra, backoff_min: 60_000, backoff_max: 60_000) + # Drain the first :connection_failure + reconnect from init before attaching. + sync(pid) + flush_mailbox() + + # Trigger a :disconnect from a retryable stream error. + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.extra == extra + + :telemetry.detach(telemetry_name) + end + + test "MFA is called and its return value is included under :extra" do + test_pid = self() + telemetry_name = "test-extra-mfa-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :disconnect], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + pid = + start_manager( + telemetry_metadata: {__MODULE__, :dynamic_meta, []}, + backoff_min: 60_000, + backoff_max: 60_000 + ) + + sync(pid) + flush_mailbox() + + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.extra == %{dynamic: true} + + :telemetry.detach(telemetry_name) + end + + test "no :extra key when telemetry_metadata is not set" do + test_pid = self() + telemetry_name = "test-no-extra-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :disconnect], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + pid = start_manager(backoff_min: 60_000, backoff_max: 60_000) + sync(pid) + flush_mailbox() + + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + refute Map.has_key?(metadata, :extra) + + :telemetry.detach(telemetry_name) + end + end + + # MFA for telemetry_metadata test. + def dynamic_meta, do: %{dynamic: true} + describe "retryable gRPC errors trigger reconnect" do test "DEADLINE_EXCEEDED (4) schedules reconnect without stopping" do pid = start_manager(backoff_min: 10_000, backoff_max: 30_000) diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs index 957ab75..78e4a5f 100644 --- a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -2,7 +2,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do use ExUnit.Case, async: true alias BroadwayCloudPubSub.Streaming.UnaryRpcClient - alias BroadwayCloudPubSub.Test.GrpcDynamicAdapter + alias BroadwayCloudPubSub.Test.{GrpcDynamicAdapter, TelemetryHelper} # ============================================================ # Chunking logic — pure caller-side, no GenServer needed @@ -234,8 +234,70 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do assert is_atom(state.config.broadway_name) refute Map.has_key?(state.config, :broadway) end + + test "static telemetry_metadata is emitted under :extra on connection_failure" do + test_pid = self() + extra = %{tenant_id: "acme"} + telemetry_name = "unary-extra-static-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :unary, :connection_failure], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + # start_client_no_channel triggers a :connection_failure on init + _pid = start_client_no_channel(telemetry_metadata: extra) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.extra == extra + + :telemetry.detach(telemetry_name) + end + + test "MFA telemetry_metadata is called and result is emitted under :extra" do + test_pid = self() + telemetry_name = "unary-extra-mfa-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :unary, :connection_failure], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + _pid = start_client_no_channel(telemetry_metadata: {__MODULE__, :dynamic_meta, []}) + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + assert metadata.extra == %{dynamic: true} + + :telemetry.detach(telemetry_name) + end + + test "no :extra key when telemetry_metadata is not set" do + test_pid = self() + telemetry_name = "unary-no-extra-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :unary, :connection_failure], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :telemetry_meta} + ) + + _pid = start_client_no_channel() + + assert_receive {:telemetry_meta, _measurements, metadata}, 1_000 + refute Map.has_key?(metadata, :extra) + + :telemetry.detach(telemetry_name) + end end + # MFA for telemetry_metadata test. + def dynamic_meta, do: %{dynamic: true} + # ============================================================ # handle_info(:reconnect) — async reconnect path # ============================================================ diff --git a/test/support/telemetry_helper.ex b/test/support/telemetry_helper.ex new file mode 100644 index 0000000..888db82 --- /dev/null +++ b/test/support/telemetry_helper.ex @@ -0,0 +1,33 @@ +defmodule BroadwayCloudPubSub.Test.TelemetryHelper do + @moduledoc """ + Shared telemetry handler for tests. + + Provides a named public function to use with `:telemetry.attach/4` instead of + anonymous functions, avoiding the "local function" performance-penalty warning + that telemetry emits when a handler cannot be resolved to an explicit module. + + ## Usage + + :telemetry.attach( + handler_id, + [:some, :event], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :my_tag} + ) + + assert_receive {:my_tag, _measurements, metadata} + """ + + @doc """ + Telemetry handler that forwards the event to a test process. + + Expects the handler config to be a map with: + - `:pid` — the test process pid to send the message to + - `:msg` — the atom tag to use as the first element of the sent tuple + + The test process receives `{msg, measurements, metadata}`. + """ + def handle_event_forward_test(_event, measurements, metadata, %{pid: pid, msg: msg}) do + send(pid, {msg, measurements, metadata}) + end +end From a502960b376959793fea8ab04c869b55e13d49f5 Mon Sep 17 00:00:00 2001 From: Rock Date: Thu, 9 Apr 2026 16:35:24 +0200 Subject: [PATCH 08/29] feat: add gRPC interceptors and improve options management Add support for gRPC channel interceptors and improve the options handling across the streaming producer modules. Allow users to configure custom interceptors for logging, metrics, or auth middleware. Key changes: - Add interceptors option for gRPC channel configuration - Centralize options management through Streaming.Options - Support modifyAckDeadline with configurable deadlines - Pass options struct consistently through all streaming modules --- .../streaming/ack_batcher.ex | 33 +++++ .../streaming/grpc_client.ex | 4 +- .../streaming/options.ex | 58 +++++++- .../streaming/producer.ex | 76 ++++------ .../streaming/stream_manager.ex | 46 +++--- .../streaming/unary_ack_supervisor.ex | 11 +- .../streaming/unary_rpc_client.ex | 36 +++++ .../streaming/ack_batcher_test.exs | 93 ++++++++++++ .../streaming/options_test.exs | 140 ++++++++++++++++++ .../streaming/stream_manager_test.exs | 53 +++++++ .../streaming/unary_rpc_client_test.exs | 80 ++++++++++ 11 files changed, 556 insertions(+), 74 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 75afab4..4a9154e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -32,6 +32,39 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do modack_attempts: %{} ] + @all_keys [ + :subscription, + :ack_batch_interval_ms, + :ack_batch_max_size, + :retry_deadline_ms, + :broadway_name, + :telemetry_metadata, + :rpc_client + ] + + @required_keys [ + :subscription, + :ack_batch_interval_ms, + :ack_batch_max_size, + :broadway_name, + :rpc_client + ] + + @doc false + @spec child_opts(keyword()) :: keyword() + def child_opts(opts) do + picked = Keyword.take(opts, @all_keys) + + Enum.each(@required_keys, fn key -> + unless Keyword.has_key?(picked, key) do + raise ArgumentError, + "missing required option #{inspect(key)} for #{inspect(__MODULE__)}" + end + end) + + picked + end + @doc """ Updates the retry deadline at runtime. Called by StreamManager when it detects a change in exactly-once delivery status from subscription_properties. diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex index f33dc0a..090f62c 100644 --- a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -138,6 +138,7 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do token ) do keepalive_interval_ms = Map.get(config, :keepalive_interval_ms, 30_000) + interceptors = Map.get(config, :interceptors, []) adapter_opts = [http2_opts: %{keepalive: keepalive_interval_ms, settings_timeout: :infinity}] @@ -150,7 +151,8 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do base_opts = [ adapter: adapter, headers: [{"authorization", "Bearer #{token}"}], - adapter_opts: adapter_opts + adapter_opts: adapter_opts, + interceptors: interceptors ] opts = diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 7828307..213a3c0 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -19,6 +19,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do definition = [ # Handled by Broadway. broadway: [type: :any, doc: false], + broadway_name: [type: :atom, doc: false], subscription: [ type: {:custom, __MODULE__, :type_non_empty_string, [[{:name, :subscription}]]}, required: true, @@ -277,6 +278,26 @@ defmodule BroadwayCloudPubSub.Streaming.Options do processing topology. """ ], + interceptors: [ + type: {:custom, __MODULE__, :type_interceptors, [[]]}, + default: [], + doc: """ + A list of client-side gRPC interceptors attached to every channel opened + by the producer (both the StreamingPull channel and the unary ack/modack channel). + + Each entry is either a bare module or a `{module, opts}` tuple: + + * `MyInterceptor` — calls `MyInterceptor.init([])` to initialise. + * `{MyInterceptor, level: :debug}` — calls `MyInterceptor.init(level: :debug)`. + + Modules must implement the `GRPC.Client.Interceptor` behaviour (`init/1` and `call/4`). + + ## Example + + interceptors: [GRPC.Client.Interceptors.Logger] + interceptors: [{GRPC.Client.Interceptors.Logger, level: :warning}] + """ + ], grpc_client: [ type: :atom, default: BroadwayCloudPubSub.Streaming.GrpcClient, @@ -288,7 +309,6 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Swap this for testing or custom gRPC transports. """ ], - telemetry_metadata: [ type: {:custom, __MODULE__, :type_telemetry_metadata, [[]]}, doc: """ @@ -411,6 +431,42 @@ defmodule BroadwayCloudPubSub.Streaming.Options do "got: #{inspect(value)}"} end + def type_interceptors(list, _opts) when is_list(list) do + case Enum.find(Enum.map(list, &validate_interceptor/1), &match?({:error, _}, &1)) do + nil -> {:ok, list} + {:error, _} = err -> err + end + end + + def type_interceptors(value, _opts) do + {:error, "expected :interceptors to be a list, got: #{inspect(value)}"} + end + + defp validate_interceptor({mod, _opts}) when is_atom(mod), do: validate_interceptor_module(mod) + defp validate_interceptor(mod) when is_atom(mod), do: validate_interceptor_module(mod) + + defp validate_interceptor(value) do + {:error, + "expected each interceptor to be a module or {module, opts} tuple, got: #{inspect(value)}"} + end + + defp validate_interceptor_module(mod) do + case Code.ensure_loaded(mod) do + {:module, ^mod} -> + if function_exported?(mod, :init, 1) and function_exported?(mod, :call, 4) do + {:ok, mod} + else + {:error, + "expected interceptor #{inspect(mod)} to implement GRPC.Client.Interceptor " <> + "(init/1 and call/4), but one or both callbacks are missing"} + end + + {:error, _} -> + {:error, + "expected interceptor to be a loaded module, but #{inspect(mod)} could not be loaded"} + end + end + @doc false def type_telemetry_metadata({m, f, a}, _opts) when is_atom(m) and is_atom(f) and is_list(a) do case Code.ensure_loaded(m) do diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index f1231f5..7dca262 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -317,57 +317,33 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do opts = opts |> Keyword.put(:broadway, broadway_opts) + |> Keyword.put(:broadway_name, broadway_opts[:name]) |> validate_options!() |> assign_client_id() |> assign_token_generator() - broadway_name = broadway_opts[:name] + broadway_name = opts[:broadway_name] + # Add grpc_client_config to be used by stream manager and unary grpc_client = opts[:grpc_client] {:ok, client_config} = grpc_client.init(opts) opts = Keyword.put(opts, :grpc_client_config, client_config) - # Config forwarded to UnaryRpcClient and AckBatcher via the supervisor. - # These keys are a subset of the full opts — only what the unary path needs. - unary_config = - opts - |> Keyword.take([ - :subscription, - :token_generator, - :grpc_endpoint, - :use_ssl, - :adapter, - :backoff_type, - :backoff_min, - :backoff_max, - :ack_batch_interval_ms, - :ack_batch_max_size, - :retry_deadline_ms, - :grpc_client, - :telemetry_metadata - ]) - |> Keyword.put(:grpc_client_config, client_config) - |> Keyword.put(:broadway_name, broadway_name) - - sup_name = Module.concat(broadway_name, UnaryAckSupervisor) + # UnaryAckSupervisor options + unary_name = Module.concat(broadway_name, UnaryAckSupervisor) + unary_opts = Keyword.put(opts, :name, unary_name) unary_sup_spec = %{ - id: sup_name, - start: - {UnaryAckSupervisor, :start_link, - [[name: sup_name, broadway_name: broadway_name, config: unary_config]]}, + id: unary_name, + start: {UnaryAckSupervisor, :start_link, [unary_opts]}, restart: :permanent, type: :supervisor } - # Pass broadway_name so StreamManager can derive the AckBatcher registered name. + # StreamManager options stream_manager_name = Module.concat(broadway_name, StreamManager) - - manager_opts = - opts - |> Keyword.put(:name, stream_manager_name) - |> Keyword.put(:broadway_name, broadway_name) + manager_opts = Keyword.put(opts, :name, stream_manager_name) manager_spec = %{ id: stream_manager_name, @@ -375,6 +351,7 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do restart: :permanent } + # Broadway options options = broadway_opts |> put_in([:producer, :module], {producer_module, opts}) @@ -436,17 +413,7 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # Nack buffered messages (not yet dispatched to processors) per on_shutdown config. buffered = StreamManager.get_buffered(manager_pid) - - case {config[:on_shutdown], buffered} do - {_, []} -> - :ok - - {:noop, _buffered_ids} -> - :ok - - {{:nack, delay_seconds}, ack_ids} -> - StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) - end + nack_ack_ids(manager_pid, config, buffered) # Stop receiving new messages and begin the drain phase. StreamManager.stop_receiving(manager_pid) @@ -456,9 +423,16 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do @impl GenStage def terminate(_reason, state) do - %{manager_pid: manager_pid} = state + %{manager_pid: manager_pid, config: config} = state if Process.alive?(manager_pid) do + # Nack any messages still in outstanding so they are redelivered promptly + # instead of waiting for their ack deadline to expire naturally. This + # covers edge cases like on_failure: :noop (acknowledger does nothing) or + # the drain timeout firing before all processors complete. + outstanding = StreamManager.get_outstanding(manager_pid) + nack_ack_ids(manager_pid, config, outstanding) + StreamManager.close(manager_pid) end @@ -469,6 +443,16 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # --- Private --- + # Nack a list of ack_ids per the on_shutdown config. Used by both + # prepare_for_draining (for buffered messages) and terminate (for + # remaining outstanding messages). + defp nack_ack_ids(_manager_pid, _config, []), do: :ok + defp nack_ack_ids(_manager_pid, %{on_shutdown: :noop}, _ack_ids), do: :ok + + defp nack_ack_ids(manager_pid, %{on_shutdown: {:nack, delay_seconds}}, ack_ids) do + StreamManager.modify_deadline(manager_pid, ack_ids, delay_seconds) + end + defp validate_options!(opts) do case NimbleOptions.validate(opts, Options.definition()) do {:ok, validated} -> diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 366b59d..e41f6d3 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -492,29 +492,25 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do def handle_cast({:modify_deadline, ack_ids, deadline_seconds}, state) do now = now_ms() - # On nack (deadline == 0), record processing times and remove from outstanding - # so they are not lease-extended further. On non-zero deadline changes, - # keep the ack_ids in outstanding unchanged. - {new_outstanding, ack_time_dist} = - if deadline_seconds == 0 do - dist = - Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, acc -> - case Map.get(state.outstanding, ack_id) do - %{received_at: received_at} -> - duration_s = max(1, div(now - received_at, 1_000)) - AckTimeDistribution.record(acc, duration_s) - - nil -> - acc - end - end) + # Record processing times and remove from outstanding for all deadline + # modifications (both nack with deadline=0 and nack with deadline>0). + # Once a message has been nacked, it must not be lease-extended further — + # otherwise the periodic extend_leases cycle would override the requested + # deadline, and the drain phase could never complete because outstanding + # would never become empty. + ack_time_dist = + Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, acc -> + case Map.get(state.outstanding, ack_id) do + %{received_at: received_at} -> + duration_s = max(1, div(now - received_at, 1_000)) + AckTimeDistribution.record(acc, duration_s) - outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) - {outstanding, dist} - else - {state.outstanding, state.ack_time_dist} - end + nil -> + acc + end + end) + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) state = %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} AckBatcher.modack(state.ack_batcher, ack_ids, deadline_seconds) @@ -987,6 +983,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do extra_metadata ) - Telemetry.execute(:stream, event, measurements, metadata, Map.get(config, :telemetry_metadata)) + Telemetry.execute( + :stream, + event, + measurements, + metadata, + Map.get(config, :telemetry_metadata) + ) end end diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex index 1290635..459aaac 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex @@ -34,21 +34,24 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do end @impl Supervisor - def init(opts) do - broadway_name = Keyword.fetch!(opts, :broadway_name) - config = Keyword.fetch!(opts, :config) + def init(config) do + broadway_name = Keyword.fetch!(config, :broadway_name) rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + # Each child's child_opts/1 selects only the keys it needs from the + # full config and validates that all required keys are present. rpc_client_opts = config + |> UnaryRpcClient.child_opts() |> Keyword.put(:name, rpc_client_name) batcher_opts = config - |> Keyword.put(:name, batcher_name) |> Keyword.put(:rpc_client, rpc_client_name) + |> AckBatcher.child_opts() + |> Keyword.put(:name, batcher_name) children = [ %{ diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index 44d3537..d0c5204 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -52,6 +52,42 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do # --- Public API --- + @all_keys [ + :subscription, + :grpc_client, + :grpc_client_config, + :backoff_type, + :backoff_min, + :backoff_max, + :broadway_name, + :telemetry_metadata + ] + + @required_keys [ + :subscription, + :grpc_client, + :grpc_client_config, + :backoff_type, + :backoff_min, + :backoff_max, + :broadway_name + ] + + @doc false + @spec child_opts(keyword()) :: keyword() + def child_opts(opts) do + picked = Keyword.take(opts, @all_keys) + + Enum.each(@required_keys, fn key -> + unless Keyword.has_key?(picked, key) do + raise ArgumentError, + "missing required option #{inspect(key)} for #{inspect(__MODULE__)}" + end + end) + + picked + end + @spec start_link(keyword()) :: GenServer.on_start() def start_link(opts) do {name, opts} = Keyword.pop(opts, :name) diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs index 8433513..c6a9a14 100644 --- a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -702,4 +702,97 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do assert result, "Expected :modify_ack_deadline with deadline #{deadline}" result end + + # ============================================================ + # child_opts/1 + # ============================================================ + + describe "child_opts/1" do + @full_opts [ + subscription: "projects/p/subscriptions/s", + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500, + retry_deadline_ms: 60_000, + broadway_name: MyPipeline, + telemetry_metadata: %{env: :test}, + rpc_client: :some_rpc_client, + # Extra keys that AckBatcher should NOT include + grpc_client: BroadwayCloudPubSub.Streaming.GrpcClient, + grpc_client_config: %{}, + backoff_type: :rand_exp, + backoff_min: 100, + backoff_max: 60_000, + max_outstanding_messages: 1_000 + ] + + test "returns only the keys AckBatcher needs" do + result = AckBatcher.child_opts(@full_opts) + + assert Keyword.keys(result) |> Enum.sort() == + Enum.sort([ + :subscription, + :ack_batch_interval_ms, + :ack_batch_max_size, + :retry_deadline_ms, + :broadway_name, + :telemetry_metadata, + :rpc_client + ]) + end + + test "excludes UnaryRpcClient-specific keys" do + result = AckBatcher.child_opts(@full_opts) + + refute Keyword.has_key?(result, :grpc_client) + refute Keyword.has_key?(result, :grpc_client_config) + refute Keyword.has_key?(result, :backoff_type) + refute Keyword.has_key?(result, :backoff_min) + refute Keyword.has_key?(result, :backoff_max) + refute Keyword.has_key?(result, :max_outstanding_messages) + end + + test "omits optional keys when not provided" do + opts = + @full_opts + |> Keyword.delete(:telemetry_metadata) + |> Keyword.delete(:retry_deadline_ms) + + result = AckBatcher.child_opts(opts) + + refute Keyword.has_key?(result, :telemetry_metadata) + refute Keyword.has_key?(result, :retry_deadline_ms) + end + + test "raises on missing required key :subscription" do + opts = Keyword.delete(@full_opts, :subscription) + + assert_raise ArgumentError, ~r/missing required option :subscription/, fn -> + AckBatcher.child_opts(opts) + end + end + + test "raises on missing required key :rpc_client" do + opts = Keyword.delete(@full_opts, :rpc_client) + + assert_raise ArgumentError, ~r/missing required option :rpc_client/, fn -> + AckBatcher.child_opts(opts) + end + end + + test "raises on missing required key :broadway_name" do + opts = Keyword.delete(@full_opts, :broadway_name) + + assert_raise ArgumentError, ~r/missing required option :broadway_name/, fn -> + AckBatcher.child_opts(opts) + end + end + + test "raises on missing required key :ack_batch_interval_ms" do + opts = Keyword.delete(@full_opts, :ack_batch_interval_ms) + + assert_raise ArgumentError, ~r/missing required option :ack_batch_interval_ms/, fn -> + AckBatcher.child_opts(opts) + end + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 1a0cb5d..d57fcbc 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -378,6 +378,146 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do end end + describe "interceptors" do + test "defaults to []" do + {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") + assert opts[:interceptors] == [] + end + + test "accepts a bare module list" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [GRPC.Client.Interceptors.Logger] + ) + + assert opts[:interceptors] == [GRPC.Client.Interceptors.Logger] + end + + test "accepts a {module, opts} tuple list" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [{GRPC.Client.Interceptors.Logger, level: :warning}] + ) + + assert opts[:interceptors] == [{GRPC.Client.Interceptors.Logger, level: :warning}] + end + + test "accepts a mixed list of bare modules and {module, opts} tuples" do + {:ok, opts} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [ + GRPC.Client.Interceptors.Logger, + {GRPC.Client.Interceptors.Logger, level: :debug} + ] + ) + + assert length(opts[:interceptors]) == 2 + end + + test "rejects a non-list value" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: GRPC.Client.Interceptors.Logger + ) + + assert Exception.message(err) =~ "interceptors" + assert Exception.message(err) =~ "list" + end + + test "rejects an entry whose module is not loaded" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [VeryUnlikelyToExist.InterceptorXYZ] + ) + + assert Exception.message(err) =~ "could not be loaded" + end + + test "rejects an entry that is not a module or {module, opts} tuple" do + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [:not_valid] + ) + + assert Exception.message(err) =~ "interceptor" + end + + test "rejects a module that does not implement GRPC.Client.Interceptor" do + # String module is loaded but doesn't export init/1 or call/4 + assert {:error, err} = + validate( + subscription: "projects/p/subscriptions/s", + interceptors: [String] + ) + + assert Exception.message(err) =~ "GRPC.Client.Interceptor" + end + end + # Used in telemetry_metadata MFA tests above. def sample_meta, do: %{node: node()} end + +defmodule BroadwayCloudPubSub.Streaming.ProducerPrepareForStartTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.Producer + + # Minimal broadway_opts that satisfies prepare_for_start/2. + defp broadway_opts(producer_opts \\ []) do + base_producer_opts = [ + subscription: "projects/test-project/subscriptions/test-sub", + token_generator: {__MODULE__, :noop_token, []}, + grpc_endpoint: "localhost:8085", + use_ssl: false + ] + + [ + name: TestPipeline, + producer: [ + module: + {Producer, Keyword.merge(base_producer_opts, producer_opts)}, + concurrency: 1 + ], + processors: [default: []] + ] + end + + def noop_token, do: {:ok, "test-token"} + + describe "prepare_for_start/2" do + test "grpc_client_config contains :broadway_name so GrpcClient telemetry does not crash" do + # GrpcClient.acknowledge/3 and modify_ack_deadline/3 read config.broadway_name + # from grpc_client_config for telemetry. This verifies that broadway_name is + # injected into opts *before* grpc_client.init/1 is called, so it ends up in + # the returned config map. + {_specs, updated_opts} = Producer.prepare_for_start(Producer, broadway_opts()) + + {_module, producer_opts} = updated_opts[:producer][:module] + grpc_client_config = producer_opts[:grpc_client_config] + + assert is_map(grpc_client_config), + "expected grpc_client_config to be a map, got: #{inspect(grpc_client_config)}" + + assert Map.has_key?(grpc_client_config, :broadway_name), + ":broadway_name missing from grpc_client_config — GrpcClient telemetry would crash" + + assert grpc_client_config.broadway_name == TestPipeline + end + + test "returns two child specs: UnaryAckSupervisor first, StreamManager second" do + {specs, _opts} = Producer.prepare_for_start(Producer, broadway_opts()) + + assert length(specs) == 2 + [sup_spec, manager_spec] = specs + assert sup_spec.type == :supervisor + assert manager_spec[:type] == nil or manager_spec[:restart] == :permanent + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 0809fab..b015e31 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -1483,4 +1483,57 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert msg.data == "data" end end + + # ============================================================ + # modify_deadline — non-zero deadline removes from outstanding + # ============================================================ + + describe "modify_deadline with non-zero deadline" do + test "removes ack_ids from outstanding (same as deadline=0)" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + # Push a message so it lands in outstanding + send(pid, {:stream_messages, [received_message("nack-5-ack", "data")]}) + assert_receive {:stream_messages, [_]}, 500 + + state = :sys.get_state(pid) + assert Map.has_key?(state.outstanding, "nack-5-ack") + + # Nack with non-zero deadline (e.g. on_shutdown default {:nack, 5}) + StreamManager.modify_deadline(pid, ["nack-5-ack"], 5) + sync(pid) + + state = :sys.get_state(pid) + refute Map.has_key?(state.outstanding, "nack-5-ack") + end + + test "non-zero nack during drain allows drain to complete" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + # Push a message so it lands in outstanding + send(pid, {:stream_messages, [received_message("drain-nack5", "data")]}) + assert_receive {:stream_messages, [_]}, 500 + + # Enter drain mode + StreamManager.stop_receiving(pid) + sync(pid) + + state = :sys.get_state(pid) + assert state.draining + assert Map.has_key?(state.outstanding, "drain-nack5") + # Drain timer should be set (drain not yet complete) + assert state.drain_timer != nil + + # Nack with non-zero deadline — should remove from outstanding and complete drain + StreamManager.modify_deadline(pid, ["drain-nack5"], 5) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 0 + # Drain should have completed: timer cancelled + assert state.drain_timer == nil + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs index 78e4a5f..8ad8a18 100644 --- a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -323,4 +323,84 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do assert state.channel == nil end end + + # ============================================================ + # child_opts/1 + # ============================================================ + + describe "child_opts/1" do + @full_opts [ + subscription: "projects/p/subscriptions/s", + grpc_client: BroadwayCloudPubSub.Streaming.GrpcClient, + grpc_client_config: %{}, + backoff_type: :rand_exp, + backoff_min: 100, + backoff_max: 60_000, + broadway_name: MyPipeline, + telemetry_metadata: %{env: :test}, + # Extra keys that UnaryRpcClient should NOT include + ack_batch_interval_ms: 100, + ack_batch_max_size: 2_500, + retry_deadline_ms: 60_000, + rpc_client: :some_rpc_client, + max_outstanding_messages: 1_000 + ] + + test "returns only the keys UnaryRpcClient needs" do + result = UnaryRpcClient.child_opts(@full_opts) + + assert Keyword.keys(result) |> Enum.sort() == + Enum.sort([ + :subscription, + :grpc_client, + :grpc_client_config, + :backoff_type, + :backoff_min, + :backoff_max, + :broadway_name, + :telemetry_metadata + ]) + end + + test "excludes AckBatcher-specific keys" do + result = UnaryRpcClient.child_opts(@full_opts) + + refute Keyword.has_key?(result, :ack_batch_interval_ms) + refute Keyword.has_key?(result, :ack_batch_max_size) + refute Keyword.has_key?(result, :retry_deadline_ms) + refute Keyword.has_key?(result, :rpc_client) + refute Keyword.has_key?(result, :max_outstanding_messages) + end + + test "omits optional :telemetry_metadata when not provided" do + opts = Keyword.delete(@full_opts, :telemetry_metadata) + result = UnaryRpcClient.child_opts(opts) + + refute Keyword.has_key?(result, :telemetry_metadata) + end + + test "raises on missing required key :subscription" do + opts = Keyword.delete(@full_opts, :subscription) + + assert_raise ArgumentError, ~r/missing required option :subscription/, fn -> + UnaryRpcClient.child_opts(opts) + end + end + + test "raises on missing required key :grpc_client" do + opts = Keyword.delete(@full_opts, :grpc_client) + + assert_raise ArgumentError, ~r/missing required option :grpc_client/, fn -> + UnaryRpcClient.child_opts(opts) + end + end + + test "raises on missing required key :broadway_name" do + opts = Keyword.delete(@full_opts, :broadway_name) + + assert_raise ArgumentError, ~r/missing required option :broadway_name/, fn -> + UnaryRpcClient.child_opts(opts) + end + end + end end From fab62fa9245150f73367d3fdb42e644a355ee37f Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 10 Apr 2026 10:00:54 +0200 Subject: [PATCH 09/29] feat: improve draining with N:N producer-manager topology Redesign the producer-manager relationship to support N producers each with their own dedicated stream manager (1:1 mapping). Improve the draining lifecycle with nack-on-timeout behavior and extract focused modules for lease management and message dispatch. Key changes: - Implement 1:1 producer-to-stream-manager topology - Move prepare_for_draining logic into StreamManager - Add nack on drain_timeout for unprocessed messages - Extract LeaseManager for ack deadline extension scheduling - Extract MessageDispatch for message delivery logic - Centralize demand management in StreamManager - Simplify producer to delegate stream concerns to manager --- .../streaming/ack_batcher.ex | 13 +- .../streaming/lease_manager.ex | 154 +++ .../streaming/message_dispatch.ex | 153 +++ .../streaming/options.ex | 22 + .../streaming/producer.ex | 161 ++- .../streaming/stream_manager.ex | 688 +++++----- .../streaming/telemetry.ex | 85 ++ .../streaming/unary_rpc_client.ex | 220 ++-- .../streaming/options_test.exs | 7 +- .../streaming/producer_integration_test.exs | 48 + .../streaming/stream_manager_test.exs | 1135 ++++++++++++++++- .../streaming/stress_test.exs | 6 +- 12 files changed, 2071 insertions(+), 621 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/streaming/lease_manager.ex create mode 100644 lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 4a9154e..61d0855 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -7,7 +7,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do use GenServer - alias BroadwayCloudPubSub.Streaming.{Telemetry, UnaryRpcClient} + alias BroadwayCloudPubSub.Streaming.{Options, Telemetry, UnaryRpcClient} @max_modack_attempts 3 @@ -53,16 +53,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do @doc false @spec child_opts(keyword()) :: keyword() def child_opts(opts) do - picked = Keyword.take(opts, @all_keys) - - Enum.each(@required_keys, fn key -> - unless Keyword.has_key?(picked, key) do - raise ArgumentError, - "missing required option #{inspect(key)} for #{inspect(__MODULE__)}" - end - end) - - picked + Options.validate_child_opts(opts, @all_keys, @required_keys) end @doc """ diff --git a/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex b/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex new file mode 100644 index 0000000..ced173a --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex @@ -0,0 +1,154 @@ +defmodule BroadwayCloudPubSub.Streaming.LeaseManager do + @moduledoc false + + # Pure-function module for lease extension, adaptive deadline computation, + # timer scheduling, and stale pending-modack sweeping. + # + # Functions accept and return the StreamManager state struct. StreamManager + # delegates to this module for lease management without mixing timer and + # deadline logic into GenServer callback bodies. + + alias BroadwayCloudPubSub.Streaming.{AckBatcher, AckTimeDistribution, Telemetry} + + # Subtracted from the adaptive deadline when computing the lease extension interval. + @grace_period_seconds 5 + + # Minimum ack deadline enforced by the server for exactly-once subscriptions. + @min_deadline_exactly_once_seconds 60 + + # Stale pending receipt modacks (older than 60s) are nacked for fast redelivery. + @receipt_modack_stale_ms 60_000 + + # --- Deadline computation --- + + @doc """ + Returns the effective ack deadline in seconds, based on the adaptive p99 + percentile from recorded processing times. For exactly-once subscriptions, + enforces the server's minimum of 60 seconds. + """ + def effective_deadline(state) do + adaptive = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + + if state.exactly_once_enabled, + do: max(adaptive, @min_deadline_exactly_once_seconds), + else: adaptive + end + + # --- Lease extension --- + + @doc """ + Runs a lease extension cycle: partitions outstanding messages into valid and + expired, emits telemetry, sends modack requests for valid messages, and + schedules the next extension tick. + """ + def do_extend_leases(state) do + now = System.monotonic_time(:millisecond) + deadline = effective_deadline(state) + + # Partition into still-valid and expired (past max_expiry — server will redeliver). + {valid, expired} = + Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) + + if map_size(expired) > 0 do + emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) + end + + emit_telemetry( + :extend_leases, + %{count: map_size(valid), deadline: deadline}, + state.config + ) + + emit_telemetry( + :pressure_snapshot, + %{ + outstanding_count: map_size(valid), + buffered_count: :queue.len(state.message_buffer), + pending_demand: state.pending_demand + }, + state.config + ) + + if map_size(valid) > 0 do + AckBatcher.modack(state.ack_batcher, Map.keys(valid), deadline) + end + + # Schedule next tick with jitter in [0.8, 0.9) to spread out concurrent StreamManagers. + base_interval_ms = max(1_000, (deadline - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + timer = Process.send_after(self(), :extend_leases, round(base_interval_ms * jitter_factor)) + + state + |> Map.put(:outstanding, valid) + |> Map.put(:lease_timer, timer) + |> sweep_stale_pending_modacks() + end + + # --- Timer management --- + + @doc """ + Schedules the initial lease extension timer based on the configured deadline. + Cancels any existing timer first. + """ + def schedule_lease_timer(state) do + state = cancel_lease_timer(state) + # Initial interval: (configured deadline - grace period) with jitter, minimum 1s. + deadline_s = state.config.stream_ack_deadline_seconds + base_ms = max(1_000, (deadline_s - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + interval_ms = round(base_ms * jitter_factor) + timer = Process.send_after(self(), :extend_leases, interval_ms) + %{state | lease_timer: timer} + end + + @doc """ + Cancels the lease extension timer if one is active. + """ + def cancel_lease_timer(%{lease_timer: nil} = state), do: state + + def cancel_lease_timer(%{lease_timer: timer} = state) do + Process.cancel_timer(timer) + %{state | lease_timer: nil} + end + + # --- Stale pending modack sweep --- + + @doc """ + Sweeps stale pending receipt modacks (older than 60s) and nacks them for + fast redelivery. Used during the lease extension cycle. + """ + def sweep_stale_pending_modacks(state) do + now = System.monotonic_time(:millisecond) + cutoff = now - @receipt_modack_stale_ms + + {stale, fresh} = + Map.split_with(state.pending_receipt_modacks, fn {_ref, %{received_at: t}} -> + t < cutoff + end) + + if map_size(stale) > 0 do + stale_ids = stale |> Map.values() |> Enum.flat_map(& &1.ack_ids) + AckBatcher.modack(state.ack_batcher, stale_ids, 0) + emit_telemetry(:receipt_modack_stale, %{count: length(stale_ids)}, state.config) + end + + %{state | pending_receipt_modacks: fresh} + end + + # --- Private: telemetry --- + + defp emit_telemetry(event, measurements, config) do + metadata = %{ + name: config.broadway[:name], + subscription: config.subscription + } + + Telemetry.execute( + :stream, + event, + measurements, + metadata, + Map.get(config, :telemetry_metadata) + ) + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex new file mode 100644 index 0000000..f1b2188 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex @@ -0,0 +1,153 @@ +defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do + @moduledoc false + + # Pure-function module for message buffer management, demand-based flushing, + # Broadway message construction, and outstanding ack_id tracking. + # + # Functions accept and return the StreamManager state struct (or relevant + # fields). StreamManager delegates to this module for message dispatch + # without mixing buffer logic into GenServer callback bodies. + + alias BroadwayCloudPubSub.MessageBuilder + alias BroadwayCloudPubSub.Streaming.AckTimeDistribution + + # --- Buffer and demand --- + + @doc """ + Enqueues `messages` into the buffer, then flushes up to `pending_demand` + messages to the producer process. + """ + def deliver_messages(state, messages) do + new_buffer = Enum.reduce(messages, state.message_buffer, &:queue.in(&1, &2)) + flush_demand(%{state | message_buffer: new_buffer}) + end + + @doc """ + Flushes up to `pending_demand` messages from the buffer to the producer. + No-op when draining, demand is zero, or the buffer is empty. + """ + def flush_demand(%{draining: true} = state), do: state + def flush_demand(%{pending_demand: 0} = state), do: state + + def flush_demand(state) do + if :queue.is_empty(state.message_buffer) do + state + else + {remaining, demand_left, batch_reversed} = + flush_demand_loop(state.message_buffer, state.pending_demand, []) + + send(state.producer_pid, {:stream_messages, Enum.reverse(batch_reversed)}) + %{state | message_buffer: remaining, pending_demand: demand_left} + end + end + + defp flush_demand_loop(queue, 0, acc), do: {queue, 0, acc} + + defp flush_demand_loop(queue, n, acc) do + case :queue.out(queue) do + {{:value, msg}, rest} -> flush_demand_loop(rest, n - 1, [msg | acc]) + {:empty, _} -> {queue, n, acc} + end + end + + # --- Outstanding tracking --- + + @doc """ + Adds ack_ids to the outstanding map with their received_at and max_expiry. + """ + def add_to_outstanding(outstanding, ack_ids, received_at, max_extension_ms) do + Enum.reduce(ack_ids, outstanding, fn ack_id, acc -> + Map.put(acc, ack_id, %{received_at: received_at, max_expiry: received_at + max_extension_ms}) + end) + end + + @doc """ + Records processing times for ack_ids in the adaptive p99 distribution, then + removes them from outstanding. Shared by both ack and modack (nack) paths. + """ + def record_and_remove_from_outstanding(state, ack_ids) do + now = System.monotonic_time(:millisecond) + + ack_time_dist = + Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, dist -> + case Map.get(state.outstanding, ack_id) do + %{received_at: received_at} -> + duration_s = max(1, div(now - received_at, 1_000)) + AckTimeDistribution.record(dist, duration_s) + + nil -> + dist + end + end) + + new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) + %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} + end + + @doc """ + Extracts ack_ids from buffered Broadway messages. + """ + def extract_buffered_ack_ids(message_buffer) do + message_buffer + |> :queue.to_list() + |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) + end + + @doc """ + Splits broadway_messages into {succeeded_msgs, succeeded_ids} by removing + messages whose ack_id is in failed_ids. + """ + def partition_succeeded(broadway_messages, all_ack_ids, failed_ids) do + failed_set = MapSet.new(failed_ids) + + {ok_msgs_reversed, ok_ids_reversed} = + Enum.zip(broadway_messages, all_ack_ids) + |> Enum.reduce({[], []}, fn {msg, id}, {msgs_acc, ids_acc} -> + if MapSet.member?(failed_set, id) do + {msgs_acc, ids_acc} + else + {[msg | msgs_acc], [id | ids_acc]} + end + end) + + {Enum.reverse(ok_msgs_reversed), Enum.reverse(ok_ids_reversed)} + end + + # --- Message construction --- + + @doc """ + Builds a `Broadway.Message` from a decoded Pub/Sub ReceivedMessage and the + current StreamManager state. + """ + def build_broadway_message( + %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, + state + ) do + ack_ref = state.config.ack_ref + acknowledger = BroadwayCloudPubSub.Streaming.Acknowledger.builder(ack_ref).(ack_id) + + %Broadway.Message{ + data: pubsub_msg.data, + metadata: build_metadata(pubsub_msg, delivery_attempt), + acknowledger: acknowledger + } + end + + defp build_metadata(msg, delivery_attempt) do + MessageBuilder.build_metadata(%{ + message_id: msg.message_id, + ordering_key: msg.ordering_key, + publish_time: to_datetime(msg.publish_time), + delivery_attempt: delivery_attempt, + attributes: Map.new(msg.attributes || []) + }) + end + + defp to_datetime(nil), do: nil + + defp to_datetime(%{seconds: seconds, nanos: nanos}) do + DateTime.from_unix!(seconds * 1_000_000_000 + nanos, :nanosecond) + rescue + _ -> nil + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 213a3c0..7e7c6a0 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -487,4 +487,26 @@ defmodule BroadwayCloudPubSub.Streaming.Options do end def type_telemetry_metadata(term, _opts), do: {:ok, term} + + @doc """ + Validates and selects child options from a full config keyword list. + + Takes the given `all_keys`, validates that all `required_keys` are present, + and returns only the selected keys. Used by `AckBatcher.child_opts/1` and + `UnaryRpcClient.child_opts/1` to extract their required options from the + shared pipeline config. + """ + @spec validate_child_opts(keyword(), [atom()], [atom()]) :: keyword() + def validate_child_opts(opts, all_keys, required_keys) do + picked = Keyword.take(opts, all_keys) + + Enum.each(required_keys, fn key -> + unless Keyword.has_key?(picked, key) do + raise ArgumentError, + "missing required option #{inspect(key)} for child opts" + end + end) + + picked + end end diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index 7dca262..89c4e16 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -10,17 +10,23 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do efficient than the HTTP pull approach (`BroadwayCloudPubSub.Producer`) for workloads that require low latency or high throughput. - The architecture has three layers: - - 1. **StreamManager** (GenServer) — owns the gRPC stream and connection - lifecycle, manages lease extensions, and dispatches messages to the - Producer when downstream demand is available. - 2. **Producer** (GenStage) — receives messages from StreamManager and - forwards them to Broadway processors. Tracks demand from downstream - stages. - 3. **UnaryAckSupervisor** — supervises AckBatcher and UnaryRpcClient, which - batch and send acknowledgement and deadline-modification requests via - separate unary RPCs (not on the streaming connection). + Each producer process (N = `producer: [concurrency: N]`) starts and links + its own **StreamManager** (GenServer), giving N independent gRPC streams + that mirror the Go client’s N `messageIterator`s sharing a single `clientID`. + + Key components: + + * **StreamManager** — GenServer that owns the gRPC bidirectional stream, + manages connection lifecycle (connect/reconnect/backoff), extends message + leases, and dispatches messages to the linked Producer when demand is + available. Started via `start_link` from `Producer.init/1`. + + * **Producer** — GenStage process that bridges StreamManager to Broadway. + Tracks downstream demand and forwards messages to processors. + + * **UnaryAckSupervisor** — shared across all producers. Supervises + AckBatcher and UnaryRpcClient, which batch and send ack/nack/modifyAckDeadline + requests via separate unary RPCs (not on the streaming connection). ## Usage @@ -193,24 +199,48 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Measurements: `%{count: pos_integer()}` - * `:drain_timeout` — graceful shutdown drain timed out before all in-flight - messages were processed. + * `:drain` — async span tracking the full graceful drain lifecycle, from + `prepare_for_draining/1` through completion, timeout, or unexpected + termination. Uses the same measurements convention as `:telemetry.span/3`. - Measurements: `%{}` + Events: - * `:drain_complete` — all in-flight messages were processed before the drain - timeout; stream closed cleanly. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start]` — drain + initiated. Emitted before the reader is closed or any messages are nacked. - Measurements: `%{}` + Measurements: `%{system_time: integer(), monotonic_time: integer(), + buffered_count: non_neg_integer(), outstanding_count: non_neg_integer(), + pending_receipt_modack_count: non_neg_integer()}` - * `:reconnect` — reconnect scheduled after a retryable stream error. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop]` — all + in-flight messages were processed and stream closed cleanly. - Measurements: `%{delay: non_neg_integer()}` + Measurements: `%{duration: non_neg_integer(), monotonic_time: integer()}` - * `:receipt_modack_stale` — pending exactly-once receipt modacks swept out - as stale and nacked for fast redelivery. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception]` — + drain ended abnormally. - Measurements: `%{count: pos_integer()}` + Measurements: `%{duration: non_neg_integer(), monotonic_time: integer()}` + (plus `remaining_count: non_neg_integer()` for `:timeout` and `:terminate` kinds) + + Metadata includes `kind` and `reason` identifying the cause: + + * `kind: :timeout, reason: :drain_timeout` — `drain_timeout_ms` elapsed + before all messages were acked. Remaining messages are nacked immediately. + * `kind: :terminate, reason: term()` — the GenServer was terminated while + a drain was in progress. + * `kind: :error, reason: binary()` — an exception was raised inside + `prepare_for_draining/1` itself. + + * `:pressure_snapshot` — a point-in-time snapshot of pipeline backpressure, + emitted on every lease extension cycle. Useful for diagnosing memory + or throughput bottlenecks without enabling tracing. + + Measurements: `%{outstanding_count: non_neg_integer(), buffered_count: non_neg_integer(), pending_demand: non_neg_integer()}` + + * `outstanding_count` — messages received but not yet acked or nacked. + * `buffered_count` — messages waiting in the internal buffer for producer demand. + * `pending_demand` — units of GenStage demand currently unfulfilled. ### AckBatcher events — `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, ...]` @@ -341,25 +371,16 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do type: :supervisor } - # StreamManager options - stream_manager_name = Module.concat(broadway_name, StreamManager) - manager_opts = Keyword.put(opts, :name, stream_manager_name) - - manager_spec = %{ - id: stream_manager_name, - start: {StreamManager, :start_link, [manager_opts]}, - restart: :permanent - } - # Broadway options options = broadway_opts |> put_in([:producer, :module], {producer_module, opts}) |> maybe_inject_partition_by(opts) - # UnaryAckSupervisor is listed first so it starts before StreamManager, - # ensuring AckBatcher is alive when the first acks are dispatched. - {[unary_sup_spec, manager_spec], options} + # Only the UnaryAckSupervisor is a shared child spec. Each producer starts + # its own StreamManager directly via start_link in init/1 — the natural + # link means crashes propagate without needing a supervisor. + {[unary_sup_spec], options} end @impl GenStage @@ -367,12 +388,21 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Process.flag(:trap_exit, true) config = Map.new(opts) - ack_ref = config.broadway[:name] - manager_name = Module.concat(ack_ref, StreamManager) - manager_pid = Process.whereis(manager_name) + broadway_name = config.broadway[:name] + index = config.broadway[:index] + + # Each producer gets a unique ack_ref so persistent_term entries don't collide. + ack_ref = {broadway_name, index} + manager_name = Module.concat(broadway_name, "StreamManager_#{index}") + + # Start our own StreamManager directly. start_link creates a natural + # bidirectional link — if the manager crashes (terminal gRPC error), the + # producer receives an EXIT signal; if the producer dies, the manager does too. + manager_opts = + opts + |> Keyword.merge(name: manager_name, producer_pid: self(), ack_ref: ack_ref) - # Tell the StreamManager our pid so it can forward messages to us - :ok = StreamManager.set_producer(manager_pid, self()) + {:ok, manager_pid} = StreamManager.start_link(manager_opts) # Store the manager's *registered name* (not its PID) in persistent_term so # the Acknowledger can route acks even after a StreamManager restart. PIDs @@ -386,22 +416,32 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do manager_name: manager_name, ack_ref: ack_ref, config: config, - draining: false, - demand: 0 + draining: false }} end @impl GenStage - def handle_demand(incoming_demand, %{demand: demand} = state) do - new_demand = demand + incoming_demand - StreamManager.notify_demand(state.manager_pid, new_demand) - {:noreply, [], %{state | demand: new_demand}} + def handle_demand(_incoming_demand, %{draining: true} = state) do + {:noreply, [], state} + end + + def handle_demand(incoming_demand, state) do + StreamManager.notify_demand(state.manager_pid, incoming_demand) + {:noreply, [], state} end @impl GenStage - def handle_info({:stream_messages, messages}, %{demand: demand} = state) do - new_demand = max(demand - length(messages), 0) - {:noreply, messages, %{state | demand: new_demand}} + def handle_info({:stream_messages, messages}, state) do + {:noreply, messages, state} + end + + # StreamManager crashed (terminal gRPC error). Propagate the crash to the + # producer so Broadway's supervision restarts the pipeline. + def handle_info( + {:EXIT, manager_pid, reason}, + %{manager_pid: manager_pid} = state + ) do + {:stop, reason, state} end @impl GenStage @@ -409,14 +449,11 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do @impl Broadway.Producer def prepare_for_draining(state) do - %{manager_pid: manager_pid, config: config} = state - - # Nack buffered messages (not yet dispatched to processors) per on_shutdown config. - buffered = StreamManager.get_buffered(manager_pid) - nack_ack_ids(manager_pid, config, buffered) + %{manager_pid: manager_pid} = state - # Stop receiving new messages and begin the drain phase. - StreamManager.stop_receiving(manager_pid) + # Single atomic call: stops the reader, nacks + clears buffered messages, + # removes them from outstanding, and sets draining mode on the StreamManager. + {:ok, _nacked_count} = StreamManager.prepare_for_draining(manager_pid) {:noreply, [], %{state | draining: true}} end @@ -443,9 +480,8 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # --- Private --- - # Nack a list of ack_ids per the on_shutdown config. Used by both - # prepare_for_draining (for buffered messages) and terminate (for - # remaining outstanding messages). + # Nack a list of ack_ids per the on_shutdown config. Used by terminate/2 + # to nack remaining outstanding messages on shutdown. defp nack_ack_ids(_manager_pid, _config, []), do: :ok defp nack_ack_ids(_manager_pid, %{on_shutdown: :noop}, _ack_ids), do: :ok @@ -496,7 +532,8 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # Broadway's :partition_by option accepts a function that takes a Broadway.Message # and returns a partition key. Broadway hashes the key and routes all messages # with the same hash to the same processor stage. Messages with an empty or nil - # ordering_key are all routed to partition 0 (unordered messages interleave freely). + # ordering_key are spread across processors via unique_integer (unordered messages + # should not be funneled to a single partition). defp maybe_inject_partition_by(broadway_opts, opts) do if opts[:enable_message_ordering] do processors = @@ -512,6 +549,10 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do end end + def partition_by(%Broadway.Message{metadata: %{orderingKey: ""}}) do + :erlang.unique_integer([:positive]) + end + def partition_by(%Broadway.Message{metadata: %{orderingKey: key}}) when is_binary(key) do :erlang.phash2(key) end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index e41f6d3..54bd146 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -9,12 +9,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do use GenServer require Logger - alias BroadwayCloudPubSub.{Backoff, MessageBuilder} + alias BroadwayCloudPubSub.Backoff alias BroadwayCloudPubSub.Streaming.{ AckBatcher, AckTimeDistribution, ErrorClassifier, + LeaseManager, + MessageDispatch, StreamReader, Telemetry } @@ -29,12 +31,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Exactly-once delivery requires a longer retry window to handle server-side transient failures. @exactly_once_retry_deadline_ms 600_000 - # Subtracted from the adaptive deadline when computing the lease extension interval. - @grace_period_seconds 5 - - # Minimum ack deadline enforced by the server for exactly-once subscriptions. - @min_deadline_exactly_once_seconds 60 - defstruct [ :producer_pid, :config, @@ -49,7 +45,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :lease_timer, # Tracks message processing times for the adaptive p99 ack deadline. :ack_time_dist, - :receiving, # Non-nil when a reconnect is already scheduled — prevents double-scheduling. :reconnect_ref, :keepalive_timer, @@ -57,6 +52,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :ack_batcher, draining: false, drain_timer: nil, + drain_started_at: nil, ordering_enabled: false, # Updated from StreamingPullResponse.subscription_properties. exactly_once_enabled: false, @@ -83,16 +79,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end end - @doc """ - Sets the producer pid. Called by `StreamingProducer.init/1` after the producer - process starts (after Broadway has started both the StreamManager child and the - producer process). - """ - @spec set_producer(GenServer.server(), pid()) :: :ok - def set_producer(server, producer_pid) do - GenServer.call(server, {:set_producer, producer_pid}) - end - @doc """ Acknowledge (ack) a list of ack_ids. Called by StreamingAcknowledger. """ @@ -110,13 +96,23 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end @doc """ - Tells the StreamManager to stop forwarding new messages to the producer. - Called during `prepare_for_draining`. The gRPC stream stays open so - in-flight acks can still be delivered. + Prepares the StreamManager for graceful shutdown. Called from the + producer's `prepare_for_draining/1`. Atomically: + + 1. Closes the reader to stop new messages from arriving. + 2. Nacks pending receipt modacks (exactly-once) so the server redelivers. + 3. Nacks and clears all buffered messages per the `on_shutdown` config. + 4. Removes those buffered ack_ids from `outstanding` so the drain + phase only waits for messages already dispatched to processors. + 5. Sets `draining: true` and starts the drain timer. + 6. Checks if drain is already complete (outstanding may now be empty). + + Returns `{:ok, nacked_count}` where `nacked_count` is the number of + buffered messages that were nacked and removed. """ - @spec stop_receiving(pid()) :: :ok - def stop_receiving(pid) do - GenServer.call(pid, :stop_receiving) + @spec prepare_for_draining(pid()) :: {:ok, non_neg_integer()} + def prepare_for_draining(pid) do + GenServer.call(pid, :prepare_for_draining) end @doc """ @@ -138,22 +134,10 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end @doc """ - Returns the ack_ids of messages that are buffered in StreamManager but have - not yet been dispatched to Broadway processors. These are messages received - from the gRPC stream that are waiting for demand. - - Called from the producer's `prepare_for_draining/1` to nack buffered messages - during graceful shutdown before they are delivered to the pipeline. - """ - @spec get_buffered(pid()) :: [String.t()] - def get_buffered(pid) do - GenServer.call(pid, :get_buffered) - end - - @doc """ - Signals the current demand from the producer. The `amount` is the producer's - total accumulated demand (not a delta). The StreamManager uses it as an upper - bound for how many buffered messages to flush immediately. + Signals additional demand from the producer. The `amount` is a delta (the + `incoming_demand` from the latest `GenStage.handle_demand/2` callback). + StreamManager accumulates it into `pending_demand` and flushes up to the + new total from the message buffer. Called by `Streaming.Producer.handle_demand/2`. """ @@ -179,33 +163,31 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do ack_batcher = Module.concat(config.broadway_name, AckBatcher) state = %__MODULE__{ - producer_pid: nil, + producer_pid: Map.fetch!(config, :producer_pid), config: config, grpc_client: config.grpc_client, grpc_client_config: config.grpc_client_config, backoff: backoff, ack_time_dist: AckTimeDistribution.new(config.stream_ack_deadline_seconds), ack_batcher: ack_batcher, - receiving: true, pending_demand: 0 } - # Delay connecting until producer tells us its pid via set_producer/2 - {:ok, state} + {:ok, state, {:continue, :connect}} end @impl GenServer - def handle_info(:connect, state) do - state = %{state | reconnect_ref: nil} + def handle_continue(:connect, state), do: do_connect(state) - case connect(state) do - {:ok, new_state} -> - {:noreply, new_state} + @impl GenServer + # During draining, ignore reconnect attempts — we don't want new messages. + def handle_info(:connect, %{draining: true} = state) do + {:noreply, %{state | reconnect_ref: nil}} + end - {:error, reason, new_state} -> - emit_telemetry(:connection_failure, %{}, state.config, %{reason: reason}) - {:noreply, schedule_reconnect(new_state)} - end + def handle_info(:connect, state) do + state = %{state | reconnect_ref: nil} + do_connect(state) end # The StreamReader successfully opened the gRPC stream and sends us the @@ -221,7 +203,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do backoff: backoff } - state = schedule_lease_timer(state) + state = LeaseManager.schedule_lease_timer(state) state = schedule_keepalive_timer(state) emit_telemetry(:connect, %{}, state.config) {:noreply, state} @@ -233,46 +215,55 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end # Decoded messages forwarded from the StreamReader. + def handle_info({:stream_messages, []}, state) do + {:noreply, state} + end + + def handle_info({:stream_messages, messages}, %{draining: true} = state) do + nack_per_on_shutdown(state, Enum.map(messages, & &1.ack_id)) + + {:noreply, state} + end + def handle_info({:stream_messages, messages}, state) do - if state.receiving and messages != [] do - broadway_messages = Enum.map(messages, &build_broadway_message(&1, state)) - ack_ids = Enum.map(messages, & &1.ack_id) - - now = now_ms() - adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) - - if state.exactly_once_enabled do - # Exactly-once receipt modack gate: hold messages until the receipt modack - # RPC confirms success. Messages whose modack fails are dropped (server redelivers). - effective_deadline = max(adaptive_deadline, @min_deadline_exactly_once_seconds) - ref = make_ref() - AckBatcher.receipt_modack(state.ack_batcher, ref, self(), ack_ids, effective_deadline) - - pending = - Map.put(state.pending_receipt_modacks, ref, %{ - broadway_messages: broadway_messages, - ack_ids: ack_ids, - received_at: now - }) - - {:noreply, %{state | pending_receipt_modacks: pending}} - else - # Standard delivery: fire-and-forget receipt modack, dispatch immediately. - new_outstanding = - add_to_outstanding(state.outstanding, ack_ids, now, state.config.max_extension_ms) + broadway_messages = Enum.map(messages, &MessageDispatch.build_broadway_message(&1, state)) + ack_ids = Enum.map(messages, & &1.ack_id) - AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) - emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} - end + now = now_ms() + adaptive_deadline = LeaseManager.effective_deadline(state) + + if state.exactly_once_enabled do + # Exactly-once receipt modack gate: hold messages until the receipt modack + # RPC confirms success. Messages whose modack fails are dropped (server redelivers). + ref = make_ref() + AckBatcher.receipt_modack(state.ack_batcher, ref, self(), ack_ids, adaptive_deadline) + + pending = + Map.put(state.pending_receipt_modacks, ref, %{ + broadway_messages: broadway_messages, + ack_ids: ack_ids, + received_at: now + }) + + {:noreply, %{state | pending_receipt_modacks: pending}} else - {:noreply, state} + # Standard delivery: fire-and-forget receipt modack, dispatch immediately. + new_outstanding = + MessageDispatch.add_to_outstanding(state.outstanding, ack_ids, now, state.config.max_extension_ms) + + AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) + emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) + {:noreply, MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} end end # Result of an exactly-once receipt modack RPC sent via AckBatcher.receipt_modack/5. # Messages are delivered only if the receipt modack succeeded; otherwise dropped # (the server will redeliver them). + # + # During draining, any surviving pending entries are nacked rather than delivered. + # Normally prepare_for_draining clears pending_receipt_modacks, but a result + # could arrive in the mailbox between the receipt_modack call and the drain. def handle_info({:receipt_modack_result, ref, result}, state) do case Map.pop(state.pending_receipt_modacks, ref) do {nil, _} -> @@ -282,48 +273,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {pending, rest} -> state = %{state | pending_receipt_modacks: rest} - case result do - {:ok, []} -> - new_outstanding = - add_to_outstanding( - state.outstanding, - pending.ack_ids, - pending.received_at, - state.config.max_extension_ms - ) - - emit_telemetry( - :receive_messages, - %{count: length(pending.broadway_messages)}, - state.config - ) - - {:noreply, - deliver_messages(%{state | outstanding: new_outstanding}, pending.broadway_messages)} - - {:ok, failed_ids} -> - # Partial success — deliver only messages whose modack succeeded. - {ok_msgs, ok_ids} = - partition_succeeded(pending.broadway_messages, pending.ack_ids, failed_ids) - - new_outstanding = - add_to_outstanding( - state.outstanding, - ok_ids, - pending.received_at, - state.config.max_extension_ms - ) - - if ok_msgs != [] do - emit_telemetry(:receive_messages, %{count: length(ok_msgs)}, state.config) - {:noreply, deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} - else - {:noreply, %{state | outstanding: new_outstanding}} - end - - {:error, _reason} -> - # Total failure — drop all messages (server will redeliver). - {:noreply, state} + if state.draining do + # Nack rather than deliver — server will redeliver. + nack_per_on_shutdown(state, pending.ack_ids) + {:noreply, state} + else + handle_receipt_modack_success(state, pending, result) end end end @@ -354,6 +309,23 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Stream-level gRPC error reported by the StreamReader. # Retryable errors trigger reconnect; terminal errors stop the GenServer. + # During draining, don't reconnect on retryable errors — just clean up. + def handle_info({:stream_error, error}, %{draining: true} = state) do + case ErrorClassifier.classify(error) do + :terminal -> + Logger.error( + "Terminal gRPC stream error during drain on subscription #{state.config.subscription} - reason: #{inspect(error)}." + ) + + emit_telemetry(:terminal_error, %{}, state.config, %{reason: error}) + {:noreply, reset_connection(state, error)} + + :retryable -> + emit_telemetry(:disconnect, %{}, state.config, %{reason: error}) + {:noreply, reset_connection(state, error)} + end + end + def handle_info({:stream_error, error}, state) do case ErrorClassifier.classify(error) do :terminal -> @@ -372,42 +344,26 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # Server closed the stream normally (StreamReader enumeration exhausted). def handle_info({:stream_closed}, state) do - emit_telemetry(:disconnect, %{}, state.config, %{reason: :stream_closed}) - # Stream ended naturally; nil out grpc_stream to skip cancel in close_stream/1. # See decisions.md for why cancelling after a server-initiated close crashes the Mint ConnectionProcess. - state = %{state | grpc_stream: nil} - - if state.draining do - {:noreply, reset_connection(state, :stream_closed)} - else - {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} - end + handle_disconnect(%{state | grpc_stream: nil}, :stream_closed) end # StreamReader exited normally — {:stream_closed} should arrive first. # Only reconnect if grpc_stream is still set (stream_closed not yet processed). def handle_info({:EXIT, pid, :normal}, %{reader_pid: pid} = state) do if state.grpc_stream do - emit_telemetry(:disconnect, %{}, state.config, %{reason: :stream_closed}) # Same rationale as {:stream_closed}: skip cancel on natural close. - state = %{state | grpc_stream: nil} - - if state.draining do - {:noreply, reset_connection(state, :stream_closed)} - else - {:noreply, schedule_reconnect(reset_connection(state, :stream_closed))} - end + handle_disconnect(%{state | grpc_stream: nil}, :stream_closed) else # Already handled by {:stream_closed} — just clear the reader_pid. {:noreply, %{state | reader_pid: nil}} end end - # StreamReader crashed — reconnect. + # StreamReader crashed — reconnect (unless draining). def handle_info({:EXIT, pid, reason}, %{reader_pid: pid} = state) do - emit_telemetry(:disconnect, %{}, state.config, %{reason: reason}) - {:noreply, schedule_reconnect(reset_connection(state, reason))} + handle_disconnect(state, reason) end # Catch-all for other EXIT signals (e.g. from the supervisor during shutdown). @@ -416,7 +372,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:extend_leases, state) do - {:noreply, do_extend_leases(state)} + {:noreply, LeaseManager.do_extend_leases(state)} end # Periodic keep-alive ping to prevent the server from closing an idle stream. @@ -425,7 +381,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:send_keepalive, state) do - adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + adaptive_deadline = LeaseManager.effective_deadline(state) keepalive_request = %StreamingPullRequest{stream_ack_deadline_seconds: adaptive_deadline} case send_on_stream(state.grpc_stream, keepalive_request, state) do @@ -440,9 +396,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end # Mint adapter signals connection loss. + # During draining, don't reconnect — the stream is intentionally closing. def handle_info({:elixir_grpc, :connection_down, conn_pid}, %{conn_pid: conn_pid} = state) do - emit_telemetry(:disconnect, %{}, state.config, %{reason: :connection_down}) - {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} + handle_disconnect(state, :connection_down) end # Gun adapter signals connection loss. @@ -450,13 +406,27 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do {:gun_down, conn_pid, _protocol, _reason, _killed_streams}, %{conn_pid: conn_pid} = state ) do - emit_telemetry(:disconnect, %{}, state.config, %{reason: :connection_down}) - {:noreply, schedule_reconnect(reset_connection(state, :connection_down))} + handle_disconnect(state, :connection_down) end def handle_info(:drain_timeout, state) do - emit_telemetry(:drain_timeout, %{}, state.config) - {:noreply, close_stream(%{state | drain_timer: nil})} + outstanding_ids = Map.keys(state.outstanding) + + Telemetry.emit_span_exception( + :stream, + :drain, + state.drain_started_at, + Map.merge(span_metadata(state.config), %{kind: :timeout, reason: :drain_timeout}), + %{remaining_count: length(outstanding_ids)}, + Map.get(state.config, :telemetry_metadata) + ) + + # Nack all remaining outstanding messages so they're redelivered promptly + # instead of waiting for their ack deadlines to expire naturally. This also + # empties the outstanding map so the producer's terminate/2 becomes a no-op. + nack_per_on_shutdown(state, outstanding_ids) + + {:noreply, close_stream(%{state | drain_timer: nil, drain_started_at: nil, outstanding: %{}})} end def handle_info(_msg, state) do @@ -465,23 +435,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @impl GenServer def handle_cast({:acknowledge, ack_ids}, state) do - now = now_ms() - - # Record processing times for the adaptive p99 deadline calculation. - ack_time_dist = - Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, dist -> - case Map.get(state.outstanding, ack_id) do - %{received_at: received_at} -> - duration_s = max(1, div(now - received_at, 1_000)) - AckTimeDistribution.record(dist, duration_s) - - nil -> - dist - end - end) - - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) - state = %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} + state = MessageDispatch.record_and_remove_from_outstanding(state, ack_ids) AckBatcher.ack(state.ack_batcher, ack_ids) emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) @@ -490,70 +444,94 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_cast({:modify_deadline, ack_ids, deadline_seconds}, state) do - now = now_ms() - # Record processing times and remove from outstanding for all deadline # modifications (both nack with deadline=0 and nack with deadline>0). # Once a message has been nacked, it must not be lease-extended further — # otherwise the periodic extend_leases cycle would override the requested # deadline, and the drain phase could never complete because outstanding # would never become empty. - ack_time_dist = - Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, acc -> - case Map.get(state.outstanding, ack_id) do - %{received_at: received_at} -> - duration_s = max(1, div(now - received_at, 1_000)) - AckTimeDistribution.record(acc, duration_s) - - nil -> - acc - end - end) - - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) - state = %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} + state = MessageDispatch.record_and_remove_from_outstanding(state, ack_ids) AckBatcher.modack(state.ack_batcher, ack_ids, deadline_seconds) {:noreply, maybe_complete_drain(state)} end - # The producer signals its current total demand. Update pending_demand and - # flush up to that many buffered messages to the producer. + # The producer signals a demand delta. Accumulate it and flush up to the new + # total from the message buffer. def handle_cast({:demand_available, amount}, state) do - state = %{state | pending_demand: amount} - {:noreply, flush_demand(state)} + state = %{state | pending_demand: state.pending_demand + amount} + {:noreply, MessageDispatch.flush_demand(state)} end @impl GenServer - def handle_call({:set_producer, producer_pid}, _from, state) do - state = %{state | producer_pid: producer_pid} - send(self(), :connect) - {:reply, :ok, state} - end + def handle_call(:prepare_for_draining, _from, state) do + drain_meta = span_metadata(state.config) + + drain_started_at = + Telemetry.emit_span_start( + :stream, + :drain, + drain_meta, + %{ + buffered_count: :queue.len(state.message_buffer), + outstanding_count: map_size(state.outstanding), + pending_receipt_modack_count: map_size(state.pending_receipt_modacks) + }, + Map.get(state.config, :telemetry_metadata) + ) - def handle_call(:stop_receiving, _from, state) do - # Nack pending receipt modacks so the server redelivers them quickly. - state = nack_pending_receipt_modacks(state) - # Close the reader so no new messages arrive; keep the channel open for AckBatcher. - state = close_reader(state) - state = start_drain_timer(state) - {:reply, :ok, %{state | receiving: false, draining: true}} + try do + # 1. Close the reader FIRST to stop new messages from arriving. + state = close_reader(state) + + # 2. Nack pending receipt modacks so the server redelivers them quickly. + state = nack_pending_receipt_modacks(state) + + # 3. Extract ack_ids from buffered messages and nack them. + buffered_ack_ids = MessageDispatch.extract_buffered_ack_ids(state.message_buffer) + + nacked_count = length(buffered_ack_ids) + nack_per_on_shutdown(state, buffered_ack_ids) + + # 4. Remove buffered ack_ids from outstanding and clear the buffer. + new_outstanding = + Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) + + state = %{ + state + | message_buffer: :queue.new(), + outstanding: new_outstanding, + draining: true, + drain_started_at: drain_started_at + } + + # 5. Start the drain timer. + state = start_drain_timer(state) + + # 6. Check if drain is already complete (outstanding may now be empty). + state = maybe_complete_drain(state) + + {:reply, {:ok, nacked_count}, state} + rescue + e -> + Telemetry.emit_span_exception( + :stream, + :drain, + drain_started_at, + Map.merge(drain_meta, %{kind: :error, reason: Exception.message(e)}), + %{}, + Map.get(state.config, :telemetry_metadata) + ) + + reraise e, __STACKTRACE__ + end end def handle_call(:get_outstanding, _from, state) do {:reply, Map.keys(state.outstanding), state} end - def handle_call(:get_buffered, _from, state) do - ack_ids = - state.message_buffer - |> :queue.to_list() - |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) - - {:reply, ack_ids, state} - end - def handle_call(:close, _from, state) do # Best-effort flush; AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) @@ -562,9 +540,23 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end @impl GenServer - def terminate(_reason, state) do + def terminate(reason, state) do + if state.draining and state.drain_started_at != nil do + remaining_count = + map_size(state.outstanding) + map_size(state.pending_receipt_modacks) + + Telemetry.emit_span_exception( + :stream, + :drain, + state.drain_started_at, + Map.merge(span_metadata(state.config), %{kind: :terminate, reason: reason}), + %{remaining_count: remaining_count}, + Map.get(state.config, :telemetry_metadata) + ) + end + state - |> cancel_lease_timer() + |> LeaseManager.cancel_lease_timer() |> cancel_keepalive_timer() |> cancel_drain_timer() |> close_stream() @@ -574,6 +566,30 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # --- Private: connection --- + # Shared handler for disconnect events (stream_closed, connection_down, + # reader crash). Emits telemetry, resets the connection, and schedules + # a reconnect unless draining. + defp handle_disconnect(state, reason) do + emit_telemetry(:disconnect, %{}, state.config, %{reason: reason}) + + if state.draining do + {:noreply, reset_connection(state, reason)} + else + {:noreply, schedule_reconnect(reset_connection(state, reason))} + end + end + + defp do_connect(state) do + case connect(state) do + {:ok, new_state} -> + {:noreply, new_state} + + {:error, reason, new_state} -> + emit_telemetry(:connection_failure, %{}, state.config, %{reason: reason}) + {:noreply, schedule_reconnect(new_state)} + end + end + defp connect(state) do case state.grpc_client.connect(state.grpc_client_config) do {:ok, channel} -> @@ -613,10 +629,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp reset_connection(state, reason) do # Drop buffered messages on disconnect; their ack_ids are already in outstanding # so removing them avoids pointless lease-extension for messages that will redeliver. - buffered_ack_ids = - state.message_buffer - |> :queue.to_list() - |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) + buffered_ack_ids = MessageDispatch.extract_buffered_ack_ids(state.message_buffer) new_outstanding = Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) @@ -708,66 +721,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp now_ms, do: System.monotonic_time(:millisecond) - # --- Private: lease extension --- - - defp do_extend_leases(state) do - now = now_ms() - adaptive_deadline = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) - - # Enforce minimum 60s for exactly-once subscriptions. - effective_deadline = - if state.exactly_once_enabled, - do: max(adaptive_deadline, @min_deadline_exactly_once_seconds), - else: adaptive_deadline - - # Partition into still-valid and expired (past max_expiry — server will redeliver). - {valid, expired} = - Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) - - if map_size(expired) > 0 do - emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) - end - - emit_telemetry( - :extend_leases, - %{count: map_size(valid), deadline: effective_deadline}, - state.config - ) - - if map_size(valid) > 0 do - AckBatcher.modack(state.ack_batcher, Map.keys(valid), effective_deadline) - end + # --- Private: lease extension (delegated to LeaseManager) --- - # Schedule next tick with jitter in [0.8, 0.9) to spread out concurrent StreamManagers. - base_interval_ms = max(1_000, (effective_deadline - @grace_period_seconds) * 1_000) - jitter_factor = 0.8 + :rand.uniform() * 0.1 - timer = Process.send_after(self(), :extend_leases, round(base_interval_ms * jitter_factor)) - - state - |> Map.put(:outstanding, valid) - |> Map.put(:lease_timer, timer) - |> sweep_stale_pending_modacks() - end - - # --- Private: lease management --- - - defp schedule_lease_timer(state) do - cancel_lease_timer(state) - # Initial interval: (configured deadline - grace period) with jitter, minimum 1s. - deadline_s = state.config.stream_ack_deadline_seconds - base_ms = max(1_000, (deadline_s - @grace_period_seconds) * 1_000) - jitter_factor = 0.8 + :rand.uniform() * 0.1 - interval_ms = round(base_ms * jitter_factor) - timer = Process.send_after(self(), :extend_leases, interval_ms) - %{state | lease_timer: timer} - end - - defp cancel_lease_timer(%{lease_timer: nil} = state), do: state - - defp cancel_lease_timer(%{lease_timer: timer} = state) do - Process.cancel_timer(timer) - %{state | lease_timer: nil} - end + # --- Private: lease management (delegated to LeaseManager) --- # --- Private: keep-alive --- @@ -822,88 +778,81 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do state = cancel_drain_timer(state) # AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) - emit_telemetry(:drain_complete, %{}, state.config) - close_stream(state) - end - - defp maybe_complete_drain(state), do: state - # --- Private: message building --- + Telemetry.emit_span_stop( + :stream, + :drain, + state.drain_started_at, + span_metadata(state.config), + Map.get(state.config, :telemetry_metadata) + ) - # Buffer incoming messages, then flush up to pending_demand to the producer. - defp deliver_messages(state, messages) do - new_buffer = Enum.reduce(messages, state.message_buffer, &:queue.in(&1, &2)) - flush_demand(%{state | message_buffer: new_buffer}) + close_stream(%{state | drain_started_at: nil}) end - # Flush up to `pending_demand` messages from the buffer to the producer. - defp flush_demand(%{pending_demand: 0} = state), do: state - - defp flush_demand(state) do - if :queue.is_empty(state.message_buffer) do - state - else - {remaining, demand_left, batch_reversed} = - flush_demand_loop(state.message_buffer, state.pending_demand, []) + defp maybe_complete_drain(state), do: state - send(state.producer_pid, {:stream_messages, Enum.reverse(batch_reversed)}) - %{state | message_buffer: remaining, pending_demand: demand_left} - end - end + # --- Private: message dispatch (delegated to MessageDispatch) --- - defp flush_demand_loop(queue, 0, acc), do: {queue, 0, acc} + # Stale pending receipt modacks sweeping is delegated to LeaseManager. - defp flush_demand_loop(queue, n, acc) do - case :queue.out(queue) do - {{:value, msg}, rest} -> flush_demand_loop(rest, n - 1, [msg | acc]) - {:empty, _} -> {queue, n, acc} - end - end + # Handle the result of an exactly-once receipt modack RPC (non-draining path). + defp handle_receipt_modack_success(state, pending, result) do + case result do + {:ok, []} -> + new_outstanding = + MessageDispatch.add_to_outstanding( + state.outstanding, + pending.ack_ids, + pending.received_at, + state.config.max_extension_ms + ) + + emit_telemetry( + :receive_messages, + %{count: length(pending.broadway_messages)}, + state.config + ) - # Build outstanding entries for a list of confirmed ack_ids. - defp add_to_outstanding(outstanding, ack_ids, received_at, max_extension_ms) do - Enum.reduce(ack_ids, outstanding, fn ack_id, acc -> - Map.put(acc, ack_id, %{received_at: received_at, max_expiry: received_at + max_extension_ms}) - end) - end + {:noreply, + MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, pending.broadway_messages)} - # Split broadway_messages into {succeeded_msgs, succeeded_ids} by removing - # messages whose ack_id is in failed_ids. - defp partition_succeeded(broadway_messages, all_ack_ids, failed_ids) do - failed_set = MapSet.new(failed_ids) + {:ok, failed_ids} -> + # Partial success — deliver only messages whose modack succeeded. + {ok_msgs, ok_ids} = + MessageDispatch.partition_succeeded(pending.broadway_messages, pending.ack_ids, failed_ids) - {ok_msgs_reversed, ok_ids_reversed} = - Enum.zip(broadway_messages, all_ack_ids) - |> Enum.reduce({[], []}, fn {msg, id}, {msgs_acc, ids_acc} -> - if MapSet.member?(failed_set, id) do - {msgs_acc, ids_acc} + new_outstanding = + MessageDispatch.add_to_outstanding( + state.outstanding, + ok_ids, + pending.received_at, + state.config.max_extension_ms + ) + + if ok_msgs != [] do + emit_telemetry(:receive_messages, %{count: length(ok_msgs)}, state.config) + {:noreply, MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} else - {[msg | msgs_acc], [id | ids_acc]} + {:noreply, %{state | outstanding: new_outstanding}} end - end) - {Enum.reverse(ok_msgs_reversed), Enum.reverse(ok_ids_reversed)} + {:error, _reason} -> + # Total failure — drop all messages (server will redeliver). + {:noreply, state} + end end - # Stale pending receipt modacks (older than 60s) are nacked for fast redelivery. - @receipt_modack_stale_ms 60_000 - - defp sweep_stale_pending_modacks(state) do - now = now_ms() - cutoff = now - @receipt_modack_stale_ms - - {stale, fresh} = - Map.split_with(state.pending_receipt_modacks, fn {_ref, %{received_at: t}} -> - t < cutoff - end) - - if map_size(stale) > 0 do - stale_ids = stale |> Map.values() |> Enum.flat_map(& &1.ack_ids) - AckBatcher.modack(state.ack_batcher, stale_ids, 0) - emit_telemetry(:receipt_modack_stale, %{count: length(stale_ids)}, state.config) - end + # Nack a list of ack_ids per the on_shutdown config. With :noop, messages are + # simply dropped (server redelivers after ack deadline expires naturally). + defp nack_per_on_shutdown(_state, []), do: :ok + defp nack_per_on_shutdown(%{config: %{on_shutdown: :noop}}, _ack_ids), do: :ok - %{state | pending_receipt_modacks: fresh} + defp nack_per_on_shutdown( + %{ack_batcher: ack_batcher, config: %{on_shutdown: {:nack, deadline}}}, + ack_ids + ) do + AckBatcher.modack(ack_batcher, ack_ids, deadline) end # Nack all messages held in pending_receipt_modacks so the server redelivers @@ -918,46 +867,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do |> Map.values() |> Enum.flat_map(& &1.ack_ids) - {_action, deadline} = state.config.on_shutdown - AckBatcher.modack(state.ack_batcher, pending_ids, deadline) + nack_per_on_shutdown(state, pending_ids) %{state | pending_receipt_modacks: %{}} end - defp build_broadway_message( - %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, - state - ) do - # ack_ref is the Broadway pipeline name, used as the persistent_term key. - ack_ref = state.config.broadway[:name] - acknowledger = BroadwayCloudPubSub.Streaming.Acknowledger.builder(ack_ref).(ack_id) - - data = pubsub_msg.data - metadata = build_metadata(pubsub_msg, delivery_attempt) - - %Broadway.Message{ - data: data, - metadata: metadata, - acknowledger: acknowledger - } - end - - defp build_metadata(msg, delivery_attempt) do - MessageBuilder.build_metadata(%{ - message_id: msg.message_id, - ordering_key: msg.ordering_key, - publish_time: to_datetime(msg.publish_time), - delivery_attempt: delivery_attempt, - attributes: Map.new(msg.attributes || []) - }) - end - - defp to_datetime(nil), do: nil - - defp to_datetime(%{seconds: seconds, nanos: nanos}) do - DateTime.from_unix!(seconds * 1_000_000_000 + nanos, :nanosecond) - rescue - _ -> nil - end + # Message construction is delegated to MessageDispatch. # Flush AckBatcher if its process is currently alive. Guards against the # batcher being down during pipeline shutdown (Broadway stops children in @@ -973,13 +887,17 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # --- Private: telemetry --- + defp span_metadata(config) do + %{ + name: config.broadway[:name], + subscription: config.subscription + } + end + defp emit_telemetry(event, measurements, config, extra_metadata \\ %{}) do metadata = Map.merge( - %{ - name: config.broadway[:name], - subscription: config.subscription - }, + span_metadata(config), extra_metadata ) diff --git a/lib/broadway_cloud_pub_sub/streaming/telemetry.ex b/lib/broadway_cloud_pub_sub/streaming/telemetry.ex index 62bd3c0..8370ef3 100644 --- a/lib/broadway_cloud_pub_sub/streaming/telemetry.ex +++ b/lib/broadway_cloud_pub_sub/streaming/telemetry.ex @@ -16,6 +16,11 @@ defmodule BroadwayCloudPubSub.Streaming.Telemetry do # # Telemetry.execute(:stream, :connect, %{}, %{name: name, subscription: sub}) # Telemetry.span(:grpc_client, :ack, %{name: name, subscription: sub}, fn -> ... end) + # + # # For async spans whose start and stop/exception are emitted separately: + # mono = Telemetry.emit_span_start(:stream, :drain, %{...}, config) + # Telemetry.emit_span_stop(:stream, :drain, mono, %{...}, config) + # Telemetry.emit_span_exception(:stream, :drain, mono, %{kind: ..., ...}, config) @base [:broadway_cloud_pub_sub, :streaming] @@ -55,6 +60,86 @@ defmodule BroadwayCloudPubSub.Streaming.Telemetry do end) end + @doc """ + Emits the `:start` event for an async span under + `[:broadway_cloud_pub_sub, :streaming, layer, event, :start]`. + + Measurements follow `:telemetry.span/3` conventions: + `%{system_time: System.system_time(), monotonic_time: monotonic_now}`. + Any `extra_measurements` are merged into the measurements map. + + Returns the monotonic start time (nanoseconds) so the caller can compute + `duration` when emitting the matching `:stop` or `:exception`. + """ + @spec emit_span_start(atom(), atom(), map(), map(), term()) :: integer() + def emit_span_start(layer, event, metadata, extra_measurements \\ %{}, telemetry_metadata) do + now_mono = System.monotonic_time() + + measurements = + Map.merge( + %{system_time: System.system_time(), monotonic_time: now_mono}, + extra_measurements + ) + + :telemetry.execute( + @base ++ [layer, event, :start], + measurements, + maybe_put_extra(metadata, resolve_extra(telemetry_metadata)) + ) + + now_mono + end + + @doc """ + Emits the `:stop` event for an async span under + `[:broadway_cloud_pub_sub, :streaming, layer, event, :stop]`. + + `start_mono` must be the value returned by the matching `emit_span_start/5` call. + `duration` is computed as `now - start_mono` in native time units. + """ + @spec emit_span_stop(atom(), atom(), integer(), map(), term()) :: :ok + def emit_span_stop(layer, event, start_mono, metadata, telemetry_metadata) do + now_mono = System.monotonic_time() + + :telemetry.execute( + @base ++ [layer, event, :stop], + %{duration: now_mono - start_mono, monotonic_time: now_mono}, + maybe_put_extra(metadata, resolve_extra(telemetry_metadata)) + ) + end + + @doc """ + Emits the `:exception` event for an async span under + `[:broadway_cloud_pub_sub, :streaming, layer, event, :exception]`. + + `start_mono` must be the value returned by the matching `emit_span_start/5` call. + `duration` is computed as `now - start_mono` in native time units. + Any `extra_measurements` are merged into the measurements map. + """ + @spec emit_span_exception(atom(), atom(), integer(), map(), map(), term()) :: :ok + def emit_span_exception( + layer, + event, + start_mono, + metadata, + extra_measurements \\ %{}, + telemetry_metadata + ) do + now_mono = System.monotonic_time() + + measurements = + Map.merge( + %{duration: now_mono - start_mono, monotonic_time: now_mono}, + extra_measurements + ) + + :telemetry.execute( + @base ++ [layer, event, :exception], + measurements, + maybe_put_extra(metadata, resolve_extra(telemetry_metadata)) + ) + end + # --- Private --- defp resolve_extra(nil), do: nil diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index d0c5204..9234379 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -31,8 +31,8 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do use GenServer - alias BroadwayCloudPubSub.{Backoff} - alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier, Telemetry} + alias BroadwayCloudPubSub.Backoff + alias BroadwayCloudPubSub.Streaming.{AckResult, ErrorClassifier, Options, Telemetry} alias Google.Pubsub.V1.{AcknowledgeRequest, ModifyAckDeadlineRequest} require Logger @@ -76,16 +76,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do @doc false @spec child_opts(keyword()) :: keyword() def child_opts(opts) do - picked = Keyword.take(opts, @all_keys) - - Enum.each(@required_keys, fn key -> - unless Keyword.has_key?(picked, key) do - raise ArgumentError, - "missing required option #{inspect(key)} for #{inspect(__MODULE__)}" - end - end) - - picked + Options.validate_child_opts(opts, @all_keys, @required_keys) end @spec start_link(keyword()) :: GenServer.on_start() @@ -109,22 +100,9 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do """ @spec acknowledge(GenServer.server(), [String.t()]) :: {:ok, [String.t()]} | {:error, term()} def acknowledge(pid, ack_ids) when is_list(ack_ids) do - ack_ids - |> Enum.chunk_every(@max_ack_ids_per_request) - |> Enum.reduce({:ok, []}, fn - chunk, {:ok, failed_so_far} -> - case GenServer.call(pid, {:acknowledge, chunk}, 30_000) do - :ok -> {:ok, failed_so_far} - {:error, _reason} -> {:ok, failed_so_far ++ chunk} - end - - _chunk, {:error, _} = err -> - # Hard process error — don't attempt remaining chunks - err + chunked_rpc(pid, ack_ids, fn chunk -> + GenServer.call(pid, {:acknowledge, chunk}, 30_000) end) - catch - :exit, reason -> - {:error, {:call_failed, reason}} end @doc """ @@ -135,21 +113,9 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do @spec modify_ack_deadline(GenServer.server(), [String.t()], non_neg_integer()) :: {:ok, [String.t()]} | {:error, term()} def modify_ack_deadline(pid, ack_ids, deadline_seconds) when is_list(ack_ids) do - ack_ids - |> Enum.chunk_every(@max_ack_ids_per_request) - |> Enum.reduce({:ok, []}, fn - chunk, {:ok, failed_so_far} -> - case GenServer.call(pid, {:modify_ack_deadline, chunk, deadline_seconds}, 30_000) do - :ok -> {:ok, failed_so_far} - {:error, _reason} -> {:ok, failed_so_far ++ chunk} - end - - _chunk, {:error, _} = err -> - err + chunked_rpc(pid, ack_ids, fn chunk -> + GenServer.call(pid, {:modify_ack_deadline, chunk, deadline_seconds}, 30_000) end) - catch - :exit, reason -> - {:error, {:call_failed, reason}} end # --- GenServer callbacks --- @@ -190,102 +156,24 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do @impl GenServer def handle_call({:acknowledge, ack_ids}, _from, state) do - state = ensure_channel(state) - - case state.channel do - nil -> - {:reply, {:error, :no_channel}, state} - - channel -> - request = %AcknowledgeRequest{ - subscription: state.config.subscription, - ack_ids: ack_ids - } - - result = state.grpc_client.acknowledge(channel, request, state.grpc_client_config) - - case result do - {:ok, _} -> - {:reply, :ok, state} + request = %AcknowledgeRequest{ + subscription: state.config.subscription, + ack_ids: ack_ids + } - {:error, error} -> - case ErrorClassifier.classify(error) do - :retryable -> - # For exactly-once subscriptions, retryable RPC errors may embed - # per-ack-ID permanent failures in error details. Permanent ids - # are dropped; transient ones are returned to AckBatcher for retry. - per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) - {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) - - if permanent_ids != [] do - emit_telemetry( - :permanent_failure, - %{count: length(permanent_ids)}, - state.config - ) - end - - state = schedule_reconnect(state) - {:reply, {:error, {error, transient_ids}}, state} - - :terminal -> - Logger.error( - "Unable to acknowledge messages with Cloud Pub/Sub via gRPC - reason: #{inspect(error)}" - ) - - # Reply first so caller can retain ack_ids, then stop so supervisor restarts fresh. - {:stop, {:terminal_error, error}, {:error, error}, state} - end - end - end + grpc_client = state.grpc_client + do_rpc(state, ack_ids, request, &grpc_client.acknowledge/3, "acknowledge messages") end def handle_call({:modify_ack_deadline, ack_ids, deadline_seconds}, _from, state) do - state = ensure_channel(state) - - case state.channel do - nil -> - {:reply, {:error, :no_channel}, state} - - channel -> - request = %ModifyAckDeadlineRequest{ - subscription: state.config.subscription, - ack_ids: ack_ids, - ack_deadline_seconds: deadline_seconds - } - - result = state.grpc_client.modify_ack_deadline(channel, request, state.grpc_client_config) - - case result do - {:ok, _} -> - {:reply, :ok, state} + request = %ModifyAckDeadlineRequest{ + subscription: state.config.subscription, + ack_ids: ack_ids, + ack_deadline_seconds: deadline_seconds + } - {:error, error} -> - case ErrorClassifier.classify(error) do - :retryable -> - per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) - {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) - - if permanent_ids != [] do - emit_telemetry( - :permanent_failure, - %{count: length(permanent_ids)}, - state.config - ) - end - - state = schedule_reconnect(state) - {:reply, {:error, {error, transient_ids}}, state} - - :terminal -> - Logger.error( - "Unable to modify ack deadline for messages with Cloud Pub/Sub via gRPC - reason: #{inspect(error)}" - ) - - {:stop, {:terminal_error, error}, {:error, error}, state} - end - end - end + grpc_client = state.grpc_client + do_rpc(state, ack_ids, request, &grpc_client.modify_ack_deadline/3, "modify ack deadline for messages") end @impl GenServer @@ -333,6 +221,74 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do # --- Private --- + # Chunks ack_ids into batches of @max_ack_ids_per_request and calls `call_fn` + # for each chunk. Accumulates failed chunks. Stops on hard process errors. + defp chunked_rpc(_pid, ack_ids, call_fn) do + ack_ids + |> Enum.chunk_every(@max_ack_ids_per_request) + |> Enum.reduce({:ok, []}, fn + chunk, {:ok, failed_so_far} -> + case call_fn.(chunk) do + :ok -> {:ok, failed_so_far} + {:error, _reason} -> {:ok, failed_so_far ++ chunk} + end + + _chunk, {:error, _} = err -> + # Hard process error — don't attempt remaining chunks + err + end) + catch + :exit, reason -> + {:error, {:call_failed, reason}} + end + + # Shared RPC execution: ensure channel, call the given rpc_fn, classify errors. + # `rpc_fn` is a 3-arity function (channel, request, grpc_client_config). + # `operation` is a human-readable string for Logger.error messages. + defp do_rpc(state, ack_ids, request, rpc_fn, operation) do + state = ensure_channel(state) + + case state.channel do + nil -> + {:reply, {:error, :no_channel}, state} + + channel -> + case rpc_fn.(channel, request, state.grpc_client_config) do + {:ok, _} -> + {:reply, :ok, state} + + {:error, error} -> + handle_rpc_error(state, ack_ids, error, operation) + end + end + end + + defp handle_rpc_error(state, ack_ids, error, operation) do + case ErrorClassifier.classify(error) do + :retryable -> + # For exactly-once subscriptions, retryable RPC errors may embed + # per-ack-ID permanent failures in error details. Permanent ids + # are dropped; transient ones are returned to AckBatcher for retry. + per_ack_errors = AckResult.parse_error_details(Map.get(error, :details)) + {transient_ids, permanent_ids} = split_by_ack_result(ack_ids, per_ack_errors) + + if permanent_ids != [] do + emit_telemetry(:permanent_failure, %{count: length(permanent_ids)}, state.config) + end + + state = schedule_reconnect(state) + {:reply, {:error, {error, transient_ids}}, state} + + :terminal -> + Logger.error( + "Unable to #{operation} with Cloud Pub/Sub via gRPC - reason: #{inspect(error)}" + ) + + # Reply first so caller can retain ack_ids, then stop so supervisor restarts fresh. + {:stop, {:terminal_error, error}, {:error, error}, state} + end + end + defp ensure_channel(%{channel: nil} = state) do case state.grpc_client.connect(state.grpc_client_config) do {:ok, channel} -> %{state | channel: channel} diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index d57fcbc..f5bf648 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -511,13 +511,12 @@ defmodule BroadwayCloudPubSub.Streaming.ProducerPrepareForStartTest do assert grpc_client_config.broadway_name == TestPipeline end - test "returns two child specs: UnaryAckSupervisor first, StreamManager second" do + test "returns one child spec: UnaryAckSupervisor (StreamManagers are started per-producer in init)" do {specs, _opts} = Producer.prepare_for_start(Producer, broadway_opts()) - assert length(specs) == 2 - [sup_spec, manager_spec] = specs + assert length(specs) == 1 + [sup_spec] = specs assert sup_spec.type == :supervisor - assert manager_spec[:type] == nil or manager_spec[:restart] == :permanent end end end diff --git a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs index 53d75c9..51b23a8 100644 --- a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs @@ -191,4 +191,52 @@ defmodule BroadwayCloudPubSub.Streaming.ProducerIntegrationTest do assert messages == [] end end + + describe "graceful shutdown" do + test "Broadway.stop completes promptly when messages are buffered", %{pipeline: pid} do + # Broadway.stop triggers the drain sequence. The pipeline should shut down + # promptly (within the shutdown timeout) rather than processing all buffered + # messages. We verify that stop returns within a reasonable time. + ref = Process.monitor(pid) + + try do + Broadway.stop(pid) + catch + :exit, _ -> :ok + end + + # Pipeline should shut down within 10 seconds (well under the 30s shutdown timeout). + # Before the fix, this would take minutes as all buffered messages were processed. + receive do + {:DOWN, ^ref, :process, ^pid, _reason} -> :ok + after + 10_000 -> flunk("Broadway.stop did not complete within 10 seconds") + end + end + + test "Broadway.stop completes promptly with buffered messages", %{topic: topic, pipeline: pid} do + # Publish enough messages to fill the buffer, then stop immediately. + # The pipeline should shut down quickly, nacking the buffered messages. + payloads = Enum.map(1..30, &"shutdown-msg-#{&1}") + {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) + + # Give the pipeline a moment to receive some messages + Process.sleep(500) + + ref = Process.monitor(pid) + + try do + Broadway.stop(pid) + catch + :exit, _ -> :ok + end + + # Should complete well under the 30s shutdown timeout. + receive do + {:DOWN, ^ref, :process, ^pid, _reason} -> :ok + after + 10_000 -> flunk("Broadway.stop with buffered messages did not complete within 10 seconds") + end + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index b015e31..49f8d16 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -84,16 +84,25 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) ) - {:ok, pid} = StreamManager.start_link(opts) + # Pass producer_pid and ack_ref directly (matching what Producer.init does) + opts = + opts + |> Keyword.put(:producer_pid, self()) + |> Keyword.put(:ack_ref, {broadway_name, 0}) - StreamManager.set_producer(pid, self()) + {:ok, pid} = StreamManager.start_link(opts) pid end # Synchronous barrier: drains all prior mailbox messages in StreamManager - # before returning. Safe to use instead of :sys.get_state/1 for sync purposes. - defp sync(pid), do: StreamManager.get_buffered(pid) + # before returning. Uses get_outstanding (a GenServer.call) to guarantee ordering. + defp sync(pid), do: StreamManager.get_outstanding(pid) + + # Returns the number of messages currently in the StreamManager's message_buffer. + defp buffer_length(pid) do + :queue.len(:sys.get_state(pid).message_buffer) + end # Drain all messages currently in the test process mailbox. # Used to discard stray telemetry events emitted before we start asserting. @@ -155,7 +164,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert_receive {:stream_messages, [_]}, 500 # Buffer is empty: all demand was consumed by the forwarded message - assert StreamManager.get_buffered(pid) == [] + assert buffer_length(pid) == 0 end end @@ -167,8 +176,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do StreamManager.notify_demand(pid, 0) send(pid, {:stream_messages, [received_message("buf-1", "msg1")]}) send(pid, {:stream_messages, [received_message("buf-2", "msg2")]}) - # Wait for both to be buffered (sync via get_buffered) - assert length(StreamManager.get_buffered(pid)) == 2 + # Wait for both to be buffered (sync via buffer_length) + assert buffer_length(pid) == 2 # Now demand arrives — should flush both at once StreamManager.notify_demand(pid, 10) @@ -177,7 +186,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert Enum.map(received, & &1.data) == ["msg1", "msg2"] # Buffer should be empty; remaining demand consumed 2 of 10 - assert StreamManager.get_buffered(pid) == [] + assert buffer_length(pid) == 0 end test "flushes only up to pending_demand, keeps remainder buffered" do @@ -190,7 +199,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do send(pid, {:stream_messages, [received_message("buf-#{i}", "msg#{i}")]}) end - assert length(StreamManager.get_buffered(pid)) == 5 + assert buffer_length(pid) == 5 # Demand for 2 — should flush exactly 2 StreamManager.notify_demand(pid, 2) @@ -200,7 +209,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert Enum.map(received, & &1.data) == ["msg1", "msg2"] # 3 remain buffered - assert length(StreamManager.get_buffered(pid)) == 3 + assert buffer_length(pid) == 3 # Demand for 10 — should flush the remaining 3 StreamManager.notify_demand(pid, 10) @@ -209,7 +218,63 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert length(received2) == 3 assert Enum.map(received2, & &1.data) == ["msg3", "msg4", "msg5"] - assert StreamManager.get_buffered(pid) == [] + assert buffer_length(pid) == 0 + end + end + + describe "notify_demand/2 — delta accumulation (regression for over-delivery bug)" do + test "demand deltas accumulate correctly when interleaved with message flushes" do + pid = start_manager() + + # 1. Signal demand delta of 10 → pending_demand should be 10 + StreamManager.notify_demand(pid, 10) + sync(pid) + assert :sys.get_state(pid).pending_demand == 10 + + # 2. StreamManager receives and flushes 5 messages → pending_demand should be 5 + msgs = for i <- 1..5, do: received_message("race-#{i}", "msg#{i}") + send(pid, {:stream_messages, msgs}) + assert_receive {:stream_messages, flushed}, 500 + assert length(flushed) == 5 + sync(pid) + assert :sys.get_state(pid).pending_demand == 5 + + # 3. Signal another demand delta of 5 → pending_demand should be 5 + 5 = 10 + # BUG: with absolute overwrite semantics, pending_demand becomes 5 instead of 10, + # effectively "forgetting" the 5 units of remaining demand from step 1. + StreamManager.notify_demand(pid, 5) + sync(pid) + assert :sys.get_state(pid).pending_demand == 10 + end + + test "over-delivery is prevented when demand arrives after flush" do + pid = start_manager() + + # Signal demand delta of 3 + StreamManager.notify_demand(pid, 3) + sync(pid) + + # StreamManager receives and flushes 3 messages → pending_demand = 0 + msgs = for i <- 1..3, do: received_message("od-#{i}", "msg#{i}") + send(pid, {:stream_messages, msgs}) + assert_receive {:stream_messages, flushed}, 500 + assert length(flushed) == 3 + sync(pid) + assert :sys.get_state(pid).pending_demand == 0 + + # Signal new demand delta of 2 → should accumulate to 0 + 2 = 2 + StreamManager.notify_demand(pid, 2) + sync(pid) + assert :sys.get_state(pid).pending_demand == 2 + + # Now 5 messages arrive — only 2 should be flushed (the rest buffered) + msgs2 = for i <- 6..10, do: received_message("od-#{i}", "msg#{i}") + send(pid, {:stream_messages, msgs2}) + assert_receive {:stream_messages, flushed2}, 500 + assert length(flushed2) == 2 + + # 3 should remain buffered + assert buffer_length(pid) == 3 end end @@ -225,7 +290,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert hd(messages).data == "hello" # Buffer should be empty (demand consumed the message immediately) - assert StreamManager.get_buffered(pid) == [] + assert buffer_length(pid) == 0 end test "messages are buffered when pending_demand is 0" do @@ -236,7 +301,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do refute_receive {:stream_messages, _}, 100 - assert length(StreamManager.get_buffered(pid)) == 1 + assert buffer_length(pid) == 1 end test "buffer is flushed in FIFO order on notify_demand" do @@ -248,7 +313,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end # Sync: ensure all 3 are buffered before we signal demand - assert length(StreamManager.get_buffered(pid)) == 3 + assert buffer_length(pid) == 3 StreamManager.notify_demand(pid, 10) @@ -258,20 +323,348 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end # ============================================================ - # receiving flag — draining + # draining # ============================================================ - describe "stop_receiving/1" do - test "messages are not forwarded after stop_receiving even when pending_demand > 0" do + describe "prepare_for_draining/1" do + test "messages are not forwarded after prepare_for_draining even when pending_demand > 0" do pid = start_manager() StreamManager.notify_demand(pid, 10) - StreamManager.stop_receiving(pid) + StreamManager.prepare_for_draining(pid) send(pid, {:stream_messages, [received_message("drain-ack", "should not arrive")]}) refute_receive {:stream_messages, _}, 200 end + + test "clears message_buffer and removes buffered ack_ids from outstanding" do + pid = start_manager() + + # Buffer 3 messages with demand=0 + StreamManager.notify_demand(pid, 0) + + for i <- 1..3 do + send(pid, {:stream_messages, [received_message("buf-drain-#{i}", "data-#{i}")]}) + end + + assert buffer_length(pid) == 3 + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 3 + + # prepare_for_draining should nack + clear buffer + remove from outstanding + {:ok, nacked_count} = StreamManager.prepare_for_draining(pid) + assert nacked_count == 3 + + state = :sys.get_state(pid) + assert buffer_length(pid) == 0 + assert map_size(state.outstanding) == 0 + assert state.draining == true + end + + test "with on_shutdown :noop, clears buffer without nacking" do + pid = start_manager(on_shutdown: :noop) + + StreamManager.notify_demand(pid, 0) + send(pid, {:stream_messages, [received_message("noop-buf", "data")]}) + assert buffer_length(pid) == 1 + + {:ok, nacked_count} = StreamManager.prepare_for_draining(pid) + assert nacked_count == 1 + + state = :sys.get_state(pid) + assert buffer_length(pid) == 0 + assert map_size(state.outstanding) == 0 + end + + test "flush_demand is blocked after draining starts" do + pid = start_manager() + + # Start draining with empty buffer + StreamManager.prepare_for_draining(pid) + + # Manually inject a message into the buffer via replace_state + # (simulating a race condition where a message arrives after draining) + :sys.replace_state(pid, fn s -> + fake_msg = %Broadway.Message{ + data: "sneaky", + metadata: %{}, + acknowledger: + {BroadwayCloudPubSub.Streaming.Acknowledger, :unused, %{ack_id: "after-drain"}} + } + + %{s | message_buffer: :queue.in(fake_msg, s.message_buffer)} + end) + + assert buffer_length(pid) == 1 + + # Demand arrives — flush_demand should be a no-op because draining=true + StreamManager.notify_demand(pid, 10) + refute_receive {:stream_messages, _}, 200 + + # Buffer should still have the message (not flushed) + assert buffer_length(pid) == 1 + end + + test "preserves in-flight messages in outstanding (only clears buffered)" do + pid = start_manager() + + # Send 3 messages with demand so they go straight to processor (in-flight) + StreamManager.notify_demand(pid, 10) + + for i <- 1..3 do + send(pid, {:stream_messages, [received_message("inflight-#{i}", "data-#{i}")]}) + end + + assert_receive {:stream_messages, _}, 500 + + # Zero out pending_demand so subsequent messages are buffered + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + send(pid, {:stream_messages, [received_message("buf-a", "buf-data-a")]}) + send(pid, {:stream_messages, [received_message("buf-b", "buf-data-b")]}) + assert buffer_length(pid) == 2 + + state = :sys.get_state(pid) + # 5 total outstanding: 3 in-flight + 2 buffered + assert map_size(state.outstanding) == 5 + + {:ok, nacked_count} = StreamManager.prepare_for_draining(pid) + assert nacked_count == 2 + + state = :sys.get_state(pid) + assert buffer_length(pid) == 0 + # Only the 3 in-flight messages remain in outstanding + assert map_size(state.outstanding) == 3 + assert Map.has_key?(state.outstanding, "inflight-1") + assert Map.has_key?(state.outstanding, "inflight-2") + assert Map.has_key?(state.outstanding, "inflight-3") + end + + test "drain completes immediately when no in-flight messages remain" do + pid = start_manager() + + # Buffer only (no demand → nothing dispatched to processors) + StreamManager.notify_demand(pid, 0) + send(pid, {:stream_messages, [received_message("only-buf", "data")]}) + assert buffer_length(pid) == 1 + + {:ok, 1} = StreamManager.prepare_for_draining(pid) + + state = :sys.get_state(pid) + # outstanding is empty → drain should have completed + assert map_size(state.outstanding) == 0 + # drain_timer should be cancelled since drain completed + assert state.drain_timer == nil + end + end + + # ============================================================ + # Draining behavior — handler guards + # ============================================================ + + describe "draining — stream_messages handler" do + test "messages arriving during drain are not delivered to the producer" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + StreamManager.prepare_for_draining(pid) + + send(pid, {:stream_messages, [received_message("late-msg", "data")]}) + sync(pid) + + refute_receive {:stream_messages, _}, 100 + end + + test "messages arriving during drain are not added to outstanding" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + StreamManager.prepare_for_draining(pid) + + outstanding_before = map_size(:sys.get_state(pid).outstanding) + + send(pid, {:stream_messages, [received_message("late-msg", "data")]}) + sync(pid) + + # Outstanding should not grow + assert map_size(:sys.get_state(pid).outstanding) == outstanding_before + end + end + + describe "draining — reconnect suppression" do + test ":connect during drain is ignored (no reconnection)" do + pid = start_manager() + StreamManager.prepare_for_draining(pid) + + # Manually set a reconnect_ref to simulate a pending reconnect + :sys.replace_state(pid, fn s -> %{s | reconnect_ref: make_ref()} end) + + send(pid, :connect) + sync(pid) + + state = :sys.get_state(pid) + # reconnect_ref should be cleared but no new connection started + assert state.reconnect_ref == nil + assert state.reader_pid == nil + end + + test "retryable stream_error during drain does not schedule reconnect" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + StreamManager.prepare_for_draining(pid) + + # Cancel any existing reconnect timer from initial connect failure + :sys.replace_state(pid, fn s -> + if s.reconnect_ref, do: Process.cancel_timer(s.reconnect_ref) + %{s | reconnect_ref: nil} + end) + + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + state = :sys.get_state(pid) + # No new reconnect scheduled during drain + assert state.reconnect_ref == nil + assert state.reader_pid == nil + assert Process.alive?(pid) + end + + test "terminal stream_error during drain does not crash (no stop)" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + StreamManager.prepare_for_draining(pid) + + send(pid, {:stream_error, %GRPC.RPCError{status: 5, message: "not found"}}) + sync(pid) + + # During drain, terminal errors don't stop the GenServer + assert Process.alive?(pid) + end + end + + describe "draining — receipt_modack_result" do + test "receipt_modack_result during drain nacks rather than delivers" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["eo-drain"], %{"eo-drain" => "data"}) + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 1 + + # Enter drain mode (this clears pending_receipt_modacks normally, + # but let's inject after drain to simulate the race) + StreamManager.prepare_for_draining(pid) + + # Re-inject to simulate a receipt_modack_result arriving after drain started + # but for a ref that was in-flight before drain + ref2 = make_ref() + inject_pending_receipt_modack(pid, ref2, ["eo-late"], %{"eo-late" => "late-data"}) + + # Result arrives during drain + send(pid, {:receipt_modack_result, ref2, {:ok, []}}) + sync(pid) + + # Message should NOT be delivered + refute_receive {:stream_messages, _}, 100 + + # pending_receipt_modacks should be cleared for this ref + state = :sys.get_state(pid) + refute Map.has_key?(state.pending_receipt_modacks, ref2) + + # Message should NOT be added to outstanding + refute Map.has_key?(state.outstanding, "eo-late") + end + end + + # ============================================================ + # Drain timeout + # ============================================================ + + describe "drain_timeout nacks outstanding messages" do + test "drain_timeout nacks all outstanding (in-flight) messages" do + # Use a very short drain timeout so the test doesn't have to wait long. + pid = start_manager(drain_timeout_ms: 50) + + # Dispatch 3 messages with demand so they become in-flight (outstanding). + StreamManager.notify_demand(pid, 10) + + for i <- 1..3 do + send(pid, {:stream_messages, [received_message("inflight-#{i}", "data-#{i}")]}) + end + + assert_receive {:stream_messages, _}, 500 + assert_receive {:stream_messages, _}, 500 + assert_receive {:stream_messages, _}, 500 + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 3 + + # Drain — buffered is empty, so only in-flight messages remain in outstanding. + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + state = :sys.get_state(pid) + assert state.draining == true + # The 3 in-flight messages should still be in outstanding. + assert map_size(state.outstanding) == 3 + # Drain timer should be set. + assert state.drain_timer != nil + + # Wait for drain_timeout to fire (50ms + some margin). + Process.sleep(150) + + state = :sys.get_state(pid) + # After drain_timeout, outstanding should be empty (messages were nacked). + assert map_size(state.outstanding) == 0 + # Drain timer should be cleared. + assert state.drain_timer == nil + end + + test "drain_timeout with on_shutdown :noop clears outstanding without nacking" do + pid = start_manager(drain_timeout_ms: 50, on_shutdown: :noop) + + # Dispatch messages to make them in-flight. + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("noop-inflight", "data")]}) + assert_receive {:stream_messages, _}, 500 + + assert map_size(:sys.get_state(pid).outstanding) == 1 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Wait for drain_timeout. + Process.sleep(150) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end + + test "drain completes before timeout when all messages are acked" do + pid = start_manager(drain_timeout_ms: 5_000) + + # Dispatch 2 messages. + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("ack-1", "data-1")]}) + send(pid, {:stream_messages, [received_message("ack-2", "data-2")]}) + assert_receive {:stream_messages, _}, 500 + assert_receive {:stream_messages, _}, 500 + + assert map_size(:sys.get_state(pid).outstanding) == 2 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + state = :sys.get_state(pid) + assert state.draining == true + assert state.drain_timer != nil + + # Simulate processors finishing and acking both messages. + StreamManager.acknowledge(pid, ["ack-1", "ack-2"]) + # Sync to ensure the cast is processed. + sync(pid) + + state = :sys.get_state(pid) + # Drain completed early — outstanding empty, timer cancelled. + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end end # ============================================================ @@ -1025,8 +1418,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) ) + opts = + opts + |> Keyword.put(:producer_pid, self()) + |> Keyword.put(:ack_ref, {broadway_name, 0}) + {:ok, pid} = StreamManager.start_link(opts) - StreamManager.set_producer(pid, self()) {pid, rpc_pid} end @@ -1244,8 +1641,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do retry_deadline_ms: 60_000 ) + opts = + opts + |> Keyword.put(:producer_pid, self()) + |> Keyword.put(:ack_ref, {broadway_name, 0}) + {:ok, pid} = StreamManager.start_link(opts) - StreamManager.set_producer(pid, self()) batcher_pid = Process.whereis(batcher_name) assert :sys.get_state(batcher_pid).retry_deadline_ms == 60_000 @@ -1297,8 +1698,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do retry_deadline_ms: 60_000 ) + opts = + opts + |> Keyword.put(:producer_pid, self()) + |> Keyword.put(:ack_ref, {broadway_name, 0}) + {:ok, pid} = StreamManager.start_link(opts) - StreamManager.set_producer(pid, self()) batcher_pid = Process.whereis(batcher_name) @@ -1350,8 +1755,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do ack_batch_max_size: 2_500 ) + opts = + opts + |> Keyword.put(:producer_pid, self()) + |> Keyword.put(:ack_ref, {broadway_name, 0}) + {:ok, pid} = StreamManager.start_link(opts) - StreamManager.set_producer(pid, self()) batcher_pid = Process.whereis(batcher_name) initial_deadline = :sys.get_state(batcher_pid).retry_deadline_ms @@ -1414,14 +1823,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do end describe "exactly-once — drain nack pending receipt modacks" do - test "pending receipt modacks are nacked on stop_receiving" do + test "pending receipt modacks are nacked on prepare_for_draining" do pid = start_manager() ref = make_ref() inject_pending_receipt_modack(pid, ref, ["drain-eo"], %{"drain-eo" => "data"}) assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 1 - StreamManager.stop_receiving(pid) + StreamManager.prepare_for_draining(pid) sync(pid) # After drain, pending_receipt_modacks should be cleared @@ -1436,7 +1845,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do inject_pending_receipt_modack(pid, ref, ["drain-stale"], %{"drain-stale" => "data"}) # Drain clears the pending map - StreamManager.stop_receiving(pid) + StreamManager.prepare_for_draining(pid) sync(pid) # RPC result arrives after drain — should be ignored, not crash @@ -1517,7 +1926,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert_receive {:stream_messages, [_]}, 500 # Enter drain mode - StreamManager.stop_receiving(pid) + StreamManager.prepare_for_draining(pid) sync(pid) state = :sys.get_state(pid) @@ -1536,4 +1945,678 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do assert state.drain_timer == nil end end + + # ============================================================ + # P0-04: pressure_snapshot telemetry + # ============================================================ + + describe "pressure_snapshot telemetry" do + test "emits :pressure_snapshot during :extend_leases with correct counts" do + pid = start_manager() + test_pid = self() + telemetry_name = "test-pressure-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :pressure_snapshot], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :pressure_snapshot} + ) + + # Dispatch 2 messages (in-flight, outstanding) + StreamManager.notify_demand(pid, 10) + + send( + pid, + {:stream_messages, [received_message("ps-1", "d1"), received_message("ps-2", "d2")]} + ) + + assert_receive {:stream_messages, _}, 500 + + # Buffer one message by zeroing pending_demand so it stays buffered. + # Note: the buffered message IS also in outstanding — all received messages + # are added to outstanding at receipt time regardless of buffer state. + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + send(pid, {:stream_messages, [received_message("ps-buf", "buf")]}) + # Wait for it to land in the buffer + assert buffer_length(pid) == 1 + + # Set pending_demand to a known value for assertion + :sys.replace_state(pid, fn s -> %{s | pending_demand: 5} end) + + send(pid, :extend_leases) + sync(pid) + + assert_receive {:pressure_snapshot, measurements, metadata}, 500 + + # 3 outstanding: 2 dispatched + 1 buffered (all in outstanding map) + assert measurements.outstanding_count == 3 + assert measurements.buffered_count == 1 + assert measurements.pending_demand == 5 + + assert metadata.subscription == "projects/test/subscriptions/test-sub" + + :telemetry.detach(telemetry_name) + end + + test ":pressure_snapshot measurements shape is correct" do + pid = start_manager() + test_pid = self() + telemetry_name = "test-pressure-shape-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :pressure_snapshot], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :pressure_snapshot} + ) + + send(pid, :extend_leases) + sync(pid) + + assert_receive {:pressure_snapshot, measurements, _metadata}, 500 + + assert is_integer(measurements.outstanding_count) and measurements.outstanding_count >= 0 + assert is_integer(measurements.buffered_count) and measurements.buffered_count >= 0 + assert is_integer(measurements.pending_demand) and measurements.pending_demand >= 0 + + :telemetry.detach(telemetry_name) + end + end + + # ============================================================ + # P0-05: drain telemetry (async span: :start / :stop / :exception) + # ============================================================ + + describe "drain :start telemetry" do + test "emits drain :start with system_time and monotonic_time, correct initial counts" do + pid = start_manager() + test_pid = self() + telemetry_name = "test-drain-start-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_start} + ) + + # Dispatch 2 messages to outstanding (in-flight) + StreamManager.notify_demand(pid, 10) + + send( + pid, + {:stream_messages, [received_message("ds-1", "d1"), received_message("ds-2", "d2")]} + ) + + assert_receive {:stream_messages, _}, 500 + + # Buffer 1 message by zeroing pending_demand + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + send(pid, {:stream_messages, [received_message("ds-buf", "buf")]}) + assert buffer_length(pid) == 1 + + # Inject 1 pending receipt modack + ref = make_ref() + inject_pending_receipt_modack(pid, ref, ["ds-eo"], %{"ds-eo" => "eo"}) + + StreamManager.prepare_for_draining(pid) + + assert_receive {:drain_start, measurements, metadata}, 500 + + # Span start measurements: system_time and monotonic_time + assert is_integer(measurements.system_time) + assert is_integer(measurements.monotonic_time) + + # Counts captured at the moment of drain initiation (before any cleanup) + # outstanding_count = 2 in-flight + 1 buffered = 3 + assert measurements.outstanding_count == 3 + assert measurements.buffered_count == 1 + assert measurements.pending_receipt_modack_count == 1 + + assert metadata.subscription == "projects/test/subscriptions/test-sub" + + :telemetry.detach(telemetry_name) + end + + test "emits drain :start with zeros when nothing is outstanding" do + pid = start_manager() + test_pid = self() + telemetry_name = "test-drain-start-empty-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_start} + ) + + StreamManager.prepare_for_draining(pid) + + assert_receive {:drain_start, measurements, _metadata}, 500 + + assert measurements.outstanding_count == 0 + assert measurements.buffered_count == 0 + assert measurements.pending_receipt_modack_count == 0 + + :telemetry.detach(telemetry_name) + end + end + + describe "drain :stop telemetry" do + test "emits drain :stop with positive duration when drain completes cleanly" do + pid = start_manager(drain_timeout_ms: 5_000) + test_pid = self() + telemetry_name = "test-drain-stop-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_stop} + ) + + # One in-flight message + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("stop-ack-1", "data")]}) + assert_receive {:stream_messages, _}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Ack the message — drain completes + StreamManager.acknowledge(pid, ["stop-ack-1"]) + + assert_receive {:drain_stop, measurements, metadata}, 500 + + assert is_integer(measurements.duration) + assert measurements.duration >= 0 + assert is_integer(measurements.monotonic_time) + assert metadata.subscription == "projects/test/subscriptions/test-sub" + + :telemetry.detach(telemetry_name) + end + + test "drain :stop is emitted when outstanding is already empty at prepare_for_draining" do + pid = start_manager() + test_pid = self() + telemetry_name = "test-drain-stop-immediate-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_stop} + ) + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + assert_receive {:drain_stop, measurements, _metadata}, 500 + + assert is_integer(measurements.duration) + assert measurements.duration >= 0 + + :telemetry.detach(telemetry_name) + end + + test "clean drain does NOT emit :exception" do + pid = start_manager(drain_timeout_ms: 5_000) + test_pid = self() + telemetry_name = "test-drain-no-exception-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_exception} + ) + + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("clean-drain", "data")]}) + assert_receive {:stream_messages, _}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + StreamManager.acknowledge(pid, ["clean-drain"]) + sync(pid) + + refute_received {:drain_exception, _, _} + + :telemetry.detach(telemetry_name) + end + end + + describe "drain :exception telemetry" do + test "emits drain :exception with kind: :timeout when drain_timeout fires" do + pid = start_manager(drain_timeout_ms: 50) + test_pid = self() + telemetry_name = "test-drain-exception-timeout-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_exception} + ) + + # One in-flight message (never acked so timeout fires) + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("timeout-ack-1", "data")]}) + assert_receive {:stream_messages, _}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Wait for the 50ms drain_timeout + assert_receive {:drain_exception, measurements, metadata}, 500 + + assert metadata.kind == :timeout + assert metadata.reason == :drain_timeout + assert measurements.remaining_count == 1 + assert is_integer(measurements.duration) + assert measurements.duration >= 0 + assert is_integer(measurements.monotonic_time) + assert metadata.subscription == "projects/test/subscriptions/test-sub" + + :telemetry.detach(telemetry_name) + end + + test "drain_timeout does NOT emit :stop" do + pid = start_manager(drain_timeout_ms: 50) + test_pid = self() + telemetry_name = "test-drain-timeout-no-stop-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_stop} + ) + + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("timeout-no-stop", "data")]}) + assert_receive {:stream_messages, _}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Wait long enough for timeout to fire + Process.sleep(150) + sync(pid) + + refute_received {:drain_stop, _, _} + + :telemetry.detach(telemetry_name) + end + + test "emits drain :exception with kind: :terminate when process is terminated mid-drain" do + pid = start_manager(drain_timeout_ms: 5_000) + test_pid = self() + telemetry_name = "test-drain-exception-terminate-#{System.unique_integer([:positive])}" + + :telemetry.attach( + telemetry_name, + [:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception], + &TelemetryHelper.handle_event_forward_test/4, + %{pid: test_pid, msg: :drain_exception} + ) + + # One in-flight message (never acked) + StreamManager.notify_demand(pid, 10) + send(pid, {:stream_messages, [received_message("term-ack-1", "data")]}) + assert_receive {:stream_messages, _}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Monitor then stop the GenServer normally + ref = Process.monitor(pid) + GenServer.stop(pid, :normal) + assert_receive {:DOWN, ^ref, :process, ^pid, _}, 500 + + assert_receive {:drain_exception, measurements, metadata}, 500 + + assert metadata.kind == :terminate + assert metadata.reason == :normal + assert measurements.remaining_count == 1 + assert is_integer(measurements.duration) + assert metadata.subscription == "projects/test/subscriptions/test-sub" + + :telemetry.detach(telemetry_name) + end + end + + # ============================================================ + # P0-05: drain failure scenarios + # ============================================================ + + describe "drain failure scenarios" do + test "stream_error (retryable) during drain does not prevent drain completion via ack" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + # Dispatch one message to make it in-flight + send(pid, {:stream_messages, [received_message("drain-err-1", "data")]}) + assert_receive {:stream_messages, [_]}, 500 + + assert map_size(:sys.get_state(pid).outstanding) == 1 + + # Start drain + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Cancel any pre-existing reconnect (from initial failed connection attempt) + # so we can cleanly assert there's no reconnect scheduled after the stream_error. + :sys.replace_state(pid, fn s -> + if s.reconnect_ref, do: Process.cancel_timer(s.reconnect_ref) + %{s | reconnect_ref: nil} + end) + + # Stream disconnects while we're draining (common race) + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + # Process is still alive and draining + assert Process.alive?(pid) + state = :sys.get_state(pid) + assert state.draining == true + # No reconnect scheduled during drain + assert state.reconnect_ref == nil + + # Ack completes — drain should finish + StreamManager.acknowledge(pid, ["drain-err-1"]) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end + + test "stream_closed during drain does not prevent drain completion via ack" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("drain-close-1", "data")]}) + assert_receive {:stream_messages, [_]}, 500 + + {:ok, 0} = StreamManager.prepare_for_draining(pid) + + # Cancel any pre-existing reconnect from initial failed connection attempt + :sys.replace_state(pid, fn s -> + if s.reconnect_ref, do: Process.cancel_timer(s.reconnect_ref) + %{s | reconnect_ref: nil} + end) + + # Server closes stream (normal — no reconnect during drain) + send(pid, {:stream_closed}) + sync(pid) + + assert Process.alive?(pid) + state = :sys.get_state(pid) + assert state.draining == true + assert state.reconnect_ref == nil + + # Ack the in-flight message — drain completes + StreamManager.acknowledge(pid, ["drain-close-1"]) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end + + test "drain with mixed buffered + in-flight: only in-flight blocks drain completion" do + pid = start_manager() + + # Dispatch 2 messages (in-flight) + StreamManager.notify_demand(pid, 10) + + send( + pid, + {:stream_messages, + [received_message("mix-if-1", "d1"), received_message("mix-if-2", "d2")]} + ) + + assert_receive {:stream_messages, _}, 500 + + # Buffer 2 messages by zeroing pending_demand + :sys.replace_state(pid, fn s -> %{s | pending_demand: 0} end) + + send( + pid, + {:stream_messages, + [received_message("mix-buf-1", "b1"), received_message("mix-buf-2", "b2")]} + ) + + assert buffer_length(pid) == 2 + + state = :sys.get_state(pid) + # 4 outstanding total: 2 in-flight + 2 buffered + assert map_size(state.outstanding) == 4 + + # Drain: buffered 2 are nacked and removed; 2 in-flight remain + {:ok, 2} = StreamManager.prepare_for_draining(pid) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 2 + assert Map.has_key?(state.outstanding, "mix-if-1") + assert Map.has_key?(state.outstanding, "mix-if-2") + assert state.drain_timer != nil + + # Ack one in-flight — drain not complete yet + StreamManager.acknowledge(pid, ["mix-if-1"]) + sync(pid) + assert state.drain_timer != nil + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 1 + + # Ack the last in-flight — drain completes + StreamManager.acknowledge(pid, ["mix-if-2"]) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end + + test "drain with pending receipt modacks: clears them on prepare_for_draining" do + pid = start_manager() + + # Inject 2 pending receipt modacks + ref1 = make_ref() + ref2 = make_ref() + inject_pending_receipt_modack(pid, ref1, ["eo-drain-1"], %{"eo-drain-1" => "d1"}) + inject_pending_receipt_modack(pid, ref2, ["eo-drain-2"], %{"eo-drain-2" => "d2"}) + + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 2 + + # Drain clears pending receipt modacks immediately + StreamManager.prepare_for_draining(pid) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + # Drain should complete since outstanding is also empty + assert map_size(state.outstanding) == 0 + assert state.drain_timer == nil + end + end + + # ============================================================ + # P0-06: EO and non-EO behavior invariants + # ============================================================ + + describe "non-EO behavior invariants" do + test "messages are immediately added to outstanding on receipt" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + send( + pid, + {:stream_messages, [received_message("neo-1", "d1"), received_message("neo-2", "d2")]} + ) + + assert_receive {:stream_messages, msgs}, 500 + + assert length(msgs) == 2 + + state = :sys.get_state(pid) + # Both ack_ids must be in outstanding immediately + assert Map.has_key?(state.outstanding, "neo-1") + assert Map.has_key?(state.outstanding, "neo-2") + # No pending receipt modacks in non-EO mode + assert map_size(state.pending_receipt_modacks) == 0 + end + + test "ack removes from outstanding" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("neo-ack", "data")]}) + assert_receive {:stream_messages, _}, 500 + + assert Map.has_key?(:sys.get_state(pid).outstanding, "neo-ack") + + StreamManager.acknowledge(pid, ["neo-ack"]) + sync(pid) + + refute Map.has_key?(:sys.get_state(pid).outstanding, "neo-ack") + end + + test "nack (deadline=0) removes from outstanding" do + pid = start_manager() + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("neo-nack", "data")]}) + assert_receive {:stream_messages, _}, 500 + + assert Map.has_key?(:sys.get_state(pid).outstanding, "neo-nack") + + StreamManager.modify_deadline(pid, ["neo-nack"], 0) + sync(pid) + + refute Map.has_key?(:sys.get_state(pid).outstanding, "neo-nack") + end + + test "reconnect does not clear buffered messages' outstanding entries" do + # Buffered messages ARE in outstanding; after reconnect the buffer is + # cleared but the ack_ids are also removed from outstanding. + pid = start_manager(backoff_min: 60_000) + StreamManager.notify_demand(pid, 0) + + send(pid, {:stream_messages, [received_message("buf-reconnect", "data")]}) + assert buffer_length(pid) == 1 + + state = :sys.get_state(pid) + assert Map.has_key?(state.outstanding, "buf-reconnect") + + # Simulate retryable reconnect — buffer is dropped and outstanding cleared for buffered + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + # After reset_connection, the buffered ack_id is removed from outstanding + state = :sys.get_state(pid) + refute Map.has_key?(state.outstanding, "buf-reconnect") + assert buffer_length(pid) == 0 + end + end + + describe "EO behavior invariants" do + test "in EO mode, messages are NOT in outstanding before receipt modack succeeds" do + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("eo-inv-1", "data")]}) + sync(pid) + + # Pending — not yet in outstanding + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 1 + refute Map.has_key?(state.outstanding, "eo-inv-1") + end + + test "in EO mode, messages are added to outstanding only after receipt modack {:ok, []}" do + {pid, _rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + send(pid, {:stream_messages, [received_message("eo-inv-2", "data")]}) + sync(pid) + + # Retrieve the pending ref + state = :sys.get_state(pid) + [ref] = Map.keys(state.pending_receipt_modacks) + + # Confirm receipt modack success + send(pid, {:receipt_modack_result, ref, {:ok, []}}) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + assert Map.has_key?(state.outstanding, "eo-inv-2") + end + + test "in EO mode, total receipt modack failure drops messages (no dispatch, no outstanding)" do + {pid, rpc} = start_manager_with_spy_rpc() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + :ok = GenServer.call(rpc, {:set_response_sync, {:error, :unavailable}}) + + send(pid, {:stream_messages, [received_message("eo-inv-fail", "data")]}) + Process.sleep(200) + sync(pid) + + refute_received {:stream_messages, _} + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 0 + refute Map.has_key?(state.outstanding, "eo-inv-fail") + end + + test "in EO mode, partial receipt modack failure: succeeded messages in outstanding, failed are not" do + pid = start_manager() + enable_exactly_once(pid) + StreamManager.notify_demand(pid, 10) + + ref = make_ref() + + inject_pending_receipt_modack(pid, ref, ["eo-ok", "eo-fail"], %{ + "eo-ok" => "good", + "eo-fail" => "bad" + }) + + send(pid, {:receipt_modack_result, ref, {:ok, ["eo-fail"]}}) + + assert_receive {:stream_messages, msgs}, 500 + assert length(msgs) == 1 + assert hd(msgs).data == "good" + + state = :sys.get_state(pid) + assert Map.has_key?(state.outstanding, "eo-ok") + refute Map.has_key?(state.outstanding, "eo-fail") + end + + test "switching from non-EO to EO: new messages go through gate, not immediate dispatch" do + # Start with a spy RPC so we can control modack responses. + # We need to use start_manager_with_spy_rpc throughout so the spy + # is in place before we enable EO. + {pid, _rpc} = start_manager_with_spy_rpc() + StreamManager.notify_demand(pid, 10) + + # Non-EO (default): message dispatched immediately + send(pid, {:stream_messages, [received_message("before-eo", "data")]}) + + # The spy RPC responds :ok, so the modack Task fires {:ok, []} and + # the message is dispatched straight through (standard path) + assert_receive {:stream_messages, _}, 500 + assert map_size(:sys.get_state(pid).pending_receipt_modacks) == 0 + + # Enable EO mode + enable_exactly_once(pid) + + # EO: new message must be held in pending_receipt_modacks, not dispatched + send(pid, {:stream_messages, [received_message("after-eo", "data")]}) + sync(pid) + + state = :sys.get_state(pid) + assert map_size(state.pending_receipt_modacks) == 1 + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/stress_test.exs b/test/broadway_cloud_pub_sub/streaming/stress_test.exs index f0be870..f6e8e65 100644 --- a/test/broadway_cloud_pub_sub/streaming/stress_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stress_test.exs @@ -1154,7 +1154,7 @@ defmodule BroadwayCloudPubSub.Streaming.StressTest do # Kill the Gun connection process and simulate gun_down so StreamManager # detects the disconnect and reconnects. - stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + stream_manager = Module.concat(name, "StreamManager_0") sm_state = :sys.get_state(stream_manager) case sm_state do @@ -1239,7 +1239,7 @@ defmodule BroadwayCloudPubSub.Streaming.StressTest do # Mirror the Mint test exactly: get conn_pid, kill the process, then send the # adapter-level disconnect signal so StreamManager detects it via the new handler. - stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + stream_manager = Module.concat(name, "StreamManager_0") sm_state = :sys.get_state(stream_manager) case sm_state do @@ -1320,7 +1320,7 @@ defmodule BroadwayCloudPubSub.Streaming.StressTest do end # For Mint, kill the conn_pid and simulate the connection_down message - stream_manager = Module.concat(name, BroadwayCloudPubSub.Streaming.StreamManager) + stream_manager = Module.concat(name, "StreamManager_0") sm_state = :sys.get_state(stream_manager) case sm_state do From a625315b2ed612dd338e6d9deac2d90f26ae6176 Mon Sep 17 00:00:00 2001 From: Rock Date: Thu, 16 Apr 2026 16:20:59 +0200 Subject: [PATCH 10/29] docs: add documentation for streaming producer modules Add module documentation, typespecs, and inline comments across all streaming producer modules. Update README with StreamingPull producer usage instructions and configuration examples. Key changes: - Add @moduledoc and function docs to streaming modules - Add typespecs to public APIs - Document configuration options in Streaming.Options - Update README with Streaming.Producer setup guide - Update mix.exs with grpc dependency documentation --- README.md | 2 +- lib/broadway_cloud_pub_sub/backoff.ex | 5 ++ .../streaming/ack_batcher.ex | 39 +++++++++++---- .../streaming/ack_time_distribution.ex | 2 +- .../streaming/grpc_client.ex | 2 +- .../streaming/options.ex | 4 +- .../streaming/producer.ex | 35 +++++++++++--- .../streaming/stream_manager.ex | 47 ++++++++++++++++--- .../streaming/stream_reader.ex | 33 ++++++++----- .../streaming/unary_ack_supervisor.ex | 6 +++ .../streaming/unary_rpc_client.ex | 39 ++++++++++----- mix.exs | 8 +++- .../streaming/ack_batcher_test.exs | 43 +++++++++++++---- .../streaming/options_test.exs | 15 ++++-- .../streaming/stream_manager_test.exs | 47 +++++++++++++++++++ 15 files changed, 257 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index e8e31f8..2f034d8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Documentation can be found at [https://hexdocs.pm/broadway_cloud_pub_sub](https: This project provides: -* `BroadwayCloudPubSub.Producer` - A GenStage producer that continuously receives messages from a Pub/Sub subscription acknowledges them after being successfully processed. +* `BroadwayCloudPubSub.Producer` - A GenStage producer that continuously receives messages from a Pub/Sub subscription and acknowledges them after being successfully processed. * `BroadwayCloudPubSub.Streaming.Producer` - A GenStage producer that uses the gRPC StreamingPull API for low-latency, push-based message delivery. * `BroadwayCloudPubSub.Client` - A generic behaviour to implement Pub/Sub clients. * `BroadwayCloudPubSub.PullClient` - Default REST client used by `BroadwayCloudPubSub.Producer`. diff --git a/lib/broadway_cloud_pub_sub/backoff.ex b/lib/broadway_cloud_pub_sub/backoff.ex index 38fc742..52b217d 100644 --- a/lib/broadway_cloud_pub_sub/backoff.ex +++ b/lib/broadway_cloud_pub_sub/backoff.ex @@ -43,6 +43,11 @@ defmodule BroadwayCloudPubSub.Backoff do nil :rand_exp -> + # `lower` prevents the randomized minimum from dropping below max/3, + # ensuring backoff stays meaningful even after many retries. With + # defaults (min=100, max=60_000), the minimum eventually floors at + # 20s rather than continuing to start from 100ms. This matches the + # gax library's backoff behavior. lower = max(min, div(max, 3)) %__MODULE__{type: :rand_exp, min: min, max: max, state: {min, lower, seed()}} diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 61d0855..259c879 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -19,6 +19,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do :batch_interval_ms, :batch_max_size, :timer_ref, + # Registered name of the Task.Supervisor for receipt modack tasks. + :task_supervisor, # nil = no deadline. Set to 600_000ms when exactly-once delivery is enabled. retry_deadline_ms: nil, ack_ids: [], @@ -39,7 +41,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do :retry_deadline_ms, :broadway_name, :telemetry_metadata, - :rpc_client + :rpc_client, + :task_supervisor ] @required_keys [ @@ -47,7 +50,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do :ack_batch_interval_ms, :ack_batch_max_size, :broadway_name, - :rpc_client + :rpc_client, + :task_supervisor ] @doc false @@ -132,7 +136,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do telemetry_metadata: config[:telemetry_metadata], batch_interval_ms: config.ack_batch_interval_ms, batch_max_size: config.ack_batch_max_size, - retry_deadline_ms: config[:retry_deadline_ms] + retry_deadline_ms: config[:retry_deadline_ms], + task_supervisor: config[:task_supervisor] } {:ok, schedule_flush(state)} @@ -189,15 +194,15 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do {:noreply, %{state | retry_deadline_ms: retry_deadline_ms}} end - # Receipt modack for exactly-once delivery. Spawns a Task that calls - # UnaryRpcClient directly (bypassing batching) and sends the result back - # to the caller. The Task is fire-and-forget from AckBatcher's perspective. + # Receipt modack for exactly-once delivery. Spawns a supervised Task that + # calls UnaryRpcClient directly (bypassing batching) and sends the result + # back to the caller. The Task is fire-and-forget from AckBatcher's + # perspective, but is supervised so it's cleaned up on pipeline shutdown. def handle_cast({:receipt_modack, ref, reply_to, ack_ids, deadline_seconds}, state) do rpc_client = state.rpc_client - Task.start(fn -> - result = UnaryRpcClient.modify_ack_deadline(rpc_client, ack_ids, deadline_seconds) - send(reply_to, {:receipt_modack_result, ref, result}) + Task.Supervisor.start_child(state.task_supervisor, fn -> + do_receipt_modack(rpc_client, ref, reply_to, ack_ids, deadline_seconds) end) {:noreply, state} @@ -414,6 +419,22 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do %{state | timer_ref: nil} end + # Executes a receipt modack RPC and always sends the result to `reply_to`, + # even if the RPC raises or the process it calls is dead. This prevents + # pending_receipt_modacks entries from being orphaned in StreamManager. + defp do_receipt_modack(rpc_client, ref, reply_to, ack_ids, deadline_seconds) do + result = + try do + UnaryRpcClient.modify_ack_deadline(rpc_client, ack_ids, deadline_seconds) + rescue + e -> {:error, {:receipt_modack_crashed, Exception.message(e)}} + catch + kind, reason -> {:error, {:receipt_modack_crashed, {kind, reason}}} + end + + send(reply_to, {:receipt_modack_result, ref, result}) + end + defp emit_telemetry(event, measurements, state) do metadata = %{ name: state.broadway_name, diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex b/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex index ff11e99..df3813d 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_time_distribution.ex @@ -93,7 +93,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckTimeDistribution do def percentile(%__MODULE__{buckets: buckets, total: total}, p) when is_number(p) and p >= 0.0 and p <= 1.0 do target = max(1, ceil(p * total)) - find_percentile_bucket(buckets, target, 0, 0) + find_percentile_bucket(buckets, target, @min_deadline_seconds, 0) end @doc """ diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex index 090f62c..6b989bc 100644 --- a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -2,7 +2,7 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do @moduledoc """ The default gRPC client for `BroadwayCloudPubSub.Streaming.Producer`. - Implements `BroadwayCloudPubSub.Streaming.Client` using the `grpc_client` library + Implements `BroadwayCloudPubSub.Streaming.Client` using the `grpc` library with the `Google.Pubsub.V1.Subscriber.Stub` generated stub. This module handles: diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 7e7c6a0..67c24f3 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -220,7 +220,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do * Any module — A custom module implementing the `GRPC.Client.Adapter` behaviour. Useful for test adapters and alternative implementations. - Both built-in adapters are provided by the `grpc_client` dependency. The + Both built-in adapters are provided by the `grpc` dependency. The adapter choice does not affect the public API or message semantics. """ ], @@ -304,7 +304,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do doc: """ The module implementing the `BroadwayCloudPubSub.Streaming.Client` behaviour. Defaults to `BroadwayCloudPubSub.Streaming.GrpcClient`, which uses the - `grpc_client` library to communicate with Google Cloud Pub/Sub. + `grpc` library to communicate with Google Cloud Pub/Sub. Swap this for testing or custom gRPC transports. """ diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/streaming/producer.ex index 89c4e16..d6a1b1a 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/streaming/producer.ex @@ -66,7 +66,7 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Pub/Sub via unary RPCs at a configurable interval (`:ack_batch_interval_ms`, default 100ms) or when the batch reaches `:ack_batch_max_size` (default 2500). Batching is done on a separate unary connection, independently of the - streaming connection. + streaming connection. See [Telemetry](#module-telemetry) for ack-related events. ## Flow control @@ -77,6 +77,9 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do StreamManager also tracks GenStage demand from the Producer and buffers messages internally when demand is zero, preventing unbounded mailbox growth. + See also [Lease management](#module-lease-management) for how message deadlines + are extended while flow control holds messages in the buffer. + ## Lease management The producer automatically extends message acknowledgement deadlines before @@ -87,7 +90,8 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Messages are tracked until they are acknowledged, nacked, or until `:max_extension_ms` elapses (default 60 minutes), after which the server redelivers them. This prevents a stuck consumer from holding messages - indefinitely. + indefinitely. The `extend_leases` and `lease_expired` telemetry events + (see [Telemetry](#module-telemetry)) provide visibility into lease activity. ## Exactly-once delivery @@ -100,7 +104,8 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do For exactly-once subscriptions, increase `:retry_deadline_ms` to 600,000ms (10 minutes) to allow the unary RPC client enough time to retry transient ack failures — the server requires successful ack receipt before guaranteeing - exactly-once semantics. + exactly-once semantics. The library automatically adjusts `:retry_deadline_ms` + when the subscription's exactly-once status changes at runtime. ## Message ordering @@ -122,6 +127,9 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do (dispatched to processors but not yet acked/nacked) to be processed. 4. Force-closes the stream after the drain timeout. + The drain lifecycle is tracked via the `drain` telemetry span + (see [Telemetry](#module-telemetry)). + ## Error handling gRPC stream errors are classified as retryable or terminal: @@ -185,6 +193,12 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Metadata includes: `reason: term()` — the connection error. + * `:reconnect` — reconnect scheduled after a disconnect or connection + failure. The backoff delay indicates how long the StreamManager will + wait before the next connection attempt. + + Measurements: `%{delay: pos_integer()}` + * `:keepalive` — keep-alive ping sent on the gRPC connection. Measurements: `%{deadline: pos_integer()}` @@ -199,6 +213,12 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do Measurements: `%{count: pos_integer()}` + * `:receipt_modack_stale` — pending receipt modack entries (exactly-once + delivery) that exceeded the 60-second staleness threshold were nacked + for fast redelivery. Emitted during the lease extension cycle. + + Measurements: `%{count: pos_integer()}` + * `:drain` — async span tracking the full graceful drain lifecycle, from `prepare_for_draining/1` through completion, timeout, or unexpected termination. Uses the same measurements convention as `:telemetry.span/3`. @@ -467,6 +487,11 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do # instead of waiting for their ack deadline to expire naturally. This # covers edge cases like on_failure: :noop (acknowledger does nothing) or # the drain timeout firing before all processors complete. + # + # nack_ack_ids sends a cast to StreamManager (which routes to AckBatcher), + # then close/1 calls flush_batcher_if_alive. Since the cast is enqueued + # in AckBatcher before the flush, the nacked ack_ids are included in the + # final flush to the server. outstanding = StreamManager.get_outstanding(manager_pid) nack_ack_ids(manager_pid, config, outstanding) @@ -557,10 +582,6 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do :erlang.phash2(key) end - def partition_by(%Broadway.Message{metadata: %{orderingKey: key}}) when is_integer(key) do - key - end - def partition_by(_) do :erlang.unique_integer([:positive]) end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 54bd146..0e3cdce 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -249,11 +249,21 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do else # Standard delivery: fire-and-forget receipt modack, dispatch immediately. new_outstanding = - MessageDispatch.add_to_outstanding(state.outstanding, ack_ids, now, state.config.max_extension_ms) + MessageDispatch.add_to_outstanding( + state.outstanding, + ack_ids, + now, + state.config.max_extension_ms + ) AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - {:noreply, MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, broadway_messages)} + + {:noreply, + MessageDispatch.deliver_messages( + %{state | outstanding: new_outstanding}, + broadway_messages + )} end end @@ -426,6 +436,10 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # empties the outstanding map so the producer's terminate/2 becomes a no-op. nack_per_on_shutdown(state, outstanding_ids) + # Flush the batcher to ensure the nacks above are sent to the server + # before the connection is torn down by close_stream. + flush_batcher_if_alive(state.ack_batcher) + {:noreply, close_stream(%{state | drain_timer: nil, drain_started_at: nil, outstanding: %{}})} end @@ -627,12 +641,22 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end defp reset_connection(state, reason) do - # Drop buffered messages on disconnect; their ack_ids are already in outstanding - # so removing them avoids pointless lease-extension for messages that will redeliver. + # Drop buffered messages on disconnect and nack them so they become + # immediately available for redelivery to any consumer in the subscription. + # Without the nack, redelivery depends on either this client reconnecting + # (same client_id) or the ack deadline expiring naturally (up to 600s). buffered_ack_ids = MessageDispatch.extract_buffered_ack_ids(state.message_buffer) + AckBatcher.modack(state.ack_batcher, buffered_ack_ids, 0) + new_outstanding = Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) + # Dispatched ack_ids (sent to the producer but not yet acked) are intentionally + # kept in `outstanding`. Since the same `client_id` is used on reconnection, + # Pub/Sub associates the new stream with the same logical subscriber and won't + # redeliver those messages. They remain in outstanding until acked/nacked by + # the processor, or until `max_extension_ms` expiry in the lease extension cycle. + # # Preserve pending_demand across reconnection to avoid a demand deadlock. # See decisions.md. close_stream( @@ -815,12 +839,19 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do ) {:noreply, - MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, pending.broadway_messages)} + MessageDispatch.deliver_messages( + %{state | outstanding: new_outstanding}, + pending.broadway_messages + )} {:ok, failed_ids} -> # Partial success — deliver only messages whose modack succeeded. {ok_msgs, ok_ids} = - MessageDispatch.partition_succeeded(pending.broadway_messages, pending.ack_ids, failed_ids) + MessageDispatch.partition_succeeded( + pending.broadway_messages, + pending.ack_ids, + failed_ids + ) new_outstanding = MessageDispatch.add_to_outstanding( @@ -832,7 +863,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do if ok_msgs != [] do emit_telemetry(:receive_messages, %{count: length(ok_msgs)}, state.config) - {:noreply, MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} + + {:noreply, + MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} else {:noreply, %{state | outstanding: new_outstanding}} end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex index e23f071..9dbf91e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_reader.ex @@ -53,20 +53,8 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do defp run(manager, channel, config) do grpc_client = config.grpc_client grpc_client_config = config.grpc_client_config - client_id = Map.fetch!(config, :client_id) - - initial_request = %StreamingPullRequest{ - subscription: config.subscription, - stream_ack_deadline_seconds: config.stream_ack_deadline_seconds, - max_outstanding_messages: config.max_outstanding_messages, - max_outstanding_bytes: config.max_outstanding_bytes, - client_id: client_id - } - grpc_stream = grpc_client.streaming_pull(channel, grpc_client_config) - - {:ok, grpc_stream} = - grpc_client.send_request(grpc_stream, initial_request, grpc_client_config) + grpc_stream = open_stream(channel, config) # Notify the manager that the stream is open. The manager needs the # grpc_stream struct to call send_request for acks and deadline @@ -79,6 +67,25 @@ defmodule BroadwayCloudPubSub.Streaming.StreamReader do end end + defp open_stream(channel, config) do + grpc_client = config.grpc_client + grpc_client_config = config.grpc_client_config + client_id = Map.fetch!(config, :client_id) + + initial_request = %StreamingPullRequest{ + subscription: config.subscription, + stream_ack_deadline_seconds: config.stream_ack_deadline_seconds, + max_outstanding_messages: config.max_outstanding_messages, + max_outstanding_bytes: config.max_outstanding_bytes, + client_id: client_id + } + + stream = grpc_client.streaming_pull(channel, grpc_client_config) + # Intentional match to crash if the stream fails to open + {:ok, stream} = grpc_client.send_request(stream, initial_request, grpc_client_config) + stream + end + defp enumerate(enum, manager) do enum |> Stream.each(fn diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex index 459aaac..b3f5a73 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex @@ -39,6 +39,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) # Each child's child_opts/1 selects only the keys it needs from the # full config and validates that all required keys are present. @@ -50,6 +51,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do batcher_opts = config |> Keyword.put(:rpc_client, rpc_client_name) + |> Keyword.put(:task_supervisor, task_sup_name) |> AckBatcher.child_opts() |> Keyword.put(:name, batcher_name) @@ -59,6 +61,10 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do start: {UnaryRpcClient, :start_link, [rpc_client_opts]}, restart: :permanent }, + # Task.Supervisor for receipt modack RPCs (exactly-once delivery). + # Started before AckBatcher so it's available when AckBatcher spawns tasks. + # Tasks are :temporary — they run once and are not restarted on failure. + {Task.Supervisor, name: task_sup_name}, %{ id: AckBatcher, start: {AckBatcher, :start_link, [batcher_opts]}, diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index 9234379..4a43b8f 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -173,7 +173,14 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do } grpc_client = state.grpc_client - do_rpc(state, ack_ids, request, &grpc_client.modify_ack_deadline/3, "modify ack deadline for messages") + + do_rpc( + state, + ack_ids, + request, + &grpc_client.modify_ack_deadline/3, + "modify ack deadline for messages" + ) end @impl GenServer @@ -224,19 +231,25 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do # Chunks ack_ids into batches of @max_ack_ids_per_request and calls `call_fn` # for each chunk. Accumulates failed chunks. Stops on hard process errors. defp chunked_rpc(_pid, ack_ids, call_fn) do - ack_ids - |> Enum.chunk_every(@max_ack_ids_per_request) - |> Enum.reduce({:ok, []}, fn - chunk, {:ok, failed_so_far} -> - case call_fn.(chunk) do - :ok -> {:ok, failed_so_far} - {:error, _reason} -> {:ok, failed_so_far ++ chunk} - end + result = + ack_ids + |> Enum.chunk_every(@max_ack_ids_per_request) + |> Enum.reduce({:ok, []}, fn + chunk, {:ok, failed_so_far} -> + case call_fn.(chunk) do + :ok -> {:ok, failed_so_far} + {:error, _reason} -> {:ok, [chunk | failed_so_far]} + end + + _chunk, {:error, _} = err -> + # Hard process error — don't attempt remaining chunks + err + end) - _chunk, {:error, _} = err -> - # Hard process error — don't attempt remaining chunks - err - end) + case result do + {:ok, failed} -> {:ok, List.flatten(failed)} + error -> error + end catch :exit, reason -> {:error, {:call_failed, reason}} diff --git a/mix.exs b/mix.exs index 967cdf1..7238d4e 100644 --- a/mix.exs +++ b/mix.exs @@ -59,10 +59,14 @@ defmodule BroadwayCloudPubSub.MixProject do groups_for_modules: [ Pull: [ BroadwayCloudPubSub.Producer, - BroadwayCloudPubSub.Client + BroadwayCloudPubSub.Client, + BroadwayCloudPubSub.PullClient ], Streaming: [ - BroadwayCloudPubSub.Streaming.Producer + BroadwayCloudPubSub.Streaming.Producer, + BroadwayCloudPubSub.Streaming.Client, + BroadwayCloudPubSub.Streaming.GrpcClient, + BroadwayCloudPubSub.Streaming.Options ] ] ] diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs index c6a9a14..2e9008d 100644 --- a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -81,6 +81,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do end end + # Start a Task.Supervisor for receipt modack tasks. + # Returns the supervisor pid. + defp start_task_supervisor do + {:ok, sup} = Task.Supervisor.start_link() + sup + end + # Start a spy RPC client + AckBatcher pair. # Returns {batcher_pid, rpc_client_pid}. defp start_batcher(extra_opts \\ []) do @@ -95,7 +102,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do broadway_name: :TestPipeline, subscription: "projects/test/subscriptions/test-sub", ack_batch_interval_ms: 50, - ack_batch_max_size: 10 + ack_batch_max_size: 10, + task_supervisor: start_task_supervisor() ], extra_opts ) @@ -290,7 +298,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: flaky, ack_batch_interval_ms: 40, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) AckBatcher.ack(batcher, ["id-1", "id-2"]) @@ -316,7 +325,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: selective, ack_batch_interval_ms: 40, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) AckBatcher.modack(batcher, ["id-30"], 30) @@ -346,7 +356,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: fake_name, ack_batch_interval_ms: 50, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) AckBatcher.ack(batcher, ["id-orphan"]) @@ -495,7 +506,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: rpc, ack_batch_interval_ms: 30, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) AckBatcher.modack(batcher, ["id-exhaust"], 30) @@ -531,7 +543,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: rpc, ack_batch_interval_ms: 10_000, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) # Add two ids with the same deadline; the rpc always fails @@ -558,7 +571,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.start_link( rpc_client: rpc, ack_batch_interval_ms: 10_000, - ack_batch_max_size: 100 + ack_batch_max_size: 100, + task_supervisor: start_task_supervisor() ) AckBatcher.modack(batcher_b, ["id-bad", "id-good"], 30) @@ -596,7 +610,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do broadway_name: :TestPipeline, subscription: "projects/test/subscriptions/test-sub", ack_batch_interval_ms: 100_000, - ack_batch_max_size: 10_000 + ack_batch_max_size: 10_000, + task_supervisor: start_task_supervisor() ], extra_opts ) @@ -716,6 +731,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do broadway_name: MyPipeline, telemetry_metadata: %{env: :test}, rpc_client: :some_rpc_client, + task_supervisor: :some_task_supervisor, # Extra keys that AckBatcher should NOT include grpc_client: BroadwayCloudPubSub.Streaming.GrpcClient, grpc_client_config: %{}, @@ -736,7 +752,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do :retry_deadline_ms, :broadway_name, :telemetry_metadata, - :rpc_client + :rpc_client, + :task_supervisor ]) end @@ -794,5 +811,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.child_opts(opts) end end + + test "raises on missing required key :task_supervisor" do + opts = Keyword.delete(@full_opts, :task_supervisor) + + assert_raise ArgumentError, ~r/missing required option :task_supervisor/, fn -> + AckBatcher.child_opts(opts) + end + end end end diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index f5bf648..8499477 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -337,13 +337,19 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do end test "accepts any static term (keyword list, atom, string)" do - {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: [a: 1]) + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: [a: 1]) + assert opts[:telemetry_metadata] == [a: 1] - {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: :my_tag) + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: :my_tag) + assert opts[:telemetry_metadata] == :my_tag - {:ok, opts} = validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: "label") + {:ok, opts} = + validate(subscription: "projects/p/subscriptions/s", telemetry_metadata: "label") + assert opts[:telemetry_metadata] == "label" end @@ -481,8 +487,7 @@ defmodule BroadwayCloudPubSub.Streaming.ProducerPrepareForStartTest do [ name: TestPipeline, producer: [ - module: - {Producer, Keyword.merge(base_producer_opts, producer_opts)}, + module: {Producer, Keyword.merge(base_producer_opts, producer_opts)}, concurrency: 1 ], processors: [default: []] diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 49f8d16..2568b9b 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -75,11 +75,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # Start stub RPC client so AckBatcher can call it {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + # Start a Task.Supervisor for receipt modack tasks + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) + {:ok, _task_sup} = Task.Supervisor.start_link(name: task_sup_name) + # Start a real AckBatcher registered under the name StreamManager will use {:ok, _batcher} = AckBatcher.start_link( name: batcher_name, rpc_client: rpc_client_name, + task_supervisor: task_sup_name, ack_batch_interval_ms: Keyword.get(opts, :ack_batch_interval_ms, 100), ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) ) @@ -1404,16 +1409,20 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) test_pid = self() {:ok, rpc_pid} = SpyRpcClientForEO.start_link(test_pid) # Register under the name AckBatcher will use Process.register(rpc_pid, rpc_client_name) + {:ok, _task_sup} = Task.Supervisor.start_link(name: task_sup_name) + {:ok, _batcher} = AckBatcher.start_link( name: batcher_name, rpc_client: rpc_client_name, + task_supervisor: task_sup_name, ack_batch_interval_ms: Keyword.get(opts, :ack_batch_interval_ms, 50), ack_batch_max_size: Keyword.get(opts, :ack_batch_max_size, 2_500) ) @@ -1629,13 +1638,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + {:ok, _task_sup} = Task.Supervisor.start_link(name: task_sup_name) {:ok, _batcher} = AckBatcher.start_link( name: batcher_name, rpc_client: rpc_client_name, + task_supervisor: task_sup_name, ack_batch_interval_ms: 100, ack_batch_max_size: 2_500, retry_deadline_ms: 60_000 @@ -1686,13 +1698,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + {:ok, _task_sup} = Task.Supervisor.start_link(name: task_sup_name) {:ok, _batcher} = AckBatcher.start_link( name: batcher_name, rpc_client: rpc_client_name, + task_supervisor: task_sup_name, ack_batch_interval_ms: 100, ack_batch_max_size: 2_500, retry_deadline_ms: 60_000 @@ -1744,13 +1759,16 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do rpc_client_name = Module.concat(broadway_name, UnaryRpcClient) batcher_name = Module.concat(broadway_name, AckBatcher) + task_sup_name = Module.concat(broadway_name, ReceiptModackTaskSupervisor) {:ok, _stub} = StubRpcClient.start_link(rpc_client_name) + {:ok, _task_sup} = Task.Supervisor.start_link(name: task_sup_name) {:ok, _batcher} = AckBatcher.start_link( name: batcher_name, rpc_client: rpc_client_name, + task_supervisor: task_sup_name, ack_batch_interval_ms: 100, ack_batch_max_size: 2_500 ) @@ -2514,6 +2532,35 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do refute Map.has_key?(state.outstanding, "buf-reconnect") assert buffer_length(pid) == 0 end + + test "buffered messages are nacked with deadline 0 on disconnect" do + # When a stream disconnects, buffered messages should be nacked so they + # become immediately available for redelivery to any consumer, rather than + # waiting for the ack deadline to expire naturally. + {pid, _rpc} = start_manager_with_spy_rpc(backoff_min: 60_000) + StreamManager.notify_demand(pid, 0) + + send(pid, {:stream_messages, [received_message("nack-buf-1", "data1")]}) + send(pid, {:stream_messages, [received_message("nack-buf-2", "data2")]}) + sync(pid) + assert buffer_length(pid) == 2 + + # Flush any pending modack calls from the initial receipt modack + flush_mailbox() + + # Simulate retryable stream error to trigger reset_connection + send(pid, {:stream_error, %GRPC.RPCError{status: 14, message: "unavailable"}}) + sync(pid) + + # AckBatcher flushes on its timer; force a flush to capture the nack RPC + batcher = :sys.get_state(pid).ack_batcher + AckBatcher.flush(batcher) + + # The spy RPC client should have received a modack with deadline 0 for the + # buffered ack_ids. + assert_receive {:rpc_call, {:modack, ids, 0}}, 1_000 + assert Enum.sort(ids) == ["nack-buf-1", "nack-buf-2"] + end end describe "EO behavior invariants" do From 49565516844998f13e5393916850a338e12f3d82 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 22 Apr 2026 12:23:41 +0200 Subject: [PATCH 11/29] Add changelog notes --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2886308..8ac6af4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,40 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- `BroadwayCloudPubSub.Streaming.Producer` — a new Broadway producer that uses the + gRPC StreamingPull API for low-latency, push-based message delivery instead of + HTTP pull requests + + - Server-side flow control via `:max_outstanding_messages` and `:max_outstanding_bytes` + + - Automatic lease extension with adaptive p99 ack deadlines to prevent premature + message redelivery + + - Batched ack/nack via a separate unary gRPC connection, independent of the streaming + connection + + - Exactly-once delivery support, auto-detected from subscription properties at runtime + + - Message ordering support via the `:enable_message_ordering` option + + - Graceful shutdown with configurable drain timeout (`:drain_timeout_ms`) + + - Telemetry events for stream lifecycle, ack batching, and gRPC spans + + - Support for both Gun and Mint HTTP/2 adapters via the `:adapter` option + + - Pub/Sub emulator support via `:grpc_endpoint` and `:use_ssl` options + + - `BroadwayCloudPubSub.Streaming.Client` — behaviour for custom gRPC client + implementations + + - `BroadwayCloudPubSub.Streaming.GrpcClient` — default gRPC client using the + `grpc` library + ## [0.9.1] - 2024-06-21 ### Changed From e58e174252b86f11a49bdf9eda846057ab9fc992 Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 1 May 2026 11:19:46 +0200 Subject: [PATCH 12/29] refactor: improve streaming architecture with retry tracking and test coverage Refactor the streaming producer internals for better reliability and maintainability. Add RetryTracker module for per-ack-ID retry state management with deadline and attempt-limit enforcement. Key changes: - Add RetryTracker for tracking retry state per ack ID - Refactor StreamManager connection handling and reconnect logic - Improve AckBatcher with retry-aware classification functions - Simplify reset_connection interface (remove unused reason param) - Refactor LeaseManager and MessageDispatch module interfaces - Add comprehensive tests for LeaseManager, MessageDispatch, RetryTracker, and Acknowledger --- .../streaming/ack_batcher.ex | 265 ++++++------- .../streaming/acknowledger.ex | 12 +- .../streaming/lease_manager.ex | 210 +++++----- .../streaming/message_dispatch.ex | 99 +++-- .../streaming/retry_tracker.ex | 147 +++++++ .../streaming/stream_manager.ex | 273 +++++++++---- .../streaming/ack_batcher_test.exs | 153 ++++++- .../streaming/acknowledger_test.exs | 18 + .../streaming/lease_manager_test.exs | 266 +++++++++++++ .../streaming/message_dispatch_test.exs | 309 +++++++++++++++ .../streaming/retry_tracker_test.exs | 375 ++++++++++++++++++ .../streaming/stream_manager_test.exs | 12 +- 12 files changed, 1762 insertions(+), 377 deletions(-) create mode 100644 lib/broadway_cloud_pub_sub/streaming/retry_tracker.ex create mode 100644 test/broadway_cloud_pub_sub/streaming/lease_manager_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/message_dispatch_test.exs create mode 100644 test/broadway_cloud_pub_sub/streaming/retry_tracker_test.exs diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex index 259c879..5b66131 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_batcher.ex @@ -7,7 +7,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do use GenServer - alias BroadwayCloudPubSub.Streaming.{Options, Telemetry, UnaryRpcClient} + alias BroadwayCloudPubSub.Streaming.{Options, RetryTracker, Telemetry, UnaryRpcClient} @max_modack_attempts 3 @@ -21,17 +21,14 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do :timer_ref, # Registered name of the Task.Supervisor for receipt modack tasks. :task_supervisor, - # nil = no deadline. Set to 600_000ms when exactly-once delivery is enabled. - retry_deadline_ms: nil, ack_ids: [], ack_count: 0, # %{deadline_seconds => [ack_id]} modack_ids: %{}, - # Monotonic ms of when each ack_id was first queued; cleaned up on success or expiry. - ack_first_queued: %{}, - modack_first_queued: %{}, - # Per-ack-ID attempt count; cleaned up each flush via sweep over remaining_modacks. - modack_attempts: %{} + # RetryTracker for ack retry state (deadline-only, no attempt limit). + ack_tracker: nil, + # RetryTracker for modack retry state (deadline + 3-attempt limit). + modack_tracker: nil ] @all_keys [ @@ -136,8 +133,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do telemetry_metadata: config[:telemetry_metadata], batch_interval_ms: config.ack_batch_interval_ms, batch_max_size: config.ack_batch_max_size, - retry_deadline_ms: config[:retry_deadline_ms], - task_supervisor: config[:task_supervisor] + task_supervisor: config[:task_supervisor], + ack_tracker: RetryTracker.new(retry_deadline_ms: config[:retry_deadline_ms]), + modack_tracker: + RetryTracker.new( + retry_deadline_ms: config[:retry_deadline_ms], + max_attempts: @max_modack_attempts + ) } {:ok, schedule_flush(state)} @@ -145,12 +147,11 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do @impl GenServer def handle_cast({:ack, ack_ids}, state) do - now = System.monotonic_time(:millisecond) + now = now_ms() new_ids = ack_ids ++ state.ack_ids new_count = state.ack_count + length(ack_ids) - # put_new: don't reset timestamp if this ack_id is already being retried - new_ts = Enum.reduce(ack_ids, state.ack_first_queued, &Map.put_new(&2, &1, now)) - state = %{state | ack_ids: new_ids, ack_count: new_count, ack_first_queued: new_ts} + ack_tracker = RetryTracker.track(state.ack_tracker, ack_ids, now) + state = %{state | ack_ids: new_ids, ack_count: new_count, ack_tracker: ack_tracker} state = if new_count >= state.batch_max_size do @@ -163,22 +164,15 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end def handle_cast({:modack, ack_ids, deadline_seconds}, state) do - now = System.monotonic_time(:millisecond) + now = now_ms() new_modack_ids = Map.update(state.modack_ids, deadline_seconds, ack_ids, &(ack_ids ++ &1)) total_modack_count = new_modack_ids |> Map.values() |> Enum.map(&length/1) |> Enum.sum() - # put_new: don't reset timestamp or attempt count for already-tracked ids - new_ts = Enum.reduce(ack_ids, state.modack_first_queued, &Map.put_new(&2, &1, now)) - new_attempts = Enum.reduce(ack_ids, state.modack_attempts, &Map.put_new(&2, &1, 0)) - - state = %{ - state - | modack_ids: new_modack_ids, - modack_first_queued: new_ts, - modack_attempts: new_attempts - } + modack_tracker = RetryTracker.track(state.modack_tracker, ack_ids, now) + + state = %{state | modack_ids: new_modack_ids, modack_tracker: modack_tracker} state = if state.ack_count + total_modack_count >= state.batch_max_size do @@ -191,7 +185,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do end def handle_cast({:update_retry_deadline, retry_deadline_ms}, state) do - {:noreply, %{state | retry_deadline_ms: retry_deadline_ms}} + {:noreply, + %{ + state + | ack_tracker: RetryTracker.update_retry_deadline(state.ack_tracker, retry_deadline_ms), + modack_tracker: + RetryTracker.update_retry_deadline(state.modack_tracker, retry_deadline_ms) + }} end # Receipt modack for exactly-once delivery. Spawns a supervised Task that @@ -247,158 +247,133 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do defp flush_acks(%{ack_count: 0} = state), do: state defp flush_acks(state) do - case UnaryRpcClient.acknowledge(state.rpc_client, state.ack_ids) do - {:ok, []} -> - %{state | ack_ids: [], ack_count: 0, ack_first_queued: %{}} + rpc_result = UnaryRpcClient.acknowledge(state.rpc_client, state.ack_ids) + result = classify_ack_result(state.ack_ids, rpc_result, state.ack_tracker, now_ms()) - {:ok, remaining_ids} -> - state |> put_retained_acks(remaining_ids) |> expire_stale_acks() - - {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> - # Permanent ids already dropped by UnaryRpcClient; retain only transient. - state |> put_retained_acks(transient_ids) |> expire_stale_acks() - - {:error, _reason} -> - expire_stale_acks(state) + if result.expired_count > 0 do + emit_telemetry(:ack_retry_expired, %{count: result.expired_count}, state) end + + %{state | ack_ids: result.live, ack_count: length(result.live), ack_tracker: result.tracker} end defp flush_modacks(%{modack_ids: modacks} = state) when map_size(modacks) == 0, do: state defp flush_modacks(state) do all_ids = state.modack_ids |> Map.values() |> List.flatten() + tracker = RetryTracker.record_attempt(state.modack_tracker, all_ids) - # Increment attempt count for all ids about to be flushed. - attempts = - Enum.reduce(all_ids, state.modack_attempts, fn id, acc -> - Map.update(acc, id, 1, &(&1 + 1)) + # Call each deadline group's RPC and collect results. + rpc_results = + Enum.map(state.modack_ids, fn {deadline, ids} -> + rpc_result = UnaryRpcClient.modify_ack_deadline(state.rpc_client, ids, deadline) + {deadline, ids, rpc_result} end) - state = %{state | modack_attempts: attempts} + result = classify_modack_results(rpc_results, tracker, now_ms()) - # Each deadline group is attempted independently. - remaining_modacks = - Enum.reduce(state.modack_ids, %{}, fn {deadline, ids}, remaining -> - case UnaryRpcClient.modify_ack_deadline(state.rpc_client, ids, deadline) do - {:ok, []} -> - remaining - - {:ok, remaining_ids} -> - keep = apply_modack_retry_limit(remaining_ids, state.modack_attempts, state) - if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) - - {:error, {_rpc_error, transient_ids}} when is_list(transient_ids) -> - keep = apply_modack_retry_limit(transient_ids, state.modack_attempts, state) - if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) - - {:error, _reason} -> - keep = apply_modack_retry_limit(ids, state.modack_attempts, state) - if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) - end - end) - - # Cleanup sweep: bound tracking maps to currently-pending ids only. - still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() - - clean_attempts = - Map.filter(state.modack_attempts, fn {id, _} -> MapSet.member?(still_pending, id) end) - - clean_ts = - Map.filter(state.modack_first_queued, fn {id, _} -> MapSet.member?(still_pending, id) end) + if result.exhausted_count > 0 do + emit_telemetry(:modack_retry_exhausted, %{count: result.exhausted_count}, state) + end - state = %{ - state - | modack_ids: remaining_modacks, - modack_attempts: clean_attempts, - modack_first_queued: clean_ts - } + if result.expired_count > 0 do + emit_telemetry(:modack_retry_expired, %{count: result.expired_count}, state) + end - expire_stale_modacks(state) + %{state | modack_ids: result.remaining, modack_tracker: result.tracker} end - # Drops modack ids that have reached the maximum attempt count and emits telemetry. - defp apply_modack_retry_limit(ids, attempts, state) do - {keep, drop} = - Enum.split_with(ids, fn id -> Map.get(attempts, id, 0) < @max_modack_attempts end) + # --- Pure classification functions --- - if drop != [] do - emit_telemetry(:modack_retry_exhausted, %{count: length(drop)}, state) - end + @typedoc "RPC result from UnaryRpcClient.acknowledge/2 or modify_ack_deadline/3." + @type rpc_result :: {:ok, [String.t()]} | {:error, term()} - keep - end + @doc false + @spec classify_ack_result([String.t()], rpc_result(), RetryTracker.t(), integer()) :: + %{live: [String.t()], expired_count: non_neg_integer(), tracker: RetryTracker.t()} + def classify_ack_result(sent_ack_ids, rpc_result, ack_tracker, now_ms) do + retained_ids = extract_retained_ids(sent_ack_ids, rpc_result) - # Replaces the pending ack_ids with the given retained set and cleans up - # ack_first_queued to contain only the retained ids. - defp put_retained_acks(state, retained_ids) do retained_set = MapSet.new(retained_ids) + tracker = RetryTracker.retain_only(ack_tracker, retained_set) + {live, expired, tracker} = RetryTracker.expire_stale(tracker, retained_ids, now_ms) - clean_ts = - Map.filter(state.ack_first_queued, fn {id, _} -> MapSet.member?(retained_set, id) end) - - %{state | ack_ids: retained_ids, ack_count: length(retained_ids), ack_first_queued: clean_ts} + %{live: live, expired_count: length(expired), tracker: tracker} end - defp expire_stale_acks(%{retry_deadline_ms: nil} = state), do: state - - defp expire_stale_acks(state) do - now = System.monotonic_time(:millisecond) - - {live, expired} = - Enum.split_with(state.ack_ids, fn id -> - case Map.get(state.ack_first_queued, id) do - nil -> true - ts -> now - ts < state.retry_deadline_ms - end + @doc false + @spec classify_modack_results( + [{non_neg_integer(), [String.t()], rpc_result()}], + RetryTracker.t(), + integer() + ) :: + %{ + remaining: %{non_neg_integer() => [String.t()]}, + exhausted_count: non_neg_integer(), + expired_count: non_neg_integer(), + tracker: RetryTracker.t() + } + def classify_modack_results(rpc_results, modack_tracker, now_ms) do + # Each deadline group is processed independently. Thread the tracker + # through each group so attempt-limit checks use the latest counts. + {remaining_modacks, tracker, total_exhausted} = + Enum.reduce(rpc_results, {%{}, modack_tracker, 0}, fn + {deadline, sent_ids, rpc_result}, {remaining, trk, exhausted_acc} -> + result_ids = extract_retained_ids(sent_ids, rpc_result) + + if result_ids == [] do + {remaining, trk, exhausted_acc} + else + {keep, drop, trk} = RetryTracker.check_attempts(trk, result_ids) + + remaining = + if keep == [], do: remaining, else: Map.put(remaining, deadline, keep) + + {remaining, trk, exhausted_acc + length(drop)} + end end) - if expired != [] do - emit_telemetry(:ack_retry_expired, %{count: length(expired)}, state) - end - - clean_ts = Map.drop(state.ack_first_queued, expired) - %{state | ack_ids: live, ack_count: length(live), ack_first_queued: clean_ts} - end - - defp expire_stale_modacks(%{retry_deadline_ms: nil} = state), do: state - - defp expire_stale_modacks(state) do - now = System.monotonic_time(:millisecond) - - {remaining_modacks, expired_count} = - Enum.reduce(state.modack_ids, {%{}, 0}, fn {deadline, ids}, {acc, dropped} -> - {live, expired} = - Enum.split_with(ids, fn id -> - case Map.get(state.modack_first_queued, id) do - nil -> true - ts -> now - ts < state.retry_deadline_ms - end - end) - - acc = if live == [], do: acc, else: Map.put(acc, deadline, live) - {acc, dropped + length(expired)} - end) + # Cleanup sweep: bound tracking maps to currently-pending ids only. + still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() + tracker = RetryTracker.retain_only(tracker, still_pending) - if expired_count > 0 do - emit_telemetry(:modack_retry_expired, %{count: expired_count}, state) - end + # Expire stale modack ids that have exceeded the retry deadline. + still_pending_list = MapSet.to_list(still_pending) - still_pending = remaining_modacks |> Map.values() |> List.flatten() |> MapSet.new() + {_live_ids, expired_ids, tracker} = + RetryTracker.expire_stale(tracker, still_pending_list, now_ms) - clean_ts = - Map.filter(state.modack_first_queued, fn {id, _} -> MapSet.member?(still_pending, id) end) + # Remove expired ids from remaining_modacks. + remaining_modacks = + if expired_ids == [] do + remaining_modacks + else + expired_set = MapSet.new(expired_ids) - clean_attempts = - Map.filter(state.modack_attempts, fn {id, _} -> MapSet.member?(still_pending, id) end) + remaining_modacks + |> Enum.map(fn {d, ids} -> {d, Enum.reject(ids, &MapSet.member?(expired_set, &1))} end) + |> Enum.reject(fn {_, ids} -> ids == [] end) + |> Map.new() + end %{ - state - | modack_ids: remaining_modacks, - modack_first_queued: clean_ts, - modack_attempts: clean_attempts + remaining: remaining_modacks, + exhausted_count: total_exhausted, + expired_count: length(expired_ids), + tracker: tracker } end + # Extracts the list of ack_ids that should be retained from an RPC result. + defp extract_retained_ids(_sent_ids, {:ok, []}), do: [] + defp extract_retained_ids(_sent_ids, {:ok, remaining_ids}), do: remaining_ids + + defp extract_retained_ids(_sent_ids, {:error, {_rpc_error, transient_ids}}) + when is_list(transient_ids), + do: transient_ids + + defp extract_retained_ids(sent_ids, {:error, _reason}), do: sent_ids + defp schedule_flush(state) do state = cancel_timer(state) ref = Process.send_after(self(), :flush_timer, state.batch_interval_ms) @@ -435,6 +410,8 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcher do send(reply_to, {:receipt_modack_result, ref, result}) end + defp now_ms, do: System.monotonic_time(:millisecond) + defp emit_telemetry(event, measurements, state) do metadata = %{ name: state.broadway_name, diff --git a/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex index 49ff317..8be9a81 100644 --- a/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex +++ b/lib/broadway_cloud_pub_sub/streaming/acknowledger.ex @@ -36,6 +36,16 @@ defmodule BroadwayCloudPubSub.Streaming.Acknowledger do &{__MODULE__, ack_ref, %{ack_id: &1}} end + @doc """ + Extracts the ack_id from a `Broadway.Message` produced by this acknowledger. + + This is the canonical extraction point for ack_ids — callers should use this + instead of pattern-matching on the acknowledger tuple's internal structure. + """ + @spec ack_id_from(Broadway.Message.t()) :: String.t() + def ack_id_from(%Broadway.Message{acknowledger: {__MODULE__, _ref, %{ack_id: ack_id}}}), + do: ack_id + @impl Acknowledger def ack(ack_ref, successful, failed) do # persistent_term stores the manager's registered *name*, not its PID, @@ -78,7 +88,7 @@ defmodule BroadwayCloudPubSub.Streaming.Acknowledger do defp default_action(:on_success, %{on_success: action}), do: action defp default_action(:on_failure, %{on_failure: action}), do: action - defp extract_ack_id(%{acknowledger: {_, _, %{ack_id: ack_id}}}), do: ack_id + defp extract_ack_id(message), do: ack_id_from(message) defp dispatch_acks(actions_and_ids, manager_server) do Enum.each(actions_and_ids, fn {action, ack_ids} -> diff --git a/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex b/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex index ced173a..e17ec66 100644 --- a/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/lease_manager.ex @@ -1,14 +1,13 @@ defmodule BroadwayCloudPubSub.Streaming.LeaseManager do @moduledoc false - # Pure-function module for lease extension, adaptive deadline computation, - # timer scheduling, and stale pending-modack sweeping. + # Pure-function module for lease extension and adaptive deadline computation. # - # Functions accept and return the StreamManager state struct. StreamManager - # delegates to this module for lease management without mixing timer and - # deadline logic into GenServer callback bodies. + # All functions accept explicit inputs and return plain data — no state structs, + # no side effects (no AckBatcher calls, no Process.send_after, no telemetry). + # StreamManager handles all side effects based on the returned results. - alias BroadwayCloudPubSub.Streaming.{AckBatcher, AckTimeDistribution, Telemetry} + alias BroadwayCloudPubSub.Streaming.AckTimeDistribution # Subtracted from the adaptive deadline when computing the lease extension interval. @grace_period_seconds 5 @@ -16,8 +15,8 @@ defmodule BroadwayCloudPubSub.Streaming.LeaseManager do # Minimum ack deadline enforced by the server for exactly-once subscriptions. @min_deadline_exactly_once_seconds 60 - # Stale pending receipt modacks (older than 60s) are nacked for fast redelivery. - @receipt_modack_stale_ms 60_000 + # Default staleness threshold for pending receipt modacks (60 seconds). + @default_receipt_modack_stale_ms 60_000 # --- Deadline computation --- @@ -26,129 +25,138 @@ defmodule BroadwayCloudPubSub.Streaming.LeaseManager do percentile from recorded processing times. For exactly-once subscriptions, enforces the server's minimum of 60 seconds. """ - def effective_deadline(state) do - adaptive = AckTimeDistribution.percentile(state.ack_time_dist, 0.99) + @spec effective_deadline(AckTimeDistribution.t(), boolean()) :: pos_integer() + def effective_deadline(ack_time_dist, exactly_once_enabled) do + adaptive = AckTimeDistribution.percentile(ack_time_dist, 0.99) - if state.exactly_once_enabled, + if exactly_once_enabled, do: max(adaptive, @min_deadline_exactly_once_seconds), else: adaptive end # --- Lease extension --- + @typedoc """ + Result of a lease extension cycle. + + * `valid` — outstanding messages whose `max_expiry` has not passed. + * `expired_count` — number of messages whose `max_expiry` has passed. + * `modack_ids` — ack_ids of valid messages that need a lease extension. + * `modack_deadline` — the adaptive deadline to use for the modack. + * `next_timer_ms` — milliseconds until the next extension tick (with jitter). + """ + @type extend_result :: %{ + valid: %{String.t() => map()}, + expired_count: non_neg_integer(), + modack_ids: [String.t()], + modack_deadline: pos_integer(), + next_timer_ms: pos_integer() + } + @doc """ - Runs a lease extension cycle: partitions outstanding messages into valid and - expired, emits telemetry, sends modack requests for valid messages, and - schedules the next extension tick. + Runs a lease extension computation: partitions outstanding messages into valid + and expired, computes the modack deadline and next timer interval. + + Returns an `extend_result` map. The caller is responsible for: + - Sending modack requests for `modack_ids` at `modack_deadline` + - Scheduling the next `:extend_leases` timer at `next_timer_ms` + - Emitting telemetry + - Updating state with `valid` as the new outstanding map """ - def do_extend_leases(state) do - now = System.monotonic_time(:millisecond) - deadline = effective_deadline(state) + @spec extend_leases( + outstanding :: %{String.t() => map()}, + ack_time_dist :: AckTimeDistribution.t(), + exactly_once_enabled :: boolean(), + now_ms :: integer() + ) :: extend_result() + def extend_leases(outstanding, ack_time_dist, exactly_once_enabled, now_ms) do + deadline = effective_deadline(ack_time_dist, exactly_once_enabled) - # Partition into still-valid and expired (past max_expiry — server will redeliver). {valid, expired} = - Map.split_with(state.outstanding, fn {_id, info} -> info.max_expiry > now end) - - if map_size(expired) > 0 do - emit_telemetry(:lease_expired, %{count: map_size(expired)}, state.config) - end - - emit_telemetry( - :extend_leases, - %{count: map_size(valid), deadline: deadline}, - state.config - ) - - emit_telemetry( - :pressure_snapshot, - %{ - outstanding_count: map_size(valid), - buffered_count: :queue.len(state.message_buffer), - pending_demand: state.pending_demand - }, - state.config - ) - - if map_size(valid) > 0 do - AckBatcher.modack(state.ack_batcher, Map.keys(valid), deadline) - end - - # Schedule next tick with jitter in [0.8, 0.9) to spread out concurrent StreamManagers. - base_interval_ms = max(1_000, (deadline - @grace_period_seconds) * 1_000) - jitter_factor = 0.8 + :rand.uniform() * 0.1 - timer = Process.send_after(self(), :extend_leases, round(base_interval_ms * jitter_factor)) + Map.split_with(outstanding, fn {_id, info} -> info.max_expiry > now_ms end) + + modack_ids = if map_size(valid) > 0, do: Map.keys(valid), else: [] - state - |> Map.put(:outstanding, valid) - |> Map.put(:lease_timer, timer) - |> sweep_stale_pending_modacks() + %{ + valid: valid, + expired_count: map_size(expired), + modack_ids: modack_ids, + modack_deadline: deadline, + next_timer_ms: compute_next_timer_ms(deadline) + } end - # --- Timer management --- + # --- Stale pending modack sweep --- - @doc """ - Schedules the initial lease extension timer based on the configured deadline. - Cancels any existing timer first. - """ - def schedule_lease_timer(state) do - state = cancel_lease_timer(state) - # Initial interval: (configured deadline - grace period) with jitter, minimum 1s. - deadline_s = state.config.stream_ack_deadline_seconds - base_ms = max(1_000, (deadline_s - @grace_period_seconds) * 1_000) - jitter_factor = 0.8 + :rand.uniform() * 0.1 - interval_ms = round(base_ms * jitter_factor) - timer = Process.send_after(self(), :extend_leases, interval_ms) - %{state | lease_timer: timer} - end + @typedoc """ + Result of sweeping stale pending receipt modacks. - @doc """ - Cancels the lease extension timer if one is active. + * `fresh` — entries that are still within the staleness threshold. + * `stale_ack_ids` — ack_ids from entries that exceeded the threshold. """ - def cancel_lease_timer(%{lease_timer: nil} = state), do: state + @type sweep_result :: %{ + fresh: %{reference() => map()}, + stale_ack_ids: [String.t()] + } - def cancel_lease_timer(%{lease_timer: timer} = state) do - Process.cancel_timer(timer) - %{state | lease_timer: nil} - end + @doc """ + Partitions pending receipt modacks into fresh and stale. - # --- Stale pending modack sweep --- + Entries whose `received_at` is older than `stale_threshold_ms` from `now_ms` + are considered stale. Returns stale ack_ids for nacking and the remaining + fresh entries. - @doc """ - Sweeps stale pending receipt modacks (older than 60s) and nacks them for - fast redelivery. Used during the lease extension cycle. + The default `stale_threshold_ms` is #{@default_receipt_modack_stale_ms}ms (60 seconds). """ - def sweep_stale_pending_modacks(state) do - now = System.monotonic_time(:millisecond) - cutoff = now - @receipt_modack_stale_ms + @spec sweep_stale_pending_modacks( + pending_receipt_modacks :: %{reference() => map()}, + now_ms :: integer(), + stale_threshold_ms :: pos_integer() + ) :: sweep_result() + def sweep_stale_pending_modacks( + pending_receipt_modacks, + now_ms, + stale_threshold_ms \\ @default_receipt_modack_stale_ms + ) do + cutoff = now_ms - stale_threshold_ms {stale, fresh} = - Map.split_with(state.pending_receipt_modacks, fn {_ref, %{received_at: t}} -> + Map.split_with(pending_receipt_modacks, fn {_ref, %{received_at: t}} -> t < cutoff end) - if map_size(stale) > 0 do - stale_ids = stale |> Map.values() |> Enum.flat_map(& &1.ack_ids) - AckBatcher.modack(state.ack_batcher, stale_ids, 0) - emit_telemetry(:receipt_modack_stale, %{count: length(stale_ids)}, state.config) - end + stale_ack_ids = + if map_size(stale) > 0 do + stale |> Map.values() |> Enum.flat_map(& &1.ack_ids) + else + [] + end - %{state | pending_receipt_modacks: fresh} + %{fresh: fresh, stale_ack_ids: stale_ack_ids} end - # --- Private: telemetry --- + # --- Timer computation --- - defp emit_telemetry(event, measurements, config) do - metadata = %{ - name: config.broadway[:name], - subscription: config.subscription - } + @doc """ + Computes the initial lease extension timer interval in milliseconds + from the configured stream ack deadline. - Telemetry.execute( - :stream, - event, - measurements, - metadata, - Map.get(config, :telemetry_metadata) - ) + The interval is `(deadline - grace_period) * 1000` with jitter in [0.8, 0.9), + minimum 1000ms. + """ + @spec initial_timer_ms(pos_integer()) :: pos_integer() + def initial_timer_ms(stream_ack_deadline_seconds) do + compute_next_timer_ms(stream_ack_deadline_seconds) + end + + # --- Private --- + + # Computes the next timer interval with jitter. + # base_interval = max(1s, (deadline - grace_period) * 1000) + # jitter in [0.8, 0.9) to spread out concurrent StreamManagers. + defp compute_next_timer_ms(deadline_seconds) do + base_interval_ms = max(1_000, (deadline_seconds - @grace_period_seconds) * 1_000) + jitter_factor = 0.8 + :rand.uniform() * 0.1 + round(base_interval_ms * jitter_factor) end end diff --git a/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex index f1b2188..c649769 100644 --- a/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex +++ b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex @@ -4,40 +4,52 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do # Pure-function module for message buffer management, demand-based flushing, # Broadway message construction, and outstanding ack_id tracking. # - # Functions accept and return the StreamManager state struct (or relevant - # fields). StreamManager delegates to this module for message dispatch - # without mixing buffer logic into GenServer callback bodies. + # All functions accept explicit inputs and return plain data — no state structs, + # no side effects (no send/2, no telemetry). StreamManager handles all side + # effects based on the returned results. alias BroadwayCloudPubSub.MessageBuilder - alias BroadwayCloudPubSub.Streaming.AckTimeDistribution + alias BroadwayCloudPubSub.Streaming.{Acknowledger, AckTimeDistribution} # --- Buffer and demand --- - @doc """ - Enqueues `messages` into the buffer, then flushes up to `pending_demand` - messages to the producer process. + @typedoc """ + Result of flushing demand from the message buffer. + + * `to_send` — messages to send to the producer (in order). + * `remaining_buffer` — the buffer after dequeuing. + * `remaining_demand` — unfulfilled demand after flushing. """ - def deliver_messages(state, messages) do - new_buffer = Enum.reduce(messages, state.message_buffer, &:queue.in(&1, &2)) - flush_demand(%{state | message_buffer: new_buffer}) - end + @type flush_result :: %{ + to_send: [Broadway.Message.t()], + remaining_buffer: :queue.queue(), + remaining_demand: non_neg_integer() + } @doc """ - Flushes up to `pending_demand` messages from the buffer to the producer. - No-op when draining, demand is zero, or the buffer is empty. + Dequeues up to `pending_demand` messages from the buffer. + + Returns a `flush_result` map. The caller is responsible for sending + `to_send` to the producer and updating state with the remaining + buffer and demand. """ - def flush_demand(%{draining: true} = state), do: state - def flush_demand(%{pending_demand: 0} = state), do: state + @spec flush_demand(:queue.queue(), non_neg_integer()) :: flush_result() + def flush_demand(buffer, 0) do + %{to_send: [], remaining_buffer: buffer, remaining_demand: 0} + end - def flush_demand(state) do - if :queue.is_empty(state.message_buffer) do - state + def flush_demand(buffer, pending_demand) when pending_demand > 0 do + if :queue.is_empty(buffer) do + %{to_send: [], remaining_buffer: buffer, remaining_demand: pending_demand} else {remaining, demand_left, batch_reversed} = - flush_demand_loop(state.message_buffer, state.pending_demand, []) + flush_demand_loop(buffer, pending_demand, []) - send(state.producer_pid, {:stream_messages, Enum.reverse(batch_reversed)}) - %{state | message_buffer: remaining, pending_demand: demand_left} + %{ + to_send: Enum.reverse(batch_reversed), + remaining_buffer: remaining, + remaining_demand: demand_left + } end end @@ -55,6 +67,12 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do @doc """ Adds ack_ids to the outstanding map with their received_at and max_expiry. """ + @spec add_to_outstanding( + outstanding :: map(), + ack_ids :: [String.t()], + received_at :: integer(), + max_extension_ms :: pos_integer() + ) :: map() def add_to_outstanding(outstanding, ack_ids, received_at, max_extension_ms) do Enum.reduce(ack_ids, outstanding, fn ack_id, acc -> Map.put(acc, ack_id, %{received_at: received_at, max_expiry: received_at + max_extension_ms}) @@ -63,16 +81,22 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do @doc """ Records processing times for ack_ids in the adaptive p99 distribution, then - removes them from outstanding. Shared by both ack and modack (nack) paths. - """ - def record_and_remove_from_outstanding(state, ack_ids) do - now = System.monotonic_time(:millisecond) + removes them from outstanding. - ack_time_dist = - Enum.reduce(ack_ids, state.ack_time_dist, fn ack_id, dist -> - case Map.get(state.outstanding, ack_id) do + Returns `{updated_outstanding, updated_ack_time_dist}`. + """ + @spec record_and_remove( + outstanding :: map(), + ack_time_dist :: AckTimeDistribution.t(), + ack_ids :: [String.t()], + now_ms :: integer() + ) :: {map(), AckTimeDistribution.t()} + def record_and_remove(outstanding, ack_time_dist, ack_ids, now_ms) do + updated_dist = + Enum.reduce(ack_ids, ack_time_dist, fn ack_id, dist -> + case Map.get(outstanding, ack_id) do %{received_at: received_at} -> - duration_s = max(1, div(now - received_at, 1_000)) + duration_s = max(1, div(now_ms - received_at, 1_000)) AckTimeDistribution.record(dist, duration_s) nil -> @@ -80,23 +104,26 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do end end) - new_outstanding = Enum.reduce(ack_ids, state.outstanding, &Map.delete(&2, &1)) - %{state | outstanding: new_outstanding, ack_time_dist: ack_time_dist} + updated_outstanding = Enum.reduce(ack_ids, outstanding, &Map.delete(&2, &1)) + {updated_outstanding, updated_dist} end @doc """ Extracts ack_ids from buffered Broadway messages. """ + @spec extract_buffered_ack_ids(:queue.queue()) :: [String.t()] def extract_buffered_ack_ids(message_buffer) do message_buffer |> :queue.to_list() - |> Enum.map(fn %Broadway.Message{acknowledger: {_, _, %{ack_id: id}}} -> id end) + |> Enum.map(&Acknowledger.ack_id_from/1) end @doc """ Splits broadway_messages into {succeeded_msgs, succeeded_ids} by removing messages whose ack_id is in failed_ids. """ + @spec partition_succeeded([Broadway.Message.t()], [String.t()], [String.t()]) :: + {[Broadway.Message.t()], [String.t()]} def partition_succeeded(broadway_messages, all_ack_ids, failed_ids) do failed_set = MapSet.new(failed_ids) @@ -116,14 +143,14 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do # --- Message construction --- @doc """ - Builds a `Broadway.Message` from a decoded Pub/Sub ReceivedMessage and the - current StreamManager state. + Builds a `Broadway.Message` from a decoded Pub/Sub ReceivedMessage and + an ack_ref. """ + @spec build_broadway_message(map(), term()) :: Broadway.Message.t() def build_broadway_message( %{ack_id: ack_id, message: pubsub_msg, delivery_attempt: delivery_attempt}, - state + ack_ref ) do - ack_ref = state.config.ack_ref acknowledger = BroadwayCloudPubSub.Streaming.Acknowledger.builder(ack_ref).(ack_id) %Broadway.Message{ diff --git a/lib/broadway_cloud_pub_sub/streaming/retry_tracker.ex b/lib/broadway_cloud_pub_sub/streaming/retry_tracker.ex new file mode 100644 index 0000000..00ec4f7 --- /dev/null +++ b/lib/broadway_cloud_pub_sub/streaming/retry_tracker.ex @@ -0,0 +1,147 @@ +defmodule BroadwayCloudPubSub.Streaming.RetryTracker do + @moduledoc false + + # Pure-data module for tracking per-ack-ID retry state. + # + # Tracks first-queued timestamps and attempt counts for ack/modack IDs + # that need retry management (primarily for exactly-once delivery). + # + # Used by AckBatcher to enforce: + # - Retry deadlines: drop IDs older than `retry_deadline_ms` + # - Attempt limits: drop IDs that have been attempted `max_attempts` times + # + # All functions are pure — they accept and return data without side effects. + # AckBatcher handles telemetry emission and state updates. + + @type t :: %__MODULE__{ + first_queued: %{String.t() => integer()}, + attempts: %{String.t() => non_neg_integer()}, + retry_deadline_ms: pos_integer() | nil, + max_attempts: pos_integer() | nil + } + + defstruct first_queued: %{}, attempts: %{}, retry_deadline_ms: nil, max_attempts: nil + + @doc """ + Creates a new RetryTracker. + + ## Options + + * `:retry_deadline_ms` — maximum time in milliseconds to keep retrying + an ack_id before dropping it. `nil` disables deadline-based expiry. + * `:max_attempts` — maximum number of flush attempts before dropping + an ack_id. `nil` disables attempt-based limits. + """ + @spec new(keyword()) :: t() + def new(opts \\ []) do + %__MODULE__{ + retry_deadline_ms: Keyword.get(opts, :retry_deadline_ms), + max_attempts: Keyword.get(opts, :max_attempts) + } + end + + @doc """ + Registers ack_ids with a first-queued timestamp. + + Uses `Map.put_new` semantics — re-tracking an already-tracked ID does + not overwrite its original timestamp or reset its attempt count. + """ + @spec track(t(), [String.t()], integer()) :: t() + def track(%__MODULE__{} = tracker, ack_ids, now_ms) do + new_fq = Enum.reduce(ack_ids, tracker.first_queued, &Map.put_new(&2, &1, now_ms)) + new_att = Enum.reduce(ack_ids, tracker.attempts, &Map.put_new(&2, &1, 0)) + %{tracker | first_queued: new_fq, attempts: new_att} + end + + @doc """ + Increments the attempt count for each ack_id. + + IDs not yet tracked get an initial count of 1. + """ + @spec record_attempt(t(), [String.t()]) :: t() + def record_attempt(%__MODULE__{} = tracker, ack_ids) do + new_att = + Enum.reduce(ack_ids, tracker.attempts, fn id, acc -> + Map.update(acc, id, 1, &(&1 + 1)) + end) + + %{tracker | attempts: new_att} + end + + @doc """ + Partitions ack_ids into live and expired based on `retry_deadline_ms`. + + Returns `{live_ids, expired_ids, updated_tracker}`. Expired IDs are + removed from internal tracking state. + + When `retry_deadline_ms` is `nil`, returns all `ack_ids` as live. + """ + @spec expire_stale(t(), [String.t()], integer()) :: + {live :: [String.t()], expired :: [String.t()], t()} + def expire_stale(%__MODULE__{retry_deadline_ms: nil} = tracker, ack_ids, _now_ms) do + {ack_ids, [], tracker} + end + + def expire_stale(%__MODULE__{} = tracker, ack_ids, now_ms) do + {live, expired} = + Enum.split_with(ack_ids, fn id -> + case Map.get(tracker.first_queued, id) do + nil -> true + ts -> now_ms - ts < tracker.retry_deadline_ms + end + end) + + new_fq = Map.drop(tracker.first_queued, expired) + new_att = Map.drop(tracker.attempts, expired) + {live, expired, %{tracker | first_queued: new_fq, attempts: new_att}} + end + + @doc """ + Partitions ack_ids into keep and drop based on `max_attempts`. + + Returns `{keep_ids, drop_ids, updated_tracker}`. Dropped IDs are + removed from internal tracking state. + + When `max_attempts` is `nil`, returns all `ids` as keep. + """ + @spec check_attempts(t(), [String.t()]) :: + {keep :: [String.t()], drop :: [String.t()], t()} + def check_attempts(%__MODULE__{max_attempts: nil} = tracker, ids) do + {ids, [], tracker} + end + + def check_attempts(%__MODULE__{} = tracker, ids) do + {keep, drop} = + Enum.split_with(ids, fn id -> + Map.get(tracker.attempts, id, 0) < tracker.max_attempts + end) + + new_fq = Map.drop(tracker.first_queued, drop) + new_att = Map.drop(tracker.attempts, drop) + {keep, drop, %{tracker | first_queued: new_fq, attempts: new_att}} + end + + @doc """ + Cleanup sweep: removes all tracking state for IDs NOT in the given set. + + Called after a flush cycle to bound memory by removing IDs that have been + successfully delivered and are no longer pending. + """ + @spec retain_only(t(), MapSet.t()) :: t() + def retain_only(%__MODULE__{} = tracker, id_set) do + new_fq = Map.filter(tracker.first_queued, fn {id, _} -> MapSet.member?(id_set, id) end) + new_att = Map.filter(tracker.attempts, fn {id, _} -> MapSet.member?(id_set, id) end) + %{tracker | first_queued: new_fq, attempts: new_att} + end + + @doc """ + Updates the retry deadline at runtime. + + Called by AckBatcher when StreamManager detects a change in exactly-once + delivery status. + """ + @spec update_retry_deadline(t(), pos_integer() | nil) :: t() + def update_retry_deadline(%__MODULE__{} = tracker, deadline_ms) do + %{tracker | retry_deadline_ms: deadline_ms} + end +end diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 0e3cdce..965cab3 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -186,25 +186,23 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:connect, state) do - state = %{state | reconnect_ref: nil} - do_connect(state) + %{state | reconnect_ref: nil} + |> do_connect() end # The StreamReader successfully opened the gRPC stream and sends us the # stream struct so we can call send_request for acks and lease extensions. def handle_info({:stream_opened, reader_pid, grpc_stream}, %{reader_pid: reader_pid} = state) do - conn_pid = grpc_stream.channel.adapter_payload.conn_pid - backoff = Backoff.reset(state.backoff) - - state = %{ - state - | grpc_stream: grpc_stream, - conn_pid: conn_pid, - backoff: backoff - } + state = + %{ + state + | grpc_stream: grpc_stream, + conn_pid: grpc_stream.channel.adapter_payload.conn_pid, + backoff: Backoff.reset(state.backoff) + } + |> schedule_lease_timer() + |> schedule_keepalive_timer() - state = LeaseManager.schedule_lease_timer(state) - state = schedule_keepalive_timer(state) emit_telemetry(:connect, %{}, state.config) {:noreply, state} end @@ -221,16 +219,18 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do def handle_info({:stream_messages, messages}, %{draining: true} = state) do nack_per_on_shutdown(state, Enum.map(messages, & &1.ack_id)) - {:noreply, state} end def handle_info({:stream_messages, messages}, state) do - broadway_messages = Enum.map(messages, &MessageDispatch.build_broadway_message(&1, state)) - ack_ids = Enum.map(messages, & &1.ack_id) + broadway_messages = + Enum.map(messages, &MessageDispatch.build_broadway_message(&1, state.config.ack_ref)) + ack_ids = Enum.map(messages, & &1.ack_id) now = now_ms() - adaptive_deadline = LeaseManager.effective_deadline(state) + + adaptive_deadline = + LeaseManager.effective_deadline(state.ack_time_dist, state.exactly_once_enabled) if state.exactly_once_enabled do # Exactly-once receipt modack gate: hold messages until the receipt modack @@ -259,11 +259,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do AckBatcher.modack(state.ack_batcher, ack_ids, adaptive_deadline) emit_telemetry(:receive_messages, %{count: length(broadway_messages)}, state.config) - {:noreply, - MessageDispatch.deliver_messages( - %{state | outstanding: new_outstanding}, - broadway_messages - )} + {:noreply, enqueue_and_flush(%{state | outstanding: new_outstanding}, broadway_messages)} end end @@ -328,11 +324,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do ) emit_telemetry(:terminal_error, %{}, state.config, %{reason: error}) - {:noreply, reset_connection(state, error)} + {:noreply, reset_connection(state)} :retryable -> emit_telemetry(:disconnect, %{}, state.config, %{reason: error}) - {:noreply, reset_connection(state, error)} + {:noreply, reset_connection(state)} end end @@ -348,7 +344,13 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do :retryable -> emit_telemetry(:disconnect, %{}, state.config, %{reason: error}) - {:noreply, schedule_reconnect(reset_connection(state, error))} + + state = + state + |> reset_connection() + |> schedule_reconnect() + + {:noreply, state} end end @@ -382,7 +384,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:extend_leases, state) do - {:noreply, LeaseManager.do_extend_leases(state)} + {:noreply, do_extend_leases(state)} end # Periodic keep-alive ping to prevent the server from closing an idle stream. @@ -391,7 +393,9 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end def handle_info(:send_keepalive, state) do - adaptive_deadline = LeaseManager.effective_deadline(state) + adaptive_deadline = + LeaseManager.effective_deadline(state.ack_time_dist, state.exactly_once_enabled) + keepalive_request = %StreamingPullRequest{stream_ack_deadline_seconds: adaptive_deadline} case send_on_stream(state.grpc_stream, keepalive_request, state) do @@ -400,8 +404,13 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do timer = schedule_keepalive_after(state.config) {:noreply, %{state | grpc_stream: stream, keepalive_timer: timer}} - {:error, reason} -> - {:noreply, schedule_reconnect(reset_connection(state, {:send_failed, reason}))} + {:error, _reason} -> + state = + state + |> reset_connection() + |> schedule_reconnect() + + {:noreply, state} end end @@ -440,7 +449,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # before the connection is torn down by close_stream. flush_batcher_if_alive(state.ack_batcher) - {:noreply, close_stream(%{state | drain_timer: nil, drain_started_at: nil, outstanding: %{}})} + state = + %{state | drain_timer: nil, drain_started_at: nil, outstanding: %{}} + |> close_stream() + + {:noreply, state} end def handle_info(_msg, state) do @@ -449,7 +462,10 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do @impl GenServer def handle_cast({:acknowledge, ack_ids}, state) do - state = MessageDispatch.record_and_remove_from_outstanding(state, ack_ids) + {new_outstanding, new_dist} = + MessageDispatch.record_and_remove(state.outstanding, state.ack_time_dist, ack_ids, now_ms()) + + state = %{state | outstanding: new_outstanding, ack_time_dist: new_dist} AckBatcher.ack(state.ack_batcher, ack_ids) emit_telemetry(:ack, %{count: length(ack_ids)}, state.config) @@ -464,7 +480,10 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # otherwise the periodic extend_leases cycle would override the requested # deadline, and the drain phase could never complete because outstanding # would never become empty. - state = MessageDispatch.record_and_remove_from_outstanding(state, ack_ids) + {new_outstanding, new_dist} = + MessageDispatch.record_and_remove(state.outstanding, state.ack_time_dist, ack_ids, now_ms()) + + state = %{state | outstanding: new_outstanding, ack_time_dist: new_dist} AckBatcher.modack(state.ack_batcher, ack_ids, deadline_seconds) @@ -474,8 +493,11 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # The producer signals a demand delta. Accumulate it and flush up to the new # total from the message buffer. def handle_cast({:demand_available, amount}, state) do - state = %{state | pending_demand: state.pending_demand + amount} - {:noreply, MessageDispatch.flush_demand(state)} + state = + %{state | pending_demand: state.pending_demand + amount} + |> do_flush_demand() + + {:noreply, state} end @impl GenServer @@ -497,14 +519,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do try do # 1. Close the reader FIRST to stop new messages from arriving. - state = close_reader(state) - # 2. Nack pending receipt modacks so the server redelivers them quickly. - state = nack_pending_receipt_modacks(state) + state = + state + |> close_reader() + |> nack_pending_receipt_modacks() # 3. Extract ack_ids from buffered messages and nack them. buffered_ack_ids = MessageDispatch.extract_buffered_ack_ids(state.message_buffer) - nacked_count = length(buffered_ack_ids) nack_per_on_shutdown(state, buffered_ack_ids) @@ -512,19 +534,17 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do new_outstanding = Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) - state = %{ - state - | message_buffer: :queue.new(), - outstanding: new_outstanding, - draining: true, - drain_started_at: drain_started_at - } - - # 5. Start the drain timer. - state = start_drain_timer(state) - - # 6. Check if drain is already complete (outstanding may now be empty). - state = maybe_complete_drain(state) + # 5–6. Set draining state, start drain timer, check if already complete. + state = + %{ + state + | message_buffer: :queue.new(), + outstanding: new_outstanding, + draining: true, + drain_started_at: drain_started_at + } + |> start_drain_timer() + |> maybe_complete_drain() {:reply, {:ok, nacked_count}, state} rescue @@ -549,8 +569,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do def handle_call(:close, _from, state) do # Best-effort flush; AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) - state = close_stream(state) - {:reply, :ok, state} + {:reply, :ok, close_stream(state)} end @impl GenServer @@ -570,7 +589,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do end state - |> LeaseManager.cancel_lease_timer() + |> cancel_lease_timer() |> cancel_keepalive_timer() |> cancel_drain_timer() |> close_stream() @@ -587,9 +606,14 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do emit_telemetry(:disconnect, %{}, state.config, %{reason: reason}) if state.draining do - {:noreply, reset_connection(state, reason)} + {:noreply, reset_connection(state)} else - {:noreply, schedule_reconnect(reset_connection(state, reason))} + state = + state + |> reset_connection() + |> schedule_reconnect() + + {:noreply, state} end end @@ -640,13 +664,12 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do state.grpc_client.send_request(grpc_stream, request, state.grpc_client_config) end - defp reset_connection(state, reason) do + defp reset_connection(state) do # Drop buffered messages on disconnect and nack them so they become # immediately available for redelivery to any consumer in the subscription. # Without the nack, redelivery depends on either this client reconnecting # (same client_id) or the ack deadline expiring naturally (up to 600s). buffered_ack_ids = MessageDispatch.extract_buffered_ack_ids(state.message_buffer) - AckBatcher.modack(state.ack_batcher, buffered_ack_ids, 0) new_outstanding = Enum.reduce(buffered_ack_ids, state.outstanding, &Map.delete(&2, &1)) @@ -659,24 +682,22 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do # # Preserve pending_demand across reconnection to avoid a demand deadlock. # See decisions.md. - close_stream( - %{state | message_buffer: :queue.new(), outstanding: new_outstanding}, - reason - ) + %{state | message_buffer: :queue.new(), outstanding: new_outstanding} + |> close_stream() end defp close_stream(%{reader_pid: nil, grpc_stream: nil} = state), do: state defp close_stream(state) do - state - |> stop_reader() - |> cancel_grpc_stream() - |> disconnect_channel() - |> cancel_keepalive_timer() - |> then(&%{&1 | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil}) - end + state = + state + |> stop_reader() + |> cancel_grpc_stream() + |> disconnect_channel() + |> cancel_keepalive_timer() - defp close_stream(state, _reason), do: close_stream(state) + %{state | reader_pid: nil, grpc_stream: nil, channel: nil, conn_pid: nil} + end defp stop_reader(%{reader_pid: nil} = state), do: state @@ -745,9 +766,78 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do defp now_ms, do: System.monotonic_time(:millisecond) - # --- Private: lease extension (delegated to LeaseManager) --- + # --- Private: lease extension --- + + defp do_extend_leases(state) do + now = now_ms() + + %{ + valid: valid, + expired_count: expired_count, + modack_ids: modack_ids, + modack_deadline: modack_deadline, + next_timer_ms: next_timer_ms + } = + LeaseManager.extend_leases( + state.outstanding, + state.ack_time_dist, + state.exactly_once_enabled, + now + ) + + if expired_count > 0 do + emit_telemetry(:lease_expired, %{count: expired_count}, state.config) + end + + emit_telemetry( + :extend_leases, + %{count: map_size(valid), deadline: modack_deadline}, + state.config + ) + + emit_telemetry( + :pressure_snapshot, + %{ + outstanding_count: map_size(valid), + buffered_count: :queue.len(state.message_buffer), + pending_demand: state.pending_demand + }, + state.config + ) + + if modack_ids != [] do + AckBatcher.modack(state.ack_batcher, modack_ids, modack_deadline) + end - # --- Private: lease management (delegated to LeaseManager) --- + timer = Process.send_after(self(), :extend_leases, next_timer_ms) + + # Sweep stale pending receipt modacks. + %{fresh: fresh, stale_ack_ids: stale_ack_ids} = + LeaseManager.sweep_stale_pending_modacks(state.pending_receipt_modacks, now) + + if stale_ack_ids != [] do + AckBatcher.modack(state.ack_batcher, stale_ack_ids, 0) + emit_telemetry(:receipt_modack_stale, %{count: length(stale_ack_ids)}, state.config) + end + + %{state | outstanding: valid, lease_timer: timer, pending_receipt_modacks: fresh} + end + + # --- Private: lease timer management --- + + defp schedule_lease_timer(state) do + state = cancel_lease_timer(state) + interval = LeaseManager.initial_timer_ms(state.config.stream_ack_deadline_seconds) + timer = Process.send_after(self(), :extend_leases, interval) + %{state | lease_timer: timer} + end + + defp cancel_lease_timer(%{lease_timer: nil} = state), do: state + + defp cancel_lease_timer(%{lease_timer: timer} = state) do + Process.cancel_timer(timer) + %{state | lease_timer: nil} + end # --- Private: keep-alive --- @@ -799,7 +889,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do %{draining: true, outstanding: outstanding, pending_receipt_modacks: pending} = state ) when map_size(outstanding) == 0 and map_size(pending) == 0 do - state = cancel_drain_timer(state) # AckBatcher may already be down during pipeline shutdown. flush_batcher_if_alive(state.ack_batcher) @@ -811,14 +900,37 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do Map.get(state.config, :telemetry_metadata) ) + state = cancel_drain_timer(state) close_stream(%{state | drain_started_at: nil}) end defp maybe_complete_drain(state), do: state - # --- Private: message dispatch (delegated to MessageDispatch) --- + # --- Private: message dispatch --- - # Stale pending receipt modacks sweeping is delegated to LeaseManager. + # Enqueues messages into the buffer and flushes up to pending_demand. + defp enqueue_and_flush(state, messages) do + new_buffer = Enum.reduce(messages, state.message_buffer, &:queue.in(&1, &2)) + + %{state | message_buffer: new_buffer} + |> do_flush_demand() + end + + # Flushes up to pending_demand messages from the buffer to the producer. + # No-op when draining, demand is zero, or the buffer is empty. + defp do_flush_demand(%{draining: true} = state), do: state + defp do_flush_demand(%{pending_demand: 0} = state), do: state + + defp do_flush_demand(state) do + %{to_send: to_send, remaining_buffer: remaining_buffer, remaining_demand: remaining_demand} = + MessageDispatch.flush_demand(state.message_buffer, state.pending_demand) + + if to_send != [] do + send(state.producer_pid, {:stream_messages, to_send}) + end + + %{state | message_buffer: remaining_buffer, pending_demand: remaining_demand} + end # Handle the result of an exactly-once receipt modack RPC (non-draining path). defp handle_receipt_modack_success(state, pending, result) do @@ -839,10 +951,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do ) {:noreply, - MessageDispatch.deliver_messages( - %{state | outstanding: new_outstanding}, - pending.broadway_messages - )} + enqueue_and_flush(%{state | outstanding: new_outstanding}, pending.broadway_messages)} {:ok, failed_ids} -> # Partial success — deliver only messages whose modack succeeded. @@ -863,9 +972,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do if ok_msgs != [] do emit_telemetry(:receive_messages, %{count: length(ok_msgs)}, state.config) - - {:noreply, - MessageDispatch.deliver_messages(%{state | outstanding: new_outstanding}, ok_msgs)} + {:noreply, enqueue_and_flush(%{state | outstanding: new_outstanding}, ok_msgs)} else {:noreply, %{state | outstanding: new_outstanding}} end @@ -904,8 +1011,6 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do %{state | pending_receipt_modacks: %{}} end - # Message construction is delegated to MessageDispatch. - # Flush AckBatcher if its process is currently alive. Guards against the # batcher being down during pipeline shutdown (Broadway stops children in # reverse start order). diff --git a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs index 2e9008d..3006c38 100644 --- a/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/ack_batcher_test.exs @@ -449,14 +449,16 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do # Default is nil (not configured in start_batcher) state = :sys.get_state(batcher) - assert state.retry_deadline_ms == nil + assert state.ack_tracker.retry_deadline_ms == nil + assert state.modack_tracker.retry_deadline_ms == nil AckBatcher.update_retry_deadline(batcher, 600_000) # Cast is async — sync via flush AckBatcher.flush(batcher) state = :sys.get_state(batcher) - assert state.retry_deadline_ms == 600_000 + assert state.ack_tracker.retry_deadline_ms == 600_000 + assert state.modack_tracker.retry_deadline_ms == 600_000 end test "restores configured deadline when exactly-once is disabled" do @@ -464,11 +466,13 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do AckBatcher.update_retry_deadline(batcher, 600_000) AckBatcher.flush(batcher) - assert :sys.get_state(batcher).retry_deadline_ms == 600_000 + assert :sys.get_state(batcher).ack_tracker.retry_deadline_ms == 600_000 + assert :sys.get_state(batcher).modack_tracker.retry_deadline_ms == 600_000 AckBatcher.update_retry_deadline(batcher, 60_000) AckBatcher.flush(batcher) - assert :sys.get_state(batcher).retry_deadline_ms == 60_000 + assert :sys.get_state(batcher).ack_tracker.retry_deadline_ms == 60_000 + assert :sys.get_state(batcher).modack_tracker.retry_deadline_ms == 60_000 end end @@ -525,7 +529,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do # State should be clear state = :sys.get_state(batcher) assert state.modack_ids == %{} - assert state.modack_attempts == %{} + assert state.modack_tracker.attempts == %{} end test "other ack_ids are not affected by one id reaching the retry limit" do @@ -820,4 +824,143 @@ defmodule BroadwayCloudPubSub.Streaming.AckBatcherTest do end end end + + # --- Pure classification function tests --- + + describe "classify_ack_result/4" do + alias BroadwayCloudPubSub.Streaming.RetryTracker + + defp new_ack_tracker(opts) do + RetryTracker.new(opts) + end + + defp tracked_ack_tracker(ids, opts \\ []) do + tracker = new_ack_tracker(opts) + now = System.monotonic_time(:millisecond) + RetryTracker.track(tracker, ids, now) + end + + test "full success returns empty live list" do + tracker = tracked_ack_tracker(["a1", "a2"]) + result = AckBatcher.classify_ack_result(["a1", "a2"], {:ok, []}, tracker, now_ms()) + + assert result.live == [] + assert result.expired_count == 0 + end + + test "partial success retains remaining ids" do + tracker = tracked_ack_tracker(["a1", "a2", "a3"]) + + result = + AckBatcher.classify_ack_result(["a1", "a2", "a3"], {:ok, ["a2"]}, tracker, now_ms()) + + assert result.live == ["a2"] + assert result.expired_count == 0 + end + + test "retryable error with transient ids retains those ids" do + tracker = tracked_ack_tracker(["a1", "a2"]) + rpc_result = {:error, {%GRPC.RPCError{status: 14}, ["a1"]}} + result = AckBatcher.classify_ack_result(["a1", "a2"], rpc_result, tracker, now_ms()) + + assert result.live == ["a1"] + assert result.expired_count == 0 + end + + test "total failure retains all ids" do + tracker = tracked_ack_tracker(["a1", "a2"]) + + result = + AckBatcher.classify_ack_result(["a1", "a2"], {:error, :unavailable}, tracker, now_ms()) + + assert result.live == ["a1", "a2"] + assert result.expired_count == 0 + end + + test "expired ids are dropped and counted" do + tracker = new_ack_tracker(retry_deadline_ms: 100) + old_time = now_ms() - 200 + tracker = RetryTracker.track(tracker, ["a1", "a2"], old_time) + + result = + AckBatcher.classify_ack_result(["a1", "a2"], {:error, :unavailable}, tracker, now_ms()) + + assert result.live == [] + assert result.expired_count == 2 + end + end + + describe "classify_modack_results/3" do + alias BroadwayCloudPubSub.Streaming.RetryTracker + + test "full success for all groups returns empty remaining" do + tracker = RetryTracker.new(max_attempts: 3) + tracker = RetryTracker.track(tracker, ["m1", "m2"], now_ms()) + tracker = RetryTracker.record_attempt(tracker, ["m1", "m2"]) + + rpc_results = [{60, ["m1"], {:ok, []}}, {0, ["m2"], {:ok, []}}] + result = AckBatcher.classify_modack_results(rpc_results, tracker, now_ms()) + + assert result.remaining == %{} + assert result.exhausted_count == 0 + assert result.expired_count == 0 + end + + test "partial failure retains failed ids in the correct deadline group" do + tracker = RetryTracker.new(max_attempts: 3) + tracker = RetryTracker.track(tracker, ["m1", "m2"], now_ms()) + tracker = RetryTracker.record_attempt(tracker, ["m1", "m2"]) + + rpc_results = [{60, ["m1", "m2"], {:ok, ["m2"]}}] + result = AckBatcher.classify_modack_results(rpc_results, tracker, now_ms()) + + assert result.remaining == %{60 => ["m2"]} + assert result.exhausted_count == 0 + end + + test "ids exceeding max_attempts are dropped" do + tracker = RetryTracker.new(max_attempts: 2) + tracker = RetryTracker.track(tracker, ["m1"], now_ms()) + # Simulate 2 prior attempts + tracker = RetryTracker.record_attempt(tracker, ["m1"]) + tracker = RetryTracker.record_attempt(tracker, ["m1"]) + + rpc_results = [{60, ["m1"], {:error, :unavailable}}] + result = AckBatcher.classify_modack_results(rpc_results, tracker, now_ms()) + + assert result.remaining == %{} + assert result.exhausted_count == 1 + end + + test "expired modack ids are dropped and counted" do + tracker = RetryTracker.new(max_attempts: 10, retry_deadline_ms: 100) + old_time = now_ms() - 200 + tracker = RetryTracker.track(tracker, ["m1"], old_time) + tracker = RetryTracker.record_attempt(tracker, ["m1"]) + + rpc_results = [{60, ["m1"], {:error, :unavailable}}] + result = AckBatcher.classify_modack_results(rpc_results, tracker, now_ms()) + + assert result.remaining == %{} + assert result.expired_count == 1 + end + + test "multiple deadline groups are tracked independently" do + tracker = RetryTracker.new(max_attempts: 3) + tracker = RetryTracker.track(tracker, ["a", "b", "c"], now_ms()) + tracker = RetryTracker.record_attempt(tracker, ["a", "b", "c"]) + + rpc_results = [ + {60, ["a"], {:ok, ["a"]}}, + {0, ["b", "c"], {:ok, ["c"]}} + ] + + result = AckBatcher.classify_modack_results(rpc_results, tracker, now_ms()) + + assert result.remaining == %{60 => ["a"], 0 => ["c"]} + assert result.exhausted_count == 0 + end + end + + defp now_ms, do: System.monotonic_time(:millisecond) end diff --git a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs index 23f6ae9..bb9ae67 100644 --- a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs @@ -197,4 +197,22 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do assert length(batch2) == 500 end end + + describe "ack_id_from/1" do + test "extracts ack_id from a Broadway.Message" do + ack_ref = make_ref() + msg = build_message("test-ack-id", ack_ref) + + assert Acknowledger.ack_id_from(msg) == "test-ack-id" + end + + test "works with different ack_ids" do + ack_ref = make_ref() + + for id <- ["a", "b", "long-ack-id-with-many-chars"] do + msg = build_message(id, ack_ref) + assert Acknowledger.ack_id_from(msg) == id + end + end + end end diff --git a/test/broadway_cloud_pub_sub/streaming/lease_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/lease_manager_test.exs new file mode 100644 index 0000000..1e410c9 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/lease_manager_test.exs @@ -0,0 +1,266 @@ +defmodule BroadwayCloudPubSub.Streaming.LeaseManagerTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.{AckTimeDistribution, LeaseManager} + + # ============================================================ + # effective_deadline/2 + # ============================================================ + + describe "effective_deadline/2" do + test "returns default deadline when distribution has fewer than 10 samples" do + dist = AckTimeDistribution.new(60) + + assert LeaseManager.effective_deadline(dist, false) == 60 + end + + test "returns p99 from distribution with enough samples" do + dist = AckTimeDistribution.new(60) + + # Record 20 samples at 15 seconds + dist = Enum.reduce(1..20, dist, fn _, d -> AckTimeDistribution.record(d, 15) end) + + assert LeaseManager.effective_deadline(dist, false) == 15 + end + + test "returns max(p99, 60) when exactly_once is true" do + dist = AckTimeDistribution.new(30) + + # Record 20 samples at 15 seconds — p99 would be 15 + dist = Enum.reduce(1..20, dist, fn _, d -> AckTimeDistribution.record(d, 15) end) + + # Without exactly-once: 15 + assert LeaseManager.effective_deadline(dist, false) == 15 + # With exactly-once: max(15, 60) = 60 + assert LeaseManager.effective_deadline(dist, true) == 60 + end + + test "exactly-once with high p99 uses the p99" do + dist = AckTimeDistribution.new(60) + + # Record 20 samples at 120 seconds — p99 is 120 + dist = Enum.reduce(1..20, dist, fn _, d -> AckTimeDistribution.record(d, 120) end) + + # max(120, 60) = 120 + assert LeaseManager.effective_deadline(dist, true) == 120 + end + end + + # ============================================================ + # extend_leases/4 + # ============================================================ + + describe "extend_leases/4" do + test "all messages valid — no expired, all keys in modack_ids" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + outstanding = %{ + "id-1" => %{received_at: now - 1_000, max_expiry: now + 100_000}, + "id-2" => %{received_at: now - 2_000, max_expiry: now + 200_000} + } + + result = LeaseManager.extend_leases(outstanding, dist, false, now) + + assert map_size(result.valid) == 2 + assert result.expired_count == 0 + assert Enum.sort(result.modack_ids) == ["id-1", "id-2"] + assert result.modack_deadline == 60 + assert result.next_timer_ms > 0 + end + + test "mix of valid and expired — correct partition" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + outstanding = %{ + "valid-1" => %{received_at: now - 1_000, max_expiry: now + 100_000}, + "expired-1" => %{received_at: now - 500_000, max_expiry: now - 1} + } + + result = LeaseManager.extend_leases(outstanding, dist, false, now) + + assert map_size(result.valid) == 1 + assert Map.has_key?(result.valid, "valid-1") + assert result.expired_count == 1 + assert result.modack_ids == ["valid-1"] + end + + test "all expired — valid is empty, modack_ids is empty" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + outstanding = %{ + "expired-1" => %{received_at: 0, max_expiry: now - 1}, + "expired-2" => %{received_at: 0, max_expiry: now - 100} + } + + result = LeaseManager.extend_leases(outstanding, dist, false, now) + + assert result.valid == %{} + assert result.expired_count == 2 + assert result.modack_ids == [] + end + + test "empty outstanding — no-op" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + result = LeaseManager.extend_leases(%{}, dist, false, now) + + assert result.valid == %{} + assert result.expired_count == 0 + assert result.modack_ids == [] + end + + test "next_timer_ms is within expected jitter range" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + # With deadline=60, base = max(1000, (60-5)*1000) = 55_000 + # jitter factor in (0.8, 0.9), after round: [44_000, 49_500] + results = + for _ <- 1..50 do + LeaseManager.extend_leases(%{}, dist, false, now).next_timer_ms + end + + assert Enum.all?(results, fn ms -> ms >= 44_000 and ms <= 49_500 end) + end + + test "modack_deadline equals the effective deadline" do + dist = AckTimeDistribution.new(45) + + result = LeaseManager.extend_leases(%{}, dist, false, 0) + + assert result.modack_deadline == 45 + end + + test "exactly-once enabled — deadline is at least 60" do + dist = AckTimeDistribution.new(30) + + result = LeaseManager.extend_leases(%{}, dist, true, 0) + + assert result.modack_deadline >= 60 + end + + test "boundary: max_expiry exactly at now — treated as expired" do + dist = AckTimeDistribution.new(60) + now = 1_000_000 + + outstanding = %{ + "boundary" => %{received_at: 0, max_expiry: now} + } + + # max_expiry > now is false when max_expiry == now + result = LeaseManager.extend_leases(outstanding, dist, false, now) + + assert result.expired_count == 1 + assert result.valid == %{} + end + end + + # ============================================================ + # sweep_stale_pending_modacks/3 + # ============================================================ + + describe "sweep_stale_pending_modacks/3" do + test "all fresh — no stale ids" do + now = 100_000 + ref = make_ref() + + pending = %{ + ref => %{ack_ids: ["id-1", "id-2"], received_at: now - 1_000} + } + + result = LeaseManager.sweep_stale_pending_modacks(pending, now) + + assert result.stale_ack_ids == [] + assert map_size(result.fresh) == 1 + end + + test "all stale — all ack_ids returned" do + now = 100_000 + ref = make_ref() + + pending = %{ + ref => %{ack_ids: ["id-1", "id-2"], received_at: now - 61_000} + } + + result = LeaseManager.sweep_stale_pending_modacks(pending, now) + + assert Enum.sort(result.stale_ack_ids) == ["id-1", "id-2"] + assert result.fresh == %{} + end + + test "mix of fresh and stale" do + now = 100_000 + fresh_ref = make_ref() + stale_ref = make_ref() + + pending = %{ + fresh_ref => %{ack_ids: ["fresh-1"], received_at: now - 1_000}, + stale_ref => %{ack_ids: ["stale-1"], received_at: now - 61_000} + } + + result = LeaseManager.sweep_stale_pending_modacks(pending, now) + + assert result.stale_ack_ids == ["stale-1"] + assert map_size(result.fresh) == 1 + assert Map.has_key?(result.fresh, fresh_ref) + end + + test "empty map — no-op" do + result = LeaseManager.sweep_stale_pending_modacks(%{}, 100_000) + + assert result.stale_ack_ids == [] + assert result.fresh == %{} + end + + test "custom threshold" do + now = 100_000 + ref = make_ref() + + pending = %{ + ref => %{ack_ids: ["id-1"], received_at: now - 5_000} + } + + # Default threshold (60s) — still fresh + result = LeaseManager.sweep_stale_pending_modacks(pending, now) + assert result.stale_ack_ids == [] + + # Custom threshold (3s) — now stale + result = LeaseManager.sweep_stale_pending_modacks(pending, now, 3_000) + assert result.stale_ack_ids == ["id-1"] + end + end + + # ============================================================ + # initial_timer_ms/1 + # ============================================================ + + describe "initial_timer_ms/1" do + test "returns value in expected jitter range for deadline 60" do + # base = max(1000, (60-5)*1000) = 55_000 + # jitter factor in (0.8, 0.9), after round: [44_000, 49_500] + results = for _ <- 1..50, do: LeaseManager.initial_timer_ms(60) + + assert Enum.all?(results, fn ms -> ms >= 44_000 and ms <= 49_500 end) + end + + test "minimum 1000ms regardless of deadline" do + # With deadline=6, base = max(1000, (6-5)*1000) = max(1000, 1000) = 1000 + # jitter factor in (0.8, 0.9), after round: [800, 900] + results = for _ <- 1..50, do: LeaseManager.initial_timer_ms(6) + + assert Enum.all?(results, fn ms -> ms >= 800 and ms <= 900 end) + end + + test "very short deadline still respects minimum" do + # With deadline=5, base = max(1000, (5-5)*1000) = max(1000, 0) = 1000 + # jitter factor in (0.8, 0.9), after round: [800, 900] + results = for _ <- 1..50, do: LeaseManager.initial_timer_ms(5) + + assert Enum.all?(results, fn ms -> ms >= 800 and ms <= 900 end) + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/message_dispatch_test.exs b/test/broadway_cloud_pub_sub/streaming/message_dispatch_test.exs new file mode 100644 index 0000000..c49ebf1 --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/message_dispatch_test.exs @@ -0,0 +1,309 @@ +defmodule BroadwayCloudPubSub.Streaming.MessageDispatchTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.{AckTimeDistribution, MessageDispatch} + + # ============================================================ + # flush_demand/2 + # ============================================================ + + describe "flush_demand/2" do + test "empty buffer, demand > 0 — returns empty to_send, demand unchanged" do + buffer = :queue.new() + result = MessageDispatch.flush_demand(buffer, 5) + + assert result.to_send == [] + assert :queue.is_empty(result.remaining_buffer) + assert result.remaining_demand == 5 + end + + test "buffer with 3 messages, demand 2 — dequeues 2, leaves 1" do + msgs = for i <- 1..3, do: make_broadway_message("ack-#{i}", "data-#{i}") + buffer = Enum.reduce(msgs, :queue.new(), &:queue.in(&1, &2)) + + result = MessageDispatch.flush_demand(buffer, 2) + + assert length(result.to_send) == 2 + assert :queue.len(result.remaining_buffer) == 1 + assert result.remaining_demand == 0 + + # Verify order preservation + [first, second] = result.to_send + assert first.data == "data-1" + assert second.data == "data-2" + end + + test "buffer with 2 messages, demand 5 — dequeues 2, remaining demand 3" do + msgs = for i <- 1..2, do: make_broadway_message("ack-#{i}", "data-#{i}") + buffer = Enum.reduce(msgs, :queue.new(), &:queue.in(&1, &2)) + + result = MessageDispatch.flush_demand(buffer, 5) + + assert length(result.to_send) == 2 + assert :queue.is_empty(result.remaining_buffer) + assert result.remaining_demand == 3 + end + + test "demand 0 — returns empty to_send, buffer unchanged" do + msgs = [make_broadway_message("ack-1", "data")] + buffer = Enum.reduce(msgs, :queue.new(), &:queue.in(&1, &2)) + + result = MessageDispatch.flush_demand(buffer, 0) + + assert result.to_send == [] + assert :queue.len(result.remaining_buffer) == 1 + assert result.remaining_demand == 0 + end + + test "both empty — returns empty" do + result = MessageDispatch.flush_demand(:queue.new(), 0) + + assert result.to_send == [] + assert :queue.is_empty(result.remaining_buffer) + assert result.remaining_demand == 0 + end + end + + # ============================================================ + # record_and_remove/4 + # ============================================================ + + describe "record_and_remove/4" do + test "records processing times and removes ids from outstanding" do + dist = AckTimeDistribution.new(60) + now = 10_000 + + outstanding = %{ + "id-1" => %{received_at: 5_000, max_expiry: 1_000_000}, + "id-2" => %{received_at: 3_000, max_expiry: 1_000_000}, + "id-3" => %{received_at: 1_000, max_expiry: 1_000_000} + } + + {updated_outstanding, updated_dist} = + MessageDispatch.record_and_remove(outstanding, dist, ["id-1", "id-2"], now) + + # id-1 and id-2 removed; id-3 remains + assert map_size(updated_outstanding) == 1 + assert Map.has_key?(updated_outstanding, "id-3") + + # Distribution should have 2 samples recorded + assert AckTimeDistribution.sample_count(updated_dist) == 2 + end + + test "ids not in outstanding are skipped — no dist update" do + dist = AckTimeDistribution.new(60) + now = 10_000 + + outstanding = %{ + "id-1" => %{received_at: 5_000, max_expiry: 1_000_000} + } + + {updated_outstanding, updated_dist} = + MessageDispatch.record_and_remove(outstanding, dist, ["id-1", "nonexistent"], now) + + assert map_size(updated_outstanding) == 0 + # Only 1 sample — "nonexistent" was skipped + assert AckTimeDistribution.sample_count(updated_dist) == 1 + end + + test "duration calculation: max(1, (now - received_at) / 1000) seconds" do + dist = AckTimeDistribution.new(60) + + # received_at=0, now=500 → 500ms → 0s → clamped to 1s + outstanding = %{"id-1" => %{received_at: 0, max_expiry: 1_000_000}} + + {_outstanding, updated_dist} = + MessageDispatch.record_and_remove(outstanding, dist, ["id-1"], 500) + + # The record should be at duration 1 (clamped), which gets clamped to 10 by ATD + assert AckTimeDistribution.sample_count(updated_dist) == 1 + end + + test "empty ack_ids — no-op" do + dist = AckTimeDistribution.new(60) + outstanding = %{"id-1" => %{received_at: 0, max_expiry: 1_000_000}} + + {updated_outstanding, updated_dist} = + MessageDispatch.record_and_remove(outstanding, dist, [], 10_000) + + assert updated_outstanding == outstanding + assert AckTimeDistribution.sample_count(updated_dist) == 0 + end + end + + # ============================================================ + # add_to_outstanding/4 + # ============================================================ + + describe "add_to_outstanding/4" do + test "adds entries with received_at and computed max_expiry" do + outstanding = MessageDispatch.add_to_outstanding(%{}, ["id-1", "id-2"], 1_000, 60_000) + + assert outstanding["id-1"] == %{received_at: 1_000, max_expiry: 61_000} + assert outstanding["id-2"] == %{received_at: 1_000, max_expiry: 61_000} + end + + test "overwrites existing key (Map.put semantics)" do + outstanding = %{"id-1" => %{received_at: 500, max_expiry: 60_500}} + + updated = MessageDispatch.add_to_outstanding(outstanding, ["id-1"], 1_000, 60_000) + + assert updated["id-1"] == %{received_at: 1_000, max_expiry: 61_000} + end + + test "preserves existing entries not in ack_ids" do + outstanding = %{"existing" => %{received_at: 0, max_expiry: 100}} + + updated = MessageDispatch.add_to_outstanding(outstanding, ["new"], 1_000, 60_000) + + assert Map.has_key?(updated, "existing") + assert Map.has_key?(updated, "new") + end + + test "empty ack_ids — returns original map" do + outstanding = %{"id-1" => %{received_at: 0, max_expiry: 100}} + + updated = MessageDispatch.add_to_outstanding(outstanding, [], 1_000, 60_000) + + assert updated == outstanding + end + end + + # ============================================================ + # extract_buffered_ack_ids/1 + # ============================================================ + + describe "extract_buffered_ack_ids/1" do + test "extracts ack_ids from Broadway messages in queue" do + msgs = for i <- 1..3, do: make_broadway_message("ack-#{i}", "data-#{i}") + buffer = Enum.reduce(msgs, :queue.new(), &:queue.in(&1, &2)) + + ids = MessageDispatch.extract_buffered_ack_ids(buffer) + + assert ids == ["ack-1", "ack-2", "ack-3"] + end + + test "empty queue — empty list" do + ids = MessageDispatch.extract_buffered_ack_ids(:queue.new()) + + assert ids == [] + end + end + + # ============================================================ + # partition_succeeded/3 + # ============================================================ + + describe "partition_succeeded/3" do + test "filters out failed ids; preserves order" do + msgs = for i <- 1..4, do: make_broadway_message("ack-#{i}", "data-#{i}") + all_ids = ["ack-1", "ack-2", "ack-3", "ack-4"] + failed_ids = ["ack-2", "ack-4"] + + {ok_msgs, ok_ids} = MessageDispatch.partition_succeeded(msgs, all_ids, failed_ids) + + assert ok_ids == ["ack-1", "ack-3"] + assert Enum.map(ok_msgs, & &1.data) == ["data-1", "data-3"] + end + + test "all failed — empty result" do + msgs = [make_broadway_message("ack-1", "data")] + {ok_msgs, ok_ids} = MessageDispatch.partition_succeeded(msgs, ["ack-1"], ["ack-1"]) + + assert ok_msgs == [] + assert ok_ids == [] + end + + test "none failed — all returned" do + msgs = [make_broadway_message("ack-1", "data")] + {ok_msgs, ok_ids} = MessageDispatch.partition_succeeded(msgs, ["ack-1"], []) + + assert length(ok_msgs) == 1 + assert ok_ids == ["ack-1"] + end + end + + # ============================================================ + # build_broadway_message/2 + # ============================================================ + + describe "build_broadway_message/2" do + test "constructs correct Broadway.Message with acknowledger tuple" do + received_msg = %{ + ack_id: "test-ack-id", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-123", + data: "hello", + attributes: %{"key" => "value"}, + ordering_key: "order-1", + publish_time: %Google.Protobuf.Timestamp{seconds: 1_700_000_000, nanos: 500_000_000} + }, + delivery_attempt: 2 + } + + ack_ref = {:my_pipeline, 0} + msg = MessageDispatch.build_broadway_message(received_msg, ack_ref) + + assert %Broadway.Message{} = msg + assert msg.data == "hello" + assert msg.metadata.messageId == "msg-123" + assert msg.metadata.orderingKey == "order-1" + assert msg.metadata.deliveryAttempt == 2 + assert msg.metadata.attributes == %{"key" => "value"} + + {mod, ref, data} = msg.acknowledger + assert mod == BroadwayCloudPubSub.Streaming.Acknowledger + assert ref == ack_ref + assert data.ack_id == "test-ack-id" + end + + test "handles nil publish_time" do + received_msg = %{ + ack_id: "ack-1", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-1", + data: "test", + attributes: %{}, + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + msg = MessageDispatch.build_broadway_message(received_msg, {:test, 0}) + + assert msg.metadata.publishTime == nil + end + + test "maps empty attributes correctly" do + received_msg = %{ + ack_id: "ack-1", + message: %Google.Pubsub.V1.PubsubMessage{ + message_id: "msg-1", + data: "test", + attributes: [], + ordering_key: "", + publish_time: nil + }, + delivery_attempt: 1 + } + + msg = MessageDispatch.build_broadway_message(received_msg, {:test, 0}) + + assert msg.metadata.attributes == %{} + end + end + + # ============================================================ + # Helpers + # ============================================================ + + # Build a minimal Broadway.Message for testing buffer operations. + defp make_broadway_message(ack_id, data) do + %Broadway.Message{ + data: data, + metadata: %{}, + acknowledger: {BroadwayCloudPubSub.Streaming.Acknowledger, {:test, 0}, %{ack_id: ack_id}} + } + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/retry_tracker_test.exs b/test/broadway_cloud_pub_sub/streaming/retry_tracker_test.exs new file mode 100644 index 0000000..f9c00ab --- /dev/null +++ b/test/broadway_cloud_pub_sub/streaming/retry_tracker_test.exs @@ -0,0 +1,375 @@ +defmodule BroadwayCloudPubSub.Streaming.RetryTrackerTest do + use ExUnit.Case, async: true + + alias BroadwayCloudPubSub.Streaming.RetryTracker + + # ============================================================ + # new/1 + # ============================================================ + + describe "new/1" do + test "creates empty tracker with default nil config" do + tracker = RetryTracker.new() + + assert tracker.first_queued == %{} + assert tracker.attempts == %{} + assert tracker.retry_deadline_ms == nil + assert tracker.max_attempts == nil + end + + test "creates tracker with configured retry_deadline_ms" do + tracker = RetryTracker.new(retry_deadline_ms: 600_000) + + assert tracker.retry_deadline_ms == 600_000 + end + + test "creates tracker with configured max_attempts" do + tracker = RetryTracker.new(max_attempts: 3) + + assert tracker.max_attempts == 3 + end + + test "creates tracker with both options" do + tracker = RetryTracker.new(retry_deadline_ms: 60_000, max_attempts: 3) + + assert tracker.retry_deadline_ms == 60_000 + assert tracker.max_attempts == 3 + end + end + + # ============================================================ + # track/3 + # ============================================================ + + describe "track/3" do + test "registers ack_ids with first-queued timestamp" do + tracker = RetryTracker.new() + + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1000) + + assert tracker.first_queued == %{"id-1" => 1000, "id-2" => 1000} + assert tracker.attempts == %{"id-1" => 0, "id-2" => 0} + end + + test "put_new semantics — does not overwrite existing timestamp" do + tracker = RetryTracker.new() + + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 2000) + + # id-1 keeps its original timestamp of 1000 + assert tracker.first_queued["id-1"] == 1000 + # id-2 gets the new timestamp of 2000 + assert tracker.first_queued["id-2"] == 2000 + end + + test "put_new semantics — does not reset attempt count" do + tracker = RetryTracker.new() + + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + tracker = RetryTracker.track(tracker, ["id-1"], 2000) + + # Attempt count preserved at 1, not reset to 0 + assert tracker.attempts["id-1"] == 1 + end + + test "handles empty list" do + tracker = RetryTracker.new() + + tracker = RetryTracker.track(tracker, [], 1000) + + assert tracker.first_queued == %{} + assert tracker.attempts == %{} + end + end + + # ============================================================ + # record_attempt/2 + # ============================================================ + + describe "record_attempt/2" do + test "increments attempt count for tracked ids" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + + assert tracker.attempts["id-1"] == 1 + end + + test "increments multiple times" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + + assert tracker.attempts["id-1"] == 3 + end + + test "initializes count to 1 for untracked ids" do + tracker = RetryTracker.new() + + tracker = RetryTracker.record_attempt(tracker, ["new-id"]) + + assert tracker.attempts["new-id"] == 1 + end + + test "handles multiple ids at once" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1000) + + tracker = RetryTracker.record_attempt(tracker, ["id-1", "id-2"]) + + assert tracker.attempts["id-1"] == 1 + assert tracker.attempts["id-2"] == 1 + end + + test "handles empty list" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + + tracker = RetryTracker.record_attempt(tracker, []) + + assert tracker.attempts["id-1"] == 0 + end + end + + # ============================================================ + # expire_stale/3 + # ============================================================ + + describe "expire_stale/3" do + test "no-op when retry_deadline_ms is nil — all ids returned as live" do + tracker = RetryTracker.new(retry_deadline_ms: nil) + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1000) + + {live, expired, updated} = RetryTracker.expire_stale(tracker, ["id-1", "id-2"], 999_999) + + assert live == ["id-1", "id-2"] + assert expired == [] + # Internal state unchanged + assert updated.first_queued == tracker.first_queued + assert updated.attempts == tracker.attempts + end + + test "partitions ids into live and expired" do + tracker = RetryTracker.new(retry_deadline_ms: 5_000) + + tracker = RetryTracker.track(tracker, ["fresh"], 9_000) + tracker = RetryTracker.track(tracker, ["stale"], 1_000) + + now = 10_000 + {live, expired, updated} = RetryTracker.expire_stale(tracker, ["fresh", "stale"], now) + + assert live == ["fresh"] + assert expired == ["stale"] + # Stale id removed from internal state + refute Map.has_key?(updated.first_queued, "stale") + refute Map.has_key?(updated.attempts, "stale") + # Fresh id retained + assert Map.has_key?(updated.first_queued, "fresh") + end + + test "id with no first_queued entry is treated as live" do + tracker = RetryTracker.new(retry_deadline_ms: 5_000) + # Don't track "unknown" — it has no first_queued entry + + {live, expired, _tracker} = RetryTracker.expire_stale(tracker, ["unknown"], 10_000) + + assert live == ["unknown"] + assert expired == [] + end + + test "all expired" do + tracker = RetryTracker.new(retry_deadline_ms: 1_000) + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1_000) + + {live, expired, updated} = RetryTracker.expire_stale(tracker, ["id-1", "id-2"], 100_000) + + assert live == [] + assert Enum.sort(expired) == ["id-1", "id-2"] + assert updated.first_queued == %{} + assert updated.attempts == %{} + end + + test "boundary: exactly at deadline is still live" do + tracker = RetryTracker.new(retry_deadline_ms: 5_000) + tracker = RetryTracker.track(tracker, ["id-1"], 1_000) + + # now - ts == 4999 < 5000 → live + {live, expired, _} = RetryTracker.expire_stale(tracker, ["id-1"], 5_999) + assert live == ["id-1"] + assert expired == [] + + # now - ts == 5000 → NOT < 5000 → expired + {live, expired, _} = RetryTracker.expire_stale(tracker, ["id-1"], 6_000) + assert live == [] + assert expired == ["id-1"] + end + + test "handles empty id list" do + tracker = RetryTracker.new(retry_deadline_ms: 5_000) + tracker = RetryTracker.track(tracker, ["id-1"], 1_000) + + {live, expired, updated} = RetryTracker.expire_stale(tracker, [], 10_000) + + assert live == [] + assert expired == [] + # Internal state unchanged — "id-1" still tracked even though not in input list + assert Map.has_key?(updated.first_queued, "id-1") + end + end + + # ============================================================ + # check_attempts/2 + # ============================================================ + + describe "check_attempts/2" do + test "no-op when max_attempts is nil — all ids returned as keep" do + tracker = RetryTracker.new(max_attempts: nil) + tracker = RetryTracker.track(tracker, ["id-1"], 1000) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + tracker = RetryTracker.record_attempt(tracker, ["id-1"]) + + {keep, drop, _updated} = RetryTracker.check_attempts(tracker, ["id-1"]) + + assert keep == ["id-1"] + assert drop == [] + end + + test "keeps ids under the limit, drops ids at or over" do + tracker = RetryTracker.new(max_attempts: 3) + tracker = RetryTracker.track(tracker, ["ok", "exhausted"], 1000) + + # "ok" has 1 attempt, "exhausted" has 3 + tracker = RetryTracker.record_attempt(tracker, ["ok"]) + tracker = RetryTracker.record_attempt(tracker, ["exhausted"]) + tracker = RetryTracker.record_attempt(tracker, ["exhausted"]) + tracker = RetryTracker.record_attempt(tracker, ["exhausted"]) + + {keep, drop, updated} = RetryTracker.check_attempts(tracker, ["ok", "exhausted"]) + + assert keep == ["ok"] + assert drop == ["exhausted"] + # Dropped id removed from internal state + refute Map.has_key?(updated.first_queued, "exhausted") + refute Map.has_key?(updated.attempts, "exhausted") + # Kept id retained + assert Map.has_key?(updated.first_queued, "ok") + end + + test "id with no attempts entry (0 by default) is kept" do + tracker = RetryTracker.new(max_attempts: 3) + + {keep, drop, _} = RetryTracker.check_attempts(tracker, ["new-id"]) + + assert keep == ["new-id"] + assert drop == [] + end + + test "handles empty list" do + tracker = RetryTracker.new(max_attempts: 3) + + {keep, drop, _} = RetryTracker.check_attempts(tracker, []) + + assert keep == [] + assert drop == [] + end + end + + # ============================================================ + # retain_only/2 + # ============================================================ + + describe "retain_only/2" do + test "removes ids not in the given set" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["keep-1", "keep-2", "remove"], 1000) + + tracker = RetryTracker.retain_only(tracker, MapSet.new(["keep-1", "keep-2"])) + + assert Map.keys(tracker.first_queued) |> Enum.sort() == ["keep-1", "keep-2"] + assert Map.keys(tracker.attempts) |> Enum.sort() == ["keep-1", "keep-2"] + end + + test "empty set removes all" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1000) + + tracker = RetryTracker.retain_only(tracker, MapSet.new()) + + assert tracker.first_queued == %{} + assert tracker.attempts == %{} + end + + test "retaining all is a no-op" do + tracker = RetryTracker.new() + tracker = RetryTracker.track(tracker, ["id-1", "id-2"], 1000) + + updated = RetryTracker.retain_only(tracker, MapSet.new(["id-1", "id-2"])) + + assert updated.first_queued == tracker.first_queued + assert updated.attempts == tracker.attempts + end + end + + # ============================================================ + # update_retry_deadline/2 + # ============================================================ + + describe "update_retry_deadline/2" do + test "updates the retry_deadline_ms field" do + tracker = RetryTracker.new(retry_deadline_ms: 60_000) + + tracker = RetryTracker.update_retry_deadline(tracker, 600_000) + + assert tracker.retry_deadline_ms == 600_000 + end + + test "can set to nil" do + tracker = RetryTracker.new(retry_deadline_ms: 60_000) + + tracker = RetryTracker.update_retry_deadline(tracker, nil) + + assert tracker.retry_deadline_ms == nil + end + end + + # ============================================================ + # Integration: full pipeline + # ============================================================ + + describe "integration: track → record_attempt → check_attempts → retain_only" do + test "full retry lifecycle for modack-style tracking" do + tracker = RetryTracker.new(retry_deadline_ms: 60_000, max_attempts: 3) + + # Track 3 ids + tracker = RetryTracker.track(tracker, ["a", "b", "c"], 1000) + + # First flush attempt + tracker = RetryTracker.record_attempt(tracker, ["a", "b", "c"]) + # "a" succeeds (removed from pending), "b" and "c" fail + tracker = RetryTracker.retain_only(tracker, MapSet.new(["b", "c"])) + + assert Map.keys(tracker.first_queued) |> Enum.sort() == ["b", "c"] + + # Second flush attempt + tracker = RetryTracker.record_attempt(tracker, ["b", "c"]) + # "b" succeeds, "c" fails + tracker = RetryTracker.retain_only(tracker, MapSet.new(["c"])) + + # Third flush attempt — "c" hits max_attempts (3) + tracker = RetryTracker.record_attempt(tracker, ["c"]) + {keep, drop, tracker} = RetryTracker.check_attempts(tracker, ["c"]) + + assert keep == [] + assert drop == ["c"] + assert tracker.first_queued == %{} + assert tracker.attempts == %{} + end + end +end diff --git a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs index 2568b9b..cddbb9d 100644 --- a/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/stream_manager_test.exs @@ -1661,7 +1661,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do {:ok, pid} = StreamManager.start_link(opts) batcher_pid = Process.whereis(batcher_name) - assert :sys.get_state(batcher_pid).retry_deadline_ms == 60_000 + assert :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms == 60_000 # Enable exactly-once send( @@ -1677,7 +1677,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do # Cast is async — let AckBatcher process it AckBatcher.flush(batcher_pid) - assert :sys.get_state(batcher_pid).retry_deadline_ms == 600_000 + assert :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms == 600_000 end test "AckBatcher retry_deadline_ms is restored to configured value when exactly-once is disabled" do @@ -1724,7 +1724,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do enable_exactly_once(pid) AckBatcher.flush(batcher_pid) - assert :sys.get_state(batcher_pid).retry_deadline_ms == 600_000 + assert :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms == 600_000 # Disable exactly-once send( @@ -1738,7 +1738,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do sync(pid) AckBatcher.flush(batcher_pid) - assert :sys.get_state(batcher_pid).retry_deadline_ms == 60_000 + assert :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms == 60_000 end test "retry_deadline_ms is NOT updated when exactly_once status does not change" do @@ -1781,7 +1781,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do {:ok, pid} = StreamManager.start_link(opts) batcher_pid = Process.whereis(batcher_name) - initial_deadline = :sys.get_state(batcher_pid).retry_deadline_ms + initial_deadline = :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms # Send the same exactly_once=false twice — no update should happen send( @@ -1796,7 +1796,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManagerTest do sync(pid) AckBatcher.flush(batcher_pid) - assert :sys.get_state(batcher_pid).retry_deadline_ms == initial_deadline + assert :sys.get_state(batcher_pid).ack_tracker.retry_deadline_ms == initial_deadline end end From 87b978725559a29c369006a0f8694ad0fdd974b5 Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 1 May 2026 11:19:52 +0200 Subject: [PATCH 13/29] build: optimize protobuf generation to subscriber service only Configure buf to generate only the Subscriber service protobuf code, removing unused schema definitions and reducing generated code size. Key changes: - Add buf.gen.yaml with opt targeting Subscriber service - Remove unused schema.pb.ex (Pub/Sub schema API) - Reduce pubsub.pb.ex from full API surface to subscriber-only --- buf.gen.yaml | 6 + .../proto/google/pubsub/v1/pubsub.pb.ex | 2098 +---------------- .../proto/google/pubsub/v1/schema.pb.ex | 318 --- 3 files changed, 35 insertions(+), 2387 deletions(-) delete mode 100644 lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex diff --git a/buf.gen.yaml b/buf.gen.yaml index 8e15831..56c0eba 100644 --- a/buf.gen.yaml +++ b/buf.gen.yaml @@ -14,3 +14,9 @@ plugins: - paths=source_relative - include_docs=true - plugins=grpc + # To add a new RPC in the future, append its fully-qualified method path + # here and re-run: buf generate + types: + - google.pubsub.v1.Subscriber.StreamingPull + - google.pubsub.v1.Subscriber.Acknowledge + - google.pubsub.v1.Subscriber.ModifyAckDeadline diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex index cd83678..484cec4 100644 --- a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex +++ b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/pubsub.pb.ex @@ -1,1823 +1,58 @@ -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State do - @moduledoc """ - Possible states for ingestion from Amazon Kinesis Data Streams. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsKinesis.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:KINESIS_PERMISSION_DENIED, 2) - field(:PUBLISH_PERMISSION_DENIED, 3) - field(:STREAM_NOT_FOUND, 4) - field(:CONSUMER_NOT_FOUND, 5) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State do - @moduledoc """ - Possible states for ingestion from Cloud Storage. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:CLOUD_STORAGE_PERMISSION_DENIED, 2) - field(:PUBLISH_PERMISSION_DENIED, 3) - field(:BUCKET_NOT_FOUND, 4) - field(:TOO_MANY_OBJECTS, 5) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State do - @moduledoc """ - Possible states for managed ingestion from Event Hubs. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AzureEventHubs.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:EVENT_HUBS_PERMISSION_DENIED, 2) - field(:PUBLISH_PERMISSION_DENIED, 3) - field(:NAMESPACE_NOT_FOUND, 4) - field(:EVENT_HUB_NOT_FOUND, 5) - field(:SUBSCRIPTION_NOT_FOUND, 6) - field(:RESOURCE_GROUP_NOT_FOUND, 7) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State do - @moduledoc """ - Possible states for managed ingestion from Amazon MSK. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsMsk.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:MSK_PERMISSION_DENIED, 2) - field(:PUBLISH_PERMISSION_DENIED, 3) - field(:CLUSTER_NOT_FOUND, 4) - field(:TOPIC_NOT_FOUND, 5) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State do - @moduledoc """ - Possible states for managed ingestion from Confluent Cloud. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.ConfluentCloud.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:CONFLUENT_CLOUD_PERMISSION_DENIED, 2) - field(:PUBLISH_PERMISSION_DENIED, 3) - field(:UNREACHABLE_BOOTSTRAP_SERVER, 4) - field(:CLUSTER_NOT_FOUND, 5) - field(:TOPIC_NOT_FOUND, 6) -end - -defmodule Google.Pubsub.V1.PlatformLogsSettings.Severity do - @moduledoc """ - Severity levels of Platform Logs. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.PlatformLogsSettings.Severity", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:SEVERITY_UNSPECIFIED, 0) - field(:DISABLED, 1) - field(:DEBUG, 2) - field(:INFO, 3) - field(:WARNING, 4) - field(:ERROR, 5) -end - -defmodule Google.Pubsub.V1.Topic.State do - @moduledoc """ - The state of the topic. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.Topic.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:INGESTION_RESOURCE_ERROR, 2) -end - -defmodule Google.Pubsub.V1.Subscription.State do - @moduledoc """ - Possible states for a subscription. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.Subscription.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:RESOURCE_ERROR, 2) -end - -defmodule Google.Pubsub.V1.BigQueryConfig.State do - @moduledoc """ - Possible states for a BigQuery subscription. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.BigQueryConfig.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:PERMISSION_DENIED, 2) - field(:NOT_FOUND, 3) - field(:SCHEMA_MISMATCH, 4) - field(:IN_TRANSIT_LOCATION_RESTRICTION, 5) - field(:VERTEX_AI_LOCATION_RESTRICTION, 6) -end - -defmodule Google.Pubsub.V1.BigtableConfig.State do - @moduledoc """ - Possible states for a Bigtable subscription. - Note: more states could be added in the future. Please code accordingly. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.BigtableConfig.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:NOT_FOUND, 2) - field(:APP_PROFILE_MISCONFIGURED, 3) - field(:PERMISSION_DENIED, 4) - field(:SCHEMA_MISMATCH, 5) - field(:IN_TRANSIT_LOCATION_RESTRICTION, 6) - field(:VERTEX_AI_LOCATION_RESTRICTION, 7) -end - -defmodule Google.Pubsub.V1.CloudStorageConfig.State do - @moduledoc """ - Possible states for a Cloud Storage subscription. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.CloudStorageConfig.State", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:STATE_UNSPECIFIED, 0) - field(:ACTIVE, 1) - field(:PERMISSION_DENIED, 2) - field(:NOT_FOUND, 3) - field(:IN_TRANSIT_LOCATION_RESTRICTION, 4) - field(:SCHEMA_MISMATCH, 5) - field(:VERTEX_AI_LOCATION_RESTRICTION, 6) -end - -defmodule Google.Pubsub.V1.MessageStoragePolicy do - @moduledoc """ - A policy constraining the storage of messages published to the topic. - """ - - use Protobuf, - full_name: "google.pubsub.v1.MessageStoragePolicy", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:allowed_persistence_regions, 1, - repeated: true, - type: :string, - json_name: "allowedPersistenceRegions", - deprecated: false - ) - - field(:enforce_in_transit, 2, type: :bool, json_name: "enforceInTransit", deprecated: false) -end - -defmodule Google.Pubsub.V1.SchemaSettings do - @moduledoc """ - Settings for validating messages published against a schema. - """ - - use Protobuf, - full_name: "google.pubsub.v1.SchemaSettings", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:schema, 1, type: :string, deprecated: false) - field(:encoding, 2, type: Google.Pubsub.V1.Encoding, enum: true, deprecated: false) - field(:first_revision_id, 3, type: :string, json_name: "firstRevisionId", deprecated: false) - field(:last_revision_id, 4, type: :string, json_name: "lastRevisionId", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis do - @moduledoc """ - Ingestion settings for Amazon Kinesis Data Streams. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsKinesis", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:state, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis.State, - enum: true, - deprecated: false - ) - - field(:stream_arn, 2, type: :string, json_name: "streamArn", deprecated: false) - field(:consumer_arn, 3, type: :string, json_name: "consumerArn", deprecated: false) - field(:aws_role_arn, 4, type: :string, json_name: "awsRoleArn", deprecated: false) - field(:gcp_service_account, 5, type: :string, json_name: "gcpServiceAccount", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat do - @moduledoc """ - Configuration for reading Cloud Storage data in text format. Each line of - text as specified by the delimiter will be set to the `data` field of a - Pub/Sub message. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.TextFormat", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:delimiter, 1, proto3_optional: true, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat do - @moduledoc """ - Configuration for reading Cloud Storage data in Avro binary format. The - bytes of each object will be set to the `data` field of a Pub/Sub - message. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.AvroFormat", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat do - @moduledoc """ - Configuration for reading Cloud Storage data written via [Cloud Storage - subscriptions](https://cloud.google.com/pubsub/docs/cloudstorage). The - data and attributes fields of the originally exported Pub/Sub message - will be restored when publishing. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage do - @moduledoc """ - Ingestion settings for Cloud Storage. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.CloudStorage", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:input_format, 0) - - field(:state, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.State, - enum: true, - deprecated: false - ) - - field(:bucket, 2, type: :string, deprecated: false) - - field(:text_format, 3, - type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.TextFormat, - json_name: "textFormat", - oneof: 0, - deprecated: false - ) - - field(:avro_format, 4, - type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.AvroFormat, - json_name: "avroFormat", - oneof: 0, - deprecated: false - ) - - field(:pubsub_avro_format, 5, - type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage.PubSubAvroFormat, - json_name: "pubsubAvroFormat", - oneof: 0, - deprecated: false - ) - - field(:minimum_object_create_time, 6, - type: Google.Protobuf.Timestamp, - json_name: "minimumObjectCreateTime", - deprecated: false - ) - - field(:match_glob, 9, type: :string, json_name: "matchGlob", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs do - @moduledoc """ - Ingestion settings for Azure Event Hubs. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AzureEventHubs", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:state, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs.State, - enum: true, - deprecated: false - ) - - field(:resource_group, 2, type: :string, json_name: "resourceGroup", deprecated: false) - field(:namespace, 3, type: :string, deprecated: false) - field(:event_hub, 4, type: :string, json_name: "eventHub", deprecated: false) - field(:client_id, 5, type: :string, json_name: "clientId", deprecated: false) - field(:tenant_id, 6, type: :string, json_name: "tenantId", deprecated: false) - field(:subscription_id, 7, type: :string, json_name: "subscriptionId", deprecated: false) - field(:gcp_service_account, 8, type: :string, json_name: "gcpServiceAccount", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk do - @moduledoc """ - Ingestion settings for Amazon MSK. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.AwsMsk", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:state, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk.State, - enum: true, - deprecated: false - ) - - field(:cluster_arn, 2, type: :string, json_name: "clusterArn", deprecated: false) - field(:topic, 3, type: :string, deprecated: false) - field(:aws_role_arn, 4, type: :string, json_name: "awsRoleArn", deprecated: false) - field(:gcp_service_account, 5, type: :string, json_name: "gcpServiceAccount", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud do - @moduledoc """ - Ingestion settings for Confluent Cloud. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings.ConfluentCloud", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:state, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud.State, - enum: true, - deprecated: false - ) - - field(:bootstrap_server, 2, type: :string, json_name: "bootstrapServer", deprecated: false) - field(:cluster_id, 3, type: :string, json_name: "clusterId", deprecated: false) - field(:topic, 4, type: :string, deprecated: false) - field(:identity_pool_id, 5, type: :string, json_name: "identityPoolId", deprecated: false) - field(:gcp_service_account, 6, type: :string, json_name: "gcpServiceAccount", deprecated: false) -end - -defmodule Google.Pubsub.V1.IngestionDataSourceSettings do - @moduledoc """ - Settings for an ingestion data source on a topic. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionDataSourceSettings", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:source, 0) - - field(:aws_kinesis, 1, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsKinesis, - json_name: "awsKinesis", - oneof: 0, - deprecated: false - ) - - field(:cloud_storage, 2, - type: Google.Pubsub.V1.IngestionDataSourceSettings.CloudStorage, - json_name: "cloudStorage", - oneof: 0, - deprecated: false - ) - - field(:azure_event_hubs, 3, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AzureEventHubs, - json_name: "azureEventHubs", - oneof: 0, - deprecated: false - ) - - field(:aws_msk, 5, - type: Google.Pubsub.V1.IngestionDataSourceSettings.AwsMsk, - json_name: "awsMsk", - oneof: 0, - deprecated: false - ) - - field(:confluent_cloud, 6, - type: Google.Pubsub.V1.IngestionDataSourceSettings.ConfluentCloud, - json_name: "confluentCloud", - oneof: 0, - deprecated: false - ) - - field(:platform_logs_settings, 4, - type: Google.Pubsub.V1.PlatformLogsSettings, - json_name: "platformLogsSettings", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.PlatformLogsSettings do - @moduledoc """ - Settings for Platform Logs produced by Pub/Sub. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PlatformLogsSettings", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:severity, 1, - type: Google.Pubsub.V1.PlatformLogsSettings.Severity, - enum: true, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason do - @moduledoc """ - Specifies the reason why some data may have been left out of - the desired Pub/Sub message due to the API message limits - (https://cloud.google.com/pubsub/quotas#resource_limits). For example, - when the number of attributes is larger than 100, the number of - attributes is truncated to 100 to respect the limit on the attribute count. - Other attribute limits are treated similarly. When the size of the desired - message would've been larger than 10MB, the message won't be published at - all, and ingestion of the subsequent messages will proceed as normal. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.ApiViolationReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason do - @moduledoc """ - Set when an Avro file is unsupported or its format is not valid. When this - occurs, one or more Avro objects won't be ingested. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.AvroFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason do - @moduledoc """ - Set when a Pub/Sub message fails to get published due to a schema - validation violation. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.SchemaViolationReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason do - @moduledoc """ - Set when a Pub/Sub message fails to get published due to a message - transformation error. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.MessageTransformationFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure do - @moduledoc """ - Failure when ingesting from a Cloud Storage source. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.CloudStorageFailure", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:reason, 0) - - field(:bucket, 1, type: :string, deprecated: false) - field(:object_name, 2, type: :string, json_name: "objectName", deprecated: false) - field(:object_generation, 3, type: :int64, json_name: "objectGeneration", deprecated: false) - - field(:avro_failure_reason, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.AvroFailureReason, - json_name: "avroFailureReason", - oneof: 0, - deprecated: false - ) - - field(:api_violation_reason, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, - json_name: "apiViolationReason", - oneof: 0, - deprecated: false - ) - - field(:schema_violation_reason, 7, - type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, - json_name: "schemaViolationReason", - oneof: 0, - deprecated: false - ) - - field(:message_transformation_failure_reason, 8, - type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, - json_name: "messageTransformationFailureReason", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason do - @moduledoc """ - Failure when ingesting from an Amazon MSK source. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.AwsMskFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:reason, 0) - - field(:cluster_arn, 1, type: :string, json_name: "clusterArn", deprecated: false) - field(:kafka_topic, 2, type: :string, json_name: "kafkaTopic", deprecated: false) - field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) - field(:offset, 4, type: :int64, deprecated: false) - - field(:api_violation_reason, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, - json_name: "apiViolationReason", - oneof: 0, - deprecated: false - ) - - field(:schema_violation_reason, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, - json_name: "schemaViolationReason", - oneof: 0, - deprecated: false - ) - - field(:message_transformation_failure_reason, 7, - type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, - json_name: "messageTransformationFailureReason", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason do - @moduledoc """ - Failure when ingesting from an Azure Event Hubs source. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.AzureEventHubsFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:reason, 0) - - field(:namespace, 1, type: :string, deprecated: false) - field(:event_hub, 2, type: :string, json_name: "eventHub", deprecated: false) - field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) - field(:offset, 4, type: :int64, deprecated: false) - - field(:api_violation_reason, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, - json_name: "apiViolationReason", - oneof: 0, - deprecated: false - ) - - field(:schema_violation_reason, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, - json_name: "schemaViolationReason", - oneof: 0, - deprecated: false - ) - - field(:message_transformation_failure_reason, 7, - type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, - json_name: "messageTransformationFailureReason", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason do - @moduledoc """ - Failure when ingesting from a Confluent Cloud source. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.ConfluentCloudFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:reason, 0) - - field(:cluster_id, 1, type: :string, json_name: "clusterId", deprecated: false) - field(:kafka_topic, 2, type: :string, json_name: "kafkaTopic", deprecated: false) - field(:partition_id, 3, type: :int64, json_name: "partitionId", deprecated: false) - field(:offset, 4, type: :int64, deprecated: false) - - field(:api_violation_reason, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, - json_name: "apiViolationReason", - oneof: 0, - deprecated: false - ) - - field(:schema_violation_reason, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, - json_name: "schemaViolationReason", - oneof: 0, - deprecated: false - ) - - field(:message_transformation_failure_reason, 7, - type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, - json_name: "messageTransformationFailureReason", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason do - @moduledoc """ - Failure when ingesting from an AWS Kinesis source. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent.AwsKinesisFailureReason", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:reason, 0) - - field(:stream_arn, 1, type: :string, json_name: "streamArn", deprecated: false) - field(:partition_key, 2, type: :string, json_name: "partitionKey", deprecated: false) - field(:sequence_number, 3, type: :string, json_name: "sequenceNumber", deprecated: false) - - field(:schema_violation_reason, 4, - type: Google.Pubsub.V1.IngestionFailureEvent.SchemaViolationReason, - json_name: "schemaViolationReason", - oneof: 0, - deprecated: false - ) - - field(:message_transformation_failure_reason, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.MessageTransformationFailureReason, - json_name: "messageTransformationFailureReason", - oneof: 0, - deprecated: false - ) - - field(:api_violation_reason, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.ApiViolationReason, - json_name: "apiViolationReason", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.IngestionFailureEvent do - @moduledoc """ - Payload of the Platform Log entry sent when a failure is encountered while - ingesting. - """ - - use Protobuf, - full_name: "google.pubsub.v1.IngestionFailureEvent", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:failure, 0) - - field(:topic, 1, type: :string, deprecated: false) - field(:error_message, 2, type: :string, json_name: "errorMessage", deprecated: false) - - field(:cloud_storage_failure, 3, - type: Google.Pubsub.V1.IngestionFailureEvent.CloudStorageFailure, - json_name: "cloudStorageFailure", - oneof: 0, - deprecated: false - ) - - field(:aws_msk_failure, 4, - type: Google.Pubsub.V1.IngestionFailureEvent.AwsMskFailureReason, - json_name: "awsMskFailure", - oneof: 0, - deprecated: false - ) - - field(:azure_event_hubs_failure, 5, - type: Google.Pubsub.V1.IngestionFailureEvent.AzureEventHubsFailureReason, - json_name: "azureEventHubsFailure", - oneof: 0, - deprecated: false - ) - - field(:confluent_cloud_failure, 6, - type: Google.Pubsub.V1.IngestionFailureEvent.ConfluentCloudFailureReason, - json_name: "confluentCloudFailure", - oneof: 0, - deprecated: false - ) - - field(:aws_kinesis_failure, 7, - type: Google.Pubsub.V1.IngestionFailureEvent.AwsKinesisFailureReason, - json_name: "awsKinesisFailure", - oneof: 0, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.JavaScriptUDF do - @moduledoc """ - User-defined JavaScript function that can transform or filter a Pub/Sub - message. - """ - - use Protobuf, - full_name: "google.pubsub.v1.JavaScriptUDF", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:function_name, 1, type: :string, json_name: "functionName", deprecated: false) - field(:code, 2, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.AIInference.UnstructuredInference do - @moduledoc """ - Configuration for making inferences using arbitrary JSON payloads. - """ - - use Protobuf, - full_name: "google.pubsub.v1.AIInference.UnstructuredInference", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:parameters, 1, type: Google.Protobuf.Struct, deprecated: false) -end - -defmodule Google.Pubsub.V1.AIInference do - @moduledoc """ - Configuration for making inference requests against Vertex AI models. - """ - - use Protobuf, - full_name: "google.pubsub.v1.AIInference", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:inference_mode, 0) - - field(:endpoint, 1, type: :string, deprecated: false) - - field(:unstructured_inference, 2, - type: Google.Pubsub.V1.AIInference.UnstructuredInference, - json_name: "unstructuredInference", - oneof: 0, - deprecated: false - ) - - field(:service_account_email, 3, - type: :string, - json_name: "serviceAccountEmail", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.MessageTransform do - @moduledoc """ - All supported message transforms types. - """ - - use Protobuf, - full_name: "google.pubsub.v1.MessageTransform", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:transform, 0) - - field(:javascript_udf, 2, - type: Google.Pubsub.V1.JavaScriptUDF, - json_name: "javascriptUdf", - oneof: 0, - deprecated: false - ) - - field(:ai_inference, 6, - type: Google.Pubsub.V1.AIInference, - json_name: "aiInference", - oneof: 0, - deprecated: false - ) - - field(:enabled, 3, type: :bool, deprecated: true) - field(:disabled, 4, type: :bool, deprecated: false) -end - -defmodule Google.Pubsub.V1.Topic.LabelsEntry do - use Protobuf, - full_name: "google.pubsub.v1.Topic.LabelsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.Topic.TagsEntry do - use Protobuf, - full_name: "google.pubsub.v1.Topic.TagsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.Topic do - @moduledoc """ - A topic resource. - """ - - use Protobuf, - full_name: "google.pubsub.v1.Topic", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - - field(:labels, 2, - repeated: true, - type: Google.Pubsub.V1.Topic.LabelsEntry, - map: true, - deprecated: false - ) - - field(:message_storage_policy, 3, - type: Google.Pubsub.V1.MessageStoragePolicy, - json_name: "messageStoragePolicy", - deprecated: false - ) - - field(:kms_key_name, 5, type: :string, json_name: "kmsKeyName", deprecated: false) - - field(:schema_settings, 6, - type: Google.Pubsub.V1.SchemaSettings, - json_name: "schemaSettings", - deprecated: false - ) - - field(:satisfies_pzs, 7, type: :bool, json_name: "satisfiesPzs", deprecated: false) - - field(:message_retention_duration, 8, - type: Google.Protobuf.Duration, - json_name: "messageRetentionDuration", - deprecated: false - ) - - field(:state, 9, type: Google.Pubsub.V1.Topic.State, enum: true, deprecated: false) - - field(:ingestion_data_source_settings, 10, - type: Google.Pubsub.V1.IngestionDataSourceSettings, - json_name: "ingestionDataSourceSettings", - deprecated: false - ) - - field(:message_transforms, 13, - repeated: true, - type: Google.Pubsub.V1.MessageTransform, - json_name: "messageTransforms", - deprecated: false - ) - - field(:tags, 14, - repeated: true, - type: Google.Pubsub.V1.Topic.TagsEntry, - map: true, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.PubsubMessage.AttributesEntry do - use Protobuf, - full_name: "google.pubsub.v1.PubsubMessage.AttributesEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.PubsubMessage do - @moduledoc """ - A message that is published by publishers and consumed by subscribers. The - message must contain either a non-empty data field or at least one attribute. - Note that client libraries represent this object differently - depending on the language. See the corresponding [client library - documentation](https://cloud.google.com/pubsub/docs/reference/libraries) for - more information. See [quotas and limits] - (https://cloud.google.com/pubsub/quotas) for more information about message - limits. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PubsubMessage", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:data, 1, type: :bytes, deprecated: false) - - field(:attributes, 2, - repeated: true, - type: Google.Pubsub.V1.PubsubMessage.AttributesEntry, - map: true, - deprecated: false - ) - - field(:message_id, 3, type: :string, json_name: "messageId") - field(:publish_time, 4, type: Google.Protobuf.Timestamp, json_name: "publishTime") - field(:ordering_key, 5, type: :string, json_name: "orderingKey", deprecated: false) -end - -defmodule Google.Pubsub.V1.GetTopicRequest do - @moduledoc """ - Request for the GetTopic method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.GetTopicRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.UpdateTopicRequest do - @moduledoc """ - Request for the UpdateTopic method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.UpdateTopicRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: Google.Pubsub.V1.Topic, deprecated: false) - - field(:update_mask, 2, - type: Google.Protobuf.FieldMask, - json_name: "updateMask", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.PublishRequest do - @moduledoc """ - Request for the Publish method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PublishRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: :string, deprecated: false) - field(:messages, 2, repeated: true, type: Google.Pubsub.V1.PubsubMessage, deprecated: false) -end - -defmodule Google.Pubsub.V1.PublishResponse do - @moduledoc """ - Response for the `Publish` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PublishResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:message_ids, 1, - repeated: true, - type: :string, - json_name: "messageIds", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.ListTopicsRequest do - @moduledoc """ - Request for the `ListTopics` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:project, 1, type: :string, deprecated: false) - field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) - field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListTopicsResponse do - @moduledoc """ - Response for the `ListTopics` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topics, 1, repeated: true, type: Google.Pubsub.V1.Topic, deprecated: false) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListTopicSubscriptionsRequest do - @moduledoc """ - Request for the `ListTopicSubscriptions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicSubscriptionsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: :string, deprecated: false) - field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) - field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListTopicSubscriptionsResponse do - @moduledoc """ - Response for the `ListTopicSubscriptions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicSubscriptionsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:subscriptions, 1, repeated: true, type: :string, deprecated: false) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListTopicSnapshotsRequest do - @moduledoc """ - Request for the `ListTopicSnapshots` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicSnapshotsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: :string, deprecated: false) - field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) - field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListTopicSnapshotsResponse do - @moduledoc """ - Response for the `ListTopicSnapshots` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListTopicSnapshotsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:snapshots, 1, repeated: true, type: :string, deprecated: false) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.DeleteTopicRequest do - @moduledoc """ - Request for the `DeleteTopic` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DeleteTopicRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:topic, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.DetachSubscriptionRequest do - @moduledoc """ - Request for the DetachSubscription method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DetachSubscriptionRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:subscription, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.DetachSubscriptionResponse do - @moduledoc """ - Response for the DetachSubscription method. - Reserved for future use. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DetachSubscriptionResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo do - @moduledoc """ - Information about an associated [Analytics Hub - subscription](https://cloud.google.com/bigquery/docs/analytics-hub-manage-subscriptions). - """ - - use Protobuf, - full_name: "google.pubsub.v1.Subscription.AnalyticsHubSubscriptionInfo", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:listing, 1, type: :string, deprecated: false) - field(:subscription, 2, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.Subscription.LabelsEntry do - use Protobuf, - full_name: "google.pubsub.v1.Subscription.LabelsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.Subscription.TagsEntry do - use Protobuf, - full_name: "google.pubsub.v1.Subscription.TagsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.Subscription do - @moduledoc """ - A subscription resource. If none of `push_config`, `bigquery_config`, or - `cloud_storage_config` is set, then the subscriber will pull and ack messages - using API methods. At most one of these fields may be set. - """ - - use Protobuf, - full_name: "google.pubsub.v1.Subscription", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:topic, 2, type: :string, deprecated: false) - - field(:push_config, 4, - type: Google.Pubsub.V1.PushConfig, - json_name: "pushConfig", - deprecated: false - ) - - field(:bigquery_config, 18, - type: Google.Pubsub.V1.BigQueryConfig, - json_name: "bigqueryConfig", - deprecated: false - ) - - field(:cloud_storage_config, 22, - type: Google.Pubsub.V1.CloudStorageConfig, - json_name: "cloudStorageConfig", - deprecated: false - ) - - field(:bigtable_config, 27, - type: Google.Pubsub.V1.BigtableConfig, - json_name: "bigtableConfig", - deprecated: false - ) - - field(:ack_deadline_seconds, 5, - type: :int32, - json_name: "ackDeadlineSeconds", - deprecated: false - ) - - field(:retain_acked_messages, 7, - type: :bool, - json_name: "retainAckedMessages", - deprecated: false - ) - - field(:message_retention_duration, 8, - type: Google.Protobuf.Duration, - json_name: "messageRetentionDuration", - deprecated: false - ) - - field(:labels, 9, - repeated: true, - type: Google.Pubsub.V1.Subscription.LabelsEntry, - map: true, - deprecated: false - ) - - field(:enable_message_ordering, 10, - type: :bool, - json_name: "enableMessageOrdering", - deprecated: false - ) - - field(:expiration_policy, 11, - type: Google.Pubsub.V1.ExpirationPolicy, - json_name: "expirationPolicy", - deprecated: false - ) - - field(:filter, 12, type: :string, deprecated: false) - - field(:dead_letter_policy, 13, - type: Google.Pubsub.V1.DeadLetterPolicy, - json_name: "deadLetterPolicy", - deprecated: false - ) - - field(:retry_policy, 14, - type: Google.Pubsub.V1.RetryPolicy, - json_name: "retryPolicy", - deprecated: false - ) - - field(:detached, 15, type: :bool, deprecated: false) - - field(:enable_exactly_once_delivery, 16, - type: :bool, - json_name: "enableExactlyOnceDelivery", - deprecated: false - ) - - field(:topic_message_retention_duration, 17, - type: Google.Protobuf.Duration, - json_name: "topicMessageRetentionDuration", - deprecated: false - ) - - field(:state, 19, type: Google.Pubsub.V1.Subscription.State, enum: true, deprecated: false) - - field(:analytics_hub_subscription_info, 23, - type: Google.Pubsub.V1.Subscription.AnalyticsHubSubscriptionInfo, - json_name: "analyticsHubSubscriptionInfo", - deprecated: false - ) - - field(:message_transforms, 25, - repeated: true, - type: Google.Pubsub.V1.MessageTransform, - json_name: "messageTransforms", - deprecated: false - ) - - field(:tags, 26, - repeated: true, - type: Google.Pubsub.V1.Subscription.TagsEntry, - map: true, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.RetryPolicy do - @moduledoc """ - A policy that specifies how Pub/Sub retries message delivery. - - Retry delay will be exponential based on provided minimum and maximum - backoffs. https://en.wikipedia.org/wiki/Exponential_backoff. - - RetryPolicy will be triggered on NACKs or acknowledgment deadline exceeded - events for a given message. - - Retry Policy is implemented on a best effort basis. At times, the delay - between consecutive deliveries may not match the configuration. That is, - delay can be more or less than configured backoff. - """ - - use Protobuf, - full_name: "google.pubsub.v1.RetryPolicy", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:minimum_backoff, 1, - type: Google.Protobuf.Duration, - json_name: "minimumBackoff", - deprecated: false - ) - - field(:maximum_backoff, 2, - type: Google.Protobuf.Duration, - json_name: "maximumBackoff", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.DeadLetterPolicy do - @moduledoc """ - Dead lettering is done on a best effort basis. The same message might be - dead lettered multiple times. - - If validation on any of the fields fails at subscription creation/updation, - the create/update subscription request will fail. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DeadLetterPolicy", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:dead_letter_topic, 1, type: :string, json_name: "deadLetterTopic", deprecated: false) - - field(:max_delivery_attempts, 2, - type: :int32, - json_name: "maxDeliveryAttempts", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.ExpirationPolicy do - @moduledoc """ - A policy that specifies the conditions for resource expiration (i.e., - automatic resource deletion). - """ - - use Protobuf, - full_name: "google.pubsub.v1.ExpirationPolicy", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:ttl, 1, type: Google.Protobuf.Duration, deprecated: false) -end - -defmodule Google.Pubsub.V1.PushConfig.OidcToken do - @moduledoc """ - Contains information needed for generating an - [OpenID Connect - token](https://developers.google.com/identity/protocols/OpenIDConnect). - """ - - use Protobuf, - full_name: "google.pubsub.v1.PushConfig.OidcToken", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:service_account_email, 1, - type: :string, - json_name: "serviceAccountEmail", - deprecated: false - ) - - field(:audience, 2, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.PushConfig.PubsubWrapper do - @moduledoc """ - The payload to the push endpoint is in the form of the JSON representation - of a PubsubMessage - (https://cloud.google.com/pubsub/docs/reference/rpc/google.pubsub.v1#pubsubmessage). - """ - - use Protobuf, - full_name: "google.pubsub.v1.PushConfig.PubsubWrapper", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.PushConfig.NoWrapper do - @moduledoc """ - Sets the `data` field as the HTTP body for delivery. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PushConfig.NoWrapper", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:write_metadata, 1, type: :bool, json_name: "writeMetadata", deprecated: false) -end - -defmodule Google.Pubsub.V1.PushConfig.AttributesEntry do - use Protobuf, - full_name: "google.pubsub.v1.PushConfig.AttributesEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.PushConfig do - @moduledoc """ - Configuration for a push delivery endpoint. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PushConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:authentication_method, 0) - - oneof(:wrapper, 1) - - field(:push_endpoint, 1, type: :string, json_name: "pushEndpoint", deprecated: false) - - field(:attributes, 2, - repeated: true, - type: Google.Pubsub.V1.PushConfig.AttributesEntry, - map: true, - deprecated: false - ) - - field(:oidc_token, 3, - type: Google.Pubsub.V1.PushConfig.OidcToken, - json_name: "oidcToken", - oneof: 0, - deprecated: false - ) - - field(:pubsub_wrapper, 4, - type: Google.Pubsub.V1.PushConfig.PubsubWrapper, - json_name: "pubsubWrapper", - oneof: 1, - deprecated: false - ) - - field(:no_wrapper, 5, - type: Google.Pubsub.V1.PushConfig.NoWrapper, - json_name: "noWrapper", - oneof: 1, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.BigQueryConfig do - @moduledoc """ - Configuration for a BigQuery subscription. - """ - - use Protobuf, - full_name: "google.pubsub.v1.BigQueryConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:table, 1, type: :string, deprecated: false) - field(:use_topic_schema, 2, type: :bool, json_name: "useTopicSchema", deprecated: false) - field(:write_metadata, 3, type: :bool, json_name: "writeMetadata", deprecated: false) - field(:drop_unknown_fields, 4, type: :bool, json_name: "dropUnknownFields", deprecated: false) - field(:state, 5, type: Google.Pubsub.V1.BigQueryConfig.State, enum: true, deprecated: false) - field(:use_table_schema, 6, type: :bool, json_name: "useTableSchema", deprecated: false) - - field(:service_account_email, 7, - type: :string, - json_name: "serviceAccountEmail", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.BigtableConfig do - @moduledoc """ - Configuration for a Bigtable subscription. The Pub/Sub message will be - written to a Bigtable row as follows: - - row key: subscription name and message ID delimited by #. - - columns: message bytes written to a single column family "data" with an - empty-string column qualifier. - - cell timestamp: the message publish timestamp. - """ - - use Protobuf, - full_name: "google.pubsub.v1.BigtableConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:table, 1, type: :string, deprecated: false) - field(:app_profile_id, 2, type: :string, json_name: "appProfileId", deprecated: false) - - field(:service_account_email, 3, - type: :string, - json_name: "serviceAccountEmail", - deprecated: false - ) - - field(:write_metadata, 5, type: :bool, json_name: "writeMetadata", deprecated: false) - field(:state, 4, type: Google.Pubsub.V1.BigtableConfig.State, enum: true, deprecated: false) -end - -defmodule Google.Pubsub.V1.CloudStorageConfig.TextConfig do - @moduledoc """ - Configuration for writing message data in text format. - Message payloads will be written to files as raw text, separated by a - newline. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CloudStorageConfig.TextConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.CloudStorageConfig.AvroConfig do - @moduledoc """ - Configuration for writing message data in Avro format. - Message payloads and metadata will be written to files as an Avro binary. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CloudStorageConfig.AvroConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:write_metadata, 1, type: :bool, json_name: "writeMetadata", deprecated: false) - field(:use_topic_schema, 2, type: :bool, json_name: "useTopicSchema", deprecated: false) -end - -defmodule Google.Pubsub.V1.CloudStorageConfig do - @moduledoc """ - Configuration for a Cloud Storage subscription. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CloudStorageConfig", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:output_format, 0) - - field(:bucket, 1, type: :string, deprecated: false) - field(:filename_prefix, 2, type: :string, json_name: "filenamePrefix", deprecated: false) - field(:filename_suffix, 3, type: :string, json_name: "filenameSuffix", deprecated: false) - - field(:filename_datetime_format, 10, - type: :string, - json_name: "filenameDatetimeFormat", - deprecated: false - ) - - field(:text_config, 4, - type: Google.Pubsub.V1.CloudStorageConfig.TextConfig, - json_name: "textConfig", - oneof: 0, - deprecated: false - ) - - field(:avro_config, 5, - type: Google.Pubsub.V1.CloudStorageConfig.AvroConfig, - json_name: "avroConfig", - oneof: 0, - deprecated: false - ) - - field(:max_duration, 6, - type: Google.Protobuf.Duration, - json_name: "maxDuration", - deprecated: false - ) - - field(:max_bytes, 7, type: :int64, json_name: "maxBytes", deprecated: false) - field(:max_messages, 8, type: :int64, json_name: "maxMessages", deprecated: false) - field(:state, 9, type: Google.Pubsub.V1.CloudStorageConfig.State, enum: true, deprecated: false) - - field(:service_account_email, 11, - type: :string, - json_name: "serviceAccountEmail", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.ReceivedMessage do - @moduledoc """ - A message and its corresponding acknowledgment ID. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ReceivedMessage", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:ack_id, 1, type: :string, json_name: "ackId", deprecated: false) - field(:message, 2, type: Google.Pubsub.V1.PubsubMessage, deprecated: false) - field(:delivery_attempt, 3, type: :int32, json_name: "deliveryAttempt", deprecated: false) -end - -defmodule Google.Pubsub.V1.GetSubscriptionRequest do - @moduledoc """ - Request for the GetSubscription method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.GetSubscriptionRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:subscription, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.UpdateSubscriptionRequest do - @moduledoc """ - Request for the UpdateSubscription method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.UpdateSubscriptionRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:subscription, 1, type: Google.Pubsub.V1.Subscription, deprecated: false) - - field(:update_mask, 2, - type: Google.Protobuf.FieldMask, - json_name: "updateMask", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.ListSubscriptionsRequest do - @moduledoc """ - Request for the `ListSubscriptions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSubscriptionsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:project, 1, type: :string, deprecated: false) - field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) - field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListSubscriptionsResponse do - @moduledoc """ - Response for the `ListSubscriptions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSubscriptionsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:subscriptions, 1, repeated: true, type: Google.Pubsub.V1.Subscription, deprecated: false) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.DeleteSubscriptionRequest do - @moduledoc """ - Request for the DeleteSubscription method. - """ - +defmodule Google.Pubsub.V1.PubsubMessage.AttributesEntry do use Protobuf, - full_name: "google.pubsub.v1.DeleteSubscriptionRequest", + full_name: "google.pubsub.v1.PubsubMessage.AttributesEntry", + map: true, protoc_gen_elixir_version: "0.16.0", syntax: :proto3 - field(:subscription, 1, type: :string, deprecated: false) + field(:key, 1, type: :string) + field(:value, 2, type: :string) end -defmodule Google.Pubsub.V1.ModifyPushConfigRequest do +defmodule Google.Pubsub.V1.PubsubMessage do @moduledoc """ - Request for the ModifyPushConfig method. + A message that is published by publishers and consumed by subscribers. The + message must contain either a non-empty data field or at least one attribute. + Note that client libraries represent this object differently + depending on the language. See the corresponding [client library + documentation](https://cloud.google.com/pubsub/docs/reference/libraries) for + more information. See [quotas and limits] + (https://cloud.google.com/pubsub/quotas) for more information about message + limits. """ use Protobuf, - full_name: "google.pubsub.v1.ModifyPushConfigRequest", + full_name: "google.pubsub.v1.PubsubMessage", protoc_gen_elixir_version: "0.16.0", syntax: :proto3 - field(:subscription, 1, type: :string, deprecated: false) + field(:data, 1, type: :bytes, deprecated: false) - field(:push_config, 2, - type: Google.Pubsub.V1.PushConfig, - json_name: "pushConfig", + field(:attributes, 2, + repeated: true, + type: Google.Pubsub.V1.PubsubMessage.AttributesEntry, + map: true, deprecated: false ) -end - -defmodule Google.Pubsub.V1.PullRequest do - @moduledoc """ - Request for the `Pull` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.PullRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - field(:subscription, 1, type: :string, deprecated: false) - field(:return_immediately, 2, type: :bool, json_name: "returnImmediately", deprecated: true) - field(:max_messages, 3, type: :int32, json_name: "maxMessages", deprecated: false) + field(:message_id, 3, type: :string, json_name: "messageId") + field(:publish_time, 4, type: Google.Protobuf.Timestamp, json_name: "publishTime") + field(:ordering_key, 5, type: :string, json_name: "orderingKey", deprecated: false) end -defmodule Google.Pubsub.V1.PullResponse do +defmodule Google.Pubsub.V1.ReceivedMessage do @moduledoc """ - Response for the `Pull` method. + A message and its corresponding acknowledgment ID. """ use Protobuf, - full_name: "google.pubsub.v1.PullResponse", + full_name: "google.pubsub.v1.ReceivedMessage", protoc_gen_elixir_version: "0.16.0", syntax: :proto3 - field(:received_messages, 1, - repeated: true, - type: Google.Pubsub.V1.ReceivedMessage, - json_name: "receivedMessages", - deprecated: false - ) + field(:ack_id, 1, type: :string, json_name: "ackId", deprecated: false) + field(:message, 2, type: Google.Pubsub.V1.PubsubMessage, deprecated: false) + field(:delivery_attempt, 3, type: :int32, json_name: "deliveryAttempt", deprecated: false) end defmodule Google.Pubsub.V1.ModifyAckDeadlineRequest do @@ -2029,243 +264,6 @@ defmodule Google.Pubsub.V1.StreamingPullResponse do ) end -defmodule Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry do - use Protobuf, - full_name: "google.pubsub.v1.CreateSnapshotRequest.LabelsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry do - use Protobuf, - full_name: "google.pubsub.v1.CreateSnapshotRequest.TagsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.CreateSnapshotRequest do - @moduledoc """ - Request for the `CreateSnapshot` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CreateSnapshotRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:subscription, 2, type: :string, deprecated: false) - - field(:labels, 3, - repeated: true, - type: Google.Pubsub.V1.CreateSnapshotRequest.LabelsEntry, - map: true, - deprecated: false - ) - - field(:tags, 4, - repeated: true, - type: Google.Pubsub.V1.CreateSnapshotRequest.TagsEntry, - map: true, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.UpdateSnapshotRequest do - @moduledoc """ - Request for the UpdateSnapshot method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.UpdateSnapshotRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:snapshot, 1, type: Google.Pubsub.V1.Snapshot, deprecated: false) - - field(:update_mask, 2, - type: Google.Protobuf.FieldMask, - json_name: "updateMask", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.Snapshot.LabelsEntry do - use Protobuf, - full_name: "google.pubsub.v1.Snapshot.LabelsEntry", - map: true, - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:key, 1, type: :string) - field(:value, 2, type: :string) -end - -defmodule Google.Pubsub.V1.Snapshot do - @moduledoc """ - A snapshot resource. Snapshots are used in - [Seek](https://cloud.google.com/pubsub/docs/replay-overview) - operations, which allow you to manage message acknowledgments in bulk. That - is, you can set the acknowledgment state of messages in an existing - subscription to the state captured by a snapshot. - """ - - use Protobuf, - full_name: "google.pubsub.v1.Snapshot", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:topic, 2, type: :string, deprecated: false) - - field(:expire_time, 3, - type: Google.Protobuf.Timestamp, - json_name: "expireTime", - deprecated: false - ) - - field(:labels, 4, - repeated: true, - type: Google.Pubsub.V1.Snapshot.LabelsEntry, - map: true, - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.GetSnapshotRequest do - @moduledoc """ - Request for the GetSnapshot method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.GetSnapshotRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:snapshot, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.ListSnapshotsRequest do - @moduledoc """ - Request for the `ListSnapshots` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSnapshotsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:project, 1, type: :string, deprecated: false) - field(:page_size, 2, type: :int32, json_name: "pageSize", deprecated: false) - field(:page_token, 3, type: :string, json_name: "pageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.ListSnapshotsResponse do - @moduledoc """ - Response for the `ListSnapshots` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSnapshotsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:snapshots, 1, repeated: true, type: Google.Pubsub.V1.Snapshot, deprecated: false) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken", deprecated: false) -end - -defmodule Google.Pubsub.V1.DeleteSnapshotRequest do - @moduledoc """ - Request for the `DeleteSnapshot` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DeleteSnapshotRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:snapshot, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.SeekRequest do - @moduledoc """ - Request for the `Seek` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.SeekRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:target, 0) - - field(:subscription, 1, type: :string, deprecated: false) - field(:time, 2, type: Google.Protobuf.Timestamp, oneof: 0, deprecated: false) - field(:snapshot, 3, type: :string, oneof: 0, deprecated: false) -end - -defmodule Google.Pubsub.V1.SeekResponse do - @moduledoc """ - Response for the `Seek` method (this response is empty). - """ - - use Protobuf, - full_name: "google.pubsub.v1.SeekResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.Publisher.Service do - @moduledoc """ - The service that an application uses to manipulate topics, and to send - messages to a topic. - """ - - use GRPC.Service, name: "google.pubsub.v1.Publisher", protoc_gen_elixir_version: "0.16.0" - - rpc(:CreateTopic, Google.Pubsub.V1.Topic, Google.Pubsub.V1.Topic) - - rpc(:UpdateTopic, Google.Pubsub.V1.UpdateTopicRequest, Google.Pubsub.V1.Topic) - - rpc(:Publish, Google.Pubsub.V1.PublishRequest, Google.Pubsub.V1.PublishResponse) - - rpc(:GetTopic, Google.Pubsub.V1.GetTopicRequest, Google.Pubsub.V1.Topic) - - rpc(:ListTopics, Google.Pubsub.V1.ListTopicsRequest, Google.Pubsub.V1.ListTopicsResponse) - - rpc( - :ListTopicSubscriptions, - Google.Pubsub.V1.ListTopicSubscriptionsRequest, - Google.Pubsub.V1.ListTopicSubscriptionsResponse - ) - - rpc( - :ListTopicSnapshots, - Google.Pubsub.V1.ListTopicSnapshotsRequest, - Google.Pubsub.V1.ListTopicSnapshotsResponse - ) - - rpc(:DeleteTopic, Google.Pubsub.V1.DeleteTopicRequest, Google.Protobuf.Empty) - - rpc( - :DetachSubscription, - Google.Pubsub.V1.DetachSubscriptionRequest, - Google.Pubsub.V1.DetachSubscriptionResponse - ) -end - -defmodule Google.Pubsub.V1.Publisher.Stub do - use GRPC.Stub, service: Google.Pubsub.V1.Publisher.Service -end - defmodule Google.Pubsub.V1.Subscriber.Service do @moduledoc """ The service that an application uses to manipulate subscriptions and to @@ -2275,53 +273,15 @@ defmodule Google.Pubsub.V1.Subscriber.Service do use GRPC.Service, name: "google.pubsub.v1.Subscriber", protoc_gen_elixir_version: "0.16.0" - rpc(:CreateSubscription, Google.Pubsub.V1.Subscription, Google.Pubsub.V1.Subscription) - - rpc(:GetSubscription, Google.Pubsub.V1.GetSubscriptionRequest, Google.Pubsub.V1.Subscription) - - rpc( - :UpdateSubscription, - Google.Pubsub.V1.UpdateSubscriptionRequest, - Google.Pubsub.V1.Subscription - ) - - rpc( - :ListSubscriptions, - Google.Pubsub.V1.ListSubscriptionsRequest, - Google.Pubsub.V1.ListSubscriptionsResponse - ) - - rpc(:DeleteSubscription, Google.Pubsub.V1.DeleteSubscriptionRequest, Google.Protobuf.Empty) - rpc(:ModifyAckDeadline, Google.Pubsub.V1.ModifyAckDeadlineRequest, Google.Protobuf.Empty) rpc(:Acknowledge, Google.Pubsub.V1.AcknowledgeRequest, Google.Protobuf.Empty) - rpc(:Pull, Google.Pubsub.V1.PullRequest, Google.Pubsub.V1.PullResponse) - rpc( :StreamingPull, stream(Google.Pubsub.V1.StreamingPullRequest), stream(Google.Pubsub.V1.StreamingPullResponse) ) - - rpc(:ModifyPushConfig, Google.Pubsub.V1.ModifyPushConfigRequest, Google.Protobuf.Empty) - - rpc(:GetSnapshot, Google.Pubsub.V1.GetSnapshotRequest, Google.Pubsub.V1.Snapshot) - - rpc( - :ListSnapshots, - Google.Pubsub.V1.ListSnapshotsRequest, - Google.Pubsub.V1.ListSnapshotsResponse - ) - - rpc(:CreateSnapshot, Google.Pubsub.V1.CreateSnapshotRequest, Google.Pubsub.V1.Snapshot) - - rpc(:UpdateSnapshot, Google.Pubsub.V1.UpdateSnapshotRequest, Google.Pubsub.V1.Snapshot) - - rpc(:DeleteSnapshot, Google.Pubsub.V1.DeleteSnapshotRequest, Google.Protobuf.Empty) - - rpc(:Seek, Google.Pubsub.V1.SeekRequest, Google.Pubsub.V1.SeekResponse) end defmodule Google.Pubsub.V1.Subscriber.Stub do diff --git a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex b/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex deleted file mode 100644 index 0ff74ae..0000000 --- a/lib/broadway_cloud_pub_sub/proto/google/pubsub/v1/schema.pb.ex +++ /dev/null @@ -1,318 +0,0 @@ -defmodule Google.Pubsub.V1.SchemaView do - @moduledoc """ - View of Schema object fields to be returned by GetSchema and ListSchemas. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.SchemaView", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:SCHEMA_VIEW_UNSPECIFIED, 0) - field(:BASIC, 1) - field(:FULL, 2) -end - -defmodule Google.Pubsub.V1.Encoding do - @moduledoc """ - Possible encoding types for messages. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.Encoding", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:ENCODING_UNSPECIFIED, 0) - field(:JSON, 1) - field(:BINARY, 2) -end - -defmodule Google.Pubsub.V1.Schema.Type do - @moduledoc """ - Possible schema definition types. - """ - - use Protobuf, - enum: true, - full_name: "google.pubsub.v1.Schema.Type", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:TYPE_UNSPECIFIED, 0) - field(:PROTOCOL_BUFFER, 1) - field(:AVRO, 2) -end - -defmodule Google.Pubsub.V1.Schema do - @moduledoc """ - A schema resource. - """ - - use Protobuf, - full_name: "google.pubsub.v1.Schema", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:type, 2, type: Google.Pubsub.V1.Schema.Type, enum: true) - field(:definition, 3, type: :string) - field(:revision_id, 4, type: :string, json_name: "revisionId", deprecated: false) - - field(:revision_create_time, 6, - type: Google.Protobuf.Timestamp, - json_name: "revisionCreateTime", - deprecated: false - ) -end - -defmodule Google.Pubsub.V1.CreateSchemaRequest do - @moduledoc """ - Request for the CreateSchema method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CreateSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:parent, 1, type: :string, deprecated: false) - field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) - field(:schema_id, 3, type: :string, json_name: "schemaId") -end - -defmodule Google.Pubsub.V1.GetSchemaRequest do - @moduledoc """ - Request for the GetSchema method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.GetSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) -end - -defmodule Google.Pubsub.V1.ListSchemasRequest do - @moduledoc """ - Request for the `ListSchemas` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSchemasRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:parent, 1, type: :string, deprecated: false) - field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) - field(:page_size, 3, type: :int32, json_name: "pageSize") - field(:page_token, 4, type: :string, json_name: "pageToken") -end - -defmodule Google.Pubsub.V1.ListSchemasResponse do - @moduledoc """ - Response for the `ListSchemas` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSchemasResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:schemas, 1, repeated: true, type: Google.Pubsub.V1.Schema) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken") -end - -defmodule Google.Pubsub.V1.ListSchemaRevisionsRequest do - @moduledoc """ - Request for the `ListSchemaRevisions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSchemaRevisionsRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:view, 2, type: Google.Pubsub.V1.SchemaView, enum: true) - field(:page_size, 3, type: :int32, json_name: "pageSize") - field(:page_token, 4, type: :string, json_name: "pageToken") -end - -defmodule Google.Pubsub.V1.ListSchemaRevisionsResponse do - @moduledoc """ - Response for the `ListSchemaRevisions` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ListSchemaRevisionsResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:schemas, 1, repeated: true, type: Google.Pubsub.V1.Schema) - field(:next_page_token, 2, type: :string, json_name: "nextPageToken") -end - -defmodule Google.Pubsub.V1.CommitSchemaRequest do - @moduledoc """ - Request for CommitSchema method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.CommitSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) -end - -defmodule Google.Pubsub.V1.RollbackSchemaRequest do - @moduledoc """ - Request for the `RollbackSchema` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.RollbackSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:revision_id, 2, type: :string, json_name: "revisionId", deprecated: false) -end - -defmodule Google.Pubsub.V1.DeleteSchemaRevisionRequest do - @moduledoc """ - Request for the `DeleteSchemaRevision` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DeleteSchemaRevisionRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) - field(:revision_id, 2, type: :string, json_name: "revisionId", deprecated: true) -end - -defmodule Google.Pubsub.V1.DeleteSchemaRequest do - @moduledoc """ - Request for the `DeleteSchema` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.DeleteSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:name, 1, type: :string, deprecated: false) -end - -defmodule Google.Pubsub.V1.ValidateSchemaRequest do - @moduledoc """ - Request for the `ValidateSchema` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ValidateSchemaRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - field(:parent, 1, type: :string, deprecated: false) - field(:schema, 2, type: Google.Pubsub.V1.Schema, deprecated: false) -end - -defmodule Google.Pubsub.V1.ValidateSchemaResponse do - @moduledoc """ - Response for the `ValidateSchema` method. - Empty for now. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ValidateSchemaResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.ValidateMessageRequest do - @moduledoc """ - Request for the `ValidateMessage` method. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ValidateMessageRequest", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 - - oneof(:schema_spec, 0) - - field(:parent, 1, type: :string, deprecated: false) - field(:name, 2, type: :string, oneof: 0, deprecated: false) - field(:schema, 3, type: Google.Pubsub.V1.Schema, oneof: 0) - field(:message, 4, type: :bytes) - field(:encoding, 5, type: Google.Pubsub.V1.Encoding, enum: true) -end - -defmodule Google.Pubsub.V1.ValidateMessageResponse do - @moduledoc """ - Response for the `ValidateMessage` method. - Empty for now. - """ - - use Protobuf, - full_name: "google.pubsub.v1.ValidateMessageResponse", - protoc_gen_elixir_version: "0.16.0", - syntax: :proto3 -end - -defmodule Google.Pubsub.V1.SchemaService.Service do - @moduledoc """ - Service for doing schema-related operations. - """ - - use GRPC.Service, name: "google.pubsub.v1.SchemaService", protoc_gen_elixir_version: "0.16.0" - - rpc(:CreateSchema, Google.Pubsub.V1.CreateSchemaRequest, Google.Pubsub.V1.Schema) - - rpc(:GetSchema, Google.Pubsub.V1.GetSchemaRequest, Google.Pubsub.V1.Schema) - - rpc(:ListSchemas, Google.Pubsub.V1.ListSchemasRequest, Google.Pubsub.V1.ListSchemasResponse) - - rpc( - :ListSchemaRevisions, - Google.Pubsub.V1.ListSchemaRevisionsRequest, - Google.Pubsub.V1.ListSchemaRevisionsResponse - ) - - rpc(:CommitSchema, Google.Pubsub.V1.CommitSchemaRequest, Google.Pubsub.V1.Schema) - - rpc(:RollbackSchema, Google.Pubsub.V1.RollbackSchemaRequest, Google.Pubsub.V1.Schema) - - rpc( - :DeleteSchemaRevision, - Google.Pubsub.V1.DeleteSchemaRevisionRequest, - Google.Pubsub.V1.Schema - ) - - rpc(:DeleteSchema, Google.Pubsub.V1.DeleteSchemaRequest, Google.Protobuf.Empty) - - rpc( - :ValidateSchema, - Google.Pubsub.V1.ValidateSchemaRequest, - Google.Pubsub.V1.ValidateSchemaResponse - ) - - rpc( - :ValidateMessage, - Google.Pubsub.V1.ValidateMessageRequest, - Google.Pubsub.V1.ValidateMessageResponse - ) -end - -defmodule Google.Pubsub.V1.SchemaService.Stub do - use GRPC.Stub, service: Google.Pubsub.V1.SchemaService.Service -end From 9725fcabc3ccc793ffc29a5d2cc1d0b49eb73fa6 Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 22 May 2026 11:27:15 +0200 Subject: [PATCH 14/29] refine: Stop tracking integration and stress tests --- .../streaming/producer_integration_test.exs | 242 --- .../streaming/stress_test.exs | 1839 ----------------- test/support/pubsub_emulator.ex | 178 -- test/test_helper.exs | 2 +- 4 files changed, 1 insertion(+), 2260 deletions(-) delete mode 100644 test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs delete mode 100644 test/broadway_cloud_pub_sub/streaming/stress_test.exs delete mode 100644 test/support/pubsub_emulator.ex diff --git a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs b/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs deleted file mode 100644 index 51b23a8..0000000 --- a/test/broadway_cloud_pub_sub/streaming/producer_integration_test.exs +++ /dev/null @@ -1,242 +0,0 @@ -defmodule BroadwayCloudPubSub.Streaming.ProducerIntegrationTest do - @moduledoc """ - Integration tests for `StreamingProducer` against the Cloud Pub/Sub emulator. - - These tests require the emulator to be running on `PUBSUB_EMULATOR_HOST` - (default `localhost:8085`). Run with: - - mix test --only integration - - Or with env: - - PUBSUB_EMULATOR_HOST=localhost:8085 mix test --only integration - """ - - use ExUnit.Case, async: false - - @moduletag :integration - @moduletag timeout: 30_000 - - alias BroadwayCloudPubSub.PubSubEmulator - - # No-op token generator for the emulator (no auth required) - def noop_token, do: {:ok, "emulator-no-auth"} - - # Minimal Broadway pipeline that sends received messages to the test process - defmodule TestPipeline do - use Broadway - - def start_link(opts) do - test_pid = Keyword.fetch!(opts, :test_pid) - subscription = Keyword.fetch!(opts, :subscription) - emulator_host = Keyword.fetch!(opts, :emulator_host) - name = Keyword.fetch!(opts, :name) - - Broadway.start_link(__MODULE__, - name: name, - producer: [ - module: - {BroadwayCloudPubSub.Streaming.Producer, - subscription: subscription, - token_generator: - {BroadwayCloudPubSub.Streaming.ProducerIntegrationTest, :noop_token, []}, - grpc_endpoint: emulator_host, - use_ssl: false, - max_outstanding_messages: 100, - on_failure: {:nack, 0}}, - concurrency: 1 - ], - processors: [ - default: [concurrency: 2] - ], - context: %{test_pid: test_pid} - ) - end - - @impl Broadway - def handle_message(:default, message, %{test_pid: test_pid}) do - require Logger - - Logger.debug( - "[TestPipeline] handle_message data=#{inspect(message.data)} ack_ref=#{inspect(elem(message.acknowledger, 1))}" - ) - - send(test_pid, {:broadway_message, message.data, message.metadata}) - message - end - - @impl Broadway - def handle_failed(messages, %{test_pid: test_pid}) do - Enum.each(messages, fn msg -> - send(test_pid, {:broadway_failed, msg.data}) - end) - - messages - end - end - - setup_all do - DynamicSupervisor.start_link(strategy: :one_for_one, name: GRPC.Client.Supervisor) - PubSubEmulator.start() - :ok - end - - setup do - topic_name = "broadway-integration-#{:erlang.unique_integer([:positive])}" - sub_name = "broadway-integration-sub-#{:erlang.unique_integer([:positive])}" - - {_topic, subscription} = - PubSubEmulator.setup_topic_and_subscription(topic_name, sub_name, ack_deadline_seconds: 60) - - pipeline_name = :"TestPipeline#{:erlang.unique_integer([:positive])}" - - {:ok, pid} = - TestPipeline.start_link( - name: pipeline_name, - test_pid: self(), - subscription: subscription, - emulator_host: PubSubEmulator.host() - ) - - on_exit(fn -> - ref = Process.monitor(pid) - - try do - Broadway.stop(pid) - catch - :exit, _ -> :ok - end - - receive do - {:DOWN, ^ref, :process, ^pid, _} -> :ok - after - 5_000 -> :ok - end - end) - - # Give the pipeline a moment to connect to the emulator - Process.sleep(500) - - {:ok, - topic: topic_name, - sub: sub_name, - subscription: subscription, - pipeline: pid, - pipeline_name: pipeline_name} - end - - describe "message delivery" do - test "receives a single published message", %{topic: topic} do - {:ok, [_msg_id]} = PubSubEmulator.publish(topic, ["hello world"]) - - assert_receive {:broadway_message, "hello world", _metadata}, 5_000 - end - - test "receives multiple published messages", %{topic: topic} do - payloads = Enum.map(1..5, &"message-#{&1}") - {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) - - received = - Enum.map(1..5, fn _ -> - receive do - {:broadway_message, data, _meta} -> data - after - 5_000 -> flunk("Timed out waiting for message") - end - end) - - assert Enum.sort(received) == Enum.sort(payloads) - end - - test "message metadata contains messageId and publishTime", %{topic: topic} do - {:ok, [_msg_id]} = PubSubEmulator.publish(topic, ["meta-test"]) - - assert_receive {:broadway_message, "meta-test", metadata}, 5_000 - assert is_binary(metadata.messageId) - assert metadata.messageId != "" - # publishTime may be nil on some emulator versions — check the key exists - assert Map.has_key?(metadata, :publishTime) - assert Map.has_key?(metadata, :attributes) - end - - test "handles large batches without dropping messages", %{topic: topic} do - count = 50 - payloads = Enum.map(1..count, &"bulk-msg-#{&1}") - {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) - - received = - Enum.map(1..count, fn _ -> - receive do - {:broadway_message, data, _meta} -> data - after - 10_000 -> flunk("Timed out waiting for bulk message") - end - end) - - assert length(received) == count - assert Enum.sort(received) == Enum.sort(payloads) - end - end - - describe "acknowledgement" do - test "acked messages are not redelivered", %{topic: topic, sub: sub} do - {:ok, [_id]} = PubSubEmulator.publish(topic, ["ack-me"]) - - assert_receive {:broadway_message, "ack-me", _}, 5_000 - - # Wait for ack to be processed, then confirm no pending messages remain - Process.sleep(500) - - {:ok, messages} = PubSubEmulator.pull(sub, max_messages: 5) - assert messages == [] - end - end - - describe "graceful shutdown" do - test "Broadway.stop completes promptly when messages are buffered", %{pipeline: pid} do - # Broadway.stop triggers the drain sequence. The pipeline should shut down - # promptly (within the shutdown timeout) rather than processing all buffered - # messages. We verify that stop returns within a reasonable time. - ref = Process.monitor(pid) - - try do - Broadway.stop(pid) - catch - :exit, _ -> :ok - end - - # Pipeline should shut down within 10 seconds (well under the 30s shutdown timeout). - # Before the fix, this would take minutes as all buffered messages were processed. - receive do - {:DOWN, ^ref, :process, ^pid, _reason} -> :ok - after - 10_000 -> flunk("Broadway.stop did not complete within 10 seconds") - end - end - - test "Broadway.stop completes promptly with buffered messages", %{topic: topic, pipeline: pid} do - # Publish enough messages to fill the buffer, then stop immediately. - # The pipeline should shut down quickly, nacking the buffered messages. - payloads = Enum.map(1..30, &"shutdown-msg-#{&1}") - {:ok, _msg_ids} = PubSubEmulator.publish(topic, payloads) - - # Give the pipeline a moment to receive some messages - Process.sleep(500) - - ref = Process.monitor(pid) - - try do - Broadway.stop(pid) - catch - :exit, _ -> :ok - end - - # Should complete well under the 30s shutdown timeout. - receive do - {:DOWN, ^ref, :process, ^pid, _reason} -> :ok - after - 10_000 -> flunk("Broadway.stop with buffered messages did not complete within 10 seconds") - end - end - end -end diff --git a/test/broadway_cloud_pub_sub/streaming/stress_test.exs b/test/broadway_cloud_pub_sub/streaming/stress_test.exs deleted file mode 100644 index f6e8e65..0000000 --- a/test/broadway_cloud_pub_sub/streaming/stress_test.exs +++ /dev/null @@ -1,1839 +0,0 @@ -defmodule BroadwayCloudPubSub.Streaming.StressTest do - @moduledoc """ - Stress tests for the Streaming Producer against the Pub/Sub emulator. - - Covers: - 1. High-volume burst (1000+ messages, fast processing) - 2. Rapid sequential batches - 3. Message completeness verification under load - 4. Streaming vs Pull producer comparison - 5. Both :gun and :mint adapters - 6. Pipeline stop/restart during message flow - 7. Concurrent publishers while pipeline processes - 8. High concurrency processors with near-zero processing time - - Run with: - - mix test test/broadway_cloud_pub_sub/streaming/stress_test.exs --only stress - - Requires the Pub/Sub emulator running on localhost:8085. - """ - - use ExUnit.Case, async: false - - @moduletag :stress - @moduletag timeout: 120_000 - - require Logger - - alias BroadwayCloudPubSub.PubSubEmulator - - # --------------------------------------------------------------------------- - # Token generators - # --------------------------------------------------------------------------- - - def noop_token, do: {:ok, "emulator-no-auth"} - - # --------------------------------------------------------------------------- - # Test Pipelines - # --------------------------------------------------------------------------- - - defmodule StreamingPipeline do - @moduledoc "Streaming pipeline that sends received data to test process." - use Broadway - - def start_link(opts) do - test_pid = Keyword.fetch!(opts, :test_pid) - subscription = Keyword.fetch!(opts, :subscription) - emulator_host = Keyword.fetch!(opts, :emulator_host) - name = Keyword.fetch!(opts, :name) - adapter = Keyword.get(opts, :adapter, :gun) - max_outstanding = Keyword.get(opts, :max_outstanding, 1000) - processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) - - Broadway.start_link(__MODULE__, - name: name, - producer: [ - module: - {BroadwayCloudPubSub.Streaming.Producer, - subscription: subscription, - token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, - grpc_endpoint: emulator_host, - use_ssl: false, - adapter: adapter, - max_outstanding_messages: max_outstanding, - on_failure: {:nack, 0}}, - concurrency: 1 - ], - processors: [ - default: [concurrency: processor_concurrency] - ], - context: %{test_pid: test_pid} - ) - end - - @impl Broadway - def handle_message(:default, message, %{test_pid: test_pid}) do - send(test_pid, {:msg, message.data}) - message - end - - @impl Broadway - def handle_failed(messages, %{test_pid: test_pid}) do - Enum.each(messages, fn msg -> - send(test_pid, {:failed, msg.data}) - end) - - messages - end - end - - defmodule PullPipeline do - @moduledoc "Pull-based pipeline for comparison." - use Broadway - - def start_link(opts) do - test_pid = Keyword.fetch!(opts, :test_pid) - subscription = Keyword.fetch!(opts, :subscription) - emulator_host = Keyword.fetch!(opts, :emulator_host) - name = Keyword.fetch!(opts, :name) - processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) - - Broadway.start_link(__MODULE__, - name: name, - producer: [ - module: - {BroadwayCloudPubSub.Producer, - subscription: subscription, - token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, - base_url: "http://#{emulator_host}", - receive_interval: 50, - max_number_of_messages: 100, - on_failure: {:nack, 0}}, - concurrency: 1 - ], - processors: [ - default: [concurrency: processor_concurrency] - ], - context: %{test_pid: test_pid} - ) - end - - @impl Broadway - def handle_message(:default, message, %{test_pid: test_pid}) do - send(test_pid, {:msg, message.data}) - message - end - - @impl Broadway - def handle_failed(messages, %{test_pid: test_pid}) do - Enum.each(messages, fn msg -> - send(test_pid, {:failed, msg.data}) - end) - - messages - end - end - - defmodule SlowPipeline do - @moduledoc "Pipeline with configurable processing delay." - use Broadway - - def start_link(opts) do - test_pid = Keyword.fetch!(opts, :test_pid) - subscription = Keyword.fetch!(opts, :subscription) - emulator_host = Keyword.fetch!(opts, :emulator_host) - name = Keyword.fetch!(opts, :name) - delay_ms = Keyword.get(opts, :delay_ms, 0) - adapter = Keyword.get(opts, :adapter, :gun) - max_outstanding = Keyword.get(opts, :max_outstanding, 1000) - processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) - - Broadway.start_link(__MODULE__, - name: name, - producer: [ - module: - {BroadwayCloudPubSub.Streaming.Producer, - subscription: subscription, - token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, - grpc_endpoint: emulator_host, - use_ssl: false, - adapter: adapter, - max_outstanding_messages: max_outstanding, - on_failure: {:nack, 0}}, - concurrency: 1 - ], - processors: [ - default: [concurrency: processor_concurrency] - ], - context: %{test_pid: test_pid, delay_ms: delay_ms} - ) - end - - @impl Broadway - def handle_message(:default, message, %{test_pid: test_pid, delay_ms: delay_ms}) do - if delay_ms > 0, do: Process.sleep(delay_ms) - send(test_pid, {:msg, message.data}) - message - end - - @impl Broadway - def handle_failed(messages, %{test_pid: test_pid}) do - Enum.each(messages, fn msg -> - send(test_pid, {:failed, msg.data}) - end) - - messages - end - end - - # --------------------------------------------------------------------------- - # Helpers - # --------------------------------------------------------------------------- - - setup_all do - # GRPC.Client.Supervisor is started automatically by the grpc_client OTP application - PubSubEmulator.start() - :ok - end - - defp unique_name, do: :"StressPipeline#{:erlang.unique_integer([:positive])}" - - defp setup_infra(prefix) do - topic = "#{prefix}-#{:erlang.unique_integer([:positive])}" - sub = "#{prefix}-sub-#{:erlang.unique_integer([:positive])}" - {_full_topic, full_sub} = PubSubEmulator.setup_topic_and_subscription(topic, sub) - {topic, sub, full_sub} - end - - defp stop_pipeline(pid) do - ref = Process.monitor(pid) - - try do - Broadway.stop(pid) - catch - :exit, _ -> :ok - end - - receive do - {:DOWN, ^ref, :process, ^pid, _} -> :ok - after - 10_000 -> :ok - end - end - - defp collect_messages(expected_count, timeout_ms) do - deadline = System.monotonic_time(:millisecond) + timeout_ms - collect_messages_loop(expected_count, deadline, []) - end - - defp collect_messages_loop(0, _deadline, acc), do: {:ok, Enum.reverse(acc)} - - defp collect_messages_loop(remaining, deadline, acc) do - now = System.monotonic_time(:millisecond) - wait = max(deadline - now, 0) - - receive do - {:msg, data} -> - collect_messages_loop(remaining - 1, deadline, [data | acc]) - after - wait -> - {:partial, Enum.reverse(acc), remaining} - end - end - - defp publish_in_batches(topic, total, batch_size) do - payloads = Enum.map(1..total, &"msg-#{&1}") - - payloads - |> Enum.chunk_every(batch_size) - |> Enum.each(fn batch -> - {:ok, _ids} = PubSubEmulator.publish(topic, batch) - end) - - payloads - end - - # --------------------------------------------------------------------------- - # Scenario 1: High-volume burst — 1000 messages, fast processing, Gun adapter - # --------------------------------------------------------------------------- - - describe "Scenario 1: High-volume burst (Gun)" do - test "receives all 1000 messages without loss" do - {topic, _sub, full_sub} = setup_infra("burst-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 1000, - processor_concurrency: 8 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 1000, 200) - - case collect_messages(1000, 60_000) do - {:ok, received} -> - assert length(received) == 1000 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 1/Gun] All 1000 messages received.") - - {:partial, received, remaining} -> - Logger.warning( - "[Stress 1/Gun] Only #{length(received)}/1000 received, #{remaining} missing" - ) - - flunk("Missing #{remaining} messages out of 1000") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 2: High-volume burst — 1000 messages, Mint adapter - # --------------------------------------------------------------------------- - - describe "Scenario 2: High-volume burst (Mint)" do - test "receives all 1000 messages without loss" do - {topic, _sub, full_sub} = setup_infra("burst-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 1000, - processor_concurrency: 8 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 1000, 200) - - case collect_messages(1000, 60_000) do - {:ok, received} -> - assert length(received) == 1000 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 2/Mint] All 1000 messages received.") - - {:partial, received, remaining} -> - Logger.warning( - "[Stress 2/Mint] Only #{length(received)}/1000 received, #{remaining} missing" - ) - - flunk("Missing #{remaining} messages out of 1000") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 3: Rapid sequential batches — 5 bursts of 200, no pause - # --------------------------------------------------------------------------- - - describe "Scenario 3: Rapid sequential batches" do - test "handles 5 rapid bursts of 200 messages (Gun)" do - {topic, _sub, full_sub} = setup_infra("rapid-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 1000, - processor_concurrency: 8 - ) - - Process.sleep(500) - - # Publish 5 bursts of 200 messages back-to-back - all_expected = - Enum.flat_map(1..5, fn batch_num -> - payloads = Enum.map(1..200, &"batch#{batch_num}-msg-#{&1}") - {:ok, _ids} = PubSubEmulator.publish(topic, payloads) - payloads - end) - - case collect_messages(1000, 60_000) do - {:ok, received} -> - assert length(received) == 1000 - assert Enum.sort(received) == Enum.sort(all_expected) - Logger.info("[Stress 3] All 1000 messages from 5 bursts received.") - - {:partial, received, remaining} -> - Logger.warning("[Stress 3] #{length(received)}/1000 received, #{remaining} missing") - - flunk("Missing #{remaining} messages after rapid sequential batches") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 4: Demand pressure — low max_outstanding, high message volume - # --------------------------------------------------------------------------- - - describe "Scenario 4: Demand pressure with low max_outstanding" do - test "handles 500 messages with max_outstanding=10 (Gun)" do - {topic, _sub, full_sub} = setup_infra("demand-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - # Very low outstanding — forces heavy demand cycling - max_outstanding: 10, - processor_concurrency: 2 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 500, 50) - - case collect_messages(500, 60_000) do - {:ok, received} -> - assert length(received) == 500 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 4] All 500 messages received with max_outstanding=10.") - - {:partial, received, remaining} -> - Logger.warning("[Stress 4] #{length(received)}/500 received, #{remaining} missing") - - flunk("Missing #{remaining} messages with constrained outstanding") - end - - stop_pipeline(pid) - end - - test "handles 500 messages with max_outstanding=10 (Mint)" do - {topic, _sub, full_sub} = setup_infra("demand-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 10, - processor_concurrency: 2 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 500, 50) - - case collect_messages(500, 60_000) do - {:ok, received} -> - assert length(received) == 500 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 4/Mint] All 500 messages received with max_outstanding=10.") - - {:partial, received, remaining} -> - Logger.warning("[Stress 4/Mint] #{length(received)}/500 received, #{remaining} missing") - - flunk("Missing #{remaining} messages with constrained outstanding (Mint)") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 5: Streaming vs Pull producer comparison - # --------------------------------------------------------------------------- - - describe "Scenario 5: Streaming vs Pull comparison" do - test "both producers receive all 200 messages" do - # -- Streaming (Gun) -- - {s_topic, _s_sub, s_full_sub} = setup_infra("cmp-stream") - s_name = unique_name() - - {:ok, s_pid} = - StreamingPipeline.start_link( - name: s_name, - test_pid: self(), - subscription: s_full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - s_expected = publish_in_batches(s_topic, 200, 100) - s_start = System.monotonic_time(:millisecond) - - s_result = collect_messages(200, 30_000) - s_elapsed = System.monotonic_time(:millisecond) - s_start - - stop_pipeline(s_pid) - - # -- Pull -- - {p_topic, _p_sub, p_full_sub} = setup_infra("cmp-pull") - p_name = unique_name() - - {:ok, p_pid} = - PullPipeline.start_link( - name: p_name, - test_pid: self(), - subscription: p_full_sub, - emulator_host: PubSubEmulator.host(), - processor_concurrency: 4 - ) - - Process.sleep(500) - - p_expected = publish_in_batches(p_topic, 200, 100) - p_start = System.monotonic_time(:millisecond) - - p_result = collect_messages(200, 30_000) - p_elapsed = System.monotonic_time(:millisecond) - p_start - - stop_pipeline(p_pid) - - # Assert both received everything - case s_result do - {:ok, s_received} -> - assert length(s_received) == 200 - assert Enum.sort(s_received) == Enum.sort(s_expected) - - {:partial, s_received, s_remaining} -> - flunk("Streaming: only #{length(s_received)}/200, missing #{s_remaining}") - end - - case p_result do - {:ok, p_received} -> - assert length(p_received) == 200 - assert Enum.sort(p_received) == Enum.sort(p_expected) - - {:partial, p_received, p_remaining} -> - flunk("Pull: only #{length(p_received)}/200, missing #{p_remaining}") - end - - Logger.info("[Stress 5] Streaming: #{s_elapsed}ms, Pull: #{p_elapsed}ms for 200 messages") - end - end - - # --------------------------------------------------------------------------- - # Scenario 6: Pipeline stop/restart during message flow - # --------------------------------------------------------------------------- - - describe "Scenario 6: Stop and restart during message flow" do - test "no messages lost after restart" do - {topic, _sub, full_sub} = setup_infra("restart") - name = unique_name() - - # Start pipeline - {:ok, pid1} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 100, - processor_concurrency: 4 - ) - - Process.sleep(500) - - # Publish first batch - batch1 = Enum.map(1..100, &"phase1-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch1) - - # Collect some messages, then stop - Process.sleep(2000) - stop_pipeline(pid1) - - # Drain whatever arrived from first pipeline - phase1_received = drain_mailbox() - - # Publish second batch while pipeline is down - batch2 = Enum.map(1..100, &"phase2-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch2) - - # Restart with a DIFFERENT name (since the old name is taken by the stopped process) - name2 = unique_name() - - {:ok, pid2} = - StreamingPipeline.start_link( - name: name2, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - # All messages not yet acked + batch2 should arrive - all_expected = MapSet.new(batch1 ++ batch2) - all_received_from_restart = collect_remaining(all_expected, phase1_received, 30_000) - - stop_pipeline(pid2) - - missing = MapSet.difference(all_expected, all_received_from_restart) - - if MapSet.size(missing) > 0 do - Logger.warning( - "[Stress 6] Missing #{MapSet.size(missing)} messages: #{inspect(Enum.take(MapSet.to_list(missing), 10))}" - ) - - flunk("Lost #{MapSet.size(missing)} messages across stop/restart") - end - - Logger.info("[Stress 6] All 200 messages recovered after pipeline restart.") - end - end - - # --------------------------------------------------------------------------- - # Scenario 7: Concurrent publishers - # --------------------------------------------------------------------------- - - describe "Scenario 7: Concurrent publishers" do - test "handles messages from 5 concurrent publishers (Gun)" do - {topic, _sub, full_sub} = setup_infra("conc-pub") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 1000, - processor_concurrency: 8 - ) - - Process.sleep(500) - - # 5 tasks each publishing 200 messages concurrently - tasks = - Enum.map(1..5, fn pub_id -> - Task.async(fn -> - payloads = Enum.map(1..200, &"pub#{pub_id}-msg-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, payloads) - payloads - end) - end) - - all_expected = - tasks - |> Task.await_many(30_000) - |> List.flatten() - - case collect_messages(1000, 60_000) do - {:ok, received} -> - assert length(received) == 1000 - assert Enum.sort(received) == Enum.sort(all_expected) - Logger.info("[Stress 7] All 1000 messages from 5 concurrent publishers received.") - - {:partial, received, remaining} -> - Logger.warning("[Stress 7] #{length(received)}/1000 received, #{remaining} missing") - - flunk("Missing #{remaining} messages from concurrent publishers") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 8: High concurrency processors, near-zero processing time - # --------------------------------------------------------------------------- - - describe "Scenario 8: High processor concurrency, zero delay" do - test "16 processors handle 2000 messages with zero processing time (Gun)" do - {topic, _sub, full_sub} = setup_infra("fast-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 2000, - processor_concurrency: 16 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 2000, 200) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(2000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 2000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 8/Gun] 2000 messages with 16 processors in #{elapsed}ms (#{Float.round(2000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - elapsed = System.monotonic_time(:millisecond) - start_time - - Logger.warning( - "[Stress 8/Gun] #{length(received)}/2000 in #{elapsed}ms, #{remaining} missing" - ) - - flunk("Missing #{remaining} messages with high concurrency") - end - - stop_pipeline(pid) - end - - test "16 processors handle 2000 messages with zero processing time (Mint)" do - {topic, _sub, full_sub} = setup_infra("fast-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 2000, - processor_concurrency: 16 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 2000, 200) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(2000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 2000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 8/Mint] 2000 messages with 16 processors in #{elapsed}ms (#{Float.round(2000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - elapsed = System.monotonic_time(:millisecond) - start_time - - Logger.warning( - "[Stress 8/Mint] #{length(received)}/2000 in #{elapsed}ms, #{remaining} missing" - ) - - flunk("Missing #{remaining} messages with high concurrency (Mint)") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 9: No duplicate messages - # --------------------------------------------------------------------------- - - describe "Scenario 9: No duplicate delivery" do - test "500 messages arrive exactly once (Gun)" do - {topic, _sub, full_sub} = setup_infra("nodup-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 500, 100) - - case collect_messages(500, 30_000) do - {:ok, received} -> - assert length(received) == 500 - - unique = Enum.uniq(received) - - if length(unique) != length(received) do - dupes = received -- unique - Logger.warning("[Stress 9] Duplicates found: #{inspect(Enum.take(dupes, 10))}") - flunk("Found #{length(received) - length(unique)} duplicate messages") - end - - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 9] 500 messages, zero duplicates.") - - {:partial, received, remaining} -> - flunk("Only #{length(received)}/500 received, #{remaining} missing") - end - - # Wait a bit for any late duplicates - Process.sleep(2000) - late_dupes = drain_mailbox() - - if length(late_dupes) > 0 do - Logger.warning( - "[Stress 9] #{length(late_dupes)} late duplicate(s) arrived after collect!" - ) - - flunk("#{length(late_dupes)} late duplicates detected") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 10: Messages published BEFORE pipeline starts - # --------------------------------------------------------------------------- - - describe "Scenario 10: Pre-published messages" do - test "receives messages that were published before pipeline started" do - {topic, _sub, full_sub} = setup_infra("prepub") - - # Publish BEFORE pipeline exists - expected = publish_in_batches(topic, 300, 100) - - # Wait to make sure messages are committed in emulator - Process.sleep(500) - - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - case collect_messages(300, 30_000) do - {:ok, received} -> - assert length(received) == 300 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 10] All 300 pre-published messages received.") - - {:partial, received, remaining} -> - flunk("Only #{length(received)}/300 pre-published messages, #{remaining} missing") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 11: Slow processing with message backlog - # --------------------------------------------------------------------------- - - describe "Scenario 11: Slow processing (simulated delay)" do - test "handles 100 messages with 50ms processing delay" do - {topic, _sub, full_sub} = setup_infra("slow-proc") - name = unique_name() - - {:ok, pid} = - SlowPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 100, - processor_concurrency: 4, - delay_ms: 50 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 100, 50) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(100, 60_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 100 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info("[Stress 11] 100 messages with 50ms delay in #{elapsed}ms") - - {:partial, received, remaining} -> - flunk("Only #{length(received)}/100, #{remaining} missing with slow processing") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Private helpers for Scenario 6 - # --------------------------------------------------------------------------- - - defp drain_mailbox do - drain_mailbox_loop([]) - end - - defp drain_mailbox_loop(acc) do - receive do - {:msg, data} -> drain_mailbox_loop([data | acc]) - {:failed, data} -> drain_mailbox_loop([data | acc]) - after - 100 -> Enum.reverse(acc) - end - end - - defp collect_remaining(all_expected, already_received, timeout_ms) do - received_set = MapSet.new(already_received) - remaining = MapSet.difference(all_expected, received_set) - remaining_count = MapSet.size(remaining) - - if remaining_count == 0 do - received_set - else - deadline = System.monotonic_time(:millisecond) + timeout_ms - collect_remaining_loop(received_set, all_expected, deadline) - end - end - - defp collect_remaining_loop(received_set, all_expected, deadline) do - if MapSet.equal?(received_set, all_expected) do - received_set - else - now = System.monotonic_time(:millisecond) - wait = max(deadline - now, 0) - - receive do - {:msg, data} -> - collect_remaining_loop(MapSet.put(received_set, data), all_expected, deadline) - after - wait -> received_set - end - end - end - - # =========================================================================== - # AGGRESSIVE SCENARIOS — Trying to break the producer - # =========================================================================== - - # --------------------------------------------------------------------------- - # Scenario 12: Extreme backpressure — max_outstanding = 1 - # Forces the producer to deliver ONE message at a time. - # --------------------------------------------------------------------------- - - describe "Scenario 12: Extreme backpressure (max_outstanding=1)" do - test "100 messages with max_outstanding=1 (Gun)" do - {topic, _sub, full_sub} = setup_infra("extreme-bp-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 1, - processor_concurrency: 1 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 100, 20) - - case collect_messages(100, 60_000) do - {:ok, received} -> - assert length(received) == 100 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 12/Gun] 100 messages with max_outstanding=1 — all received.") - - {:partial, received, remaining} -> - flunk("max_outstanding=1 (Gun): only #{length(received)}/100, missing #{remaining}") - end - - stop_pipeline(pid) - end - - test "100 messages with max_outstanding=1 (Mint)" do - {topic, _sub, full_sub} = setup_infra("extreme-bp-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 1, - processor_concurrency: 1 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 100, 20) - - case collect_messages(100, 60_000) do - {:ok, received} -> - assert length(received) == 100 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 12/Mint] 100 messages with max_outstanding=1 — all received.") - - {:partial, received, remaining} -> - flunk("max_outstanding=1 (Mint): only #{length(received)}/100, missing #{remaining}") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 13: Massive burst — 5000 messages - # Tests the upper end of throughput. - # --------------------------------------------------------------------------- - - describe "Scenario 13: Massive burst (5000 messages)" do - test "receives all 5000 messages (Gun)" do - {topic, _sub, full_sub} = setup_infra("massive-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 5000, - processor_concurrency: 16 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 5000, 500) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(5000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 5000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 13/Gun] 5000 messages in #{elapsed}ms " <> - "(#{Float.round(5000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - elapsed = System.monotonic_time(:millisecond) - start_time - - flunk( - "Massive burst (Gun): #{length(received)}/5000 in #{elapsed}ms, #{remaining} missing" - ) - end - - stop_pipeline(pid) - end - - test "receives all 5000 messages (Mint)" do - {topic, _sub, full_sub} = setup_infra("massive-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 5000, - processor_concurrency: 16 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 5000, 500) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(5000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 5000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 13/Mint] 5000 messages in #{elapsed}ms " <> - "(#{Float.round(5000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - elapsed = System.monotonic_time(:millisecond) - start_time - - flunk( - "Massive burst (Mint): #{length(received)}/5000 in #{elapsed}ms, #{remaining} missing" - ) - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 14: Kill the gRPC connection mid-stream - # Simulates network failure while messages are actively flowing. - # --------------------------------------------------------------------------- - - describe "Scenario 14: Kill connection during active message flow" do - test "recovers after connection kill (Gun)" do - {topic, _sub, full_sub} = setup_infra("connkill-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - # Publish initial batch - batch1 = Enum.map(1..200, &"before-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch1) - - # Wait for batch1 to be fully consumed - case collect_messages(200, 15_000) do - {:ok, _} -> - Logger.info("[Stress 14/Gun] batch1 fully consumed before kill") - - {:partial, received, remaining} -> - Logger.info( - "[Stress 14/Gun] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" - ) - end - - # Kill the Gun connection process and simulate gun_down so StreamManager - # detects the disconnect and reconnects. - stream_manager = Module.concat(name, "StreamManager_0") - sm_state = :sys.get_state(stream_manager) - - case sm_state do - %{conn_pid: conn_pid} when is_pid(conn_pid) -> - Logger.info("[Stress 14/Gun] Killing conn_pid: #{inspect(conn_pid)}") - Process.exit(conn_pid, :kill) - - # Simulate the gun_down message that Gun would normally send - # send(stream_manager, {:gun_down, conn_pid, :http2, :killed, []}) - - _ -> - Logger.warning("[Stress 14/Gun] No conn_pid found in state") - end - - # Wait for reconnect (backoff_min is 1000ms by default, plus connection setup) - Process.sleep(5000) - - # Inspect StreamManager state to verify reconnection - sm_state_after = :sys.get_state(stream_manager) - - Logger.info( - "[Stress 14/Gun] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> - "pending_demand=#{sm_state_after.pending_demand}, " <> - "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> - "outstanding=#{map_size(sm_state_after.outstanding)}" - ) - - # Publish more AFTER reconnect - batch2 = Enum.map(1..200, &"after-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch2) - - case collect_messages(200, 30_000) do - {:ok, received} -> - assert length(received) == 200 - Logger.info("[Stress 14/Gun] All 200 post-kill messages received.") - - {:partial, received, remaining} -> - # Log final state for diagnostics - sm_final = :sys.get_state(stream_manager) - - Logger.warning( - "[Stress 14/Gun] Post-kill: #{length(received)}/200, #{remaining} missing. " <> - "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> - "buffer=#{:queue.len(sm_final.message_buffer)}" - ) - - flunk("Lost #{remaining} messages after connection kill") - end - - stop_pipeline(pid) - end - - test "recovers after connection kill — gun_down only (Gun)" do - {topic, _sub, full_sub} = setup_infra("connkill-gun2") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - batch1 = Enum.map(1..200, &"before-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch1) - - case collect_messages(200, 15_000) do - {:ok, _} -> - Logger.info("[Stress 14/Gun2] batch1 fully consumed before kill") - - {:partial, received, remaining} -> - Logger.info( - "[Stress 14/Gun2] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" - ) - end - - # Mirror the Mint test exactly: get conn_pid, kill the process, then send the - # adapter-level disconnect signal so StreamManager detects it via the new handler. - stream_manager = Module.concat(name, "StreamManager_0") - sm_state = :sys.get_state(stream_manager) - - case sm_state do - %{conn_pid: conn_pid} when is_pid(conn_pid) -> - Logger.info("[Stress 14/Gun2] Killing conn_pid: #{inspect(conn_pid)}") - Process.exit(conn_pid, :kill) - # Send the gun_down signal directly to StreamManager — mirrors the Mint - # test which sends {:elixir_grpc, :connection_down, conn_pid}. - send(stream_manager, {:gun_down, conn_pid, :http2, :killed, []}) - - _ -> - Logger.warning("[Stress 14/Gun2] No conn_pid found in state") - end - - Process.sleep(5000) - - sm_state_after = :sys.get_state(stream_manager) - - Logger.info( - "[Stress 14/Gun2] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> - "pending_demand=#{sm_state_after.pending_demand}, " <> - "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> - "outstanding=#{map_size(sm_state_after.outstanding)}" - ) - - batch2 = Enum.map(1..200, &"after-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch2) - - case collect_messages(200, 30_000) do - {:ok, received} -> - assert length(received) == 200 - Logger.info("[Stress 14/Gun2] All 200 post-kill messages received.") - - {:partial, received, remaining} -> - sm_final = :sys.get_state(stream_manager) - - Logger.warning( - "[Stress 14/Gun2] Post-kill: #{length(received)}/200, #{remaining} missing. " <> - "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> - "buffer=#{:queue.len(sm_final.message_buffer)}" - ) - - flunk("Lost #{remaining} messages after connection kill (Gun2)") - end - - stop_pipeline(pid) - end - - test "recovers after connection kill (Mint)" do - {topic, _sub, full_sub} = setup_infra("connkill-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 500, - processor_concurrency: 4 - ) - - Process.sleep(500) - - batch1 = Enum.map(1..200, &"before-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch1) - - # Wait for batch1 to be fully consumed - case collect_messages(200, 15_000) do - {:ok, _} -> - Logger.info("[Stress 14/Mint] batch1 fully consumed before kill") - - {:partial, received, remaining} -> - Logger.info( - "[Stress 14/Mint] batch1 partially consumed: #{length(received)}/200, #{remaining} remaining" - ) - end - - # For Mint, kill the conn_pid and simulate the connection_down message - stream_manager = Module.concat(name, "StreamManager_0") - sm_state = :sys.get_state(stream_manager) - - case sm_state do - %{conn_pid: conn_pid} when is_pid(conn_pid) -> - Logger.info("[Stress 14/Mint] Killing conn_pid: #{inspect(conn_pid)}") - Process.exit(conn_pid, :kill) - # Simulate the Mint connection down event - send(stream_manager, {:elixir_grpc, :connection_down, conn_pid}) - - _ -> - Logger.warning("[Stress 14/Mint] No conn_pid found in state") - end - - # Wait for reconnect - Process.sleep(5000) - - # Inspect StreamManager state to verify reconnection - sm_state_after = :sys.get_state(stream_manager) - - Logger.info( - "[Stress 14/Mint] After reconnect: grpc_stream=#{sm_state_after.grpc_stream != nil}, " <> - "pending_demand=#{sm_state_after.pending_demand}, " <> - "buffer_size=#{:queue.len(sm_state_after.message_buffer)}, " <> - "outstanding=#{map_size(sm_state_after.outstanding)}" - ) - - batch2 = Enum.map(1..200, &"after-kill-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, batch2) - - case collect_messages(200, 30_000) do - {:ok, received} -> - assert length(received) == 200 - Logger.info("[Stress 14/Mint] All 200 post-kill messages received.") - - {:partial, received, remaining} -> - sm_final = :sys.get_state(stream_manager) - - Logger.warning( - "[Stress 14/Mint] Post-kill: #{length(received)}/200, #{remaining} missing. " <> - "stream=#{sm_final.grpc_stream != nil}, demand=#{sm_final.pending_demand}, " <> - "buffer=#{:queue.len(sm_final.message_buffer)}" - ) - - flunk("Lost #{remaining} messages after connection kill (Mint)") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 15: Publish continuously while processing - # Simulates a steady stream of publishes during active consumption. - # --------------------------------------------------------------------------- - - describe "Scenario 15: Continuous publish during processing" do - test "handles steady publish stream (Gun)" do - {topic, _sub, full_sub} = setup_infra("continuous-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 8 - ) - - Process.sleep(500) - - # Publish 20 batches of 50 with small delays between batches - # to simulate a steady incoming stream - total_msgs = 1000 - - publisher_task = - Task.async(fn -> - Enum.flat_map(1..20, fn batch_num -> - payloads = Enum.map(1..50, &"stream-b#{batch_num}-#{&1}") - {:ok, _} = PubSubEmulator.publish(topic, payloads) - # Small delay to simulate realistic publishing rate - Process.sleep(50) - payloads - end) - end) - - all_expected = Task.await(publisher_task, 30_000) - - case collect_messages(total_msgs, 60_000) do - {:ok, received} -> - assert length(received) == total_msgs - assert Enum.sort(received) == Enum.sort(all_expected) - - Logger.info( - "[Stress 15/Gun] All #{total_msgs} continuously-published messages received." - ) - - {:partial, received, remaining} -> - flunk( - "Continuous publish (Gun): #{length(received)}/#{total_msgs}, #{remaining} missing" - ) - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 16: Extreme processor concurrency — 32 processors, zero delay - # Tests for race conditions in demand handling. - # --------------------------------------------------------------------------- - - describe "Scenario 16: Extreme processor concurrency (32)" do - test "32 processors handle 3000 messages (Gun)" do - {topic, _sub, full_sub} = setup_infra("extreme-proc-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 3000, - processor_concurrency: 32 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 3000, 300) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(3000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 3000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 16/Gun] 3000 msgs, 32 processors in #{elapsed}ms " <> - "(#{Float.round(3000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - flunk("32 processors (Gun): #{length(received)}/3000, #{remaining} missing") - end - - stop_pipeline(pid) - end - - test "32 processors handle 3000 messages (Mint)" do - {topic, _sub, full_sub} = setup_infra("extreme-proc-mint") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :mint, - max_outstanding: 3000, - processor_concurrency: 32 - ) - - Process.sleep(500) - - expected = publish_in_batches(topic, 3000, 300) - - start_time = System.monotonic_time(:millisecond) - - case collect_messages(3000, 90_000) do - {:ok, received} -> - elapsed = System.monotonic_time(:millisecond) - start_time - assert length(received) == 3000 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 16/Mint] 3000 msgs, 32 processors in #{elapsed}ms " <> - "(#{Float.round(3000 / (elapsed / 1000), 1)} msgs/sec)" - ) - - {:partial, received, remaining} -> - flunk("32 processors (Mint): #{length(received)}/3000, #{remaining} missing") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 17: Rapid stop/start cycles - # Tests that the producer cleans up properly when repeatedly started/stopped. - # --------------------------------------------------------------------------- - - describe "Scenario 17: Rapid stop/start cycles" do - test "survives 5 rapid start/stop cycles, then processes all messages" do - {topic, _sub, full_sub} = setup_infra("rapid-restart") - - # Start and stop 5 times rapidly - Enum.each(1..5, fn cycle -> - name = unique_name() - - {:ok, cycle_pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 100, - processor_concurrency: 2 - ) - - Process.sleep(200) - stop_pipeline(cycle_pid) - Logger.info("[Stress 17] Cycle #{cycle}/5 completed.") - end) - - # Drain any stale messages from cycles - drain_mailbox() - - # Now publish messages and start a final pipeline - expected = publish_in_batches(topic, 200, 100) - Process.sleep(200) - - final_name = unique_name() - - {:ok, final_pid} = - StreamingPipeline.start_link( - name: final_name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 500, - processor_concurrency: 4 - ) - - case collect_messages(200, 30_000) do - {:ok, received} -> - assert length(received) == 200 - assert Enum.sort(received) == Enum.sort(expected) - Logger.info("[Stress 17] All 200 messages received after 5 rapid start/stop cycles.") - - {:partial, received, remaining} -> - flunk("After rapid restarts: #{length(received)}/200, #{remaining} missing") - end - - stop_pipeline(final_pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 18: Large message payloads - # Tests with big payloads (10KB each) to stress the gRPC frame parser. - # --------------------------------------------------------------------------- - - describe "Scenario 18: Large message payloads" do - test "handles 100 messages of ~10KB each (Gun)" do - {topic, _sub, full_sub} = setup_infra("large-payload-gun") - name = unique_name() - - {:ok, pid} = - StreamingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 100, - processor_concurrency: 4 - ) - - Process.sleep(500) - - # Each message is ~10KB - large_payloads = - Enum.map(1..100, fn i -> - padding = String.duplicate("X", 10_000) - "large-#{i}-#{padding}" - end) - - # Publish in smaller batches to avoid HTTP payload limits - large_payloads - |> Enum.chunk_every(10) - |> Enum.each(fn batch -> - {:ok, _} = PubSubEmulator.publish(topic, batch) - end) - - case collect_messages(100, 60_000) do - {:ok, received} -> - assert length(received) == 100 - # Verify content integrity — each should start with "large-N-" - Enum.each(received, fn msg -> - assert String.starts_with?(msg, "large-") - end) - - Logger.info("[Stress 18/Gun] All 100 large (~10KB) messages received intact.") - - {:partial, received, remaining} -> - flunk("Large payloads (Gun): #{length(received)}/100, #{remaining} missing") - end - - stop_pipeline(pid) - end - end - - # --------------------------------------------------------------------------- - # Scenario 19: Interleaved message failures - # Odd-numbered messages are failed; verify they get nacked properly - # and even-numbered ones succeed. - # --------------------------------------------------------------------------- - - defmodule FailingPipeline do - @moduledoc "Pipeline that fails odd-numbered messages." - use Broadway - - def start_link(opts) do - test_pid = Keyword.fetch!(opts, :test_pid) - subscription = Keyword.fetch!(opts, :subscription) - emulator_host = Keyword.fetch!(opts, :emulator_host) - name = Keyword.fetch!(opts, :name) - adapter = Keyword.get(opts, :adapter, :gun) - max_outstanding = Keyword.get(opts, :max_outstanding, 500) - processor_concurrency = Keyword.get(opts, :processor_concurrency, 4) - - Broadway.start_link(__MODULE__, - name: name, - producer: [ - module: - {BroadwayCloudPubSub.Streaming.Producer, - subscription: subscription, - token_generator: {BroadwayCloudPubSub.Streaming.StressTest, :noop_token, []}, - grpc_endpoint: emulator_host, - use_ssl: false, - adapter: adapter, - max_outstanding_messages: max_outstanding, - on_failure: {:nack, 0}}, - concurrency: 1 - ], - processors: [ - default: [concurrency: processor_concurrency] - ], - context: %{test_pid: test_pid} - ) - end - - @impl Broadway - def handle_message(:default, message, %{test_pid: test_pid}) do - data = message.data - - # Fail odd messages - if String.contains?(data, "-odd-") do - send(test_pid, {:will_fail, data}) - Broadway.Message.failed(message, :intentional_failure) - else - send(test_pid, {:msg, data}) - message - end - end - - @impl Broadway - def handle_failed(messages, %{test_pid: test_pid}) do - Enum.each(messages, fn msg -> - send(test_pid, {:failed, msg.data}) - end) - - messages - end - end - - describe "Scenario 19: Interleaved message failures" do - test "even messages succeed, odd messages are nacked and redelivered" do - {topic, _sub, full_sub} = setup_infra("fail-interleave") - name = unique_name() - - {:ok, pid} = - FailingPipeline.start_link( - name: name, - test_pid: self(), - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 200, - processor_concurrency: 4 - ) - - Process.sleep(500) - - # Publish 50 even and 50 odd messages - even_msgs = Enum.map(1..50, &"even-#{&1}") - odd_msgs = Enum.map(1..50, &"msg-odd-#{&1}") - all_msgs = Enum.shuffle(even_msgs ++ odd_msgs) - - {:ok, _} = PubSubEmulator.publish(topic, all_msgs) - - # Collect the 50 even messages that should succeed - case collect_messages(50, 30_000) do - {:ok, received} -> - assert length(received) == 50 - - Enum.each(received, fn msg -> - assert String.starts_with?(msg, "even-"), - "Expected only even messages, got: #{msg}" - end) - - Logger.info("[Stress 19] 50 even messages received, 50 odd messages properly failed.") - - {:partial, received, remaining} -> - Logger.warning( - "[Stress 19] Only #{length(received)}/50 even messages, #{remaining} missing" - ) - - # This is still acceptable — the test is about failure handling - flunk("Missing #{remaining} even messages") - end - - # Stop the pipeline BEFORE draining failure messages to prevent - # infinite nack/redeliver cycles on the odd messages - stop_pipeline(pid) - - # Check that we got failure notifications for odd messages - failed_msgs = drain_tagged_mailbox(:failed) - will_fail_msgs = drain_tagged_mailbox(:will_fail) - - Logger.info( - "[Stress 19] Failed callbacks: #{length(failed_msgs)}, will_fail signals: #{length(will_fail_msgs)}" - ) - - # At least some odd messages should have triggered :failed - assert length(failed_msgs) > 0 or length(will_fail_msgs) > 0, - "Expected at least some failure notifications for odd messages" - end - end - - # --------------------------------------------------------------------------- - # Scenario 20: Multiple concurrent pipelines on same subscription - # Tests competing consumers behavior. - # --------------------------------------------------------------------------- - - describe "Scenario 20: Multiple competing consumers" do - test "two pipelines on same subscription collectively receive all messages" do - {topic, _sub, full_sub} = setup_infra("competing") - - collector_pid = self() - - # Start two competing pipelines on the same subscription - name1 = unique_name() - name2 = unique_name() - - {:ok, pid1} = - StreamingPipeline.start_link( - name: name1, - test_pid: collector_pid, - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 250, - processor_concurrency: 4 - ) - - {:ok, pid2} = - StreamingPipeline.start_link( - name: name2, - test_pid: collector_pid, - subscription: full_sub, - emulator_host: PubSubEmulator.host(), - adapter: :gun, - max_outstanding: 250, - processor_concurrency: 4 - ) - - Process.sleep(1000) - - expected = publish_in_batches(topic, 500, 100) - - case collect_messages(500, 60_000) do - {:ok, received} -> - assert length(received) == 500 - assert Enum.sort(received) == Enum.sort(expected) - - Logger.info( - "[Stress 20] Two competing consumers collectively received all 500 messages." - ) - - {:partial, received, remaining} -> - Logger.warning("[Stress 20] #{length(received)}/500, #{remaining} missing") - flunk("Competing consumers: #{remaining} messages missing") - end - - stop_pipeline(pid1) - stop_pipeline(pid2) - end - end - - # --------------------------------------------------------------------------- - # Helper for Scenario 19 - # --------------------------------------------------------------------------- - - defp drain_tagged_mailbox(tag) do - drain_tagged_loop(tag, []) - end - - defp drain_tagged_loop(tag, acc) do - receive do - {^tag, data} -> drain_tagged_loop(tag, [data | acc]) - after - 200 -> Enum.reverse(acc) - end - end -end diff --git a/test/support/pubsub_emulator.ex b/test/support/pubsub_emulator.ex deleted file mode 100644 index ba3395a..0000000 --- a/test/support/pubsub_emulator.ex +++ /dev/null @@ -1,178 +0,0 @@ -defmodule BroadwayCloudPubSub.PubSubEmulator do - @moduledoc """ - Helpers for integration tests against the Cloud Pub/Sub emulator. - - The emulator must be running on `PUBSUB_EMULATOR_HOST` (default `localhost:8085`). - It exposes both HTTP/REST and gRPC on the same port, without TLS. - - ## Usage - - @moduletag :integration - - setup do - BroadwayCloudPubSub.PubSubEmulator.setup_topic_and_subscription( - "my-test-topic", - "my-test-sub" - ) - end - """ - - @default_host "localhost:8085" - @project "test-project" - @finch_name BroadwayCloudPubSub.PubSubEmulator.Finch - - @doc "Returns the emulator host:port (from env or default)." - def host do - System.get_env("PUBSUB_EMULATOR_HOST", @default_host) - end - - @doc "Returns the test GCP project ID." - def project, do: @project - - @doc "Returns the full subscription name." - def subscription(sub_name) do - "projects/#{@project}/subscriptions/#{sub_name}" - end - - @doc "Returns the full topic name." - def topic(topic_name) do - "projects/#{@project}/topics/#{topic_name}" - end - - @doc """ - Starts the internal Finch pool used for emulator REST calls. - Call this once in your `setup` or `setup_all`. - """ - def start do - {:ok, _} = - Finch.start_link( - name: @finch_name, - pools: %{ - :default => [size: 5] - } - ) - - :ok - end - - @doc """ - Creates a topic, then a subscription bound to it. - Deletes them first if they already exist (idempotent). - Returns `{full_topic, full_sub}` as full resource paths. - """ - def setup_topic_and_subscription(topic_name, sub_name, opts \\ []) do - ack_deadline = Keyword.get(opts, :ack_deadline_seconds, 60) - full_topic = topic(topic_name) - full_sub = subscription(sub_name) - - # Idempotent: delete if they exist (ignore errors) - delete_subscription(full_sub) - delete_topic(full_topic) - - :ok = create_topic(full_topic) - :ok = create_subscription(full_sub, full_topic, ack_deadline) - - {full_topic, full_sub} - end - - @doc "Publish messages via the emulator REST API. `messages` is a list of string payloads." - def publish(topic_name, messages) when is_list(messages) do - full_topic = topic(topic_name) - - body = - Jason.encode!(%{ - messages: - Enum.map(messages, fn msg -> - %{data: Base.encode64(msg)} - end) - }) - - url = "http://#{host()}/v1/#{full_topic}:publish" - - case request(:post, url, body) do - {:ok, 200, response_body} -> - decoded = Jason.decode!(response_body) - {:ok, decoded["messageIds"]} - - {:ok, status, body} -> - {:error, {:http_error, status, body}} - - {:error, reason} -> - {:error, reason} - end - end - - @doc "Pulls messages synchronously via the REST API (for post-ack verification)." - def pull(sub_name, opts \\ []) do - max = Keyword.get(opts, :max_messages, 10) - full_sub = subscription(sub_name) - - body = Jason.encode!(%{maxMessages: max, returnImmediately: true}) - url = "http://#{host()}/v1/#{full_sub}:pull" - - case request(:post, url, body) do - {:ok, 200, response_body} -> - decoded = Jason.decode!(response_body) - {:ok, Map.get(decoded, "receivedMessages", [])} - - {:ok, status, body} -> - {:error, {:http_error, status, body}} - - {:error, reason} -> - {:error, reason} - end - end - - # --- Private REST helpers --- - - defp create_topic(full_topic) do - url = "http://#{host()}/v1/#{full_topic}" - - case request(:put, url, "{}") do - {:ok, status, _} when status in [200, 409] -> :ok - {:ok, status, body} -> {:error, {:http_error, status, body}} - {:error, reason} -> {:error, reason} - end - end - - defp create_subscription(full_sub, full_topic, ack_deadline) do - url = "http://#{host()}/v1/#{full_sub}" - - body = - Jason.encode!(%{ - topic: full_topic, - ackDeadlineSeconds: ack_deadline - }) - - case request(:put, url, body) do - {:ok, status, _} when status in [200, 409] -> :ok - {:ok, status, body} -> {:error, {:http_error, status, body}} - {:error, reason} -> {:error, reason} - end - end - - defp delete_topic(full_topic) do - url = "http://#{host()}/v1/#{full_topic}" - request(:delete, url, "") - :ok - end - - defp delete_subscription(full_sub) do - url = "http://#{host()}/v1/#{full_sub}" - request(:delete, url, "") - :ok - end - - defp request(method, url, body) do - headers = [{"content-type", "application/json"}] - req = Finch.build(method, url, headers, body) - - case Finch.request(req, @finch_name) do - {:ok, %Finch.Response{status: status, body: resp_body}} -> - {:ok, status, resp_body} - - {:error, reason} -> - {:error, reason} - end - end -end diff --git a/test/test_helper.exs b/test/test_helper.exs index 8d3891f..869559e 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -1 +1 @@ -ExUnit.start(exclude: [:integration, :stress]) +ExUnit.start() From 2d24206a0f93276e74468a9dc1a8ffaa2be9289b Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 27 May 2026 17:39:02 +0200 Subject: [PATCH 15/29] Fix tests with nimble_options format --- test/broadway_cloud_pub_sub/producer_test.exs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/broadway_cloud_pub_sub/producer_test.exs b/test/broadway_cloud_pub_sub/producer_test.exs index 224222f..02b87f1 100644 --- a/test/broadway_cloud_pub_sub/producer_test.exs +++ b/test/broadway_cloud_pub_sub/producer_test.exs @@ -148,7 +148,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do test ":subscription should be a string" do assert_raise( ValidationError, - "required :subscription option not found, received options: []", + "required option :subscription not found, received options: [:client]", fn -> prepare_for_start_module_opts([]) end @@ -266,7 +266,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert_raise( ValidationError, - ~r/expected positive integer, got: 0/, + ~r/expected :max_number_of_messages to be a positive integer, got: 0/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -278,7 +278,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert_raise( ValidationError, - ~r/expected positive integer, got: -1/, + ~r/expected :max_number_of_messages to be a positive integer, got: -1/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -326,7 +326,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert producer_opts[:token_generator] == token_generator assert_raise ValidationError, - ~r/expected tuple {mod, fun, args}, got: {1, 1, 1}/, + ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: {1, 1, 1}/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", @@ -335,7 +335,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do end assert_raise ValidationError, - ~r/expected tuple {mod, fun, args}, got: SomeModule/, + ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: SomeModule/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", From c63dec837632e0caad3a7c9e0142e52d01f4cf4b Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 27 May 2026 22:48:23 +0200 Subject: [PATCH 16/29] Pull producer to pull directory and modules --- .../{ => pull}/acknowledger.ex | 4 +- .../{ => pull}/client.ex | 6 +- .../{pull_client.ex => pull/finch_client.ex} | 22 +++-- .../{ => pull}/message_builder.ex | 2 +- .../{ => pull}/options.ex | 12 +-- .../{ => pull}/producer.ex | 16 ++-- .../streaming/client.ex | 2 +- .../streaming/message_dispatch.ex | 2 +- .../streaming/options.ex | 2 +- mix.exs | 11 +-- .../{ => pull}/acknowledger_test.exs | 10 +- .../finch_client_test.exs} | 96 +++++++++---------- .../{ => pull}/producer_test.exs | 48 +++++----- 13 files changed, 118 insertions(+), 115 deletions(-) rename lib/broadway_cloud_pub_sub/{ => pull}/acknowledger.ex (96%) rename lib/broadway_cloud_pub_sub/{ => pull}/client.ex (93%) rename lib/broadway_cloud_pub_sub/{pull_client.ex => pull/finch_client.ex} (91%) rename lib/broadway_cloud_pub_sub/{ => pull}/message_builder.ex (97%) rename lib/broadway_cloud_pub_sub/{ => pull}/options.ex (95%) rename lib/broadway_cloud_pub_sub/{ => pull}/producer.ex (94%) rename test/broadway_cloud_pub_sub/{ => pull}/acknowledger_test.exs (96%) rename test/broadway_cloud_pub_sub/{pull_client_test.exs => pull/finch_client_test.exs} (86%) rename test/broadway_cloud_pub_sub/{ => pull}/producer_test.exs (92%) diff --git a/lib/broadway_cloud_pub_sub/acknowledger.ex b/lib/broadway_cloud_pub_sub/pull/acknowledger.ex similarity index 96% rename from lib/broadway_cloud_pub_sub/acknowledger.ex rename to lib/broadway_cloud_pub_sub/pull/acknowledger.ex index bb14d67..7f874a6 100644 --- a/lib/broadway_cloud_pub_sub/acknowledger.ex +++ b/lib/broadway_cloud_pub_sub/pull/acknowledger.ex @@ -1,7 +1,7 @@ -defmodule BroadwayCloudPubSub.Acknowledger do +defmodule BroadwayCloudPubSub.Pull.Acknowledger do @moduledoc false alias Broadway.Acknowledger - alias BroadwayCloudPubSub.{Client, Options} + alias BroadwayCloudPubSub.Pull.{Client, Options} @behaviour Acknowledger diff --git a/lib/broadway_cloud_pub_sub/client.ex b/lib/broadway_cloud_pub_sub/pull/client.ex similarity index 93% rename from lib/broadway_cloud_pub_sub/client.ex rename to lib/broadway_cloud_pub_sub/pull/client.ex index a4b9e86..a5c2804 100644 --- a/lib/broadway_cloud_pub_sub/client.ex +++ b/lib/broadway_cloud_pub_sub/pull/client.ex @@ -1,10 +1,10 @@ -defmodule BroadwayCloudPubSub.Client do +defmodule BroadwayCloudPubSub.Pull.Client do @moduledoc """ - A generic behaviour to implement Pub/Sub Clients for `BroadwayCloudPubSub.Producer`. + A generic behaviour to implement Pub/Sub Clients for `BroadwayCloudPubSub.Pull.Producer`. This module defines callbacks to normalize options and receive messages from a Cloud Pub/Sub topic. Modules that implement this behaviour should be passed - as the `:client` option from `BroadwayCloudPubSub.Producer`. + as the `:client` option from `BroadwayCloudPubSub.Pull.Producer`. """ alias Broadway.Message diff --git a/lib/broadway_cloud_pub_sub/pull_client.ex b/lib/broadway_cloud_pub_sub/pull/finch_client.ex similarity index 91% rename from lib/broadway_cloud_pub_sub/pull_client.ex rename to lib/broadway_cloud_pub_sub/pull/finch_client.ex index ceb4c11..f4aa96b 100644 --- a/lib/broadway_cloud_pub_sub/pull_client.ex +++ b/lib/broadway_cloud_pub_sub/pull/finch_client.ex @@ -1,20 +1,24 @@ -defmodule BroadwayCloudPubSub.PullClient do +defmodule BroadwayCloudPubSub.Pull.FinchClient do @moduledoc """ - A subscriptions [pull client](https://cloud.google.com/pubsub/docs/reference/rest/v1/projects.subscriptions/pull) built on `Finch`. + The default Pub/Sub pull client, built on `Finch`. + + Implements the `BroadwayCloudPubSub.Pull.Client` behaviour and handles + [pull](https://cloud.google.com/pubsub/docs/reference/rest/v1/projects.subscriptions/pull) + requests to the Cloud Pub/Sub REST API. """ alias Broadway.Message - alias BroadwayCloudPubSub.{Client, MessageBuilder} + alias BroadwayCloudPubSub.Pull.MessageBuilder alias Finch.Response require Logger - @behaviour Client + @behaviour BroadwayCloudPubSub.Pull.Client @default_retry_codes [408, 500, 502, 503, 504, 522, 524] @default_retry_delay_ms 500 @default_max_retries 10 - @impl Client + @impl BroadwayCloudPubSub.Pull.Client def prepare_to_connect(name, producer_opts) do case Keyword.fetch(producer_opts, :finch) do {:ok, nil} -> @@ -40,12 +44,12 @@ defmodule BroadwayCloudPubSub.PullClient do {specs, producer_opts} end - @impl Client + @impl BroadwayCloudPubSub.Pull.Client def init(opts) do {:ok, Map.new(opts)} end - @impl Client + @impl BroadwayCloudPubSub.Pull.Client def receive_messages(demand, ack_builder, config) do max_messages = min(demand, config.max_number_of_messages) @@ -68,7 +72,7 @@ defmodule BroadwayCloudPubSub.PullClient do ) end - @impl Client + @impl BroadwayCloudPubSub.Pull.Client def put_deadline(ack_ids, ack_deadline_seconds, config) do payload = %{ "ackIds" => ack_ids, @@ -80,7 +84,7 @@ defmodule BroadwayCloudPubSub.PullClient do |> handle_response(:put_deadline) end - @impl Client + @impl BroadwayCloudPubSub.Pull.Client def acknowledge(ack_ids, config) do :telemetry.span( [:broadway_cloud_pub_sub, :pull_client, :ack], diff --git a/lib/broadway_cloud_pub_sub/message_builder.ex b/lib/broadway_cloud_pub_sub/pull/message_builder.ex similarity index 97% rename from lib/broadway_cloud_pub_sub/message_builder.ex rename to lib/broadway_cloud_pub_sub/pull/message_builder.ex index 63eb6ba..97c401d 100644 --- a/lib/broadway_cloud_pub_sub/message_builder.ex +++ b/lib/broadway_cloud_pub_sub/pull/message_builder.ex @@ -1,4 +1,4 @@ -defmodule BroadwayCloudPubSub.MessageBuilder do +defmodule BroadwayCloudPubSub.Pull.MessageBuilder do @moduledoc false # Shared message-building logic used by both the pull client and the diff --git a/lib/broadway_cloud_pub_sub/options.ex b/lib/broadway_cloud_pub_sub/pull/options.ex similarity index 95% rename from lib/broadway_cloud_pub_sub/options.ex rename to lib/broadway_cloud_pub_sub/pull/options.ex index 0289879..bfb5e2c 100644 --- a/lib/broadway_cloud_pub_sub/options.ex +++ b/lib/broadway_cloud_pub_sub/pull/options.ex @@ -1,4 +1,4 @@ -defmodule BroadwayCloudPubSub.Options do +defmodule BroadwayCloudPubSub.Pull.Options do @moduledoc false @default_base_url "https://pubsub.googleapis.com" @@ -14,19 +14,19 @@ defmodule BroadwayCloudPubSub.Options do broadway: [type: :any, doc: false], client: [ type: {:or, [:atom, :mod_arg]}, - default: BroadwayCloudPubSub.PullClient, + default: BroadwayCloudPubSub.Pull.FinchClient, doc: """ - A module that implements the BroadwayCloudPubSub.Client behaviour. + A module that implements the `BroadwayCloudPubSub.Pull.Client` behaviour. This module is responsible for fetching and acknowledging the messages. Pay attention that all options passed to the producer will be forwarded to the client. It's up to the client to normalize the options it needs. - The BroadwayCloudPubSub.PullClient is the default client and will + `BroadwayCloudPubSub.Pull.FinchClient` is the default client and will automatically retry the following errors [408, 500, 502, 503, 504, 522, 524] up to 10 times with a 500ms pause between retries. This can be configured by passing the module with options to the client: - {BroadwayCloudPubSub.PullClient, + {BroadwayCloudPubSub.Pull.FinchClient, retry_codes: [502, 503], retry_delay_ms: 300, max_retries: 5} @@ -198,7 +198,7 @@ defmodule BroadwayCloudPubSub.Options do Broadway.start_link( producers: [ default: [ - module: {BroadwayCloudPubSub.Producer, + module: {BroadwayCloudPubSub.Pull.Producer, token_generator: {MyGenerator, :generate, ["foo"]} } ] diff --git a/lib/broadway_cloud_pub_sub/producer.ex b/lib/broadway_cloud_pub_sub/pull/producer.ex similarity index 94% rename from lib/broadway_cloud_pub_sub/producer.ex rename to lib/broadway_cloud_pub_sub/pull/producer.ex index 7050c90..25303a0 100644 --- a/lib/broadway_cloud_pub_sub/producer.ex +++ b/lib/broadway_cloud_pub_sub/pull/producer.ex @@ -1,10 +1,10 @@ -defmodule BroadwayCloudPubSub.Producer do +defmodule BroadwayCloudPubSub.Pull.Producer do @moduledoc """ A GenStage producer that continuously receives messages from a Google Cloud Pub/Sub topic and acknowledges them after being successfully processed. - By default this producer uses `BroadwayCloudPubSub.PullClient` to talk to Cloud - Pub/Sub, but you can provide your client by implementing the `BroadwayCloudPubSub.Client` + By default this producer uses `BroadwayCloudPubSub.Pull.FinchClient` to talk to Cloud + Pub/Sub, but you can provide your client by implementing the `BroadwayCloudPubSub.Pull.Client` behaviour. For a quick getting started on using Broadway with Cloud Pub/Sub, please see @@ -16,7 +16,7 @@ defmodule BroadwayCloudPubSub.Producer do producers (regardless of the client implementation), all other options are specific to `BroadwayCloudPubSub.PullClient`, which is the default client. - #{NimbleOptions.docs(BroadwayCloudPubSub.Options.definition())} + #{NimbleOptions.docs(BroadwayCloudPubSub.Pull.Options.definition())} ### Custom token generator @@ -43,8 +43,8 @@ defmodule BroadwayCloudPubSub.Producer do You can use the `:on_success` and `:on_failure` options to control how messages are acknowledged with the Pub/Sub system. - By default successful messages are acknowledged and failed messages are ignored. - You can set `:on_success` and `:on_failure` when starting this producer, + By default successful messages are acknowledged and failed messages are ignored + (`:noop`). You can set `:on_success` and `:on_failure` when starting this producer, or change them for each message through `Broadway.Message.configure_ack/2`. The following values are supported by both `:on_success` and `:on_failure`: @@ -82,7 +82,7 @@ defmodule BroadwayCloudPubSub.Producer do Broadway.start_link(MyBroadway, name: MyBroadway, producer: [ - module: {BroadwayCloudPubSub.Producer, + module: {BroadwayCloudPubSub.Pull.Producer, goth: MyApp.Goth, subscription: "projects/my-project/subscriptions/my_subscription" } @@ -130,7 +130,7 @@ defmodule BroadwayCloudPubSub.Producer do use GenStage alias Broadway.Producer - alias BroadwayCloudPubSub.{Acknowledger, Options} + alias BroadwayCloudPubSub.Pull.{Acknowledger, Options} @behaviour Producer diff --git a/lib/broadway_cloud_pub_sub/streaming/client.ex b/lib/broadway_cloud_pub_sub/streaming/client.ex index aea0d5d..ede986e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/client.ex @@ -49,7 +49,7 @@ defmodule BroadwayCloudPubSub.Streaming.Client do Invoked once during producer startup to normalize options into a `config` term. The `config` term is stored in state and forwarded as the last argument to all - subsequent callbacks, analogous to how `c:BroadwayCloudPubSub.Client.init/1` + subsequent callbacks, analogous to how `c:BroadwayCloudPubSub.Pull.Client.init/1` works for the pull producer. """ @callback init(opts :: keyword()) :: {:ok, config()} | {:error, term()} diff --git a/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex index c649769..d0a9972 100644 --- a/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex +++ b/lib/broadway_cloud_pub_sub/streaming/message_dispatch.ex @@ -8,7 +8,7 @@ defmodule BroadwayCloudPubSub.Streaming.MessageDispatch do # no side effects (no send/2, no telemetry). StreamManager handles all side # effects based on the returned results. - alias BroadwayCloudPubSub.MessageBuilder + alias BroadwayCloudPubSub.Pull.MessageBuilder alias BroadwayCloudPubSub.Streaming.{Acknowledger, AckTimeDistribution} # --- Buffer and demand --- diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 67c24f3..cb09f8e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -349,7 +349,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do @doc """ Builds an MFArgs tuple for a token generator using Goth. """ - defdelegate make_token_generator(opts), to: BroadwayCloudPubSub.Options + defdelegate make_token_generator(opts), to: BroadwayCloudPubSub.Pull.Options # --- Custom type validators --- diff --git a/mix.exs b/mix.exs index 1225242..ac07519 100644 --- a/mix.exs +++ b/mix.exs @@ -58,15 +58,14 @@ defmodule BroadwayCloudPubSub.MixProject do ], groups_for_modules: [ Pull: [ - BroadwayCloudPubSub.Producer, - BroadwayCloudPubSub.Client, - BroadwayCloudPubSub.PullClient + BroadwayCloudPubSub.Pull.Producer, + BroadwayCloudPubSub.Pull.Client, + BroadwayCloudPubSub.Pull.FinchClient ], Streaming: [ - BroadwayCloudPubSub.Streaming.Producer, + BroadwayCloudPubSub.Producer, BroadwayCloudPubSub.Streaming.Client, - BroadwayCloudPubSub.Streaming.GrpcClient, - BroadwayCloudPubSub.Streaming.Options + BroadwayCloudPubSub.Streaming.GrpcClient ] ] ] diff --git a/test/broadway_cloud_pub_sub/acknowledger_test.exs b/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs similarity index 96% rename from test/broadway_cloud_pub_sub/acknowledger_test.exs rename to test/broadway_cloud_pub_sub/pull/acknowledger_test.exs index 6162ff7..eb52500 100644 --- a/test/broadway_cloud_pub_sub/acknowledger_test.exs +++ b/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs @@ -1,13 +1,13 @@ -defmodule BroadwayCloudPubSub.AcknowledgerTest do +defmodule BroadwayCloudPubSub.Pull.AcknowledgerTest do use ExUnit.Case alias Broadway.Message - alias BroadwayCloudPubSub.Client - alias BroadwayCloudPubSub.Acknowledger + alias BroadwayCloudPubSub.Pull.Client + alias BroadwayCloudPubSub.Pull.Acknowledger defmodule CallerClient do - alias BroadwayCloudPubSub.Acknowledger + alias BroadwayCloudPubSub.Pull.Acknowledger - @behaviour Client + @behaviour BroadwayCloudPubSub.Pull.Client @impl Client def init(opts) do diff --git a/test/broadway_cloud_pub_sub/pull_client_test.exs b/test/broadway_cloud_pub_sub/pull/finch_client_test.exs similarity index 86% rename from test/broadway_cloud_pub_sub/pull_client_test.exs rename to test/broadway_cloud_pub_sub/pull/finch_client_test.exs index ddc5ffe..d71d90a 100644 --- a/test/broadway_cloud_pub_sub/pull_client_test.exs +++ b/test/broadway_cloud_pub_sub/pull/finch_client_test.exs @@ -1,10 +1,10 @@ -defmodule BroadwayCloudPubSub.PullClientTest do +defmodule BroadwayCloudPubSub.Pull.FinchClientTest do use ExUnit.Case, async: true import ExUnit.CaptureLog - alias BroadwayCloudPubSub.Acknowledger - alias BroadwayCloudPubSub.PullClient + alias BroadwayCloudPubSub.Pull.Acknowledger + alias BroadwayCloudPubSub.Pull.FinchClient alias BroadwayCloudPubSub.Test.TelemetryHelper alias Broadway.Message @@ -145,7 +145,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do ack_ref = opts[:broadway][:name] fill_persistent_term(ack_ref, opts) - {:ok, config} = PullClient.init(opts) + {:ok, config} = FinchClient.init(opts) {ack_ref, Acknowledger.builder(ack_ref), config} end @@ -181,9 +181,9 @@ defmodule BroadwayCloudPubSub.PullClientTest do {:ok, @ordered_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - assert [message] = PullClient.receive_messages(10, & &1, opts) + assert [message] = FinchClient.receive_messages(10, & &1, opts) assert message.metadata.messageId == "19917247038" assert message.metadata.orderingKey == "key1" @@ -200,9 +200,9 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:max_retries, 3) |> Keyword.put(:retry_delay_ms, 0) |> Keyword.put(:retry_codes, [502]) - |> PullClient.init() + |> FinchClient.init() - assert [_message] = PullClient.receive_messages(10, & &1, opts) + assert [_message] = FinchClient.receive_messages(10, & &1, opts) end test "returns a list of Broadway.Message when payloadFormat is NONE", %{ @@ -213,18 +213,18 @@ defmodule BroadwayCloudPubSub.PullClientTest do {:ok, @no_payload_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - assert [message] = PullClient.receive_messages(10, & &1, opts) + assert [message] = FinchClient.receive_messages(10, & &1, opts) assert message.metadata.messageId == "20240501001" end test "returns a list of Broadway.Message with :data and :metadata set", %{ opts: base_opts } do - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - [message1, message2, message3, message4] = PullClient.receive_messages(10, & &1, opts) + [message1, message2, message3, message4] = FinchClient.receive_messages(10, & &1, opts) assert %Message{data: "Message1", metadata: %{publishTime: %DateTime{}}} = message1 @@ -260,9 +260,9 @@ defmodule BroadwayCloudPubSub.PullClientTest do {:ok, @empty_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - assert [] == PullClient.receive_messages(10, & &1, opts) + assert [] == FinchClient.receive_messages(10, & &1, opts) end test "if the request fails, returns an empty list and log the error", %{ @@ -271,16 +271,16 @@ defmodule BroadwayCloudPubSub.PullClientTest do } do on_pubsub_request(server, fn _, _ -> {:error, 403, @empty_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) assert capture_log(fn -> - assert PullClient.receive_messages(10, & &1, opts) == [] + assert FinchClient.receive_messages(10, & &1, opts) == [] end) =~ "[error] Unable to fetch events from Cloud Pub/Sub - reason: " end test "send a projects.subscriptions.pull request with default options", %{opts: base_opts} do - {:ok, opts} = PullClient.init(base_opts) - PullClient.receive_messages(10, & &1, opts) + {:ok, opts} = FinchClient.init(base_opts) + FinchClient.receive_messages(10, & &1, opts) assert_received {:http_request_called, %{body: body, url: url}} assert body == %{"maxMessages" => 10} @@ -288,8 +288,8 @@ defmodule BroadwayCloudPubSub.PullClientTest do end test "request with custom :max_number_of_messages", %{opts: base_opts} do - {:ok, opts} = base_opts |> Keyword.put(:max_number_of_messages, 5) |> PullClient.init() - PullClient.receive_messages(10, & &1, opts) + {:ok, opts} = base_opts |> Keyword.put(:max_number_of_messages, 5) |> FinchClient.init() + FinchClient.receive_messages(10, & &1, opts) assert_received {:http_request_called, %{body: body, url: _url}} assert body["maxMessages"] == 5 @@ -312,8 +312,8 @@ defmodule BroadwayCloudPubSub.PullClientTest do %{pid: test_pid, msg: :stop} ) - {:ok, opts} = base_opts |> Keyword.put(:max_number_of_messages, 5) |> PullClient.init() - PullClient.receive_messages(10, & &1, opts) + {:ok, opts} = base_opts |> Keyword.put(:max_number_of_messages, 5) |> FinchClient.init() + FinchClient.receive_messages(10, & &1, opts) assert_received {:start, _measurements, metadata} assert_received {:stop, measurements, _metadata} @@ -352,9 +352,9 @@ defmodule BroadwayCloudPubSub.PullClientTest do end test "makes a projects.subscriptions.acknowledge request", %{opts: base_opts} do - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - PullClient.acknowledge(["1", "2", "3"], opts) + FinchClient.acknowledge(["1", "2", "3"], opts) assert_received {:http_request_called, %{body: body, url: url}} @@ -371,10 +371,10 @@ defmodule BroadwayCloudPubSub.PullClientTest do {:error, 503, @empty_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) assert capture_log(fn -> - assert PullClient.acknowledge(["1", "2"], opts) == :ok + assert FinchClient.acknowledge(["1", "2"], opts) == :ok end) =~ "[error] Unable to acknowledge messages with Cloud Pub/Sub - reason: " end @@ -395,9 +395,9 @@ defmodule BroadwayCloudPubSub.PullClientTest do %{pid: test_pid, msg: :stop} ) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) - PullClient.acknowledge(["1", "2", "3"], opts) + FinchClient.acknowledge(["1", "2", "3"], opts) assert_received {:start, _measurements, metadata} assert metadata.name == Broadway3 @@ -438,10 +438,10 @@ defmodule BroadwayCloudPubSub.PullClientTest do test "makes a projects.subscriptions.modifyAckDeadline request", %{ opts: base_opts } do - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) ack_ids = ["1", "2"] - PullClient.put_deadline(ack_ids, 30, opts) + FinchClient.put_deadline(ack_ids, 30, opts) assert_received {:http_request_called, %{body: body, url: url}} assert body == %{"ackIds" => ack_ids, "ackDeadlineSeconds" => 30} @@ -455,30 +455,30 @@ defmodule BroadwayCloudPubSub.PullClientTest do {:error, 503, @empty_response} end) - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) assert capture_log(fn -> - assert PullClient.put_deadline(["1", "2"], 60, opts) == :ok + assert FinchClient.put_deadline(["1", "2"], 60, opts) == :ok end) =~ "[error] Unable to put new ack deadline with Cloud Pub/Sub - reason: " end end describe "prepare_to_connect/2" do test "returns a child_spec for starting a Finch http pool " do - {[pool_spec], opts} = PullClient.prepare_to_connect(SomePipeline, []) - assert pool_spec == {Finch, name: SomePipeline.BroadwayCloudPubSub.PullClient} - assert opts == [finch: SomePipeline.BroadwayCloudPubSub.PullClient] + {[pool_spec], opts} = FinchClient.prepare_to_connect(SomePipeline, []) + assert pool_spec == {Finch, name: SomePipeline.BroadwayCloudPubSub.Pull.FinchClient} + assert opts == [finch: SomePipeline.BroadwayCloudPubSub.Pull.FinchClient] end test "allows custom finch" do - {specs, opts} = PullClient.prepare_to_connect(SomePipeline, finch: Foo) + {specs, opts} = FinchClient.prepare_to_connect(SomePipeline, finch: Foo) assert specs == [] assert opts == [finch: Foo] end end - describe "integration with BroadwayCloudPubSub.Acknowledger" do + describe "integration with BroadwayCloudPubSub.Pull.Acknowledger" do setup %{server: server, base_url: base_url, finch: finch} do test_pid = self() @@ -518,7 +518,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do # will be injected by Broadway at runtime broadway: [name: :Broadway3], base_url: base_url, - client: PullClient, + client: FinchClient, finch: finch, max_number_of_messages: 10, subscription: "projects/foo/subscriptions/bar", @@ -532,10 +532,10 @@ defmodule BroadwayCloudPubSub.PullClientTest do test "returns a list of Broadway.Message structs with ack builder", %{ opts: base_opts } do - {:ok, opts} = PullClient.init(base_opts) + {:ok, opts} = FinchClient.init(base_opts) [message1, message2, message3, message4] = - PullClient.receive_messages(10, &{:ack, &1}, opts) + FinchClient.receive_messages(10, &{:ack, &1}, opts) assert {:ack, _} = message1.acknowledger assert {:ack, _} = message2.acknowledger @@ -548,7 +548,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do } do {ack_ref, builder, opts} = init_with_ack_builder(base_opts) - messages = PullClient.receive_messages(10, builder, opts) + messages = FinchClient.receive_messages(10, builder, opts) {successful, failed} = Enum.split(messages, 1) @@ -565,7 +565,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:on_success, :noop) |> init_with_ack_builder() - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, messages, []) @@ -580,7 +580,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:on_success, :nack) |> init_with_ack_builder() - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, messages, []) @@ -596,7 +596,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:on_success, {:nack, 300}) |> init_with_ack_builder() - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, messages, []) @@ -607,7 +607,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do test "with default :on_failure, failed messages are ignored", %{opts: base_opts} do {ack_ref, builder, opts} = init_with_ack_builder(base_opts) - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, [], messages) @@ -622,7 +622,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:on_failure, :nack) |> init_with_ack_builder() - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, [], messages) @@ -637,7 +637,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do |> Keyword.put(:on_failure, {:nack, 60}) |> init_with_ack_builder() - [_, _, _, _] = messages = PullClient.receive_messages(10, builder, opts) + [_, _, _, _] = messages = FinchClient.receive_messages(10, builder, opts) Acknowledger.ack(ack_ref, [], messages) @@ -650,7 +650,7 @@ defmodule BroadwayCloudPubSub.PullClientTest do defp fill_persistent_term(ack_ref, base_opts) do :persistent_term.put(ack_ref, %{ base_url: Keyword.fetch!(base_opts, :base_url), - client: PullClient, + client: FinchClient, finch: Keyword.fetch!(base_opts, :finch), on_failure: base_opts[:on_failure] || :noop, on_success: base_opts[:on_success] || :ack, diff --git a/test/broadway_cloud_pub_sub/producer_test.exs b/test/broadway_cloud_pub_sub/pull/producer_test.exs similarity index 92% rename from test/broadway_cloud_pub_sub/producer_test.exs rename to test/broadway_cloud_pub_sub/pull/producer_test.exs index 02b87f1..becbe2b 100644 --- a/test/broadway_cloud_pub_sub/producer_test.exs +++ b/test/broadway_cloud_pub_sub/pull/producer_test.exs @@ -1,4 +1,4 @@ -defmodule BroadwayCloudPubSub.ProducerTest do +defmodule BroadwayCloudPubSub.Pull.ProducerTest do use ExUnit.Case alias Broadway.Message @@ -49,7 +49,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do end defmodule FakeClient do - alias BroadwayCloudPubSub.Client + alias BroadwayCloudPubSub.Pull.Client alias Broadway.Acknowledger @behaviour Client @@ -81,7 +81,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do end defmodule FakePrepareToConnectClient do - alias BroadwayCloudPubSub.Client + alias BroadwayCloudPubSub.Pull.Client @behaviour Client @@ -132,9 +132,9 @@ defmodule BroadwayCloudPubSub.ProducerTest do {:ok, pid} = start_broadway(message_server) try do - BroadwayCloudPubSub.Producer.prepare_for_start(Forwarder, + BroadwayCloudPubSub.Pull.Producer.prepare_for_start(Forwarder, producer: [ - module: {BroadwayCloudPubSub.Producer, module_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, module_opts}, concurrency: 1 ], name: __MODULE__ @@ -182,7 +182,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do _, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -201,7 +201,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -218,7 +218,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -235,7 +235,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, result_module_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, result_module_opts}, concurrency: 1 ], name: __MODULE__ @@ -251,7 +251,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, result_module_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, result_module_opts}, concurrency: 1 ], name: __MODULE__ @@ -293,7 +293,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -304,7 +304,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do ) assert producer_opts[:token_generator] == - {BroadwayCloudPubSub.Options, :generate_goth_token, [FakeAuth]} + {BroadwayCloudPubSub.Pull.Options, :generate_goth_token, [FakeAuth]} end test ":token_generator should be a tuple {Mod, Fun, Args}" do @@ -313,7 +313,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -348,7 +348,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -377,7 +377,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -395,7 +395,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -412,7 +412,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -430,7 +430,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -490,7 +490,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -549,7 +549,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do assert {_, [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -568,11 +568,11 @@ defmodule BroadwayCloudPubSub.ProducerTest do test "with :client PullClient returns a child_spec for starting a Finch pool" do assert { [ - {Finch, name: BroadwayCloudPubSub.ProducerTest.BroadwayCloudPubSub.PullClient} + {Finch, name: BroadwayCloudPubSub.Pull.ProducerTest.BroadwayCloudPubSub.Pull.FinchClient} ], [ producer: [ - module: {BroadwayCloudPubSub.Producer, _producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, _producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -589,7 +589,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do [], [ producer: [ - module: {BroadwayCloudPubSub.Producer, producer_opts}, + module: {BroadwayCloudPubSub.Pull.Producer, producer_opts}, concurrency: 1 ], name: __MODULE__ @@ -761,7 +761,7 @@ defmodule BroadwayCloudPubSub.ProducerTest do name: broadway_name, context: %{test_pid: self()}, producer: [ - module: {BroadwayCloudPubSub.Producer, Keyword.merge(producer_opts, opts)}, + module: {BroadwayCloudPubSub.Pull.Producer, Keyword.merge(producer_opts, opts)}, concurrency: 1 ], processors: [ From 4c8fcec7b6ef1eb9a0fd86a24a27651eb25ed812 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 27 May 2026 22:52:27 +0200 Subject: [PATCH 17/29] Streaming no default producer --- .../{streaming => }/producer.ex | 14 +++++++------- lib/broadway_cloud_pub_sub/streaming/client.ex | 4 ++-- .../streaming/grpc_client.ex | 2 +- lib/broadway_cloud_pub_sub/streaming/options.ex | 4 +--- .../streaming/stream_manager.ex | 2 +- .../streaming/unary_ack_supervisor.ex | 2 +- .../streaming/options_test.exs | 2 +- 7 files changed, 14 insertions(+), 16 deletions(-) rename lib/broadway_cloud_pub_sub/{streaming => }/producer.ex (97%) diff --git a/lib/broadway_cloud_pub_sub/streaming/producer.ex b/lib/broadway_cloud_pub_sub/producer.ex similarity index 97% rename from lib/broadway_cloud_pub_sub/streaming/producer.ex rename to lib/broadway_cloud_pub_sub/producer.ex index d6a1b1a..2ca189f 100644 --- a/lib/broadway_cloud_pub_sub/streaming/producer.ex +++ b/lib/broadway_cloud_pub_sub/producer.ex @@ -1,4 +1,4 @@ -defmodule BroadwayCloudPubSub.Streaming.Producer do +defmodule BroadwayCloudPubSub.Producer do @moduledoc """ A Broadway producer that uses the gRPC StreamingPull API to receive messages from a Google Cloud Pub/Sub subscription. @@ -7,7 +7,7 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do This producer opens a persistent bidirectional gRPC stream to the Pub/Sub service and receives messages as the server pushes them. This is more - efficient than the HTTP pull approach (`BroadwayCloudPubSub.Producer`) for + efficient than the HTTP pull approach (`BroadwayCloudPubSub.Pull.Producer`) for workloads that require low latency or high throughput. Each producer process (N = `producer: [concurrency: N]`) starts and links @@ -34,7 +34,7 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do name: MyPipeline, producer: [ module: - {BroadwayCloudPubSub.Streaming.Producer, + {BroadwayCloudPubSub.Producer, goth: MyApp.Goth, subscription: "projects/my-project/subscriptions/my-subscription", max_outstanding_messages: 1000} @@ -326,13 +326,13 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do To use with the local Pub/Sub emulator: - {BroadwayCloudPubSub.Streaming.Producer, + {BroadwayCloudPubSub.Producer, subscription: "projects/my-project/subscriptions/my-subscription", grpc_endpoint: "localhost:8085", use_ssl: false, token_generator: {MyApp, :emulator_token, []}} - ## Differences from `BroadwayCloudPubSub.Producer` + ## Differences from `BroadwayCloudPubSub.Pull.Producer` * **Push-based**: Messages arrive via a persistent gRPC stream rather than being fetched on demand via HTTP pull requests. @@ -525,13 +525,13 @@ defmodule BroadwayCloudPubSub.Streaming.Producer do if min > max do raise ArgumentError, - "invalid Streaming.Producer options: :backoff_min (#{min}) must be <= :backoff_max (#{max})" + "invalid BroadwayCloudPubSub.Producer options: :backoff_min (#{min}) must be <= :backoff_max (#{max})" end validated {:error, err} -> - raise ArgumentError, "invalid Streaming.Producer options: #{Exception.message(err)}" + raise ArgumentError, "invalid BroadwayCloudPubSub.Producer options: #{Exception.message(err)}" end end diff --git a/lib/broadway_cloud_pub_sub/streaming/client.ex b/lib/broadway_cloud_pub_sub/streaming/client.ex index ede986e..85d6a62 100644 --- a/lib/broadway_cloud_pub_sub/streaming/client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/client.ex @@ -4,7 +4,7 @@ defmodule BroadwayCloudPubSub.Streaming.Client do The default implementation is `BroadwayCloudPubSub.Streaming.GrpcClient`. Provide a custom module via the `:grpc_client` option on - `BroadwayCloudPubSub.Streaming.Producer` for testing or alternate transports. + `BroadwayCloudPubSub.Producer` for testing or alternate transports. ## Implementing a custom client @@ -28,7 +28,7 @@ defmodule BroadwayCloudPubSub.Streaming.Client do Then configure the producer: - {BroadwayCloudPubSub.Streaming.Producer, + {BroadwayCloudPubSub.Producer, grpc_client: MyApp.FakeGrpcClient, subscription: "projects/my-project/subscriptions/my-sub", ...} diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex index 6b989bc..24076b5 100644 --- a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -1,6 +1,6 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do @moduledoc """ - The default gRPC client for `BroadwayCloudPubSub.Streaming.Producer`. + The default gRPC client for `BroadwayCloudPubSub.Producer`. Implements `BroadwayCloudPubSub.Streaming.Client` using the `grpc` library with the `Google.Pubsub.V1.Subscriber.Stub` generated stub. diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index cb09f8e..976aaee 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -1,7 +1,5 @@ defmodule BroadwayCloudPubSub.Streaming.Options do - @moduledoc """ - Options for `BroadwayCloudPubSub.Streaming.Producer`. - """ + @moduledoc false @default_grpc_endpoint "pubsub.googleapis.com:443" @default_max_outstanding_messages 1_000 diff --git a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex index 965cab3..3478426 100644 --- a/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex +++ b/lib/broadway_cloud_pub_sub/streaming/stream_manager.ex @@ -139,7 +139,7 @@ defmodule BroadwayCloudPubSub.Streaming.StreamManager do StreamManager accumulates it into `pending_demand` and flushes up to the new total from the message buffer. - Called by `Streaming.Producer.handle_demand/2`. + Called by `BroadwayCloudPubSub.Producer.handle_demand/2`. """ @spec notify_demand(pid(), non_neg_integer()) :: :ok def notify_demand(pid, amount) when is_integer(amount) and amount >= 0 do diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex index b3f5a73..4dcb215 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_ack_supervisor.ex @@ -2,7 +2,7 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryAckSupervisor do @moduledoc false # Supervisor that owns the AckBatcher and UnaryRpcClient for a single Broadway - # Streaming.Producer pipeline. + # BroadwayCloudPubSub.Producer pipeline. # # Uses :one_for_one so each child restarts independently. AckBatcher accumulates # pending ack_ids in its state; restarting it when UnaryRpcClient crashes would diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 8499477..754eebd 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -473,7 +473,7 @@ end defmodule BroadwayCloudPubSub.Streaming.ProducerPrepareForStartTest do use ExUnit.Case, async: true - alias BroadwayCloudPubSub.Streaming.Producer + alias BroadwayCloudPubSub.Producer # Minimal broadway_opts that satisfies prepare_for_start/2. defp broadway_opts(producer_opts \\ []) do From 4a44ccd7d798d193fc064de5a7beca808ea35446 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 27 May 2026 22:56:25 +0200 Subject: [PATCH 18/29] grpc_client accepts {Module, opts} --- lib/broadway_cloud_pub_sub/producer.ex | 10 ++++- .../streaming/options.ex | 19 +++++++++- .../streaming/options_test.exs | 38 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/producer.ex b/lib/broadway_cloud_pub_sub/producer.ex index 2ca189f..08c2088 100644 --- a/lib/broadway_cloud_pub_sub/producer.ex +++ b/lib/broadway_cloud_pub_sub/producer.ex @@ -374,8 +374,16 @@ defmodule BroadwayCloudPubSub.Producer do broadway_name = opts[:broadway_name] + # Normalise :grpc_client — accept Module or {Module, inner_opts}. + # When a tuple is given, merge the inner opts into the producer opts so + # that grpc_client.init/1 and all downstream components see them. + {grpc_client, opts} = + case opts[:grpc_client] do + {mod, inner_opts} -> {mod, Keyword.merge(opts, inner_opts) |> Keyword.put(:grpc_client, mod)} + mod -> {mod, opts} + end + # Add grpc_client_config to be used by stream manager and unary - grpc_client = opts[:grpc_client] {:ok, client_config} = grpc_client.init(opts) opts = Keyword.put(opts, :grpc_client_config, client_config) diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index 976aaee..d8c3f93 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -297,13 +297,19 @@ defmodule BroadwayCloudPubSub.Streaming.Options do """ ], grpc_client: [ - type: :atom, + type: {:custom, __MODULE__, :type_grpc_client, [[]]}, default: BroadwayCloudPubSub.Streaming.GrpcClient, doc: """ The module implementing the `BroadwayCloudPubSub.Streaming.Client` behaviour. Defaults to `BroadwayCloudPubSub.Streaming.GrpcClient`, which uses the `grpc` library to communicate with Google Cloud Pub/Sub. + Accepts either a bare module or a `{module, opts}` tuple. When a tuple is + given, `opts` are merged into the producer options and passed to + `c:BroadwayCloudPubSub.Streaming.Client.init/1`: + + grpc_client: {MyGrpcClient, channel_opts: [transport_opts: []]} + Swap this for testing or custom gRPC transports. """ ], @@ -402,6 +408,17 @@ defmodule BroadwayCloudPubSub.Streaming.Options do "integer is between 0 and 600, got: #{inspect(value)}"} end + def type_grpc_client(mod, _opts) when is_atom(mod) and not is_nil(mod), do: {:ok, mod} + + def type_grpc_client({mod, inner_opts}, _opts) + when is_atom(mod) and not is_nil(mod) and is_list(inner_opts), + do: {:ok, {mod, inner_opts}} + + def type_grpc_client(value, _opts) do + {:error, + "expected :grpc_client to be a module or {module, opts} tuple, got: #{inspect(value)}"} + end + def type_adapter(:gun, _), do: {:ok, GRPC.Client.Adapters.Gun} def type_adapter(:mint, _), do: {:ok, GRPC.Client.Adapters.Mint} diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 754eebd..98f1093 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -523,5 +523,43 @@ defmodule BroadwayCloudPubSub.Streaming.ProducerPrepareForStartTest do [sup_spec] = specs assert sup_spec.type == :supervisor end + + test ":grpc_client accepts a bare module" do + {_specs, updated_opts} = Producer.prepare_for_start(Producer, broadway_opts()) + {_module, producer_opts} = updated_opts[:producer][:module] + + # Default GrpcClient stored as bare module + assert producer_opts[:grpc_client] == BroadwayCloudPubSub.Streaming.GrpcClient + end + + test ":grpc_client accepts {Module, inner_opts} and merges inner_opts into producer opts" do + defmodule FakeGrpcClient do + @behaviour BroadwayCloudPubSub.Streaming.Client + + @impl true + def init(opts), do: {:ok, Map.new(opts)} + + @impl true + def connect(_opts), do: {:ok, :fake_channel} + + @impl true + def recv_messages(_channel, _opts), do: [] + + @impl true + def acknowledge(_channel, _ack_ids, _opts), do: :ok + + @impl true + def modify_ack_deadline(_channel, _ack_ids, _deadline, _opts), do: :ok + end + + opts = broadway_opts(grpc_client: {FakeGrpcClient, custom_opt: :hello}) + {_specs, updated_opts} = Producer.prepare_for_start(Producer, opts) + {_module, producer_opts} = updated_opts[:producer][:module] + + # Module extracted from tuple + assert producer_opts[:grpc_client] == FakeGrpcClient + # Inner opts merged into producer opts + assert producer_opts[:custom_opt] == :hello + end end end From 00a426100ca7460232e810b62c417b3f2b78572b Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 27 May 2026 23:23:29 +0200 Subject: [PATCH 19/29] on_failure defaults to {:nack, 0} --- lib/broadway_cloud_pub_sub/pull/options.ex | 6 +++++- .../streaming/options.ex | 6 ++++-- .../pull/acknowledger_test.exs | 6 +++--- .../pull/finch_client_test.exs | 2 +- .../pull/producer_test.exs | 4 ++-- .../streaming/acknowledger_test.exs | 19 +++++++++++++++---- .../streaming/options_test.exs | 4 ++-- 7 files changed, 32 insertions(+), 15 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/pull/options.ex b/lib/broadway_cloud_pub_sub/pull/options.ex index bfb5e2c..c485f99 100644 --- a/lib/broadway_cloud_pub_sub/pull/options.ex +++ b/lib/broadway_cloud_pub_sub/pull/options.ex @@ -59,8 +59,12 @@ defmodule BroadwayCloudPubSub.Pull.Options do See the "Acknowledgements" section below for all the possible values. This option can also be changed for each message through `Broadway.Message.configure_ack/2`. + + Defaults to `{:nack, 0}`, which makes failed messages immediately + available for redelivery. This matches the behaviour of the official + Google Cloud Pub/Sub client libraries. """, - default: :noop + default: {:nack, 0} ], on_success: [ type: diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index d8c3f93..e13ac2c 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -89,10 +89,12 @@ defmodule BroadwayCloudPubSub.Streaming.Options do ], on_failure: [ type: {:custom, __MODULE__, :type_ack_option, [[{:name, :on_failure}]]}, - default: :noop, + default: {:nack, 0}, doc: """ Configures the acknowledgement behaviour for failed messages. - Defaults to `:noop`. + Defaults to `{:nack, 0}`, which makes failed messages immediately + available for redelivery. This matches the behaviour of the official + Google Cloud Pub/Sub client libraries. """ ], on_shutdown: [ diff --git a/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs b/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs index eb52500..3182c18 100644 --- a/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs +++ b/test/broadway_cloud_pub_sub/pull/acknowledger_test.exs @@ -34,7 +34,7 @@ defmodule BroadwayCloudPubSub.Pull.AcknowledgerTest do :persistent_term.put(ack_ref, %{ base_url: "http://localhost:8085", client: CallerClient, - on_failure: opts[:on_failure] || :noop, + on_failure: opts[:on_failure] || {:nack, 0}, on_success: opts[:on_success] || :ack, subscription: "projects/test/subscriptions/test-subscription", # Required for the CallerClient @@ -76,14 +76,14 @@ defmodule BroadwayCloudPubSub.Pull.AcknowledgerTest do test "sets defaults" do ack_data = %{ack_id: "1"} - expected = %{ack_id: "1", on_success: :ack, on_failure: :noop} + expected = %{ack_id: "1", on_success: :ack, on_failure: {:nack, 0}} assert {:ok, expected} == Acknowledger.configure(:ack_ref, ack_data, []) end test "set on_success with ignore" do ack_data = %{ack_id: "1"} - expected = %{ack_id: "1", on_success: :noop, on_failure: :noop} + expected = %{ack_id: "1", on_success: :noop, on_failure: {:nack, 0}} assert {:ok, expected} == Acknowledger.configure(:ack_ref, ack_data, on_success: :noop) diff --git a/test/broadway_cloud_pub_sub/pull/finch_client_test.exs b/test/broadway_cloud_pub_sub/pull/finch_client_test.exs index d71d90a..68153de 100644 --- a/test/broadway_cloud_pub_sub/pull/finch_client_test.exs +++ b/test/broadway_cloud_pub_sub/pull/finch_client_test.exs @@ -652,7 +652,7 @@ defmodule BroadwayCloudPubSub.Pull.FinchClientTest do base_url: Keyword.fetch!(base_opts, :base_url), client: FinchClient, finch: Keyword.fetch!(base_opts, :finch), - on_failure: base_opts[:on_failure] || :noop, + on_failure: base_opts[:on_failure] || {:nack, 0}, on_success: base_opts[:on_success] || :ack, subscription: "projects/test/subscriptions/test-subscription", token_generator: {__MODULE__, :generate_token, []}, diff --git a/test/broadway_cloud_pub_sub/pull/producer_test.exs b/test/broadway_cloud_pub_sub/pull/producer_test.exs index becbe2b..95177c2 100644 --- a/test/broadway_cloud_pub_sub/pull/producer_test.exs +++ b/test/broadway_cloud_pub_sub/pull/producer_test.exs @@ -408,7 +408,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do assert producer_opts[:on_success] == :ack end - test ":on_failure defaults to :noop" do + test ":on_failure defaults to {:nack, 0}" do assert {_, [ producer: [ @@ -422,7 +422,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do subscription: "projects/foo/subscriptions/bar" ) - assert producer_opts[:on_failure] == :noop + assert producer_opts[:on_failure] == {:nack, 0} end test ":on_success should be a valid action" do diff --git a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs index bb9ae67..47b0123 100644 --- a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs @@ -39,7 +39,7 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do {:ok, stub_pid} = StubManager.start_link(self()) ack_ref = make_ref() - config = %{on_success: :ack, on_failure: :noop} + config = %{on_success: :ack, on_failure: {:nack, 0}} :persistent_term.put(ack_ref, {stub_pid, config}) on_exit(fn -> :persistent_term.erase(ack_ref) end) @@ -73,7 +73,7 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do test "merges on_success into ack_data", %{ack_ref: ack_ref} do {:ok, data} = Acknowledger.configure(ack_ref, %{ack_id: "x"}, on_success: :noop) - assert data == %{ack_id: "x", on_success: :noop, on_failure: :noop} + assert data == %{ack_id: "x", on_success: :noop, on_failure: {:nack, 0}} end test "normalises :nack on_failure to {:nack, 0}", %{ack_ref: ack_ref} do @@ -98,7 +98,8 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do assert Enum.sort(ack_ids) == ["id-1", "id-2"] end - test "does not ack failed messages when on_failure is :noop (default)", %{ack_ref: ack_ref} do + test "does not ack failed messages when on_failure is :noop", %{ack_ref: ack_ref, stub_pid: stub_pid} do + :persistent_term.put(ack_ref, {stub_pid, %{on_success: :ack, on_failure: :noop}}) success = [build_message("ok-1", ack_ref)] failure = [build_message("fail-1", ack_ref)] @@ -109,6 +110,16 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do refute_receive {:modify_deadline, _, _} end + test "nacks failed messages by default (on_failure: {:nack, 0})", %{ack_ref: ack_ref} do + success = [build_message("ok-1", ack_ref)] + failure = [build_message("fail-1", ack_ref)] + + Acknowledger.ack(ack_ref, success, failure) + + assert_receive {:acknowledge, ["ok-1"]} + assert_receive {:modify_deadline, ["fail-1"], 0} + end + test "does not send anything when on_success is :noop", %{ ack_ref: ack_ref, stub_pid: stub_pid @@ -169,7 +180,7 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do test "respects per-message on_failure override", %{ack_ref: ack_ref} do nack_msg = build_message("nack-id", ack_ref, %{on_failure: {:nack, 10}}) - noop_msg = build_message("noop-id", ack_ref) + noop_msg = build_message("noop-id", ack_ref, %{on_failure: :noop}) Acknowledger.ack(ack_ref, [], [nack_msg, noop_msg]) diff --git a/test/broadway_cloud_pub_sub/streaming/options_test.exs b/test/broadway_cloud_pub_sub/streaming/options_test.exs index 98f1093..e70aa9a 100644 --- a/test/broadway_cloud_pub_sub/streaming/options_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/options_test.exs @@ -93,9 +93,9 @@ defmodule BroadwayCloudPubSub.Streaming.OptionsTest do assert opts[:on_success] == :ack end - test "on_failure defaults to :noop" do + test "on_failure defaults to {:nack, 0}" do {:ok, opts} = validate(subscription: "projects/p/subscriptions/s") - assert opts[:on_failure] == :noop + assert opts[:on_failure] == {:nack, 0} end test "accepts :ack" do From c9e8167666654216beb64205b3c7b7d68f417fcd Mon Sep 17 00:00:00 2001 From: Rock Date: Thu, 28 May 2026 17:17:01 +0200 Subject: [PATCH 20/29] README and docs --- CHANGELOG.md | 80 +++++++++---- README.md | 126 ++++++++++++--------- docs/upgrade_to_2.0.md | 251 +++++++++++++++++++++++++++++++++++++++++ mix.exs | 5 +- 4 files changed, 385 insertions(+), 77 deletions(-) create mode 100644 docs/upgrade_to_2.0.md diff --git a/CHANGELOG.md b/CHANGELOG.md index ef74c29..466e150 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,42 +7,80 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +## [2.0.0-rc.0] - unreleased + +2.0 introduces a new default producer based on the gRPC StreamingPull API. +The previous HTTP pull producer is still fully supported under +`BroadwayCloudPubSub.Pull.Producer`. See the +[2.0 upgrade guide](docs/upgrade_to_2.0.md) for step-by-step migration +instructions. + ### Added -- `BroadwayCloudPubSub.Streaming.Producer` — a new Broadway producer that uses the - gRPC StreamingPull API for low-latency, push-based message delivery instead of - HTTP pull requests +- **`BroadwayCloudPubSub.Producer`**: a new Broadway producer that uses the + gRPC StreamingPull API for low-latency, push-based message delivery. This is + the recommended producer going forward. + + - Persistent bidirectional gRPC stream; messages are pushed by the server + rather than fetched on demand + - Server-side flow control via `:max_outstanding_messages` and + `:max_outstanding_bytes` + - Automatic lease extension with adaptive p99 ack deadlines, preventing + premature redelivery without manual `ackDeadlineSeconds` tuning + - Batched ack/nack via a separate unary gRPC connection, independent of the + streaming connection + - Exactly-once delivery support, auto-detected from subscription properties + at runtime + - Message ordering via `:enable_message_ordering` + - Graceful shutdown with configurable drain timeout (`:drain_timeout_ms`) + - Comprehensive telemetry: stream lifecycle, ack batching, and gRPC spans + - Gun and Mint HTTP/2 adapters via the `:adapter` option + - Pub/Sub emulator support via `:grpc_endpoint` and `:use_ssl` - - Server-side flow control via `:max_outstanding_messages` and `:max_outstanding_bytes` +- **`BroadwayCloudPubSub.Streaming.Client`**: behaviour for custom gRPC client + implementations, analogous to `BroadwayCloudPubSub.Pull.Client` on the pull + side. - - Automatic lease extension with adaptive p99 ack deadlines to prevent premature - message redelivery +- **`BroadwayCloudPubSub.Streaming.GrpcClient`**: default gRPC client + implementation using the `grpc` library. - - Batched ack/nack via a separate unary gRPC connection, independent of the streaming - connection +### Changed - - Exactly-once delivery support, auto-detected from subscription properties at runtime +- Modernize GitHub Actions CI. - - Message ordering support via the `:enable_message_ordering` option +### Breaking changes - - Graceful shutdown with configurable drain timeout (`:drain_timeout_ms`) +- Bump minimum Elixir to 1.15. - - Telemetry events for stream lifecycle, ack batching, and gRPC spans +- **`BroadwayCloudPubSub.Producer` is now the gRPC streaming producer.** The + 1.x HTTP pull producer has moved to `BroadwayCloudPubSub.Pull.Producer`. + Update the module name in your pipeline to keep the pull behaviour: - - Support for both Gun and Mint HTTP/2 adapters via the `:adapter` option + ```elixir + # 1.x + {BroadwayCloudPubSub.Producer, goth: MyApp.Goth, subscription: "..."} + + # 2.0, keep pull + {BroadwayCloudPubSub.Pull.Producer, goth: MyApp.Goth, subscription: "..."} - - Pub/Sub emulator support via `:grpc_endpoint` and `:use_ssl` options + # 2.0, switch to streaming (recommended) + {BroadwayCloudPubSub.Producer, goth: MyApp.Goth, subscription: "...", + max_outstanding_messages: 1000} + ``` - - `BroadwayCloudPubSub.Streaming.Client` — behaviour for custom gRPC client - implementations +- **`BroadwayCloudPubSub.PullClient` renamed to + `BroadwayCloudPubSub.Pull.FinchClient`.** Only affects you if you referenced + it directly - - `BroadwayCloudPubSub.Streaming.GrpcClient` — default gRPC client using the - `grpc` library +- **`BroadwayCloudPubSub.Client` behaviour renamed to + `BroadwayCloudPubSub.Pull.Client`.** Only affects you if you implemented a + custom HTTP pull client to override the `:client` option.. -### Changed +- **`on_failure` default changed from `:noop` to `{:nack, 0}`** in both + producers. Failed messages are now immediately made available for redelivery, + matching the behaviour of the official Google Cloud Pub/Sub client libraries. + Set `on_failure: :noop` explicitly to preserve the 1.x behaviour. -- Bump minimum elixir to 1.15 -- Modernize github actions ## [1.0.0] - 2026-05-26 diff --git a/README.md b/README.md index 815d658..d5a0ea1 100644 --- a/README.md +++ b/README.md @@ -6,44 +6,60 @@ A Google Cloud Pub/Sub connector for [Broadway](https://github.com/dashbitco/bro Documentation can be found at [https://hexdocs.pm/broadway_cloud_pub_sub](https://hexdocs.pm/broadway_cloud_pub_sub). -This project provides: - -* `BroadwayCloudPubSub.Producer` - A GenStage producer that continuously receives messages from a Pub/Sub subscription and acknowledges them after being successfully processed. -* `BroadwayCloudPubSub.Streaming.Producer` - A GenStage producer that uses the gRPC StreamingPull API for low-latency, push-based message delivery. -* `BroadwayCloudPubSub.Client` - A generic behaviour to implement Pub/Sub clients. -* `BroadwayCloudPubSub.PullClient` - Default REST client used by `BroadwayCloudPubSub.Producer`. +## What's in the box + +* `BroadwayCloudPubSub.Producer`: Broadway producer using the gRPC + [StreamingPull][gcp-streamingpull] API. Messages are pushed by the server over + a persistent bidirectional stream, giving low latency and high throughput with + automatic lease extension and server-side flow control. **This is the + recommended producer**, in line with Google's own [guidance][gcp-streamingpull] + that StreamingPull is what their first-party client libraries use "where + possible". +* `BroadwayCloudPubSub.Pull.Producer`: Broadway producer using the unary HTTP + [Pull][gcp-pull-api] API. Retained for environments where gRPC is unavailable + or undesired, and for the cases Google lists as Pull-only: when you need + strict control over the number of messages pulled per request, tight control + over client memory and CPU, or when your subscriber acts as a proxy to + another pull-oriented system. +* `BroadwayCloudPubSub.Streaming.Client`: Behaviour for custom gRPC client implementations. +* `BroadwayCloudPubSub.Pull.Client`: Behaviour for custom HTTP pull client implementations. + +[gcp-streamingpull]: https://cloud.google.com/pubsub/docs/pull#streamingpull_api +[gcp-pull-api]: https://cloud.google.com/pubsub/docs/pull#pull_api ## Installation -Add `:broadway_cloud_pub_sub` to the list of dependencies in `mix.exs`: +Add `:broadway_cloud_pub_sub` to your dependencies, along with an HTTP/2 adapter +for `:grpc`: ```elixir def deps do [ - {:broadway_cloud_pub_sub, "~> 1.0"}, - {:goth, "~> 1.3"} - ] -end -``` - -> Note the [goth](https://hexdocs.pm/goth) package, which handles Google Authentication, is required for the default token generator. - -If you are using `BroadwayCloudPubSub.Streaming.Producer`, also add the gRPC dependencies: - -```elixir -def deps do - [ - {:broadway_cloud_pub_sub, "~> 0.10.0"}, + {:broadway_cloud_pub_sub, "~> 2.0"}, {:goth, "~> 1.3"}, {:grpc, "~> 1.0"}, - {:protobuf, "~> 0.12"} + {:protobuf, "~> 0.12"}, + # Pick one HTTP/2 adapter: + {:gun, "~> 2.0"}, + # or + # {:mint, "~> 1.5"}, + # {:castore, "~> 1.0"} ] end ``` -## Usage +> The [goth](https://hexdocs.pm/goth) package handles Google Authentication and +> is required for the default token generator. +> +> The `grpc` and `protobuf` packages are required by +> `BroadwayCloudPubSub.Producer`. You must pick one HTTP/2 adapter for the gRPC +> connection and add it to your `mix.exs`: either `:gun`, or `:mint` together +> with `:castore`. +> +> If you only use `BroadwayCloudPubSub.Pull.Producer` you may omit `:grpc`, +> `:protobuf`, and the adapter packages. -Configure Broadway with one or more producers using `BroadwayCloudPubSub.Producer`: +## Usage ```elixir Broadway.start_link(MyBroadway, @@ -51,56 +67,58 @@ Broadway.start_link(MyBroadway, producer: [ module: {BroadwayCloudPubSub.Producer, goth: MyGoth, - subscription: "projects/my-project/subscriptions/my-subscription" + subscription: "projects/my-project/subscriptions/my-subscription", + max_outstanding_messages: 1000 } - ] + ], + processors: [default: [concurrency: 10]] ) ``` -## Streaming Usage +See `BroadwayCloudPubSub.Producer` for the full option reference, including flow +control, reconnection backoff, graceful shutdown, and telemetry. + +### HTTP/2 adapter -For lower latency and higher throughput workloads, use `BroadwayCloudPubSub.Streaming.Producer`. -It opens a persistent bidirectional gRPC stream to Pub/Sub and receives messages as the server -pushes them, rather than polling via HTTP. +The producer supports two adapters. Both are optional dependencies of `:grpc`, +so you select one by adding it to your application's `mix.exs` (see +[Installation](#installation)). + +- `:gun` (default): [Gun](https://github.com/ninenines/gun) HTTP/2 client. + Add `{:gun, "~> 2.0"}` to your deps. +- `:mint`: [Mint](https://github.com/elixir-mint/mint) HTTP/2 client. + Add `{:mint, "~> 1.5"}` and `{:castore, "~> 1.0"}` to your deps. + +Then select the adapter in your producer config: ```elixir -Broadway.start_link(MyBroadway, - name: MyBroadway, - producer: [ - module: {BroadwayCloudPubSub.Streaming.Producer, - goth: MyGoth, - subscription: "projects/my-project/subscriptions/my-subscription", - max_outstanding_messages: 1000 - } - ] -) +{BroadwayCloudPubSub.Producer, + goth: MyGoth, + subscription: "projects/my-project/subscriptions/my-subscription", + adapter: :mint} ``` -### gRPC adapter - -The streaming producer supports two HTTP/2 adapters, both provided by the `grpc` dependency: +### Using the HTTP pull producer -- `:gun` (default) — Uses the [Gun](https://github.com/ninenines/gun) HTTP/2 client. This is the - traditional adapter and works out of the box with the standard `grpc` dependency. -- `:mint` — Uses the [Mint](https://github.com/elixir-mint/mint) HTTP/2 client. Mint may be - preferable in environments where Gun is not available or not desired. +If gRPC is not available in your environment or you prefer to use the HTTP pull method, use `BroadwayCloudPubSub.Pull.Producer`: ```elixir Broadway.start_link(MyBroadway, name: MyBroadway, producer: [ - module: {BroadwayCloudPubSub.Streaming.Producer, + module: {BroadwayCloudPubSub.Pull.Producer, goth: MyGoth, - subscription: "projects/my-project/subscriptions/my-subscription", - adapter: :mint + subscription: "projects/my-project/subscriptions/my-subscription" } - ] + ], + processors: [default: [concurrency: 10]] ) ``` -See `BroadwayCloudPubSub.Streaming.Producer` for the full list of configuration options, -including flow control (`max_outstanding_messages`, `max_outstanding_bytes`), reconnection -backoff, and shutdown behaviour. +### Upgrading from 1.x + +See the [2.0 upgrade guide](docs/upgrade_to_2.0.md) for the full list of breaking +changes and step-by-step migration instructions from pull producer to gRPC streaming producer. ## License diff --git a/docs/upgrade_to_2.0.md b/docs/upgrade_to_2.0.md new file mode 100644 index 0000000..891c5e7 --- /dev/null +++ b/docs/upgrade_to_2.0.md @@ -0,0 +1,251 @@ +# Upgrading to broadway_cloud_pub_sub 2.0 + +2.0 introduces a new default producer built on the gRPC StreamingPull API. The +previous HTTP pull producer is still fully supported, but has moved to its own +sub-namespace as a fallback for environments where gRPC is unavailable. + +## Overview of breaking changes + +| # | What changed | Migration action | +|---|---|---| +| 1 | `BroadwayCloudPubSub.Producer` is now the **streaming** producer | Switch to streaming, or rename to `Pull.Producer` to keep pull | +| 2 | Old pull producer moved to `BroadwayCloudPubSub.Pull.Producer` | Rename the module in your pipeline | +| 3 | `BroadwayCloudPubSub.PullClient` → `BroadwayCloudPubSub.Pull.FinchClient` | Rename if referenced directly | +| 4 | `BroadwayCloudPubSub.Client` behaviour → `BroadwayCloudPubSub.Pull.Client` | Rename if you implemented a custom pull client | +| 5 | `on_failure` default: `:noop` → `{:nack, 0}` | Set `on_failure: :noop` explicitly to keep old behaviour | + +--- + +## Should you switch to streaming? + +The short answer from Google's [Pub/Sub documentation][gcp-pull] is: yes, in +almost all cases. + +**StreamingPull** ([reference][gcp-streamingpull]) is what Google's own +first-party client libraries use "where possible" because it minimises latency +and maximises throughput. It uses a persistent bidirectional gRPC connection: +the server pushes messages as they become available, applies flow control via +outstanding-message and outstanding-byte limits, and the client library extends +ack deadlines automatically. + +**Unary Pull** ([reference][gcp-pull-api]) is a traditional request/response +RPC. Google notes that to get high throughput with the Pull API you would need +to maintain many simultaneous outstanding requests, which is "error-prone and +hard to maintain", and recommends StreamingPull instead. Google lists only +these cases where unary Pull is the right choice: + +- You need strict control over the number of messages the subscriber processes + per request. +- You need fine-grained control over client memory, CPU, or network usage. +- Your subscriber is a proxy between Pub/Sub and another service that operates + in a pull-oriented way. +- gRPC is unavailable or undesired in your environment (for example, an + HTTP-only network policy). + +If none of those apply, switch to the streaming producer. + +[gcp-pull]: https://cloud.google.com/pubsub/docs/pull +[gcp-streamingpull]: https://cloud.google.com/pubsub/docs/pull#streamingpull_api +[gcp-pull-api]: https://cloud.google.com/pubsub/docs/pull#pull_api + +--- + +## 1. New default producer + +In 2.0, `BroadwayCloudPubSub.Producer` is a brand-new producer that uses the +gRPC StreamingPull API. Instead of polling Pub/Sub over HTTP, it opens a +persistent bidirectional gRPC stream and receives messages as the server pushes +them. This gives lower latency, higher throughput, and removes the need to tune +`ackDeadlineSeconds`. Leases are extended automatically. + +You have two migration paths: + +### Path A: switch to the new streaming producer (recommended) + +Add the gRPC dependencies to `mix.exs`. You must pick one HTTP/2 adapter for +the gRPC connection: either `:gun`, or `:mint` together with `:castore`: + +```elixir +def deps do + [ + {:broadway_cloud_pub_sub, "~> 2.0"}, + {:goth, "~> 1.3"}, + {:grpc, "~> 1.0"}, + {:protobuf, "~> 0.12"}, + # Pick one HTTP/2 adapter: + {:gun, "~> 2.0"}, + # or + # {:mint, "~> 1.5"}, + # {:castore, "~> 1.0"} + ] +end +``` + +Then update your pipeline config. + +```elixir +# 1.x +producer: [ + module: {BroadwayCloudPubSub.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub", + max_number_of_messages: 100, + receive_interval: 500} +] + +# 2.0, streaming producer +producer: [ + module: {BroadwayCloudPubSub.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub", + max_outstanding_messages: 1000} +] +``` + +The tables below map every 1.x pull option to its 2.0 streaming equivalent. + +#### Options that work unchanged + +These options have the same name and semantics in the streaming producer: + +| Option | Notes | +|---|---| +| `:subscription` | Required. Same format. | +| `:goth` | Same. | +| `:token_generator` | Same MFA tuple interface. | +| `:on_success` | Same values (`:ack`, `:noop`, `{:nack, seconds}`). | +| `:on_failure` | Same values. Default changed to `{:nack, 0}` (see breaking change #5). | + +#### Options that have a replacement + +| 1.x pull option | 2.0 streaming replacement | Notes | +|---|---|---| +| `:max_number_of_messages` | `:max_outstanding_messages` | Controls how many unacknowledged messages the server pushes at once, across the whole stream rather than per-request. | +| `:base_url` | `:grpc_endpoint` | Override the service endpoint. The format differs: `:base_url` takes an HTTP URL (`"https://pubsub.googleapis.com"`), while `:grpc_endpoint` takes a bare `host:port` string (`"localhost:8085"`). `:grpc_endpoint` pairs with `:use_ssl` (boolean, default `true`) to control TLS. | +| `:client` | `:grpc_client` | Plug-in a custom client implementation. Now accepts `Module` or `{Module, opts}`. See `BroadwayCloudPubSub.Streaming.Client`. | + +#### Options with no streaming equivalent + +The streaming producer manages its own connection lifecycle and flow control, so +these pull options have no direct replacement: + +| 1.x pull option | Why it does not apply | +|---|---| +| `:receive_interval` | The stream is persistent; the producer does not poll on a timer. | +| `:receive_timeout` | Timeouts are handled at the gRPC transport level. Use `:backoff_*` options to control reconnection. | +| `:finch` | The streaming producer uses gRPC over Gun or Mint, not Finch. | + +The streaming producer also exposes many options that have no pull counterpart, +covering message ordering, flow control tuning, ack batching, reconnection +backoff, graceful shutdown, and more. See `BroadwayCloudPubSub.Producer` for +the full option reference. + +### Path B: keep the HTTP pull producer + +If gRPC is not available in your environment, you want to continue using the pull producer, or want to do progresive rollout supporting both, simply rename the module: + +```elixir +# 1.x +producer: [ + module: {BroadwayCloudPubSub.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub"} +] + +# 2.0, pull producer +producer: [ + module: {BroadwayCloudPubSub.Pull.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub"} +] +``` + +All existing options (`:goth`, `:subscription`, `:token_generator`, `:base_url`, +`:max_number_of_messages`, `:receive_interval`, `:on_success`, `:on_failure`, +`:client`) are unchanged. + +The `grpc`, `protobuf`, `gun`, `mint` and `castore` dependencies are **not** required when using only +`BroadwayCloudPubSub.Pull.Producer`. + +--- + +## 2. `BroadwayCloudPubSub.PullClient` renamed + +`BroadwayCloudPubSub.PullClient` is now `BroadwayCloudPubSub.Pull.FinchClient`. + +This only affects you if you reference it directly, for example when overriding +the `:client` option or in tests: + +```elixir +# 1.x +client: BroadwayCloudPubSub.PullClient + +# 2.0 +client: BroadwayCloudPubSub.Pull.FinchClient +``` + +--- + +## 3. `BroadwayCloudPubSub.Client` behaviour renamed + +`BroadwayCloudPubSub.Client` is now `BroadwayCloudPubSub.Pull.Client`. + +This only affects you if you implemented a custom HTTP pull client: + +```elixir +# 1.x +defmodule MyApp.CustomPullClient do + @behaviour BroadwayCloudPubSub.Client + ... +end + +# 2.0 +defmodule MyApp.CustomPullClient do + @behaviour BroadwayCloudPubSub.Pull.Client + ... +end +``` + +The callback signatures are unchanged. + +--- + +## 4. `on_failure` default changed: `:noop` → `{:nack, 0}` + +In 1.x, failed messages were left to expire and be redelivered after the +subscription's `ackDeadlineSeconds`. In 2.0 the default is `{:nack, 0}`, making +them immediately available for redelivery, matching the behaviour of the official +Google Cloud Pub/Sub client libraries. + +**If you relied on the old default**, add `on_failure: :noop` explicitly: + +```elixir +# Pull producer +{BroadwayCloudPubSub.Pull.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub", + on_failure: :noop} + +# Streaming producer +{BroadwayCloudPubSub.Producer, + goth: MyApp.Goth, + subscription: "projects/my-project/subscriptions/my-sub", + on_failure: :noop} +``` + +For most applications the new default is the right behaviour. A failed message +is retried immediately rather than holding up the subscription until its deadline +expires. + +--- + +## Quick-reference: all renamed modules + +| 1.x name | 2.0 name | +|---|---| +| `BroadwayCloudPubSub.Producer` | `BroadwayCloudPubSub.Pull.Producer` (pull, fallback) | +| *(new in 2.0)* | `BroadwayCloudPubSub.Producer` (streaming, recommended) | +| `BroadwayCloudPubSub.PullClient` | `BroadwayCloudPubSub.Pull.FinchClient` | +| `BroadwayCloudPubSub.Client` | `BroadwayCloudPubSub.Pull.Client` | +| `BroadwayCloudPubSub.Options` | BroadwayCloudPubSub.Pull.Options (internal) | +| `BroadwayCloudPubSub.Acknowledger` | BroadwayCloudPubSub.Pull.Acknowledger (internal) | diff --git a/mix.exs b/mix.exs index ac07519..75f91b1 100644 --- a/mix.exs +++ b/mix.exs @@ -1,7 +1,7 @@ defmodule BroadwayCloudPubSub.MixProject do use Mix.Project - @version "1.0.0" + @version "2.0.0-rc.0" @description "A Google Cloud Pub/Sub connector for Broadway" @repo_url "https://github.com/dashbitco/broadway_cloud_pub_sub" @@ -54,7 +54,8 @@ defmodule BroadwayCloudPubSub.MixProject do source_url: @repo_url, extras: [ "README.md", - "CHANGELOG.md" + "CHANGELOG.md", + "docs/upgrade_to_2.0.md" ], groups_for_modules: [ Pull: [ From 6498fdbfbe4971070edd85f9bc7f629b22e026fd Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 29 May 2026 11:29:09 +0200 Subject: [PATCH 21/29] Documentation improvements --- docs/upgrade_to_2.0.md | 13 +- lib/broadway_cloud_pub_sub/producer.ex | 251 +++++++------ .../streaming/ack_result.ex | 2 +- .../streaming/options.ex | 337 +++++++++--------- mix.exs | 18 +- 5 files changed, 328 insertions(+), 293 deletions(-) diff --git a/docs/upgrade_to_2.0.md b/docs/upgrade_to_2.0.md index 891c5e7..6b12c06 100644 --- a/docs/upgrade_to_2.0.md +++ b/docs/upgrade_to_2.0.md @@ -1,4 +1,4 @@ -# Upgrading to broadway_cloud_pub_sub 2.0 +# Upgrading to 2.0 2.0 introduces a new default producer built on the gRPC StreamingPull API. The previous HTTP pull producer is still fully supported, but has moved to its own @@ -8,11 +8,10 @@ sub-namespace as a fallback for environments where gRPC is unavailable. | # | What changed | Migration action | |---|---|---| -| 1 | `BroadwayCloudPubSub.Producer` is now the **streaming** producer | Switch to streaming, or rename to `Pull.Producer` to keep pull | -| 2 | Old pull producer moved to `BroadwayCloudPubSub.Pull.Producer` | Rename the module in your pipeline | -| 3 | `BroadwayCloudPubSub.PullClient` → `BroadwayCloudPubSub.Pull.FinchClient` | Rename if referenced directly | -| 4 | `BroadwayCloudPubSub.Client` behaviour → `BroadwayCloudPubSub.Pull.Client` | Rename if you implemented a custom pull client | -| 5 | `on_failure` default: `:noop` → `{:nack, 0}` | Set `on_failure: :noop` explicitly to keep old behaviour | +| [1](#1-new-default-producer) | `BroadwayCloudPubSub.Producer` is now the **streaming** producer | Switch to streaming, or rename to `Pull.Producer` to keep pull | +| [2](#2-broadwaycloudpubsubpullclient-renamed) | `BroadwayCloudPubSub.PullClient` → `BroadwayCloudPubSub.Pull.FinchClient` | Rename if referenced directly | +| [3](#3-broadwaycloudpubsubclient-behaviour-renamed) | `BroadwayCloudPubSub.Client` behaviour → `BroadwayCloudPubSub.Pull.Client` | Rename if you implemented a custom pull client | +| [4](#4-on_failure-default-changed-noop--nack-0) | `on_failure` default: `:noop` → `{:nack, 0}` | Set `on_failure: :noop` explicitly to keep old behaviour | --- @@ -114,7 +113,7 @@ These options have the same name and semantics in the streaming producer: | `:goth` | Same. | | `:token_generator` | Same MFA tuple interface. | | `:on_success` | Same values (`:ack`, `:noop`, `{:nack, seconds}`). | -| `:on_failure` | Same values. Default changed to `{:nack, 0}` (see breaking change #5). | +| `:on_failure` | Same values. Default changed to `{:nack, 0}` (see [breaking change #4](#4-on_failure-default-changed-noop--nack-0)). | #### Options that have a replacement diff --git a/lib/broadway_cloud_pub_sub/producer.ex b/lib/broadway_cloud_pub_sub/producer.ex index 08c2088..de5a480 100644 --- a/lib/broadway_cloud_pub_sub/producer.ex +++ b/lib/broadway_cloud_pub_sub/producer.ex @@ -12,19 +12,19 @@ defmodule BroadwayCloudPubSub.Producer do Each producer process (N = `producer: [concurrency: N]`) starts and links its own **StreamManager** (GenServer), giving N independent gRPC streams - that mirror the Go client’s N `messageIterator`s sharing a single `clientID`. + sharing a single `clientID`. Key components: - * **StreamManager** — GenServer that owns the gRPC bidirectional stream, + * **StreamManager** - GenServer that owns the gRPC bidirectional stream, manages connection lifecycle (connect/reconnect/backoff), extends message leases, and dispatches messages to the linked Producer when demand is available. Started via `start_link` from `Producer.init/1`. - * **Producer** — GenStage process that bridges StreamManager to Broadway. + * **Producer** - GenStage process that bridges StreamManager to Broadway. Tracks downstream demand and forwards messages to processors. - * **UnaryAckSupervisor** — shared across all producers. Supervises + * **UnaryAckSupervisor** - shared across all producers. Supervises AckBatcher and UnaryRpcClient, which batch and send ack/nack/modifyAckDeadline requests via separate unary RPCs (not on the streaming connection). @@ -54,13 +54,13 @@ defmodule BroadwayCloudPubSub.Producer do Supported values: - * `:ack` — acknowledge the message; Pub/Sub removes it from the subscription. - * `:noop` — do nothing; the message is redelivered after the subscription's + * `:ack` - acknowledge the message; Pub/Sub removes it from the subscription. + * `:noop` - do nothing; the message is redelivered after the subscription's `ackDeadlineSeconds` expires. - * `:nack` — equivalent to `{:nack, 0}`; makes the message immediately + * `:nack` - equivalent to `{:nack, 0}`; makes the message immediately available for redelivery. - * `{:nack, seconds}` — sets `ackDeadlineSeconds` to `seconds` for the - message, controlling when it becomes available for redelivery (0–600). + * `{:nack, seconds}` - sets `ackDeadlineSeconds` to `seconds` for the + message, controlling when it becomes available for redelivery (0-600). Acks and deadline modifications are batched by **AckBatcher** and flushed to Pub/Sub via unary RPCs at a configurable interval (`:ack_batch_interval_ms`, @@ -103,7 +103,7 @@ defmodule BroadwayCloudPubSub.Producer do For exactly-once subscriptions, increase `:retry_deadline_ms` to 600,000ms (10 minutes) to allow the unary RPC client enough time to retry transient - ack failures — the server requires successful ack receipt before guaranteeing + ack failures - the server requires successful ack receipt before guaranteeing exactly-once semantics. The library automatically adjusts `:retry_deadline_ms` when the subscription's exactly-once status changes at runtime. @@ -134,16 +134,45 @@ defmodule BroadwayCloudPubSub.Producer do gRPC stream errors are classified as retryable or terminal: - * **Retryable** (e.g. `DEADLINE_EXCEEDED`, `UNAVAILABLE`, `UNAUTHENTICATED`) — + * **Retryable** (e.g. `DEADLINE_EXCEEDED`, `UNAVAILABLE`, `UNAUTHENTICATED`) - the stream is closed and reconnected after a backoff delay. A new OAuth2 token is fetched on each reconnect. - * **Terminal** (e.g. `NOT_FOUND`, `PERMISSION_DENIED`, `INVALID_ARGUMENT`) — + * **Terminal** (e.g. `NOT_FOUND`, `PERMISSION_DENIED`, `INVALID_ARGUMENT`) - the StreamManager stops and Broadway's supervision restarts the pipeline. Reconnect backoff is configurable via `:backoff_type`, `:backoff_min`, and `:backoff_max`. The default is randomized exponential (`:rand_exp`) starting at 100ms and capped at 60s. + ## Pub/Sub Emulator + + To use with the local Pub/Sub emulator: + + {BroadwayCloudPubSub.Producer, + subscription: "projects/my-project/subscriptions/my-subscription", + grpc_endpoint: "localhost:8085", + use_ssl: false, + token_generator: {MyApp, :emulator_token, []}} + + ## Differences from `BroadwayCloudPubSub.Pull.Producer` + + * **Push-based**: Messages arrive via a persistent gRPC stream rather than + being fetched on demand via HTTP pull requests. + * **Flow control**: Controlled at the gRPC stream level via + `:max_outstanding_messages` and `:max_outstanding_bytes` rather than + per-request polling. See [Flow control](#module-flow-control). + * **Graceful shutdown**: The stream is closed immediately on shutdown to + stop new messages arriving; the unary channel stays up so in-flight + messages can still be acked or nacked during the drain window. The pull + producer has no drain phase. See [Graceful shutdown](#module-graceful-shutdown). + * **Lease extension**: Message deadlines are extended automatically to + prevent redelivery while processing. The pull producer relies on the + subscription-level ack deadline only. See + [Lease management](#module-lease-management). + * **Enhanced telemetry**: Emits a richer set of events covering connection + lifecycle, lease activity, ack/modack RPC spans, drain lifecycle, and + per-cycle pressure snapshots. See [Telemetry](#module-telemetry). + ## Telemetry This producer emits the following [Telemetry](https://github.com/beam-telemetry/telemetry) @@ -154,90 +183,118 @@ defmodule BroadwayCloudPubSub.Producer do is configured. Its value is the static term provided, or the return value of the MFA called at emission time. - ### Stream events — `[:broadway_cloud_pub_sub, :streaming, :stream, ...]` + ### Stream events - `[:broadway_cloud_pub_sub, :streaming, :stream, ...]` Emitted by `StreamManager`. Metadata: `%{name: broadway_name, subscription: subscription}` (plus `:extra` when `:telemetry_metadata` is set). - * `:connect` — gRPC StreamingPull stream successfully established. + #### Backpressure + + * `[:broadway_cloud_pub_sub, :streaming, :stream, :pressure_snapshot]` - + point-in-time snapshot of pipeline backpressure, emitted on every lease + extension cycle. Useful for diagnosing throughput bottlenecks without + enabling tracing. + + Measurements: `%{outstanding_count: non_neg_integer(), buffered_count: non_neg_integer(), pending_demand: non_neg_integer()}` + + * `outstanding_count` - messages received but not yet acked or nacked. + * `buffered_count` - messages waiting in the internal buffer for producer demand. + * `pending_demand` - units of GenStage demand currently unfulfilled. + + #### Connection lifecycle + + * `[:broadway_cloud_pub_sub, :streaming, :stream, :connect]` - gRPC + StreamingPull stream successfully established. Measurements: `%{}` - * `:disconnect` — gRPC stream closed or errored. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :disconnect]` - gRPC + stream closed or errored. Measurements: `%{}` - Metadata includes: `reason: term()` — the error or close reason + Metadata includes: `reason: term()` - the error or close reason (e.g. a `GRPC.RPCError`, `:stream_closed`, `:connection_down`). - * `:receive_messages` — messages received from the stream and forwarded to - the producer. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :connection_failure]` - + connection attempt failed before the stream was established. - Measurements: `%{count: pos_integer()}` + Measurements: `%{}` - * `:ack` — acknowledge request dispatched to AckBatcher. + Metadata includes: `reason: term()` - the connection error. - Measurements: `%{count: pos_integer()}` + * `[:broadway_cloud_pub_sub, :streaming, :stream, :reconnect]` - reconnect + scheduled after a disconnect or connection failure. The backoff delay + indicates how long the StreamManager will wait before the next connection + attempt. + + Measurements: `%{delay: pos_integer()}` - * `:terminal_error` — non-retryable gRPC error received. StreamManager stops - after this event. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :terminal_error]` - + non-retryable gRPC error received. StreamManager stops after this event. Measurements: `%{}` - Metadata includes: `reason: term()` — the terminal gRPC error. + Metadata includes: `reason: term()` - the terminal gRPC error. - * `:connection_failure` — connection attempt failed before the stream was - established. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :keepalive]` - keep-alive + ping sent on the gRPC connection. - Measurements: `%{}` + Measurements: `%{deadline: pos_integer()}` - Metadata includes: `reason: term()` — the connection error. + #### Messages - * `:reconnect` — reconnect scheduled after a disconnect or connection - failure. The backoff delay indicates how long the StreamManager will - wait before the next connection attempt. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :receive_messages]` - + messages received from the stream and forwarded to the producer. - Measurements: `%{delay: pos_integer()}` + Measurements: `%{count: pos_integer()}` - * `:keepalive` — keep-alive ping sent on the gRPC connection. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :ack]` - acknowledge + request dispatched to AckBatcher. - Measurements: `%{deadline: pos_integer()}` + Measurements: `%{count: pos_integer()}` + + #### Lease management - * `:extend_leases` — lease extension cycle ran; modack requests dispatched - for outstanding messages. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :extend_leases]` - lease + extension cycle ran; modack requests dispatched for outstanding messages. Measurements: `%{count: non_neg_integer(), deadline: pos_integer()}` - * `:lease_expired` — outstanding messages dropped because they exceeded - `:max_extension_ms`. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :lease_expired]` - + outstanding messages dropped because they exceeded `:max_extension_ms`. Measurements: `%{count: pos_integer()}` - * `:receipt_modack_stale` — pending receipt modack entries (exactly-once - delivery) that exceeded the 60-second staleness threshold were nacked - for fast redelivery. Emitted during the lease extension cycle. + #### Exactly-once delivery + + * `[:broadway_cloud_pub_sub, :streaming, :stream, :receipt_modack_stale]` - + pending receipt modack entries that exceeded the 60-second staleness + threshold were nacked for fast redelivery. Emitted during the lease + extension cycle. Measurements: `%{count: pos_integer()}` - * `:drain` — async span tracking the full graceful drain lifecycle, from - `prepare_for_draining/1` through completion, timeout, or unexpected - termination. Uses the same measurements convention as `:telemetry.span/3`. + #### Graceful shutdown - Events: + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start | :stop | :exception]` - + span tracking the full graceful drain lifecycle, from + `prepare_for_draining/1` through completion, timeout, or unexpected + termination. Uses the same convention as `:telemetry.span/3`. - * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start]` — drain - initiated. Emitted before the reader is closed or any messages are nacked. + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :start]` - drain + initiated. Emitted before the stream is closed or any messages are nacked. Measurements: `%{system_time: integer(), monotonic_time: integer(), buffered_count: non_neg_integer(), outstanding_count: non_neg_integer(), pending_receipt_modack_count: non_neg_integer()}` - * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop]` — all + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :stop]` - all in-flight messages were processed and stream closed cleanly. Measurements: `%{duration: non_neg_integer(), monotonic_time: integer()}` - * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception]` — + * `[:broadway_cloud_pub_sub, :streaming, :stream, :drain, :exception]` - drain ended abnormally. Measurements: `%{duration: non_neg_integer(), monotonic_time: integer()}` @@ -245,110 +302,74 @@ defmodule BroadwayCloudPubSub.Producer do Metadata includes `kind` and `reason` identifying the cause: - * `kind: :timeout, reason: :drain_timeout` — `drain_timeout_ms` elapsed + * `kind: :timeout, reason: :drain_timeout` - `drain_timeout_ms` elapsed before all messages were acked. Remaining messages are nacked immediately. - * `kind: :terminate, reason: term()` — the GenServer was terminated while + * `kind: :terminate, reason: term()` - the GenServer was terminated while a drain was in progress. - * `kind: :error, reason: binary()` — an exception was raised inside + * `kind: :error, reason: binary()` - an exception was raised inside `prepare_for_draining/1` itself. - * `:pressure_snapshot` — a point-in-time snapshot of pipeline backpressure, - emitted on every lease extension cycle. Useful for diagnosing memory - or throughput bottlenecks without enabling tracing. - - Measurements: `%{outstanding_count: non_neg_integer(), buffered_count: non_neg_integer(), pending_demand: non_neg_integer()}` - - * `outstanding_count` — messages received but not yet acked or nacked. - * `buffered_count` — messages waiting in the internal buffer for producer demand. - * `pending_demand` — units of GenStage demand currently unfulfilled. - - ### AckBatcher events — `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, ...]` + ### AckBatcher events - `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, ...]` Emitted by `AckBatcher`. Metadata: `%{name: broadway_name, subscription: subscription}` (plus `:extra` when `:telemetry_metadata` is set). - * `:flush_deferred` — flush deferred because UnaryRpcClient was not yet - available (e.g. restarting after a crash). + * `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, :flush_deferred]` - + flush deferred because UnaryRpcClient was not yet available (e.g. + restarting after a crash). Measurements: `%{ack_count: non_neg_integer(), modack_groups: non_neg_integer()}` - * `:modack_retry_exhausted` — modack ack_ids dropped after reaching the - maximum retry attempt count. + * `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, :modack_retry_exhausted]` - + modack ack_ids dropped after reaching the maximum retry attempt count. Measurements: `%{count: pos_integer()}` - * `:ack_retry_expired` — ack ack_ids dropped because they exceeded the - exactly-once retry deadline. + * `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, :ack_retry_expired]` - + ack ack_ids dropped because they exceeded the exactly-once retry deadline. Measurements: `%{count: pos_integer()}` - * `:modack_retry_expired` — modack ack_ids dropped because they exceeded the - exactly-once retry deadline. + * `[:broadway_cloud_pub_sub, :streaming, :ack_batcher, :modack_retry_expired]` - + modack ack_ids dropped because they exceeded the exactly-once retry deadline. Measurements: `%{count: pos_integer()}` - ### Unary RPC client events — `[:broadway_cloud_pub_sub, :streaming, :unary, ...]` + ### Unary RPC client events - `[:broadway_cloud_pub_sub, :streaming, :unary, ...]` Emitted by `UnaryRpcClient`. Metadata: `%{name: broadway_name, subscription: subscription}` (plus `:extra` when `:telemetry_metadata` is set). - * `:connect` — unary RPC channel reconnected after a failure. + * `[:broadway_cloud_pub_sub, :streaming, :unary, :connect]` - unary RPC + channel reconnected after a failure. Measurements: `%{}` - * `:connection_failure` — unary RPC channel connect attempt failed. + * `[:broadway_cloud_pub_sub, :streaming, :unary, :connection_failure]` - + unary RPC channel connect attempt failed. Measurements: `%{}` - Metadata includes: `reason: term()` — the connection error. + Metadata includes: `reason: term()` - the connection error. - * `:permanent_failure` — one or more ack_ids were permanently rejected by - the server (e.g. ack_id expired). These are dropped and not retried. + * `[:broadway_cloud_pub_sub, :streaming, :unary, :permanent_failure]` - + one or more ack_ids were permanently rejected by the server (e.g. ack_id + expired). These are dropped and not retried. Measurements: `%{count: pos_integer()}` - ### gRPC client spans — `[:broadway_cloud_pub_sub, :streaming, :grpc_client, ...]` + ### gRPC client spans - `[:broadway_cloud_pub_sub, :streaming, :grpc_client, ...]` Emitted by `GrpcClient` (the default `BroadwayCloudPubSub.Streaming.Client` implementation) as `:telemetry.span/3` spans. Metadata: `%{name: broadway_name, subscription: subscription, count: ack_count}` (plus `:extra` when `:telemetry_metadata` is set). - * `:ack` — wraps each `Acknowledge` unary RPC call. - - Events: `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :ack, :start | :stop | :exception]` - - * `:modack` — wraps each `ModifyAckDeadline` unary RPC call. - - Events: `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :modack, :start | :stop | :exception]` + * `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :ack, :start | :stop | :exception]` - + wraps each `Acknowledge` unary RPC call. - ## Pub/Sub Emulator - - To use with the local Pub/Sub emulator: - - {BroadwayCloudPubSub.Producer, - subscription: "projects/my-project/subscriptions/my-subscription", - grpc_endpoint: "localhost:8085", - use_ssl: false, - token_generator: {MyApp, :emulator_token, []}} - - ## Differences from `BroadwayCloudPubSub.Pull.Producer` - - * **Push-based**: Messages arrive via a persistent gRPC stream rather than - being fetched on demand via HTTP pull requests. - * **Flow control**: Controlled by `:max_outstanding_messages` and - `:max_outstanding_bytes` on the gRPC stream level, rather than by - `:max_number_of_messages` per pull request. - * **Shutdown behaviour**: By default, unprocessed messages are returned to - Pub/Sub with a short delay (`on_shutdown: {:nack, 5}`) so they are - redelivered quickly on rolling deploys. The pull producer does not nack - on shutdown. - * **Ack path**: Acks are batched and sent via a separate unary RPC - connection managed by AckBatcher and UnaryRpcClient, not on the streaming - connection itself. - * **Lease extension**: The streaming producer automatically extends message - deadlines to prevent redelivery while messages are being processed. The - pull producer relies on the subscription-level ack deadline only. + * `[:broadway_cloud_pub_sub, :streaming, :grpc_client, :modack, :start | :stop | :exception]` - + wraps each `ModifyAckDeadline` unary RPC call. """ @@ -374,7 +395,7 @@ defmodule BroadwayCloudPubSub.Producer do broadway_name = opts[:broadway_name] - # Normalise :grpc_client — accept Module or {Module, inner_opts}. + # Normalise :grpc_client - accept Module or {Module, inner_opts}. # When a tuple is given, merge the inner opts into the producer opts so # that grpc_client.init/1 and all downstream components see them. {grpc_client, opts} = @@ -406,7 +427,7 @@ defmodule BroadwayCloudPubSub.Producer do |> maybe_inject_partition_by(opts) # Only the UnaryAckSupervisor is a shared child spec. Each producer starts - # its own StreamManager directly via start_link in init/1 — the natural + # its own StreamManager directly via start_link in init/1 - the natural # link means crashes propagate without needing a supervisor. {[unary_sup_spec], options} end @@ -424,7 +445,7 @@ defmodule BroadwayCloudPubSub.Producer do manager_name = Module.concat(broadway_name, "StreamManager_#{index}") # Start our own StreamManager directly. start_link creates a natural - # bidirectional link — if the manager crashes (terminal gRPC error), the + # bidirectional link - if the manager crashes (terminal gRPC error), the # producer receives an EXIT signal; if the producer dies, the manager does too. manager_opts = opts diff --git a/lib/broadway_cloud_pub_sub/streaming/ack_result.ex b/lib/broadway_cloud_pub_sub/streaming/ack_result.ex index 8ab3e57..3c1dffa 100644 --- a/lib/broadway_cloud_pub_sub/streaming/ack_result.ex +++ b/lib/broadway_cloud_pub_sub/streaming/ack_result.ex @@ -8,7 +8,7 @@ defmodule BroadwayCloudPubSub.Streaming.AckResult do # operation resolves to an AckResult describing whether the operation succeeded # or failed and why. # - # This matches Go's AckResult type in pubsub/message.go: + # This matches the AckResult type pattern used in official client libraries: # # type AckResult struct { # ready chan struct{} diff --git a/lib/broadway_cloud_pub_sub/streaming/options.ex b/lib/broadway_cloud_pub_sub/streaming/options.ex index e13ac2c..ee99cc4 100644 --- a/lib/broadway_cloud_pub_sub/streaming/options.ex +++ b/lib/broadway_cloud_pub_sub/streaming/options.ex @@ -5,7 +5,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do @default_max_outstanding_messages 1_000 @default_max_outstanding_bytes 100 * 1024 * 1024 @default_stream_ack_deadline_seconds 60 - # 60 minutes — matches Go's MaxExtension default. + # 60 minutes, matches the official client library default. @default_max_extension_ms 60 * 60 * 1_000 # gax defaults: https://github.com/googleapis/gax-go/blob/main/v2/call_option.go @default_backoff_min 100 @@ -18,6 +18,8 @@ defmodule BroadwayCloudPubSub.Streaming.Options do # Handled by Broadway. broadway: [type: :any, doc: false], broadway_name: [type: :atom, doc: false], + + # -- Connection -- subscription: [ type: {:custom, __MODULE__, :type_non_empty_string, [[{:name, :subscription}]]}, required: true, @@ -28,6 +30,23 @@ defmodule BroadwayCloudPubSub.Streaming.Options do `"projects/my-project/subscriptions/my-subscription"`. """ ], + goth: [ + type: :atom, + doc: """ + The `Goth` module to use for authentication. Note that this option only + applies to the default token generator. + """ + ], + token_generator: [ + type: :mfa, + doc: """ + An MFArgs tuple that will be called before each gRPC connection to fetch + an authentication token. Should return `{:ok, String.t()} | {:error, any()}`. + By default this will invoke `Goth.fetch/1` with the `:goth` option. + """ + ], + + # -- Flow control -- max_outstanding_messages: [ type: :pos_integer, default: @default_max_outstanding_messages, @@ -43,58 +62,25 @@ defmodule BroadwayCloudPubSub.Streaming.Options do doc: """ The maximum total size in bytes of outstanding messages. The server will not push more messages if the total byte size of outstanding - messages exceeds this limit. Defaults to 100 MiB. + messages exceeds this limit. """ ], - stream_ack_deadline_seconds: [ - type: - {:custom, __MODULE__, :type_integer_in_range, - [[{:name, :stream_ack_deadline_seconds}, {:min, 10}, {:max, 600}]]}, - default: @default_stream_ack_deadline_seconds, - doc: """ - The number of seconds the server will wait before re-delivering an - unacknowledged message. Must be between 10 and 600. Defaults to 60. - The producer will extend leases automatically before this deadline. - """ - ], - max_extension_ms: [ - type: :pos_integer, - default: @default_max_extension_ms, - doc: """ - The maximum total time in milliseconds that a message's ack deadline will - be extended from the moment of initial receipt. After this duration, the - message is dropped from lease management and the server will redeliver it. - This prevents a stuck consumer from holding messages indefinitely. - Matches the Go client's `MaxExtension` default of 60 minutes. - Defaults to #{div(@default_max_extension_ms, 60_000)} minutes. - """ - ], - client_id: [ - type: :string, - doc: """ - An identifier that can be used to distinguish individual instances of - the producer. If not provided, a unique ID will be generated. Using - a stable `client_id` across reconnections enables the server to use - sticky assignment for ordered subscriptions. - """ - ], + # -- Acknowledgement -- on_success: [ type: {:custom, __MODULE__, :type_ack_option, [[{:name, :on_success}]]}, default: :ack, doc: """ Configures the acknowledgement behaviour for successfully processed - messages. Defaults to `:ack`. + messages. """ ], on_failure: [ type: {:custom, __MODULE__, :type_ack_option, [[{:name, :on_failure}]]}, default: {:nack, 0}, doc: """ - Configures the acknowledgement behaviour for failed messages. - Defaults to `{:nack, 0}`, which makes failed messages immediately - available for redelivery. This matches the behaviour of the official - Google Cloud Pub/Sub client libraries. + Configures the acknowledgement behaviour for failed messages. The + default makes failed messages immediately available for redelivery. """ ], on_shutdown: [ @@ -104,85 +90,78 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Configures what happens to messages received but not yet processed when the producer is shut down. - * `{:nack, seconds}` - Sends a `modifyAckDeadline` request with the + - `{:nack, seconds}` - Sends a `modifyAckDeadline` request with the given `seconds` for all outstanding messages, making them available - for redelivery after that delay. The default `{:nack, 5}` provides - a small delay to avoid thundering herd on rolling deploys. - * `:nack` - Equivalent to `{:nack, 0}`. Immediately makes unprocessed + for redelivery after that delay. + - `:nack` - Equivalent to `{:nack, 0}`. Immediately makes unprocessed messages available for redelivery. - * `:noop` - Does nothing. Messages become available after their ack + - `:noop` - Does nothing. Messages become available after their ack deadline expires naturally. - - Defaults to `{:nack, 5}`. """ ], - goth: [ - type: :atom, + + # -- Lease management -- + stream_ack_deadline_seconds: [ + type: + {:custom, __MODULE__, :type_integer_in_range, + [[{:name, :stream_ack_deadline_seconds}, {:min, 10}, {:max, 600}]]}, + default: @default_stream_ack_deadline_seconds, doc: """ - The `Goth` module to use for authentication. Note that this option only - applies to the default token generator. + The number of seconds the server will wait before re-delivering an + unacknowledged message. Must be between 10 and 600. + The producer will extend leases automatically before this deadline. """ ], - token_generator: [ - type: :mfa, + max_extension_ms: [ + type: :pos_integer, + default: @default_max_extension_ms, doc: """ - An MFArgs tuple that will be called before each gRPC connection to fetch - an authentication token. Should return `{:ok, String.t()} | {:error, any()}`. - By default this will invoke `Goth.fetch/1` with the `:goth` option. + The maximum total time in milliseconds that a message's ack deadline will + be extended from the moment of initial receipt. After this duration, the + message is dropped from lease management and the server will redeliver it. + This prevents a stuck consumer from holding messages indefinitely. """ ], - backoff_type: [ - type: {:in, [:rand_exp, :exp, :rand, :stop]}, - default: :rand_exp, + + # -- Message ordering -- + enable_message_ordering: [ + type: :boolean, + default: false, doc: """ - The backoff strategy used when reconnecting after a stream failure. + When `true`, messages with the same `ordering_key` are routed to the + same Broadway processor and processed sequentially. This guarantees + in-order delivery for ordered subscriptions. - * `:rand_exp` - Randomized exponential backoff (default). Adds jitter - to prevent thundering herd after mass disconnects. - * `:exp` - Pure exponential backoff. - * `:rand` - Random value between `backoff_min` and `backoff_max`. - * `:stop` - Do not reconnect. The producer will crash after one failure. + Ordering is enforced via Broadway's built-in `:partition_by` option, + which assigns messages with the same `orderingKey` metadata to the + same processor partition. The subscription itself must also have + message ordering enabled in Google Cloud Pub/Sub. - """ - ], - backoff_min: [ - type: :pos_integer, - default: @default_backoff_min, - doc: - "Minimum reconnection backoff in milliseconds. Matches the gax default of 100ms. Defaults to 100." - ], - backoff_max: [ - type: :pos_integer, - default: @default_backoff_max, - doc: - "Maximum reconnection backoff in milliseconds. Matches the gax default of 60s. Defaults to 60000." - ], - retry_deadline_ms: [ - type: :pos_integer, - default: 60_000, - doc: """ - Maximum total time in milliseconds to keep retrying a failed acknowledge or - modifyAckDeadline request before giving up and dropping the ack_ids. + When `false` (default), messages are distributed across processors + without regard to ordering key, matching the unordered behaviour of a + standard Pub/Sub subscription. - The default of 60,000ms (60 seconds) applies to standard delivery subscriptions. - When exactly-once delivery is detected from subscription properties, the library - automatically switches to 600,000ms (600 seconds) to match the Go client's - extended retry deadline for exactly-once acks. The configured value is restored - if exactly-once delivery is later disabled on the subscription. + Note: the server will also report whether the subscription has ordering + enabled in each `StreamingPullResponse.subscription_properties`. This + client-side option controls whether to enforce it in the Broadway + processing topology. """ ], - keepalive_interval_ms: [ - type: :pos_integer, - default: @default_keepalive_interval_ms, + client_id: [ + type: :string, doc: """ - Interval in milliseconds at which HTTP/2 PING frames are sent on the gRPC - connection to keep it alive. This prevents Google Cloud's load balancer - from closing idle connections (which it does after roughly 20 seconds by - default). Matches the 30-second keepalive interval used by the official - Python and Go Pub/Sub client libraries. Only applies to the `:gun` adapter. - Defaults to 30000. + An identifier shared across all streaming connections opened by this + pipeline. When a stream disconnects and reconnects with the same + `client_id`, the server transfers any guarantees (e.g. ordered delivery + assignment) from the old stream to the new one. If not provided, a + random ID is generated once at pipeline startup. This is sufficient for + most use cases: the ID is stable across gRPC reconnections within the + same process lifetime, which is all the server requires. You only need + to set this explicitly if you want a human-readable value for debugging. """ ], + + # -- Ack batching -- ack_batch_interval_ms: [ type: {:custom, __MODULE__, :type_integer_in_range, @@ -192,7 +171,7 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Interval in milliseconds at which batched ack and modifyAckDeadline requests are flushed to the Pub/Sub service via unary RPCs. Lower values reduce end-to-end ack latency; higher values improve - batching efficiency. Defaults to 100. + batching efficiency. """ ], ack_batch_max_size: [ @@ -204,24 +183,80 @@ defmodule BroadwayCloudPubSub.Streaming.Options do Maximum number of ack_ids to accumulate before triggering an immediate flush, regardless of the timer. Each unary RPC carries at most 2,500 ack_ids (the Google API limit), so values above 2,500 - result in multiple RPCs per flush. Defaults to 2500. + result in multiple RPCs per flush. + """ + ], + retry_deadline_ms: [ + type: :pos_integer, + default: 60_000, + doc: """ + Maximum total time in milliseconds to keep retrying a failed acknowledge or + modifyAckDeadline request before giving up and dropping the ack_ids. + + The default of 60,000ms (60 seconds) applies to standard delivery subscriptions. + When exactly-once delivery is detected from subscription properties, the library + automatically switches to 600,000ms (600 seconds) for exactly-once acks. The + configured value is restored if exactly-once delivery is later disabled on the + subscription. + """ + ], + + # -- Reconnection -- + backoff_type: [ + type: {:in, [:rand_exp, :exp, :rand, :stop]}, + default: :rand_exp, + doc: """ + The backoff strategy used when reconnecting after a stream failure. + + - `:rand_exp` - Randomized exponential backoff. Adds jitter to + prevent thundering herd after mass disconnects. + - `:exp` - Pure exponential backoff. + - `:rand` - Random value between `backoff_min` and `backoff_max`. + - `:stop` - Do not reconnect. The producer will crash after one + failure. + """ + ], + backoff_min: [ + type: :pos_integer, + default: @default_backoff_min, + doc: "Minimum reconnection backoff in milliseconds." + ], + backoff_max: [ + type: :pos_integer, + default: @default_backoff_max, + doc: "Maximum reconnection backoff in milliseconds." + ], + + # -- Shutdown -- + drain_timeout_ms: [ + type: :pos_integer, + default: 30_000, + doc: """ + Maximum time in milliseconds to wait for in-flight messages to be + processed and acknowledged during graceful shutdown. After this timeout, + any remaining outstanding messages are nacked (per the `on_shutdown` + setting) and the connection is force-closed. + + This drain phase waits for all outstanding messages to be acked before + calling `CloseSend` on the stream. """ ], + + # -- Transport -- adapter: [ type: {:custom, __MODULE__, :type_adapter, [[{:name, :adapter}]]}, default: :gun, doc: """ The gRPC HTTP/2 adapter to use for the streaming connection. - * `:gun` — Uses the Gun HTTP/2 client (default). Gun is well-tested - and is the traditional adapter for the Elixir gRPC library. - * `:mint` — Uses the Mint HTTP/2 client. Mint may be preferable in - deployment environments where Gun is not available or not desired. - * Any module — A custom module implementing the `GRPC.Client.Adapter` - behaviour. Useful for test adapters and alternative implementations. + - `:gun` - Uses the Gun HTTP/2 client. Well-tested and the + traditional adapter for the Elixir gRPC library. + - `:mint` - Uses the Mint HTTP/2 client. May be preferable where + Gun is not available. + - Any module implementing the `GRPC.Client.Adapter` behaviour. - Both built-in adapters are provided by the `grpc` dependency. The - adapter choice does not affect the public API or message semantics. + Both built-in adapters are provided by the `grpc` dependency. The + adapter choice does not affect the public API or message semantics. """ ], grpc_endpoint: [ @@ -238,44 +273,16 @@ defmodule BroadwayCloudPubSub.Streaming.Options do doc: """ Whether to use TLS when connecting to the gRPC endpoint. Set to `false` when connecting to the Pub/Sub emulator, which does not use TLS. - Defaults to `true`. """ ], - drain_timeout_ms: [ + keepalive_interval_ms: [ type: :pos_integer, - default: 30_000, - doc: """ - Maximum time in milliseconds to wait for in-flight messages to be - processed and acknowledged during graceful shutdown. After this timeout, - any remaining outstanding messages are nacked (per the `on_shutdown` - setting) and the connection is force-closed. - - This drain phase mirrors Go's `iterator.stop()` which waits for the - `drained` channel to close (all outstanding messages acked) before - calling `CloseSend`. Defaults to 30 seconds. - """ - ], - enable_message_ordering: [ - type: :boolean, - default: false, + default: @default_keepalive_interval_ms, doc: """ - When `true`, messages with the same `ordering_key` are routed to the - same Broadway processor and processed sequentially. This guarantees - in-order delivery for ordered subscriptions. - - Ordering is enforced via Broadway's built-in `:partition_by` option, - which assigns messages with the same `orderingKey` metadata to the - same processor partition. The subscription itself must also have - message ordering enabled in Google Cloud Pub/Sub. - - When `false` (default), messages are distributed across processors - without regard to ordering key, matching the unordered behaviour of a - standard Pub/Sub subscription. - - Note: the server will also report whether the subscription has ordering - enabled in each `StreamingPullResponse.subscription_properties`. This - client-side option controls whether to enforce it in the Broadway - processing topology. + Interval in milliseconds at which HTTP/2 PING frames are sent on the gRPC + connection to keep it alive. This prevents Google Cloud's load balancer + from closing idle connections (which it does after roughly 20 seconds by + default). Only applies to the `:gun` adapter. """ ], interceptors: [ @@ -285,34 +292,36 @@ defmodule BroadwayCloudPubSub.Streaming.Options do A list of client-side gRPC interceptors attached to every channel opened by the producer (both the StreamingPull channel and the unary ack/modack channel). - Each entry is either a bare module or a `{module, opts}` tuple: - - * `MyInterceptor` — calls `MyInterceptor.init([])` to initialise. - * `{MyInterceptor, level: :debug}` — calls `MyInterceptor.init(level: :debug)`. + Each entry is either a bare module (e.g. `MyInterceptor`, which calls + `MyInterceptor.init([])`) or a `{module, opts}` tuple (e.g. + `{MyInterceptor, level: :debug}`, which calls + `MyInterceptor.init(level: :debug)`). - Modules must implement the `GRPC.Client.Interceptor` behaviour (`init/1` and `call/4`). + Modules must implement the `GRPC.Client.Interceptor` behaviour (`init/1` and `call/4`). - ## Example + ## Example - interceptors: [GRPC.Client.Interceptors.Logger] - interceptors: [{GRPC.Client.Interceptors.Logger, level: :warning}] + interceptors: [GRPC.Client.Interceptors.Logger] + interceptors: [{GRPC.Client.Interceptors.Logger, level: :warning}] """ ], + + # -- Advanced -- grpc_client: [ type: {:custom, __MODULE__, :type_grpc_client, [[]]}, default: BroadwayCloudPubSub.Streaming.GrpcClient, doc: """ The module implementing the `BroadwayCloudPubSub.Streaming.Client` behaviour. - Defaults to `BroadwayCloudPubSub.Streaming.GrpcClient`, which uses the - `grpc` library to communicate with Google Cloud Pub/Sub. + The built-in `BroadwayCloudPubSub.Streaming.GrpcClient` uses the `grpc` + library to communicate with Google Cloud Pub/Sub. - Accepts either a bare module or a `{module, opts}` tuple. When a tuple is - given, `opts` are merged into the producer options and passed to - `c:BroadwayCloudPubSub.Streaming.Client.init/1`: + Accepts either a bare module or a `{module, opts}` tuple. When a tuple is + given, `opts` are merged into the producer options and passed to + `c:BroadwayCloudPubSub.Streaming.Client.init/1`: - grpc_client: {MyGrpcClient, channel_opts: [transport_opts: []]} + grpc_client: {MyGrpcClient, channel_opts: [transport_opts: []]} - Swap this for testing or custom gRPC transports. + Swap this for testing or custom gRPC transports. """ ], telemetry_metadata: [ @@ -322,15 +331,13 @@ defmodule BroadwayCloudPubSub.Streaming.Options do producer. The value is included in the event metadata under the `:extra` key. - Accepts either: - - * A static term (e.g. a map or keyword list) — stored once and - included verbatim in every event. - * An `{module, function, args}` tuple — called on every event - emission; its return value is used as the `:extra` value. Useful - for attaching dynamic data such as node names or runtime counters. + Accepts either a static term (e.g. a map or keyword list), which is + stored once and included verbatim in every event, or an + `{module, function, args}` tuple that is called on every event emission + and whose return value is used as the `:extra` value (useful for + attaching dynamic data such as node names or runtime counters). - When not set, no `:extra` key is added to event metadata. + When not set, no `:extra` key is added to event metadata. """ ], diff --git a/mix.exs b/mix.exs index 75f91b1..780321e 100644 --- a/mix.exs +++ b/mix.exs @@ -49,7 +49,12 @@ defmodule BroadwayCloudPubSub.MixProject do defp docs do [ main: "BroadwayCloudPubSub.Producer", - nest_modules_by_prefix: [BroadwayCloudPubSub], + nest_modules_by_prefix: [ + BroadwayCloudPubSub, + BroadwayCloudPubSub.Streaming, + BroadwayCloudPubSub.Pull, + Google.Pubsub.V1 + ], source_ref: "v#{@version}", source_url: @repo_url, extras: [ @@ -58,15 +63,18 @@ defmodule BroadwayCloudPubSub.MixProject do "docs/upgrade_to_2.0.md" ], groups_for_modules: [ + Streaming: [ + BroadwayCloudPubSub.Producer, + BroadwayCloudPubSub.Streaming.Client, + BroadwayCloudPubSub.Streaming.GrpcClient + ], Pull: [ BroadwayCloudPubSub.Pull.Producer, BroadwayCloudPubSub.Pull.Client, BroadwayCloudPubSub.Pull.FinchClient ], - Streaming: [ - BroadwayCloudPubSub.Producer, - BroadwayCloudPubSub.Streaming.Client, - BroadwayCloudPubSub.Streaming.GrpcClient + "Protobuf (generated)": [ + ~r"Google.Pubsub.V1." ] ] ] From 9ca1b367ccb8cf08e3a1b37c410cdd61f3276ee1 Mon Sep 17 00:00:00 2001 From: Rock Date: Fri, 29 May 2026 11:52:49 +0200 Subject: [PATCH 22/29] Mix format --- lib/broadway_cloud_pub_sub/producer.ex | 10 +++++++--- test/broadway_cloud_pub_sub/pull/producer_test.exs | 3 ++- .../streaming/acknowledger_test.exs | 5 ++++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/producer.ex b/lib/broadway_cloud_pub_sub/producer.ex index de5a480..854cf7b 100644 --- a/lib/broadway_cloud_pub_sub/producer.ex +++ b/lib/broadway_cloud_pub_sub/producer.ex @@ -400,8 +400,11 @@ defmodule BroadwayCloudPubSub.Producer do # that grpc_client.init/1 and all downstream components see them. {grpc_client, opts} = case opts[:grpc_client] do - {mod, inner_opts} -> {mod, Keyword.merge(opts, inner_opts) |> Keyword.put(:grpc_client, mod)} - mod -> {mod, opts} + {mod, inner_opts} -> + {mod, Keyword.merge(opts, inner_opts) |> Keyword.put(:grpc_client, mod)} + + mod -> + {mod, opts} end # Add grpc_client_config to be used by stream manager and unary @@ -560,7 +563,8 @@ defmodule BroadwayCloudPubSub.Producer do validated {:error, err} -> - raise ArgumentError, "invalid BroadwayCloudPubSub.Producer options: #{Exception.message(err)}" + raise ArgumentError, + "invalid BroadwayCloudPubSub.Producer options: #{Exception.message(err)}" end end diff --git a/test/broadway_cloud_pub_sub/pull/producer_test.exs b/test/broadway_cloud_pub_sub/pull/producer_test.exs index 95177c2..d6736bb 100644 --- a/test/broadway_cloud_pub_sub/pull/producer_test.exs +++ b/test/broadway_cloud_pub_sub/pull/producer_test.exs @@ -568,7 +568,8 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do test "with :client PullClient returns a child_spec for starting a Finch pool" do assert { [ - {Finch, name: BroadwayCloudPubSub.Pull.ProducerTest.BroadwayCloudPubSub.Pull.FinchClient} + {Finch, + name: BroadwayCloudPubSub.Pull.ProducerTest.BroadwayCloudPubSub.Pull.FinchClient} ], [ producer: [ diff --git a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs index 47b0123..95db8ff 100644 --- a/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/acknowledger_test.exs @@ -98,7 +98,10 @@ defmodule BroadwayCloudPubSub.Streaming.AcknowledgerTest do assert Enum.sort(ack_ids) == ["id-1", "id-2"] end - test "does not ack failed messages when on_failure is :noop", %{ack_ref: ack_ref, stub_pid: stub_pid} do + test "does not ack failed messages when on_failure is :noop", %{ + ack_ref: ack_ref, + stub_pid: stub_pid + } do :persistent_term.put(ack_ref, {stub_pid, %{on_success: :ack, on_failure: :noop}}) success = [build_message("ok-1", ack_ref)] failure = [build_message("fail-1", ack_ref)] From 85c8e2fa0dbf3739db98ba6182be2893934026c5 Mon Sep 17 00:00:00 2001 From: Rock Date: Tue, 2 Jun 2026 16:20:04 +0200 Subject: [PATCH 23/29] Add more info to the README --- README.md | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d5a0ea1..4fbe3fb 100644 --- a/README.md +++ b/README.md @@ -117,8 +117,61 @@ Broadway.start_link(MyBroadway, ### Upgrading from 1.x -See the [2.0 upgrade guide](docs/upgrade_to_2.0.md) for the full list of breaking -changes and step-by-step migration instructions from pull producer to gRPC streaming producer. +> **2.0 is a major release with breaking changes.** The three-line summary is +> below; the [full upgrade guide](docs/upgrade_to_2.0.md) has step-by-step +> instructions, option mapping tables, and rationale for every change. + +#### Breaking change 1: [`BroadwayCloudPubSub.Producer` is now the gRPC streaming producer](docs/upgrade_to_2.0.md#1-new-default-producer) + +The biggest change: the module name `BroadwayCloudPubSub.Producer` now refers to +the **new gRPC StreamingPull producer**. The 1.x HTTP pull producer lives on +under `BroadwayCloudPubSub.Pull.Producer`. + +```elixir +# 1.x — HTTP pull producer +{BroadwayCloudPubSub.Producer, goth: MyApp.Goth, subscription: "..."} + +# 2.0 option A — switch to streaming (recommended, lower latency) +{BroadwayCloudPubSub.Producer, + goth: MyApp.Goth, + subscription: "...", + max_outstanding_messages: 1000} + +# 2.0 option B — keep HTTP pull, one-line change +{BroadwayCloudPubSub.Pull.Producer, goth: MyApp.Goth, subscription: "..."} +``` + +The streaming producer requires `:grpc`, `:protobuf`, and an HTTP/2 adapter +(`:gun` or `:mint` + `:castore`). If you stay on the pull producer those +packages are not needed. + +#### Breaking change 2: [two modules renamed](docs/upgrade_to_2.0.md#2-broadwaycloudpubsubpullclient-renamed) (only if referenced directly) + +| 1.x | 2.0 | +|---|---| +| `BroadwayCloudPubSub.PullClient` | `BroadwayCloudPubSub.Pull.FinchClient` | +| `BroadwayCloudPubSub.Client` (behaviour) | `BroadwayCloudPubSub.Pull.Client` | + +These only matter if you passed the module explicitly (e.g. `client: +BroadwayCloudPubSub.PullClient`) or implemented a custom pull client with +`@behaviour BroadwayCloudPubSub.Client`. + +#### Breaking change 3: [`on_failure` default changed from `:noop` to `{:nack, 0}`](docs/upgrade_to_2.0.md#4-on_failure-default-changed-noop--nack-0) + +Failed messages are now immediately made available for redelivery instead of +waiting for the subscription's `ackDeadlineSeconds` to expire. This matches the +behaviour of Google's own first-party client libraries. + +To keep the 1.x behaviour, set the option explicitly: + +```elixir +{BroadwayCloudPubSub.Pull.Producer, + goth: MyApp.Goth, + subscription: "...", + on_failure: :noop} +``` + +See the [full upgrade guide](docs/upgrade_to_2.0.md) for all details. ## License From f7a6c9e121ed8d0f5f9f38011a31563e0405e1e3 Mon Sep 17 00:00:00 2001 From: Rock Date: Tue, 16 Jun 2026 08:47:43 +0200 Subject: [PATCH 24/29] Upload grpc to 1.0 --- mix.exs | 5 ++--- mix.lock | 18 +++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/mix.exs b/mix.exs index 780321e..c364d3f 100644 --- a/mix.exs +++ b/mix.exs @@ -35,11 +35,10 @@ defmodule BroadwayCloudPubSub.MixProject do {:broadway, "~> 1.0"}, {:finch, "~> 0.9"}, {:jason, "~> 1.0"}, - {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0"}, + {:nimble_options, "~> 0.4 or ~> 1.0"}, {:telemetry, "~> 0.4.3 or ~> 1.0"}, {:goth, "~> 1.3", optional: true}, - # TODO: Replace with Hex versions when grpc 1.0 is released - {:grpc, "~> 1.0.0-rc.1", optional: true}, + {:grpc, "~> 1.0", optional: true}, {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} diff --git a/mix.lock b/mix.lock index 08e8dca..bc11658 100644 --- a/mix.lock +++ b/mix.lock @@ -1,34 +1,34 @@ %{ "broadway": {:hex, :broadway, "1.2.1", "83a1567423c26885e15f6cd8670ca790370af2fcff2ede7fa88c5ea793087a67", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "68ae63d83b55bdca0f95cd49feee5fb74c5a6bec557caf940860fe07dbc8a4fb"}, "bypass": {:hex, :bypass, "2.1.0", "909782781bf8e20ee86a9cabde36b259d44af8b9f38756173e8f5e2e1fabb9b1", [:mix], [{:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "d9b5df8fa5b7a6efa08384e9bbecfe4ce61c77d28a4282f79e02f1ef78d96b80"}, - "castore": {:hex, :castore, "0.1.22", "4127549e411bedd012ca3a308dede574f43819fe9394254ca55ab4895abfa1a2", [:mix], [], "hexpm", "c17576df47eb5aa1ee40cc4134316a99f5cad3e215d5c77b8dd3cfef12a22cac"}, + "castore": {:hex, :castore, "1.0.19", "6903cabdfd9d1af46454126e7c8385186659dd33ecfb74a885cae52221ad6109", [:mix], [], "hexpm", "3669e6cab13f54c2df26b3e6833745d647f35b6e30d8ddd5975df0d5c842ca98"}, "cowboy": {:hex, :cowboy, "2.15.0", "9cfe86ed7117bf045e10adbedb0170af7be57f2a3637e7be143433d8dd267396", [:make, :rebar3], [{:cowlib, ">= 2.16.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, ">= 1.8.0 and < 3.0.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "179fb65140fb440a17b767ad53b755081506f9596c4db5c49c0396d8c8643668"}, "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"}, - "cowlib": {:hex, :cowlib, "2.16.1", "318d385d55f657e9a5005838c4e426e13dcd724a691438384b6165a69687e531", [:make, :rebar3], [], "hexpm", "58f1e425a9e04176f1d30e20116f57c4e90ef0e187552e9741c465bdf4044f70"}, + "cowlib": {:hex, :cowlib, "2.17.1", "3e6053016d1ab245730f0af688755476dcedb1c25ed8fb5751f59a2bfdc0c9af", [:make, :rebar3], [], "hexpm", "ff08bd17e6dd931445b18af77315b9b5fe052407110964ad2588c686b57b5e3f"}, "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, - "finch": {:hex, :finch, "0.9.0", "8b772324aebafcaba763f1dffaa3e7f52f8c4e52485f50f48bbb2f42219a2e87", [:mix], [{:castore, "~> 0.1", [hex: :castore, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.5", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a93bfcad9ca50fa3cb2d459f27667d9a87cfbb7fecf9b29b2e78a50bc2ab445d"}, + "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"}, "gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"}, "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, "goth": {:hex, :goth, "1.4.2", "a598dfbce6fe65db3f5f43b1ab2ce8fbe3b2fe20a7569ad62d71c11c0ddc3f41", [:mix], [{:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "d51bb6544dc551fe5754ab72e6cf194120b3c06d924282aaa3321a516ed3b98a"}, - "grpc": {:hex, :grpc, "1.0.0-rc.1", "790336fc827f0a22521d443c1c89e941502ce1e3ef09160c7e4798b2e148b53d", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:grpc_core, "~> 1.0.0-rc.1", [hex: :grpc_core, repo: "hexpm", optional: false]}, {:gun, "~> 2.0", [hex: :gun, repo: "hexpm", optional: false]}, {:mint, "~> 1.5", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "c60dcda2fb143769ba496bd86d33420a9e44d633555bc781deaba3668138372b"}, - "grpc_core": {:hex, :grpc_core, "1.0.0-rc.1", "d82957bca32937bb52df06596cca7550783acc139a06b70202a982ef8b59490e", [:mix], [{:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.14", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "c76233ea374421da562b5b022c22614e81f9cf862da93543cff93c37c085f136"}, + "grpc": {:hex, :grpc, "1.0.0", "edb1cbfa752195934058fed0822da8e1cdf41ed47f5da94cea153cfc57e994da", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:grpc_core, "~> 1.0.0", [hex: :grpc_core, repo: "hexpm", optional: false]}, {:gun, "~> 2.2.0", [hex: :gun, repo: "hexpm", optional: true]}, {:mint, "~> 1.9", [hex: :mint, repo: "hexpm", optional: true]}], "hexpm", "d3811809268817d46f7f4dedfe8eccd95cc31c148f02f1a35f0a775b34579605"}, + "grpc_core": {:hex, :grpc_core, "1.0.0", "58a6446dfa7cb96c1d2a4da2aa630a728a63d6a8252812aa5e6d51ce79e8c1c8", [:mix], [{:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.17", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "181c84e6fcd6ed456711503f47c238d4aa6fc2bdc5d1cf5851c779062b224a6a"}, "gun": {:hex, :gun, "2.3.0", "c1eb7be3b5178f6e67edd373f954360de7d7933f2d5a57686affd3b279d76cdf", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "c3bfbbb8f146a6c5ffb2c487f06a3ca4a57e90220b07a1f97eb69a4e7b176035"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, - "jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"}, + "jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"}, "jose": {:hex, :jose, "1.11.12", "06e62b467b61d3726cbc19e9b5489f7549c37993de846dfb3ee8259f9ed208b3", [:mix, :rebar3], [], "hexpm", "31e92b653e9210b696765cdd885437457de1add2a9011d92f8cf63e4641bab7b"}, "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, "makeup_erlang": {:hex, :makeup_erlang, "1.1.0", "835f7e60792e08824cda445639555d7bf1bbbddb1b60b306e33cb6f6db24dc74", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "1cd6780fb1dd1a03979abaed0fe82712b0625118fd5257d3ebbf73f960c73c3c"}, "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"}, - "mint": {:hex, :mint, "1.8.0", "b964eaf4416f2dee2ba88968d52239fca5621b0402b9c95f55a08eb9d74803e9", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "f3c572c11355eccf00f22275e9b42463bc17bd28db13be1e28f8e0bb4adbc849"}, - "nimble_options": {:hex, :nimble_options, "0.3.7", "1e52dd7673d36138b1a5dede183b5d86dff175dc46d104a8e98e396b85b04670", [:mix], [], "hexpm", "2086907e6665c6b6579be54ef5001928df5231f355f71ed258f80a55e9f63633"}, + "mint": {:hex, :mint, "1.9.0", "d6f534c2a3e98b2a8cc749b4796eb77e9e3af79a76f96e4c74035a827de0d318", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "007154c7d8c43916aed3c93afd1f11aebbaa9c5ff4b7ba55ebe0d17ee0296042"}, + "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, "nimble_pool": {:hex, :nimble_pool, "0.2.6", "91f2f4c357da4c4a0a548286c84a3a28004f68f05609b4534526871a22053cde", [:mix], [], "hexpm", "1c715055095d3f2705c4e236c18b618420a35490da94149ff8b580a2144f653f"}, "plug": {:hex, :plug, "1.19.2", "e4950525b22c6789dfb38a3f95d47171ba159da3fc5a33be9643b43d5e8adb98", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b6fce20a56af5e60fa5dfecf3f907bb98ec981be43c79a3809a499bc3d133de0"}, "plug_cowboy": {:hex, :plug_cowboy, "2.8.1", "5aa391a5e8d1ac3192e36a3bcaff12b5fd6ef6c7e29b53a38e63a860783e77d0", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "4c200288673d5bc86a0ab7dc6a2a069176a74e5d573ef62740a1c517458a5f26"}, "plug_crypto": {:hex, :plug_crypto, "2.1.1", "19bda8184399cb24afa10be734f84a16ea0a2bc65054e23a62bb10f06bc89491", [:mix], [], "hexpm", "6470bce6ffe41c8bd497612ffde1a7e4af67f36a15eea5f921af71cf3e11247c"}, - "protobuf": {:hex, :protobuf, "0.16.0", "d1878725105d49162977cf3408ccc3eac4f3532e26e5a9e250f2c624175d10f6", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "f0d0d3edd8768130f24cc2cfc41320637d32c80110e80d13f160fa699102c828"}, + "protobuf": {:hex, :protobuf, "0.17.0", "39e24e43c9648e148feba16ed51100b5b2028ea900b55460377b0476f6e10613", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "ca6c91f6f63e2c147b47f03eefd10b80538aa6fc55ff4b12b795efb786b0152f"}, "ranch": {:hex, :ranch, "1.8.1", "208169e65292ac5d333d6cdbad49388c1ae198136e4697ae2f474697140f201c", [:make, :rebar3], [], "hexpm", "aed58910f4e21deea992a67bf51632b6d60114895eb03bb392bb733064594dd0"}, "telemetry": {:hex, :telemetry, "1.4.2", "a0cb522801dffb1c49fe6e30561badffc7b6d0e180db1300df759faa22062855", [:rebar3], [], "hexpm", "928f6495066506077862c0d1646609eed891a4326bee3126ba54b60af61febb1"}, } From 12621f410c020a43597808c6beb76b7fc3ee6154 Mon Sep 17 00:00:00 2001 From: Rock Date: Tue, 16 Jun 2026 10:46:22 +0200 Subject: [PATCH 25/29] Remove lock and fix tests --- mix.lock | 1 - test/broadway_cloud_pub_sub/pull/producer_test.exs | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/mix.lock b/mix.lock index bc11658..bc6ca48 100644 --- a/mix.lock +++ b/mix.lock @@ -13,7 +13,6 @@ "goth": {:hex, :goth, "1.4.2", "a598dfbce6fe65db3f5f43b1ab2ce8fbe3b2fe20a7569ad62d71c11c0ddc3f41", [:mix], [{:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "d51bb6544dc551fe5754ab72e6cf194120b3c06d924282aaa3321a516ed3b98a"}, "grpc": {:hex, :grpc, "1.0.0", "edb1cbfa752195934058fed0822da8e1cdf41ed47f5da94cea153cfc57e994da", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:grpc_core, "~> 1.0.0", [hex: :grpc_core, repo: "hexpm", optional: false]}, {:gun, "~> 2.2.0", [hex: :gun, repo: "hexpm", optional: true]}, {:mint, "~> 1.9", [hex: :mint, repo: "hexpm", optional: true]}], "hexpm", "d3811809268817d46f7f4dedfe8eccd95cc31c148f02f1a35f0a775b34579605"}, "grpc_core": {:hex, :grpc_core, "1.0.0", "58a6446dfa7cb96c1d2a4da2aa630a728a63d6a8252812aa5e6d51ce79e8c1c8", [:mix], [{:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.17", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "181c84e6fcd6ed456711503f47c238d4aa6fc2bdc5d1cf5851c779062b224a6a"}, - "gun": {:hex, :gun, "2.3.0", "c1eb7be3b5178f6e67edd373f954360de7d7933f2d5a57686affd3b279d76cdf", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "c3bfbbb8f146a6c5ffb2c487f06a3ca4a57e90220b07a1f97eb69a4e7b176035"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, "jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"}, "jose": {:hex, :jose, "1.11.12", "06e62b467b61d3726cbc19e9b5489f7549c37993de846dfb3ee8259f9ed208b3", [:mix, :rebar3], [], "hexpm", "31e92b653e9210b696765cdd885437457de1add2a9011d92f8cf63e4641bab7b"}, diff --git a/test/broadway_cloud_pub_sub/pull/producer_test.exs b/test/broadway_cloud_pub_sub/pull/producer_test.exs index d6736bb..cab0bee 100644 --- a/test/broadway_cloud_pub_sub/pull/producer_test.exs +++ b/test/broadway_cloud_pub_sub/pull/producer_test.exs @@ -148,7 +148,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do test ":subscription should be a string" do assert_raise( ValidationError, - "required option :subscription not found, received options: [:client]", + ~r/required :subscription option not found, received options: /, fn -> prepare_for_start_module_opts([]) end @@ -266,7 +266,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do assert_raise( ValidationError, - ~r/expected :max_number_of_messages to be a positive integer, got: 0/, + ~r/invalid value for :max_number_of_messages option: expected positive integer, got: 0/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -278,7 +278,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do assert_raise( ValidationError, - ~r/expected :max_number_of_messages to be a positive integer, got: -1/, + ~r/invalid value for :max_number_of_messages option: expected positive integer, got: -1/, fn -> prepare_for_start_module_opts( goth: FakeAuth, @@ -326,7 +326,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do assert producer_opts[:token_generator] == token_generator assert_raise ValidationError, - ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: {1, 1, 1}/, + ~r/invalid value for :token_generator option: expected tuple {mod, fun, args}, got: {1, 1, 1}/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", @@ -335,7 +335,7 @@ defmodule BroadwayCloudPubSub.Pull.ProducerTest do end assert_raise ValidationError, - ~r/expected :token_generator to be a tuple {Mod, Fun, Args}, got: SomeModule/, + ~r/invalid value for :token_generator option: expected tuple {mod, fun, args}, got: SomeModule/, fn -> prepare_for_start_module_opts( subscription: "projects/foo/subscriptions/bar", From 3accfcf356723f3e6042ace1b004a7c7795908ff Mon Sep 17 00:00:00 2001 From: Rock Date: Tue, 16 Jun 2026 11:27:18 +0200 Subject: [PATCH 26/29] Protobuf 0.17 like grpc_core --- mix.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mix.exs b/mix.exs index c364d3f..2c3881e 100644 --- a/mix.exs +++ b/mix.exs @@ -39,7 +39,7 @@ defmodule BroadwayCloudPubSub.MixProject do {:telemetry, "~> 0.4.3 or ~> 1.0"}, {:goth, "~> 1.3", optional: true}, {:grpc, "~> 1.0", optional: true}, - {:protobuf, "~> 0.12 or ~> 0.13 or ~> 0.14 or ~> 0.15 or ~> 0.16", optional: true}, + {:protobuf, "~> 0.17", optional: true}, {:ex_doc, "~> 0.23", only: :docs}, {:bypass, "~> 2.1", only: :test} ] From 095662d39e41884ad7356ea2aab8fceec05930d5 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 17 Jun 2026 11:38:22 +0200 Subject: [PATCH 27/29] Fix unary channel's childs being killed and treating it as a channel disconnect --- .../streaming/client.ex | 10 ++ .../streaming/grpc_client.ex | 11 ++ .../streaming/unary_rpc_client.ex | 29 +++- .../streaming/unary_rpc_client_test.exs | 138 ++++++++++++++++++ 4 files changed, 182 insertions(+), 6 deletions(-) diff --git a/lib/broadway_cloud_pub_sub/streaming/client.ex b/lib/broadway_cloud_pub_sub/streaming/client.ex index 85d6a62..2cb962e 100644 --- a/lib/broadway_cloud_pub_sub/streaming/client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/client.ex @@ -72,6 +72,16 @@ defmodule BroadwayCloudPubSub.Streaming.Client do """ @callback disconnect(channel(), config()) :: :ok + @doc """ + Returns the pid that owns the underlying transport connection for `channel`, + or `nil` if the channel does not have one (e.g. an already-closed channel, + or a custom client whose transport is not pid-backed). + + Implementations should return `nil` rather than raise on unknown channel + shapes; callers treat `nil` as “no match possible” and ignore the EXIT. + """ + @callback connection_pid(channel()) :: pid() | nil + # --- Streaming RPCs --- @doc """ diff --git a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex index 24076b5..6c3a85b 100644 --- a/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/grpc_client.ex @@ -61,6 +61,17 @@ defmodule BroadwayCloudPubSub.Streaming.GrpcClient do _, _ -> :ok end + @impl BroadwayCloudPubSub.Streaming.Client + # Both the Mint and Gun adapters in the `grpc` library expose the connection- + # owning process under `channel.adapter_payload.conn_pid`. Returns nil for any + # other shape (e.g. a closed channel whose adapter_payload has been cleared) + # so callers can safely pattern-match without raising. + def connection_pid(%GRPC.Channel{adapter_payload: %{conn_pid: pid}}) when is_pid(pid) do + pid + end + + def connection_pid(_channel), do: nil + @impl BroadwayCloudPubSub.Streaming.Client def streaming_pull(channel, _config) do Stub.streaming_pull(channel, []) diff --git a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex index 4a43b8f..4e0d266 100644 --- a/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex +++ b/lib/broadway_cloud_pub_sub/streaming/unary_rpc_client.ex @@ -205,19 +205,36 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClient do end # The Mint/Gun ConnectionProcess is linked to this GenServer (trap_exit in init/1). - # :normal = clean disconnect; nil out channel so ensure_channel/1 reopens it. + # :normal = clean disconnect; tear down the channel so ensure_channel/1 reopens it. # other = unexpected crash; schedule a reconnect. - def handle_info({:EXIT, _pid, :normal}, state) do - {:noreply, %{state | channel: nil}} + def handle_info({:EXIT, pid, :normal}, state) do + if channel_conn_pid(state) == pid do + {:noreply, disconnect_channel(state)} + else + {:noreply, state} + end end - def handle_info({:EXIT, _pid, _reason}, state) do - state = schedule_reconnect(%{state | channel: nil}) - {:noreply, state} + def handle_info({:EXIT, pid, _reason}, state) do + if channel_conn_pid(state) == pid do + state = state |> disconnect_channel() |> schedule_reconnect() + {:noreply, state} + else + {:noreply, state} + end end def handle_info(_msg, state), do: {:noreply, state} + # Resolves the held channel's owning pid via the configured grpc_client. + # Returns nil if no channel is held (so an EXIT signal can never match) or + # the client reports no pid for this channel. + defp channel_conn_pid(%{channel: nil}), do: nil + + defp channel_conn_pid(%{channel: channel, grpc_client: grpc_client}) do + grpc_client.connection_pid(channel) + end + @impl GenServer def terminate(_reason, %{channel: channel} = state) when not is_nil(channel) do state.grpc_client.disconnect(channel, state.grpc_client_config) diff --git a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs index 8ad8a18..101a47a 100644 --- a/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs +++ b/test/broadway_cloud_pub_sub/streaming/unary_rpc_client_test.exs @@ -403,4 +403,142 @@ defmodule BroadwayCloudPubSub.Streaming.UnaryRpcClientTest do end end end + + # ============================================================ + # handle_info({:EXIT, ...}) — channel lifecycle + # + # Regression coverage for the Mint-adapter leak: a stray EXIT from a + # spawn-helper (e.g. Mint's `StreamResponseProcess`, which links to the + # caller and exits :normal once the response is consumed) must NOT be + # treated as a channel teardown. Only an EXIT whose pid matches the held + # ConnectionProcess pid (`adapter_payload.conn_pid`) may tear down the + # channel. See UPSTREAM_BUG_REPORT.md. + # ============================================================ + + defmodule FakeGrpcClient do + @moduledoc false + @behaviour BroadwayCloudPubSub.Streaming.Client + + # Each connect/1 spawns a new long-lived linked pid to play the role of + # the Mint/Gun ConnectionProcess. The pid is exposed via the channel's + # adapter_payload, mirroring the real adapters. + @impl true + def init(opts), do: {:ok, %{test_pid: Keyword.fetch!(opts, :test_pid)}} + + @impl true + def connect(%{test_pid: test_pid}) do + conn_pid = spawn_link(fn -> Process.sleep(:infinity) end) + send(test_pid, {:fake_grpc, :connect, conn_pid}) + channel = %{adapter_payload: %{conn_pid: conn_pid}} + {:ok, channel} + end + + @impl true + def disconnect(%{adapter_payload: %{conn_pid: conn_pid}}, %{test_pid: test_pid}) do + send(test_pid, {:fake_grpc, :disconnect, conn_pid}) + if is_pid(conn_pid) and Process.alive?(conn_pid), do: Process.exit(conn_pid, :kill) + :ok + end + + @impl true + def connection_pid(%{adapter_payload: %{conn_pid: pid}}) when is_pid(pid), do: pid + def connection_pid(_), do: nil + + # Unused by the EXIT-handler regression tests but required by the behaviour. + @impl true + def streaming_pull(_channel, _config), do: {:error, :not_implemented} + @impl true + def send_request(_stream, _request, _config), do: {:ok, _stream = nil} + @impl true + def recv(_stream, _config), do: {:error, :not_implemented} + @impl true + def cancel(_stream, _config), do: :ok + @impl true + def acknowledge(_channel, _request, _config), do: {:ok, %{}} + @impl true + def modify_ack_deadline(_channel, _request, _config), do: {:ok, %{}} + end + + defp start_client_with_fake_grpc do + opts = [ + broadway_name: __MODULE__, + subscription: "projects/test/subscriptions/test-sub", + grpc_client: FakeGrpcClient, + grpc_client_config: %{test_pid: self()}, + backoff_type: :exp, + backoff_min: 50, + backoff_max: 1_000 + ] + + {:ok, pid} = UnaryRpcClient.start_link(opts) + pid + end + + describe "handle_info({:EXIT, ...}) — channel pid matching" do + test "spurious EXIT :normal from a non-channel pid does NOT tear down the channel" do + pid = start_client_with_fake_grpc() + + assert_receive {:fake_grpc, :connect, conn_pid} + state = :sys.get_state(pid) + assert state.channel.adapter_payload.conn_pid == conn_pid + + # Simulate Mint's per-RPC StreamResponseProcess: a foreign pid exiting :normal. + stray = spawn(fn -> :ok end) + send(pid, {:EXIT, stray, :normal}) + _ = :sys.get_state(pid) + + # Channel must be unchanged, and no extra connect/disconnect must have happened. + state_after = :sys.get_state(pid) + assert state_after.channel.adapter_payload.conn_pid == conn_pid + refute_received {:fake_grpc, :connect, _} + refute_received {:fake_grpc, :disconnect, _} + assert Process.alive?(conn_pid) + end + + test "spurious EXIT :crash from a non-channel pid does NOT tear down the channel" do + pid = start_client_with_fake_grpc() + + assert_receive {:fake_grpc, :connect, conn_pid} + + stray = spawn(fn -> :ok end) + send(pid, {:EXIT, stray, :some_crash}) + _ = :sys.get_state(pid) + + state_after = :sys.get_state(pid) + assert state_after.channel.adapter_payload.conn_pid == conn_pid + assert state_after.reconnect_pending == false + refute_received {:fake_grpc, :disconnect, _} + refute_received :reconnect + end + + test "EXIT :normal from the held ConnectionProcess pid tears down the channel via disconnect_channel/1" do + pid = start_client_with_fake_grpc() + + assert_receive {:fake_grpc, :connect, conn_pid} + + send(pid, {:EXIT, conn_pid, :normal}) + _ = :sys.get_state(pid) + + assert_received {:fake_grpc, :disconnect, ^conn_pid} + state_after = :sys.get_state(pid) + assert state_after.channel == nil + end + + test "EXIT :crash from the held ConnectionProcess pid disconnects AND schedules a reconnect" do + pid = start_client_with_fake_grpc() + + assert_receive {:fake_grpc, :connect, conn_pid} + + send(pid, {:EXIT, conn_pid, :boom}) + _ = :sys.get_state(pid) + + assert_received {:fake_grpc, :disconnect, ^conn_pid} + # disconnect_channel sets channel: nil, after which schedule_reconnect/1 + # short-circuits (channel-nil clause) — the next ack will lazily reopen + # via ensure_channel/1. State must remain consistent and alive. + state_after = :sys.get_state(pid) + assert state_after.channel == nil + assert Process.alive?(pid) + end + end end From 60e67529e4d9edd969b1cb52035e8981f37be072 Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 17 Jun 2026 11:47:48 +0200 Subject: [PATCH 28/29] Update deps --- mix.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mix.lock b/mix.lock index bc6ca48..3640943 100644 --- a/mix.lock +++ b/mix.lock @@ -7,10 +7,10 @@ "cowlib": {:hex, :cowlib, "2.17.1", "3e6053016d1ab245730f0af688755476dcedb1c25ed8fb5751f59a2bfdc0c9af", [:make, :rebar3], [], "hexpm", "ff08bd17e6dd931445b18af77315b9b5fe052407110964ad2588c686b57b5e3f"}, "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, - "finch": {:hex, :finch, "0.18.0", "944ac7d34d0bd2ac8998f79f7a811b21d87d911e77a786bc5810adb75632ada4", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.3", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 0.2.6 or ~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "69f5045b042e531e53edc2574f15e25e735b522c37e2ddb766e15b979e03aa65"}, + "finch": {:hex, :finch, "0.23.0", "e3f9287ac25a8832f848b144c2b57346aac65b205e2e0629a52adfe6507fd837", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.8", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "80e58d3f936f57e3fdf404f83a3642897ae6d9fb642934e46da4d8fe761b99d5"}, "gen_stage": {:hex, :gen_stage, "1.2.1", "19d8b5e9a5996d813b8245338a28246307fd8b9c99d1237de199d21efc4c76a1", [:mix], [], "hexpm", "83e8be657fa05b992ffa6ac1e3af6d57aa50aace8f691fcf696ff02f8335b001"}, "googleapis": {:hex, :googleapis, "0.1.0", "13770f3f75f5b863fb9acf41633c7bc71bad788f3f553b66481a096d083ee20e", [:mix], [{:protobuf, "~> 0.12", [hex: :protobuf, repo: "hexpm", optional: false]}], "hexpm", "1989a7244fd17d3eb5f3de311a022b656c3736b39740db46506157c4604bd212"}, - "goth": {:hex, :goth, "1.4.2", "a598dfbce6fe65db3f5f43b1ab2ce8fbe3b2fe20a7569ad62d71c11c0ddc3f41", [:mix], [{:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "d51bb6544dc551fe5754ab72e6cf194120b3c06d924282aaa3321a516ed3b98a"}, + "goth": {:hex, :goth, "1.4.5", "ee37f96e3519bdecd603f20e7f10c758287088b6d77c0147cd5ee68cf224aade", [:mix], [{:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:jose, "~> 1.11", [hex: :jose, repo: "hexpm", optional: false]}], "hexpm", "0fc2dce5bd710651ed179053d0300ce3a5d36afbdde11e500d57f05f398d5ed5"}, "grpc": {:hex, :grpc, "1.0.0", "edb1cbfa752195934058fed0822da8e1cdf41ed47f5da94cea153cfc57e994da", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:grpc_core, "~> 1.0.0", [hex: :grpc_core, repo: "hexpm", optional: false]}, {:gun, "~> 2.2.0", [hex: :gun, repo: "hexpm", optional: true]}, {:mint, "~> 1.9", [hex: :mint, repo: "hexpm", optional: true]}], "hexpm", "d3811809268817d46f7f4dedfe8eccd95cc31c148f02f1a35f0a775b34579605"}, "grpc_core": {:hex, :grpc_core, "1.0.0", "58a6446dfa7cb96c1d2a4da2aa630a728a63d6a8252812aa5e6d51ce79e8c1c8", [:mix], [{:googleapis, "~> 0.1.0", [hex: :googleapis, repo: "hexpm", optional: false]}, {:jason, ">= 0.0.0", [hex: :jason, repo: "hexpm", optional: false]}, {:protobuf, "~> 0.17", [hex: :protobuf, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "181c84e6fcd6ed456711503f47c238d4aa6fc2bdc5d1cf5851c779062b224a6a"}, "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"}, @@ -23,7 +23,7 @@ "mint": {:hex, :mint, "1.9.0", "d6f534c2a3e98b2a8cc749b4796eb77e9e3af79a76f96e4c74035a827de0d318", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "007154c7d8c43916aed3c93afd1f11aebbaa9c5ff4b7ba55ebe0d17ee0296042"}, "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, - "nimble_pool": {:hex, :nimble_pool, "0.2.6", "91f2f4c357da4c4a0a548286c84a3a28004f68f05609b4534526871a22053cde", [:mix], [], "hexpm", "1c715055095d3f2705c4e236c18b618420a35490da94149ff8b580a2144f653f"}, + "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "plug": {:hex, :plug, "1.19.2", "e4950525b22c6789dfb38a3f95d47171ba159da3fc5a33be9643b43d5e8adb98", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b6fce20a56af5e60fa5dfecf3f907bb98ec981be43c79a3809a499bc3d133de0"}, "plug_cowboy": {:hex, :plug_cowboy, "2.8.1", "5aa391a5e8d1ac3192e36a3bcaff12b5fd6ef6c7e29b53a38e63a860783e77d0", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "4c200288673d5bc86a0ab7dc6a2a069176a74e5d573ef62740a1c517458a5f26"}, "plug_crypto": {:hex, :plug_crypto, "2.1.1", "19bda8184399cb24afa10be734f84a16ea0a2bc65054e23a62bb10f06bc89491", [:mix], [], "hexpm", "6470bce6ffe41c8bd497612ffde1a7e4af67f36a15eea5f921af71cf3e11247c"}, From 95b9278da77a7c358e59d54c5879591d6f1e4c9d Mon Sep 17 00:00:00 2001 From: Rock Date: Wed, 17 Jun 2026 11:53:46 +0200 Subject: [PATCH 29/29] Unused castore --- mix.lock | 1 - 1 file changed, 1 deletion(-) diff --git a/mix.lock b/mix.lock index 3640943..5b4b9b7 100644 --- a/mix.lock +++ b/mix.lock @@ -1,7 +1,6 @@ %{ "broadway": {:hex, :broadway, "1.2.1", "83a1567423c26885e15f6cd8670ca790370af2fcff2ede7fa88c5ea793087a67", [:mix], [{:gen_stage, "~> 1.0", [hex: :gen_stage, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.3.7 or ~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "68ae63d83b55bdca0f95cd49feee5fb74c5a6bec557caf940860fe07dbc8a4fb"}, "bypass": {:hex, :bypass, "2.1.0", "909782781bf8e20ee86a9cabde36b259d44af8b9f38756173e8f5e2e1fabb9b1", [:mix], [{:plug, "~> 1.7", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.0", [hex: :plug_cowboy, repo: "hexpm", optional: false]}, {:ranch, "~> 1.3", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "d9b5df8fa5b7a6efa08384e9bbecfe4ce61c77d28a4282f79e02f1ef78d96b80"}, - "castore": {:hex, :castore, "1.0.19", "6903cabdfd9d1af46454126e7c8385186659dd33ecfb74a885cae52221ad6109", [:mix], [], "hexpm", "3669e6cab13f54c2df26b3e6833745d647f35b6e30d8ddd5975df0d5c842ca98"}, "cowboy": {:hex, :cowboy, "2.15.0", "9cfe86ed7117bf045e10adbedb0170af7be57f2a3637e7be143433d8dd267396", [:make, :rebar3], [{:cowlib, ">= 2.16.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, ">= 1.8.0 and < 3.0.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "179fb65140fb440a17b767ad53b755081506f9596c4db5c49c0396d8c8643668"}, "cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"}, "cowlib": {:hex, :cowlib, "2.17.1", "3e6053016d1ab245730f0af688755476dcedb1c25ed8fb5751f59a2bfdc0c9af", [:make, :rebar3], [], "hexpm", "ff08bd17e6dd931445b18af77315b9b5fe052407110964ad2588c686b57b5e3f"},