-
Notifications
You must be signed in to change notification settings - Fork 11
Improvements to the ACR IPC protocol #1855
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package datadog.checks.v1; | ||
|
|
||
| import "checks/v1/checks.proto"; | ||
|
|
||
| // Handshake: client -> server | ||
| message Hello { | ||
| uint32 protocol_version = 1; | ||
| string client_id = 2; | ||
| string client_version = 3; | ||
| } | ||
|
|
||
| // Handshake: server -> client | ||
| message HelloResp { | ||
| uint32 protocol_version = 1; | ||
| string server_id = 2; | ||
| bool accepted = 3; | ||
| string reject_reason = 4; | ||
| } | ||
|
|
||
| // Configuration: server -> client (unsolicited) | ||
| message ConfigData { | ||
| EnrichmentData enrichment = 1; | ||
| repeated CheckInstance checks = 2; | ||
| map<string, string> agent_config = 3; | ||
| } | ||
|
|
||
| message EnrichmentData { | ||
| string hostname = 1; | ||
| map<string, string> host_tags = 2; | ||
| string cluster_name = 3; | ||
| string agent_version = 4; | ||
| map<string, bytes> config_values = 5; | ||
| uint64 process_start_time = 6; | ||
| K8sConnectionInfo k8s_connection_info = 7; | ||
| } | ||
|
|
||
| message K8sConnectionInfo { | ||
| string api_server_url = 1; | ||
| string bearer_token = 2; | ||
| } | ||
|
|
||
| message CheckInstance { | ||
| string check_name = 1; | ||
| string instance_name = 2; | ||
| string id = 3; | ||
| // The per-instance YAML/JSON block (a single entry from the | ||
| // integration's `instances:` list). | ||
| bytes config = 4; | ||
| // The integration-level YAML/JSON `init_config:` block, shared | ||
| // across every instance of this integration. Required for any check | ||
| // that reads defaults at Configure time (credentials, base URLs, | ||
| // shared tags, etc.). | ||
| bytes init_config = 5; | ||
| } | ||
|
|
||
| // Check data: client -> server (batched) | ||
| message CheckDataMsg { | ||
| uint64 sequence_id = 1; | ||
| string check_name = 2; | ||
| string instance_name = 3; | ||
| string check_id = 4; | ||
| repeated CheckData data = 5; | ||
| } | ||
|
|
||
| // Check data acknowledgement: server -> client | ||
| message CheckDataAck { | ||
| uint64 sequence_id = 1; | ||
| bool success = 2; | ||
| string error = 3; | ||
| } | ||
|
|
||
| // Check result: client -> server | ||
| message CheckResultMsg { | ||
| string check_name = 1; | ||
| string instance_name = 2; | ||
| string check_id = 3; | ||
| string error = 4; | ||
| } | ||
|
|
||
| // Trivial request/response types for RPCs that don't need a body. | ||
| message StreamConfigRequest {} | ||
| message SendCheckResultResponse {} | ||
|
|
||
| // gRPC service for ACR <-> ADP IPC. | ||
| service AcrIpc { | ||
| // Handshake — first call after connecting. | ||
| rpc Handshake(Hello) returns (HelloResp); | ||
|
|
||
| // Check data submission with acknowledgement. | ||
| rpc SendCheckData(CheckDataMsg) returns (CheckDataAck); | ||
|
|
||
| // Check result reporting. | ||
| rpc SendCheckResult(CheckResultMsg) returns (SendCheckResultResponse); | ||
|
|
||
| // Server pushes config updates to the client as a stream. | ||
| rpc StreamConfig(StreamConfigRequest) returns (stream ConfigData); | ||
|
Comment on lines
+89
to
+98
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I realize that this is unwritten, but: can we follow the existing naming scheme for gRPC request/response message types in the Agent ecosystem? As well, don't truncate words like request, response, or message. |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,25 +3,26 @@ syntax = "proto3"; | |
| package datadog.checks.v1; | ||
|
|
||
| import "checks/v1/event.proto"; | ||
| import "checks/v1/event_platform_event.proto"; | ||
| import "checks/v1/histogram_bucket.proto"; | ||
| import "checks/v1/log.proto"; | ||
| import "checks/v1/metric.proto"; | ||
| import "checks/v1/service_check.proto"; | ||
| import "checks/v1/sketch.proto"; | ||
|
|
||
| message CheckData { | ||
| oneof data { | ||
| datadog.checks.v1.metric.Metric metric = 1; | ||
| datadog.checks.v1.log.Log log = 2; | ||
| datadog.checks.v1.service_check.ServiceCheck service_check = 3; | ||
| datadog.checks.v1.event.Event event = 4; | ||
| // Slot 5 was previously `histogram` (an unused stub). Repurposed | ||
| // to carry pre-aggregated DDSketches; the wire bytes from any | ||
| // legacy producer of the old Histogram message would be | ||
| // mis-parsed here, so don't rely on cross-version compatibility | ||
| // for slot 5 specifically. | ||
|
Comment on lines
+19
to
+23
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: comments that describe a change are a code smell of not reviewing LLM output. We document to explain the current state of things, Git history is for everything that came before, unless there's a strong reason to couple them together.
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, there are some commits that are mostly AI generated. Most of the code has been reviewed piecemeal, but I opened the PR as a draft so that I could do a higher-level review (I like the ability to mark files as reviewed because it helps me remember where I left off). Sorry for sending you the slop!
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is in draft so I definitely don't see it as you asking us/me to review slop. 😅 If nothing else, this should mostly just serve as a reminder that LLMs love to do this sort of thing (code comments as a description of what they changed, rather than the thing they changed something to) and that we need to be vigilant in cleaning that up. |
||
| datadog.checks.v1.sketch.Sketch sketch = 5; | ||
| datadog.checks.v1.event_platform_event.EventPlatformEvent event_platform_event = 6; | ||
| datadog.checks.v1.histogram_bucket.HistogramBucket histogram_bucket = 7; | ||
| } | ||
| } | ||
|
|
||
| message SendCheckPayloadRequest { | ||
| repeated CheckData data = 1; | ||
| } | ||
|
|
||
| message SendCheckPayloadResponse {} | ||
|
|
||
| service Checks { | ||
| rpc SendCheckPayload(SendCheckPayloadRequest) returns (SendCheckPayloadResponse); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package datadog.checks.v1.event_platform_event; | ||
|
|
||
| message EventPlatformEvent { | ||
| string event_data = 1; | ||
| string event_type = 2; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package datadog.checks.v1.histogram_bucket; | ||
|
|
||
| // HistogramBucket carries a pre-aggregated histogram bucket produced | ||
| // by an OpenMetrics / Prometheus-style integration. It is the wire | ||
| // shape for both the agent's `Sender.HistogramBucket` and | ||
| // `Sender.OpenmetricsBucket` upcalls (see | ||
| // pkg/aggregator/sender/sender.go on the receiving side); the two | ||
| // differ only in whether the producer is emitting many buckets per | ||
| // (name, tags) context (`HistogramBucket`, multiple_buckets=true) or | ||
| // exactly one (`OpenmetricsBucket`, multiple_buckets=false, the | ||
| // default for Python `aggregator.submit_histogram_bucket`). | ||
| message HistogramBucket { | ||
| // Metric name (e.g. "http.request.duration.count"). | ||
| string name = 1; | ||
|
|
||
| // Number of observations counted by this bucket. | ||
| int64 value = 2; | ||
|
|
||
| // Bucket bounds. For point observations (single-value buckets), | ||
| // both bounds carry the same value. | ||
| double lower_bound = 3; | ||
| double upper_bound = 4; | ||
|
|
||
| // True when value increases monotonically across calls and the | ||
| // receiver should compute per-bucket deltas client-side. False | ||
| // when the producer already emits deltas. | ||
| bool monotonic = 5; | ||
|
|
||
| // Hostname this bucket was produced on. Empty means "use the | ||
| // receiving agent's default hostname". | ||
| string hostname = 6; | ||
|
|
||
| // Tags in `key:value` form. | ||
| repeated string tags = 7; | ||
|
|
||
| // When `monotonic=true`, indicates the receiver should emit the | ||
| // first observed value rather than wait for a second to compute a | ||
| // delta. Producers set this when they know the bucket started from | ||
| // zero recently (e.g. a freshly-scraped target). | ||
| bool flush_first_value = 8; | ||
|
|
||
| // Distinguishes the two sender upcalls. False (the default) | ||
| // routes to `Sender.OpenmetricsBucket`; true routes to | ||
| // `Sender.HistogramBucket`. Python `submit_histogram_bucket` | ||
| // always lands as false. | ||
|
Comment on lines
+44
to
+47
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: opaque comment that requires implicit cross-repository knowledge. It's also... confusing at face value. The naming implies multiple buckets, but this type is, definitionally, a value representing a single bucket. |
||
| bool multiple_buckets = 9; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package datadog.checks.v1.sketch; | ||
|
|
||
| // Sketch carries a pre-aggregated DDSketch produced by a check. | ||
| // | ||
| // The wire format is aligned with the Datadog Agent's sketch_payload | ||
| // format (see DataDog/agent-payload v5 SketchPayload.Sketch.Dogsketch) | ||
| // so receivers can convert directly to the agent's internal Sketch | ||
| // type without a separate sketches-go round-trip. | ||
| message Sketch { | ||
| // Metric name (e.g. "http.request.duration"). | ||
| string name = 1; | ||
|
|
||
| // Tags in `key:value` form. | ||
| repeated string tags = 2; | ||
|
|
||
| // Hostname this sketch was produced on. Empty means "use the | ||
| // receiving agent's default hostname". | ||
| string hostname = 3; | ||
|
|
||
| // Unix-second timestamp at which the producer flushed the sketch. | ||
| int64 timestamp = 4; | ||
|
|
||
| // Flush interval the producer used; informational only — the | ||
| // receiving aggregator stamps its own flush boundary. | ||
| uint64 interval_secs = 5; | ||
|
|
||
| // The DDSketch body itself, in the agent's bin-encoded shape. | ||
| Dogsketch dogsketch = 6; | ||
| } | ||
|
|
||
| // Dogsketch is the bin-encoded DDSketch payload. Bin keys (k) and | ||
| // counts (n) are parallel arrays — k[i] is the bin index and n[i] the | ||
| // number of observations in that bin. This matches the Datadog Agent's | ||
| // SketchPayload.Sketch.Dogsketch definition exactly. | ||
| message Dogsketch { | ||
| int64 ts = 1; | ||
| int64 cnt = 2; | ||
| double min = 3; | ||
| double max = 4; | ||
| double avg = 5; | ||
| double sum = 6; | ||
| repeated sint32 k = 7; | ||
| repeated uint32 n = 8; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we just call this
CheckRunner, and drop the IPC part?More generic, fits the fact that realistically anything can implement it, avoids the stuttering of "IPC" in the name when that's already implied by how it's used, etc.