Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion lib/protos/datadog/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,16 +147,21 @@ fn main() {

tonic_prost_build::configure()
.build_server(true)
.build_client(true)
.include_file("checks.mod.rs")
.compile_protos(
&[
"proto/checks/v1/checks.proto",
"proto/checks/v1/acr_ipc.proto",
"proto/checks/v1/metric.proto",
"proto/checks/v1/log.proto",
"proto/checks/v1/service_check.proto",
"proto/checks/v1/event.proto",
"proto/checks/v1/sketch.proto",
"proto/checks/v1/event_platform_event.proto",
"proto/checks/v1/histogram_bucket.proto",
],
&["proto"],
)
.expect("Failed to build gRPC service definitions for Checks IPC.");
.expect("Failed to build checks proto definitions.");
}
99 changes: 99 additions & 0 deletions lib/protos/datadog/proto/checks/v1/acr_ipc.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
syntax = "proto3";

package datadog.checks.v1;

import "checks/v1/checks.proto";

// Handshake: client -> server
message Hello {
uint32 protocol_version = 1;
string client_id = 2;
string client_version = 3;
}

// Handshake: server -> client
message HelloResp {
uint32 protocol_version = 1;
string server_id = 2;
bool accepted = 3;
string reject_reason = 4;
}

// Configuration: server -> client (unsolicited)
message ConfigData {
EnrichmentData enrichment = 1;
repeated CheckInstance checks = 2;
map<string, string> agent_config = 3;
}

message EnrichmentData {
string hostname = 1;
map<string, string> host_tags = 2;
string cluster_name = 3;
string agent_version = 4;
map<string, bytes> config_values = 5;
uint64 process_start_time = 6;
K8sConnectionInfo k8s_connection_info = 7;
}

message K8sConnectionInfo {
string api_server_url = 1;
string bearer_token = 2;
}

message CheckInstance {
string check_name = 1;
string instance_name = 2;
string id = 3;
// The per-instance YAML/JSON block (a single entry from the
// integration's `instances:` list).
bytes config = 4;
// The integration-level YAML/JSON `init_config:` block, shared
// across every instance of this integration. Required for any check
// that reads defaults at Configure time (credentials, base URLs,
// shared tags, etc.).
bytes init_config = 5;
}

// Check data: client -> server (batched)
message CheckDataMsg {
uint64 sequence_id = 1;
string check_name = 2;
string instance_name = 3;
string check_id = 4;
repeated CheckData data = 5;
}

// Check data acknowledgement: server -> client
message CheckDataAck {
uint64 sequence_id = 1;
bool success = 2;
string error = 3;
}

// Check result: client -> server
message CheckResultMsg {
string check_name = 1;
string instance_name = 2;
string check_id = 3;
string error = 4;
}

// Trivial request/response types for RPCs that don't need a body.
message StreamConfigRequest {}
message SendCheckResultResponse {}

// gRPC service for ACR <-> ADP IPC.
service AcrIpc {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we just call this CheckRunner, and drop the IPC part?

More generic, fits the fact that realistically anything can implement it, avoids the stuttering of "IPC" in the name when that's already implied by how it's used, etc.

// Handshake — first call after connecting.
rpc Handshake(Hello) returns (HelloResp);

// Check data submission with acknowledgement.
rpc SendCheckData(CheckDataMsg) returns (CheckDataAck);

// Check result reporting.
rpc SendCheckResult(CheckResultMsg) returns (SendCheckResultResponse);

// Server pushes config updates to the client as a stream.
rpc StreamConfig(StreamConfigRequest) returns (stream ConfigData);
Comment on lines +89 to +98

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize that this is unwritten, but: can we follow the existing naming scheme for gRPC request/response message types in the Agent ecosystem? FooRequest for requests and FooResponse for responses, such that they're always paired by prefix for the same RPC operation.

As well, don't truncate words like request, response, or message.

}
21 changes: 11 additions & 10 deletions lib/protos/datadog/proto/checks/v1/checks.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,26 @@ syntax = "proto3";
package datadog.checks.v1;

import "checks/v1/event.proto";
import "checks/v1/event_platform_event.proto";
import "checks/v1/histogram_bucket.proto";
import "checks/v1/log.proto";
import "checks/v1/metric.proto";
import "checks/v1/service_check.proto";
import "checks/v1/sketch.proto";

message CheckData {
oneof data {
datadog.checks.v1.metric.Metric metric = 1;
datadog.checks.v1.log.Log log = 2;
datadog.checks.v1.service_check.ServiceCheck service_check = 3;
datadog.checks.v1.event.Event event = 4;
// Slot 5 was previously `histogram` (an unused stub). Repurposed
// to carry pre-aggregated DDSketches; the wire bytes from any
// legacy producer of the old Histogram message would be
// mis-parsed here, so don't rely on cross-version compatibility
// for slot 5 specifically.
Comment on lines +19 to +23

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: comments that describe a change are a code smell of not reviewing LLM output.

We document to explain the current state of things, Git history is for everything that came before, unless there's a strong reason to couple them together.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, there are some commits that are mostly AI generated. Most of the code has been reviewed piecemeal, but I opened the PR as a draft so that I could do a higher-level review (I like the ability to mark files as reviewed because it helps me remember where I left off). Sorry for sending you the slop!

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is in draft so I definitely don't see it as you asking us/me to review slop. 😅

If nothing else, this should mostly just serve as a reminder that LLMs love to do this sort of thing (code comments as a description of what they changed, rather than the thing they changed something to) and that we need to be vigilant in cleaning that up.

datadog.checks.v1.sketch.Sketch sketch = 5;
datadog.checks.v1.event_platform_event.EventPlatformEvent event_platform_event = 6;
datadog.checks.v1.histogram_bucket.HistogramBucket histogram_bucket = 7;
}
}

message SendCheckPayloadRequest {
repeated CheckData data = 1;
}

message SendCheckPayloadResponse {}

service Checks {
rpc SendCheckPayload(SendCheckPayloadRequest) returns (SendCheckPayloadResponse);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
syntax = "proto3";

package datadog.checks.v1.event_platform_event;

message EventPlatformEvent {
string event_data = 1;
string event_type = 2;
}
49 changes: 49 additions & 0 deletions lib/protos/datadog/proto/checks/v1/histogram_bucket.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
syntax = "proto3";

package datadog.checks.v1.histogram_bucket;

// HistogramBucket carries a pre-aggregated histogram bucket produced
// by an OpenMetrics / Prometheus-style integration. It is the wire
// shape for both the agent's `Sender.HistogramBucket` and
// `Sender.OpenmetricsBucket` upcalls (see
// pkg/aggregator/sender/sender.go on the receiving side); the two
// differ only in whether the producer is emitting many buckets per
// (name, tags) context (`HistogramBucket`, multiple_buckets=true) or
// exactly one (`OpenmetricsBucket`, multiple_buckets=false, the
// default for Python `aggregator.submit_histogram_bucket`).
message HistogramBucket {
// Metric name (e.g. "http.request.duration.count").
string name = 1;

// Number of observations counted by this bucket.
int64 value = 2;

// Bucket bounds. For point observations (single-value buckets),
// both bounds carry the same value.
double lower_bound = 3;
double upper_bound = 4;

// True when value increases monotonically across calls and the
// receiver should compute per-bucket deltas client-side. False
// when the producer already emits deltas.
bool monotonic = 5;

// Hostname this bucket was produced on. Empty means "use the
// receiving agent's default hostname".
string hostname = 6;

// Tags in `key:value` form.
repeated string tags = 7;

// When `monotonic=true`, indicates the receiver should emit the
// first observed value rather than wait for a second to compute a
// delta. Producers set this when they know the bucket started from
// zero recently (e.g. a freshly-scraped target).
bool flush_first_value = 8;

// Distinguishes the two sender upcalls. False (the default)
// routes to `Sender.OpenmetricsBucket`; true routes to
// `Sender.HistogramBucket`. Python `submit_histogram_bucket`
// always lands as false.
Comment on lines +44 to +47

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: opaque comment that requires implicit cross-repository knowledge.

It's also... confusing at face value. The naming implies multiple buckets, but this type is, definitionally, a value representing a single bucket.

bool multiple_buckets = 9;
}
30 changes: 30 additions & 0 deletions lib/protos/datadog/proto/checks/v1/log.proto
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,37 @@ enum LogLevel {
LOG_LEVEL_CRITICAL = 50;
}

// Log carries a single log record produced by an integration check.
//
// Fields 1 and 2 mirror the original minimal shape and stay for backward
// compatibility. Fields 3-8 are additive: proto3 default-empty values are
// wire-compatible with peers that don't populate them.
message Log {
string message = 1;
LogLevel level = 2;

// Integration source name (e.g. "mysql", "http_check"). Used by the
// logs intake to route and tag.
string source = 3;

// Hostname the log was produced on. Empty means "use the receiving
// agent's default hostname".
string hostname = 4;

// Service the log relates to. Surfaces as `service` in the intake.
string service = 5;

// Log-level tags, in `key:value` form (matches the agent's wire
// format used elsewhere). Combined with host / integration tags
// downstream.
repeated string tags = 6;

// Unix-nanosecond timestamp at which the producer believes the log
// event happened. Zero means "stamp at receive time".
int64 timestamp = 7;

// Free-form additional structured fields. Each value is a JSON-encoded
// string so producers can convey arbitrary scalar / array / object
// shapes through a stable wire type.
map<string, string> additional_properties = 8;
}
2 changes: 2 additions & 0 deletions lib/protos/datadog/proto/checks/v1/metric.proto
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ enum MetricType {
METRIC_TYPE_RATE = 2;
METRIC_TYPE_GAUGE = 3;
METRIC_TYPE_HISTOGRAM = 4;
METRIC_TYPE_MONOTONIC_COUNT = 5;
METRIC_TYPE_HISTORATE = 6;
}

message Metric {
Expand Down
46 changes: 46 additions & 0 deletions lib/protos/datadog/proto/checks/v1/sketch.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
syntax = "proto3";

package datadog.checks.v1.sketch;

// Sketch carries a pre-aggregated DDSketch produced by a check.
//
// The wire format is aligned with the Datadog Agent's sketch_payload
// format (see DataDog/agent-payload v5 SketchPayload.Sketch.Dogsketch)
// so receivers can convert directly to the agent's internal Sketch
// type without a separate sketches-go round-trip.
message Sketch {
// Metric name (e.g. "http.request.duration").
string name = 1;

// Tags in `key:value` form.
repeated string tags = 2;

// Hostname this sketch was produced on. Empty means "use the
// receiving agent's default hostname".
string hostname = 3;

// Unix-second timestamp at which the producer flushed the sketch.
int64 timestamp = 4;

// Flush interval the producer used; informational only — the
// receiving aggregator stamps its own flush boundary.
uint64 interval_secs = 5;

// The DDSketch body itself, in the agent's bin-encoded shape.
Dogsketch dogsketch = 6;
}

// Dogsketch is the bin-encoded DDSketch payload. Bin keys (k) and
// counts (n) are parallel arrays — k[i] is the bin index and n[i] the
// number of observations in that bin. This matches the Datadog Agent's
// SketchPayload.Sketch.Dogsketch definition exactly.
message Dogsketch {
int64 ts = 1;
int64 cnt = 2;
double min = 3;
double max = 4;
double avg = 5;
double sum = 6;
repeated sint32 k = 7;
repeated uint32 n = 8;
}
Loading
Loading