diff --git a/.golangci.yml b/.golangci.yml index 875c5341f..36b5b3634 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -163,6 +163,10 @@ linters: - linters: - gosec text: "G118" + - linters: + - gosec + text: "G124" + path: service/http/client/handler\.go - linters: - revive text: "unexported-return" diff --git a/Makefile b/Makefile index ad3b72592..4c37d07fe 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,11 @@ otel-up: otel-down: cd tests && docker-compose down +# otel-e2e starts only Jaeger and runs the OTLP round-trip test (needs docker). +otel-e2e: + cd tests && docker-compose up -d jaeger + go test -tags integration -run TestOTLP_TracesReachJaeger ./tests/ -timeout 120s + # Wippy CLI build targets WIPPY_VERSION ?= $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") WIPPY_COMMIT ?= $(shell git rev-parse --short HEAD 2>/dev/null || echo "unknown") diff --git a/api/metrics/metrics.go b/api/metrics/metrics.go index 0377d131e..a125cd0b4 100644 --- a/api/metrics/metrics.go +++ b/api/metrics/metrics.go @@ -6,7 +6,8 @@ package metrics // MetricType constants. const ( TypeCounter MetricType = "counter" - TypeGauge MetricType = "gauge" + TypeGauge MetricType = "gauge" // absolute set (GaugeSet) + TypeGaugeAdd MetricType = "gauge_add" // relative delta (GaugeInc/GaugeDec) TypeHistogram MetricType = "histogram" ) diff --git a/boot/components/dispatchers/sql_dispatcher.go b/boot/components/dispatchers/sql_dispatcher.go index 8e4d4c78d..8ca0577c6 100644 --- a/boot/components/dispatchers/sql_dispatcher.go +++ b/boot/components/dispatchers/sql_dispatcher.go @@ -7,6 +7,7 @@ import ( "github.com/wippyai/runtime/api/boot" dispatcherapi "github.com/wippyai/runtime/api/dispatcher" + metricsapi "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/service/sql" ) @@ -20,6 +21,7 @@ func SQL() boot.Component { return ctx, ErrDispatcherNotFound } svc := sql.NewDispatcher() + svc.SetCollector(metricsapi.GetCollector(ctx)) svc.RegisterAll(reg.Register) return ctx, nil }, diff --git a/boot/components/metrics/all.go b/boot/components/metrics/all.go index 12d6e7bb5..c0ff44854 100644 --- a/boot/components/metrics/all.go +++ b/boot/components/metrics/all.go @@ -7,6 +7,7 @@ import "github.com/wippyai/runtime/api/boot" func All() []boot.Component { return []boot.Component{ Metrics(), - // MetricsInterceptor(), + Interceptor(), + Process(), } } diff --git a/boot/components/metrics/constants.go b/boot/components/metrics/constants.go index ebd83fc43..49b4e8929 100644 --- a/boot/components/metrics/constants.go +++ b/boot/components/metrics/constants.go @@ -6,3 +6,17 @@ import "github.com/wippyai/runtime/api/boot" // Name is the component name for metrics. const Name boot.Name = "metrics" + +// InterceptorName is the component name for the function-call metrics interceptor. +const InterceptorName boot.Name = "metrics-interceptor" + +// ProcessName is the component name for the process lifecycle metrics handler. +const ProcessName boot.Name = "metrics-process" + +// interceptorName is the external dependency name of the function interceptor +// registry component (boot/components/system.Interceptor). +const interceptorName boot.Name = "interceptor" + +// lifecycleName is the external dependency name of the process lifecycle +// registry component (boot/components/system.LifecycleName). +const lifecycleName boot.Name = "system.lifecycle" diff --git a/boot/components/metrics/interceptor.go b/boot/components/metrics/interceptor.go index 05dd08dea..f88d5abd9 100644 --- a/boot/components/metrics/interceptor.go +++ b/boot/components/metrics/interceptor.go @@ -1,3 +1,60 @@ // SPDX-License-Identifier: MPL-2.0 package metrics + +import ( + "context" + + "github.com/wippyai/runtime/api/boot" + apiinterceptor "github.com/wippyai/runtime/api/function" + metricsapi "github.com/wippyai/runtime/api/metrics" + metricsinterceptor "github.com/wippyai/runtime/service/metrics/interceptor" +) + +// Interceptor wires the function-call metrics interceptor +// (wippy_function_calls / wippy_function_duration / wippy_function_in_flight) +// into the function interceptor registry. It runs at order 50 so the duration +// timer wraps the full call, including the OTel tracing interceptor (order 100). +func Interceptor() boot.Component { + return boot.New(boot.P{ + Name: InterceptorName, + DependsOn: []boot.Name{Name, interceptorName}, + Load: func(ctx context.Context) (context.Context, error) { + if !loadInterceptorEnabled(ctx) { + return ctx, nil + } + + collector := metricsapi.GetCollector(ctx) + if collector == nil { + return ctx, nil + } + + registry := apiinterceptor.GetInterceptorRegistry(ctx) + if registry == nil { + return ctx, nil + } + + if err := registry.Register("metrics", metricsinterceptor.NewFunctionInterceptor(collector, true), 50); err != nil { + return ctx, err + } + + return ctx, nil + }, + }) +} + +// loadInterceptorEnabled reads metrics.interceptor.enabled, defaulting to true +// so function-call metrics emit out of the box. +func loadInterceptorEnabled(ctx context.Context) bool { + bootCfg := boot.GetConfig(ctx) + if bootCfg == nil { + return true + } + + metricsCfg := bootCfg.Sub("metrics") + if metricsCfg == nil { + return true + } + + return metricsCfg.GetBool("interceptor.enabled", true) +} diff --git a/boot/components/metrics/interceptor_test.go b/boot/components/metrics/interceptor_test.go new file mode 100644 index 000000000..3391b0369 --- /dev/null +++ b/boot/components/metrics/interceptor_test.go @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: MPL-2.0 + +package metrics + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wippyai/runtime/api/boot" + ctxapi "github.com/wippyai/runtime/api/context" + "github.com/wippyai/runtime/api/function" + logapi "github.com/wippyai/runtime/api/logs" + metricsapi "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/registry" + "github.com/wippyai/runtime/api/runtime" + "github.com/wippyai/runtime/internal/telemetrytest" + metricsinterceptor "github.com/wippyai/runtime/service/metrics/interceptor" + "go.uber.org/zap" +) + +type mockInterceptorRegistry struct { + registrations []registeredInterceptor +} + +type registeredInterceptor struct { + interceptor function.Interceptor + name string + order int +} + +func (m *mockInterceptorRegistry) Register(name string, i function.Interceptor, order int) error { + m.registrations = append(m.registrations, registeredInterceptor{name: name, interceptor: i, order: order}) + return nil +} + +func (m *mockInterceptorRegistry) Unregister(string) error { return nil } + +func (m *mockInterceptorRegistry) Execute(ctx context.Context, f function.Func, task runtime.Task) (*runtime.Result, error) { + return f(ctx, task) +} + +func TestMetricsInterceptor_RegistersWhenEnabled(t *testing.T) { + component := Interceptor() + assert.Equal(t, InterceptorName, component.Name()) + + ctx := ctxapi.NewRootContext() + ctx = logapi.WithLogger(ctx, zap.NewNop()) + ctx = boot.WithConfig(ctx, boot.NewConfig(boot.WithSection("metrics", map[string]any{ + "interceptor.enabled": true, + }))) + rec := telemetrytest.NewRecorder() + ctx = metricsapi.WithCollector(ctx, rec) + reg := &mockInterceptorRegistry{} + ctx = function.WithInterceptorRegistry(ctx, reg) + + newCtx, err := component.Load(ctx) + require.NoError(t, err) + require.NotNil(t, newCtx) + require.Len(t, reg.registrations, 1) + + r := reg.registrations[0] + assert.Equal(t, "metrics", r.name) + assert.Equal(t, 50, r.order) + + fi, ok := r.interceptor.(*metricsinterceptor.FunctionInterceptor) + require.True(t, ok, "registered interceptor must be *FunctionInterceptor") + + funcID := registry.NewID("ns", "test_func") + task := runtime.Task{ID: funcID} + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { + return &runtime.Result{}, nil + } + _, err = fi.Handle(context.Background(), task, next) + require.NoError(t, err) + assert.Equal(t, 1.0, rec.CounterValue(metricsinterceptor.FunctionCalls, + metricsapi.Labels{"function_id": funcID.String(), "status": "success"})) +} + +func TestMetricsInterceptor_NotRegisteredWhenDisabled(t *testing.T) { + component := Interceptor() + + ctx := ctxapi.NewRootContext() + ctx = logapi.WithLogger(ctx, zap.NewNop()) + ctx = boot.WithConfig(ctx, boot.NewConfig(boot.WithSection("metrics", map[string]any{ + "interceptor.enabled": false, + }))) + ctx = metricsapi.WithCollector(ctx, telemetrytest.NewRecorder()) + reg := &mockInterceptorRegistry{} + ctx = function.WithInterceptorRegistry(ctx, reg) + + _, err := component.Load(ctx) + require.NoError(t, err) + assert.Empty(t, reg.registrations) +} + +func TestMetricsInterceptor_NoCollector(t *testing.T) { + component := Interceptor() + + ctx := ctxapi.NewRootContext() + ctx = logapi.WithLogger(ctx, zap.NewNop()) + ctx = boot.WithConfig(ctx, boot.NewConfig(boot.WithSection("metrics", map[string]any{ + "interceptor.enabled": true, + }))) + reg := &mockInterceptorRegistry{} + ctx = function.WithInterceptorRegistry(ctx, reg) + + _, err := component.Load(ctx) + require.NoError(t, err) + assert.Empty(t, reg.registrations, "no registration without a collector") +} diff --git a/boot/components/metrics/process.go b/boot/components/metrics/process.go new file mode 100644 index 000000000..abf6384ab --- /dev/null +++ b/boot/components/metrics/process.go @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MPL-2.0 + +package metrics + +import ( + "context" + + "github.com/wippyai/runtime/api/boot" + metricsapi "github.com/wippyai/runtime/api/metrics" + processapi "github.com/wippyai/runtime/api/process" + impl "github.com/wippyai/runtime/service/metrics" +) + +// Process registers the process lifecycle metrics handler, which emits +// wippy_process_started_total / wippy_process_terminated_total / +// wippy_process_active alongside the existing OTel lifecycle spans. +func Process() boot.Component { + return boot.New(boot.P{ + Name: ProcessName, + DependsOn: []boot.Name{Name, lifecycleName}, + Load: func(ctx context.Context) (context.Context, error) { + coll := metricsapi.GetCollector(ctx) + if coll == nil { + return ctx, nil + } + + reg := processapi.GetLifecycleRegistry(ctx) + if reg == nil { + return ctx, nil + } + + reg.Register("metrics", impl.NewProcessLifecycle(coll)) + return ctx, nil + }, + }) +} diff --git a/boot/components/otel/otel.go b/boot/components/otel/otel.go index f1765a0c3..120b180aa 100644 --- a/boot/components/otel/otel.go +++ b/boot/components/otel/otel.go @@ -9,10 +9,13 @@ import ( logapi "github.com/wippyai/runtime/api/logs" otelapi "github.com/wippyai/runtime/api/service/otel" "github.com/wippyai/runtime/service/otel" + "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) func OTel() boot.Component { + var tp trace.TracerProvider + return boot.New(boot.P{ Name: Name, Load: func(ctx context.Context) (context.Context, error) { @@ -35,7 +38,8 @@ func OTel() boot.Component { return ctx, nil } - tp, err := otel.InitializeProvider(ctx, cfg, logger) + var err error + tp, err = otel.InitializeProvider(ctx, cfg, logger) if err != nil { return ctx, NewOTELInitError(err) } @@ -53,8 +57,12 @@ func OTel() boot.Component { Start: func(_ context.Context) error { return nil }, - Stop: func(_ context.Context) error { - return nil + Stop: func(ctx context.Context) error { + logger := logapi.GetLogger(ctx).Named("otel") + if tp == nil { + return nil + } + return otel.ShutdownTracerProvider(ctx, tp, logger) }, }) } diff --git a/boot/components/otel/otel_temporal.go b/boot/components/otel/otel_temporal.go index c0553ec09..ef495c66f 100644 --- a/boot/components/otel/otel_temporal.go +++ b/boot/components/otel/otel_temporal.go @@ -4,6 +4,7 @@ package otel import ( "context" + "fmt" "github.com/wippyai/runtime/api/boot" logapi "github.com/wippyai/runtime/api/logs" @@ -11,9 +12,8 @@ import ( temporalapi "github.com/wippyai/runtime/api/service/temporal" temporalboot "github.com/wippyai/runtime/boot/components/service/temporal" "github.com/wippyai/runtime/service/otel" - "go.opentelemetry.io/otel/propagation" + gootel "go.opentelemetry.io/otel" temporalotel "go.temporal.io/sdk/contrib/opentelemetry" - "go.uber.org/zap" ) // Temporal creates the OTEL tracing interceptor for Temporal workflows and activities. @@ -60,17 +60,15 @@ func Temporal() boot.Component { return ctx, nil } - // Create Temporal tracing interceptor with the shared tracer + // Create Temporal tracing interceptor with the shared tracer and the + // globally-configured propagator so Temporal follows the same W3C + // propagation settings as every other surface. tracingInterceptor, err := temporalotel.NewTracingInterceptor(temporalotel.TracerOptions{ - Tracer: tracer, - TextMapPropagator: propagation.NewCompositeTextMapPropagator( - propagation.TraceContext{}, - propagation.Baggage{}, - ), + Tracer: tracer, + TextMapPropagator: gootel.GetTextMapPropagator(), }) if err != nil { - logger.Error("failed to create Temporal tracing interceptor", zap.Error(err)) - return ctx, nil + return ctx, fmt.Errorf("failed to create Temporal tracing interceptor: %w", err) } // Register with both client and worker registries diff --git a/boot/components/queue/consumers.go b/boot/components/queue/consumers.go index 07636b1d1..a85dfc75f 100644 --- a/boot/components/queue/consumers.go +++ b/boot/components/queue/consumers.go @@ -9,6 +9,7 @@ import ( "github.com/wippyai/runtime/api/event" "github.com/wippyai/runtime/api/function" logapi "github.com/wippyai/runtime/api/logs" + metricsapi "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" queueapi "github.com/wippyai/runtime/api/queue" regapi "github.com/wippyai/runtime/api/registry" @@ -34,6 +35,7 @@ func Consumers() boot.Component { handlers := bootpkg.GetHandlerRegistry(ctx) queueMgr := queueapi.GetManager(ctx) funcReg := function.GetRegistry(ctx) + coll := metricsapi.GetCollector(ctx) if reg := regapi.GetRegistry(ctx); reg != nil { consumerPatterns := []regapi.DependencyPattern{ @@ -53,6 +55,7 @@ func Consumers() boot.Component { funcReg, dtt, logger.Named("queue.consumer"), + coll, ) handlers.RegisterListener("queue.consumer", manager) diff --git a/boot/components/service/temporal/temporal.go b/boot/components/service/temporal/temporal.go index 120ac9205..c191a45db 100644 --- a/boot/components/service/temporal/temporal.go +++ b/boot/components/service/temporal/temporal.go @@ -11,6 +11,7 @@ import ( "github.com/wippyai/runtime/api/event" funcapi "github.com/wippyai/runtime/api/function" logapi "github.com/wippyai/runtime/api/logs" + metricsapi "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" regapi "github.com/wippyai/runtime/api/registry" "github.com/wippyai/runtime/api/resource" @@ -161,6 +162,7 @@ func Component() boot.Component { return dcRegistry.Build() }), client.WithInterceptors(clientInterceptors), + client.WithMetricsHandler(client.NewMetricsHandler(metricsapi.GetCollector(ctx))), ) if err != nil { return ctx, fmt.Errorf("failed to create client manager: %w", err) diff --git a/boot/components/store/memstore.go b/boot/components/store/memstore.go index c606b3139..c7bb3577e 100644 --- a/boot/components/store/memstore.go +++ b/boot/components/store/memstore.go @@ -8,6 +8,7 @@ import ( "github.com/wippyai/runtime/api/boot" "github.com/wippyai/runtime/api/event" logapi "github.com/wippyai/runtime/api/logs" + metricsapi "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" bootpkg "github.com/wippyai/runtime/boot" memorystore "github.com/wippyai/runtime/service/store/memory" @@ -22,11 +23,13 @@ func MemStore() boot.Component { dtt := payload.GetTranscoder(ctx) bus := event.GetBus(ctx) handlers := bootpkg.GetHandlerRegistry(ctx) + coll := metricsapi.GetCollector(ctx) manager := memorystore.NewManager( bus, dtt, logger.Named("memory"), + coll, ) handlers.RegisterListener("store.memory", manager) diff --git a/go.mod b/go.mod index 70db0c648..189f239a1 100644 --- a/go.mod +++ b/go.mod @@ -26,7 +26,6 @@ require ( github.com/charmbracelet/x/input v0.3.7 github.com/charmbracelet/x/term v0.2.2 github.com/coder/websocket v1.8.15 - github.com/docker/docker v28.5.2+incompatible github.com/expr-lang/expr v1.17.8 github.com/fatih/color v1.19.0 github.com/go-sql-driver/mysql v1.10.0 @@ -45,8 +44,10 @@ require ( github.com/lib/pq v1.12.3 github.com/mattn/go-sqlite3 v1.14.47 github.com/microcosm-cc/bluemonday v1.0.27 + github.com/moby/moby/api v1.55.0 + github.com/moby/moby/client v0.5.0 github.com/muesli/termenv v0.16.0 - github.com/prometheus/client_golang v1.23.0 + github.com/prometheus/client_golang v1.23.2 github.com/rabbitmq/amqp091-go v1.12.0 github.com/sergi/go-diff v1.4.0 github.com/spf13/cobra v1.10.2 @@ -68,6 +69,7 @@ require ( github.com/wippyai/wapp v0.1.2 github.com/wippyai/wasm-runtime v0.0.0-20260624004845-53368697b5c6 github.com/xuri/excelize/v2 v2.10.1 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0 go.opentelemetry.io/otel v1.44.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.44.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.44.0 @@ -122,6 +124,7 @@ require ( github.com/charmbracelet/harmonica v0.2.0 // indirect github.com/charmbracelet/x/cellbuf v0.0.15 // indirect github.com/charmbracelet/x/windows v0.2.1 // indirect + github.com/cilium/ebpf v0.17.3 // indirect github.com/clipperhouse/displaywidth v0.11.0 // indirect github.com/clipperhouse/uax29/v2 v2.7.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect @@ -135,7 +138,7 @@ require ( github.com/dblohm7/wingoes v0.0.0-20240119213807-a09d6be7affa // indirect github.com/distribution/reference v0.6.0 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect - github.com/docker/go-connections v0.6.0 // indirect + github.com/docker/go-connections v0.7.0 // indirect github.com/docker/go-units v0.5.0 // indirect github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect @@ -146,7 +149,7 @@ require ( github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/goccy/go-yaml v1.19.2 // indirect - github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466 // indirect + github.com/godbus/dbus/v5 v5.2.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/mock v1.6.0 // indirect @@ -160,7 +163,7 @@ require ( github.com/hashicorp/go-metrics v0.5.4 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-sockaddr v1.0.7 // indirect - github.com/hashicorp/golang-lru v0.6.0 // indirect + github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/hdevalence/ed25519consensus v0.2.0 // indirect github.com/huin/goupnp v1.3.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect @@ -179,10 +182,9 @@ require ( github.com/mattn/go-runewidth v0.0.23 // indirect github.com/mdlayher/netlink v1.7.3-0.20250113171957-fbb4dce95f42 // indirect github.com/mdlayher/socket v0.5.0 // indirect - github.com/miekg/dns v1.1.68 // indirect + github.com/miekg/dns v1.1.72 // indirect github.com/mitchellh/go-ps v1.0.0 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect - github.com/moby/sys/atomicwriter v0.1.0 // indirect github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect @@ -191,12 +193,11 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/pires/go-proxyproto v0.8.1 // indirect - github.com/pkg/errors v0.9.1 // indirect github.com/pkoukk/tiktoken-go v0.1.6 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.65.0 // indirect - github.com/prometheus/procfs v0.16.1 // indirect + github.com/prometheus/common v0.67.5 // indirect + github.com/prometheus/procfs v0.20.1 // indirect github.com/richardlehane/mscfb v1.0.6 // indirect github.com/richardlehane/msoleps v1.0.6 // indirect github.com/rivo/uniseg v0.4.7 // indirect @@ -223,17 +224,17 @@ require ( gitlab.com/golang-commonmark/mdurl v0.0.0-20191124015652-932350d1cb84 // indirect gitlab.com/golang-commonmark/puny v0.0.0-20191124015043-9f83538fa04f // indirect go.bytecodealliance.org v0.7.0 // indirect - go.etcd.io/bbolt v1.4.2 // indirect + go.etcd.io/bbolt v1.4.3 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.44.0 // indirect go.opentelemetry.io/proto/otlp v1.10.0 // indirect go.uber.org/mock v0.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.4 // indirect go4.org/mem v0.0.0-20240501181205-ae6ca9944745 // indirect go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect - golang.org/x/mod v0.36.0 // indirect + golang.org/x/mod v0.37.0 // indirect golang.org/x/oauth2 v0.36.0 // indirect golang.org/x/sync v0.21.0 // indirect golang.org/x/text v0.38.0 // indirect @@ -242,8 +243,8 @@ require ( golang.zx2c4.com/wireguard/windows v0.5.3 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260526163538-3dc84a4a5aaa // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260526163538-3dc84a4a5aaa // indirect - gotest.tools/v3 v3.5.2 // indirect gvisor.dev/gvisor v0.0.0-20260224225140-573d5e7127a8 // indirect + pgregory.net/rapid v1.3.0 // indirect ) tool go.uber.org/mock/mockgen diff --git a/go.sum b/go.sum index dd016b635..037405ab4 100644 --- a/go.sum +++ b/go.sum @@ -9,8 +9,6 @@ filippo.io/edwards25519 v1.2.0 h1:crnVqOiS4jqYleHd9vaKZ+HKtHfllngJIiOpNpoJsjo= filippo.io/edwards25519 v1.2.0/go.mod h1:xzAOLCNug/yB62zG1bQ8uziwrIqIuxhctzJT18Q77mc= filippo.io/mkcert v1.4.4 h1:8eVbbwfVlaqUM7OwuftKc2nuYOoTDQWqsoXmzoXZdbc= filippo.io/mkcert v1.4.4/go.mod h1:VyvOchVuAye3BoUsPUOOofKygVwLV2KQMVFJNRq+1dA= -github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= -github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg= github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/CloudyKit/fastprinter v0.0.0-20200109182630-33d98a066a53 h1:sR+/8Yb4slttB4vD+b9btVEnWgL3Q00OBTzVT8B9C0c= @@ -120,8 +118,8 @@ github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSg github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= github.com/charmbracelet/x/windows v0.2.1 h1:3x7vnbpQrjpuq/4L+I4gNsG5htYoCiA5oe9hLjAij5I= github.com/charmbracelet/x/windows v0.2.1/go.mod h1:ptZp16h40gDYqs5TSawSVW+yiLB13j4kSMA0lSCHL0M= -github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= -github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= +github.com/cilium/ebpf v0.17.3 h1:FnP4r16PWYSE4ux6zN+//jMcW4nMVRvuTLVTvCjyyjg= +github.com/cilium/ebpf v0.17.3/go.mod h1:G5EDHij8yiLzaqn0WjyfJHvRa+3aDlReIaLVRMvOyJk= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/clipperhouse/displaywidth v0.11.0 h1:lBc6kY44VFw+TDx4I8opi/EtL9m20WSEFgwIwO+UVM8= @@ -134,8 +132,6 @@ github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/coreos/etcd v3.3.27+incompatible h1:QIudLb9KeBsE5zyYxd1mjzRSkzLg9Wf9QlRwFgd6oTA= github.com/coreos/etcd v3.3.27+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-iptables v0.7.1-0.20240112124308-65c67c9f46e6 h1:8h5+bWd7R6AYUslN6c6iuZWTKsKxUFDlpnmilO6R2n0= @@ -171,10 +167,8 @@ github.com/djherbis/times v1.6.0 h1:w2ctJ92J8fBvWPxugmXIv7Nz7Q3iDMKNx9v5ocVH20c= github.com/djherbis/times v1.6.0/go.mod h1:gOHeRAz2h+VJNZ5Gmc/o7iD9k4wW7NMVqieYCY99oc0= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= -github.com/docker/docker v28.5.2+incompatible h1:DBX0Y0zAjZbSrm1uzOkdr1onVghKaftjlSWt4AFexzM= -github.com/docker/docker v28.5.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94= -github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE= +github.com/docker/go-connections v0.7.0 h1:6SsRfJddP22WMrCkj19x9WKjEDTB+ahsdiGYf0mN39c= +github.com/docker/go-connections v0.7.0/go.mod h1:no1qkHdjq7kLMGUXYAduOhYPSJxxvgWBh7ogVvptn3Q= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= @@ -220,8 +214,8 @@ github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737 h1:cf60tHxREO github.com/go4org/plan9netshell v0.0.0-20250324183649-788daa080737/go.mod h1:MIS0jDzbU/vuM9MC4YnBITCv+RYuTRq8dJzmCrFsK9g= github.com/goccy/go-yaml v1.19.2 h1:PmFC1S6h8ljIz6gMRBopkjP1TVT7xuwrButHID66PoM= github.com/goccy/go-yaml v1.19.2/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA= -github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466 h1:sQspH8M4niEijh3PFscJRLDnkL547IeP7kpPe3uUhEg= -github.com/godbus/dbus/v5 v5.1.1-0.20230522191255-76236955d466/go.mod h1:ZiQxhyQ+bbbfxUKVvjfO498oPYvtYhZzycal3G/NHmU= +github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ= +github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= @@ -290,8 +284,8 @@ github.com/hashicorp/go-sockaddr v1.0.7/go.mod h1:FZQbEYa1pxkQ7WLpyXJ6cbjpT8q0Yg github.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= -github.com/hashicorp/golang-lru v0.6.0 h1:uL2shRDx7RTrOrTCUZEGP/wJUFiUI8QT6E7z5o8jga4= -github.com/hashicorp/golang-lru v0.6.0/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= +github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/memberlist v0.5.4 h1:40YY+3qq2tAUhZIMEK8kqusKZBBjdwJ3NUjvYkcxh74= github.com/hashicorp/memberlist v0.5.4/go.mod h1:OgN6xiIo6RlHUWk+ALjP9e32xWCoQrsOCmHrWCm2MWA= github.com/hashicorp/raft v1.7.3 h1:DxpEqZJysHN0wK+fviai5mFcSYsCkNpFUl1xpAW8Rbo= @@ -396,24 +390,20 @@ github.com/mdlayher/socket v0.5.0 h1:ilICZmJcQz70vrWVes1MFera4jGiWNocSkykwwoy3XI github.com/mdlayher/socket v0.5.0/go.mod h1:WkcBFfvyG8QENs5+hfQPl1X6Jpd2yeLIYgrGFmJiJxI= github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= -github.com/miekg/dns v1.1.68 h1:jsSRkNozw7G/mnmXULynzMNIsgY2dHC8LO6U6Ij2JEA= -github.com/miekg/dns v1.1.68/go.mod h1:fujopn7TB3Pu3JM69XaawiU0wqjpL9/8xGop5UrTPps= +github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI= +github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs= github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc= github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= -github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw= -github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= -github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= -github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= -github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ= -github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc= +github.com/moby/moby/api v1.55.0 h1:2/sexvQyqIWS8pRSCFddBfpW2qE7vR7FCL+vN8pxwMc= +github.com/moby/moby/api v1.55.0/go.mod h1:+RQ6wluLwtYaTd1WnPLykIDPekkuyD/ROWQClE83pzs= +github.com/moby/moby/client v0.5.0 h1:5XhyPk2fuOWf6RlSFa3MkIIgDZkF25xToXW8Q/BH7cc= +github.com/moby/moby/client v0.5.0/go.mod h1:rcVpF8ncl9vo5gaIBdol6CnbEtSj1uxMvEV/UrykF/s= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= -github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= @@ -442,7 +432,6 @@ github.com/pires/go-proxyproto v0.8.1 h1:9KEixbdJfhrbtjpz/ZwCdWDD2Xem0NZ38qMYaAS github.com/pires/go-proxyproto v0.8.1/go.mod h1:ZKAAyp3cgy5Y5Mo4n9AlScrkCZwUy0g3Jf+slqQVcuU= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/sftp v1.13.6 h1:JFZT4XbOU7l77xGSpOdW+pwIMqP044IyjXX6FGyEKFo= github.com/pkg/sftp v1.13.6/go.mod h1:tz1ryNURKu77RL+GuCzmoJYxQczL3wLNNpPWagdg4Qk= @@ -456,8 +445,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= github.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= -github.com/prometheus/client_golang v1.23.0 h1:ust4zpdl9r4trLY/gSjlm07PuiBq2ynaXXlptpfy8Uc= -github.com/prometheus/client_golang v1.23.0/go.mod h1:i/o0R9ByOnHX0McrTMTyhYvKE4haaf2mW08I+jGAjEE= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= @@ -467,15 +456,15 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= -github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= -github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.67.5 h1:pIgK94WWlQt1WLwAC5j2ynLaBRDiinoAb86HZHTUGI4= +github.com/prometheus/common v0.67.5/go.mod h1:SjE/0MzDEEAyrdr5Gqc6G+sXI67maCxzaT3A2+HqjUw= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= -github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= -github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/prometheus/procfs v0.20.1 h1:XwbrGOIplXW/AU3YhIhLODXMJYyC1isLFfYCsTEycfc= +github.com/prometheus/procfs v0.20.1/go.mod h1:o9EMBZGRyvDrSPH1RqdxhojkuXstoe4UlK79eF5TGGo= github.com/rabbitmq/amqp091-go v1.12.0 h1:V0v14Iqfs+MwHWihJt/nGS5Ulu0vw572b2Co3mwunkI= github.com/rabbitmq/amqp091-go v1.12.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= github.com/richardlehane/mscfb v1.0.6 h1:eN3bvvZCp00bs7Zf52bxNwAx5lJDBK1tCuH19qq5aC8= @@ -500,8 +489,6 @@ github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepq github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= -github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= -github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -597,8 +584,6 @@ github.com/wippyai/tree-sitter-sql v0.0.4 h1:0hJyjkV6/lhoGA5FJAaf96od2xyo+OJINSE github.com/wippyai/tree-sitter-sql v0.0.4/go.mod h1:QN9CfIO55fwQNVNk1p/7pjxrLk7iKxrQs42Zh6lrr84= github.com/wippyai/wapp v0.1.2 h1:fCoxKr9s3gk+pWx4XcnIQmOVgPiuimXf3QcE9mHbdmw= github.com/wippyai/wapp v0.1.2/go.mod h1:ndCkYR80+osLGbd7AFWlP+3DxwooR+R6cxQYPZhksg4= -github.com/wippyai/wasm-runtime v0.0.0-20260623160937-4f371ee0f68a h1:hNZ5ujZpmQgWlvk9mCaN1Dzvdq2wokrGUVJ8w4OG+Zc= -github.com/wippyai/wasm-runtime v0.0.0-20260623160937-4f371ee0f68a/go.mod h1:1AVYdZibORqlBez081Seakxv2u9IR+KIMrvlBKsk2bQ= github.com/wippyai/wasm-runtime v0.0.0-20260624004845-53368697b5c6 h1:KXsgwoyjcw6rLpltno8Vb7eV+Np0gvJk8s6evZ/vSec= github.com/wippyai/wasm-runtime v0.0.0-20260624004845-53368697b5c6/go.mod h1:1AVYdZibORqlBez081Seakxv2u9IR+KIMrvlBKsk2bQ= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= @@ -630,12 +615,12 @@ gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638 h1:uPZaMiz6Sz0PZs3IZJW gitlab.com/opennota/wd v0.0.0-20180912061657-c5d65f63c638/go.mod h1:EGRJaqe2eO9XGmFtQCvV3Lm9NLico3UhFwUpCG/+mVU= go.bytecodealliance.org v0.7.0 h1:CTJ1eb5kFhBKHw1/xycxxz4SmVWNKXYHhrA78oLNXhY= go.bytecodealliance.org v0.7.0/go.mod h1:PCLMft5yTQsHT9oNPWlq0I6Qdmo6THvdky2AZHjNUkA= -go.etcd.io/bbolt v1.4.2 h1:IrUHp260R8c+zYx/Tm8QZr04CX+qWS5PGfPdevhdm1I= -go.etcd.io/bbolt v1.4.2/go.mod h1:Is8rSHO/b4f3XigBC0lL0+4FwAQv3HXEEIgFMuKHceM= +go.etcd.io/bbolt v1.4.3 h1:dEadXpI6G79deX5prL3QRNP6JB8UxVkqo4UPnHaNXJo= +go.etcd.io/bbolt v1.4.3/go.mod h1:tKQlpPaYCVFctUIgFKFnAlvbmB3tpy1vkTnDWohtc0E= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0/go.mod h1:GQ/474YrbE4Jx8gZ4q5I4hrhUzM6UPzyrqJYV2AqPoQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0 h1:8tvICD4vSTOOsNrsI4Ljf6C+6UKvpTEH5XY3JMoyPoo= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.69.0/go.mod h1:z9+yiacE0IHRqM4qFfkbt/JYlmYXgss8GY/jXoNuPJI= go.opentelemetry.io/otel v1.44.0 h1:JjwHmHpA4iZ3wBxluu2fbbE7j4kqlE8jXyAyPXH7HqU= go.opentelemetry.io/otel v1.44.0/go.mod h1:BMgjTHL9WPRlRjL2oZCBTL4whCGtXch2H4BhOPIAyYc= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.44.0 h1:SUplec5dp06reu1zaXmOXdvqH398taqrDXqUl99jxSc= @@ -674,8 +659,8 @@ go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.28.0 h1:IZzaP1Fv73/T/pBMLk4VutPl36uNC+OSUh3JLG3FIjo= go.uber.org/zap v1.28.0/go.mod h1:rDLpOi171uODNm/mxFcuYWxDsqWSAVkFdX4XojSKg/Q= -go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= -go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v2 v2.4.4 h1:tuyd0P+2Ont/d6e2rl3be67goVK4R6deVxCUX5vyPaQ= +go.yaml.in/yaml/v2 v2.4.4/go.mod h1:gMZqIpDtDqOfM0uNfy0SkpRhvUryYH0Z6wdMYcacYXQ= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= go4.org/mem v0.0.0-20240501181205-ae6ca9944745 h1:Tl++JLUCe4sxGu8cTpDzRLd3tN7US4hOxG5YpKCzkek= @@ -697,8 +682,8 @@ golang.org/x/image v0.41.0/go.mod h1:uIc348UZMSvS5Z65CVZ7iDPaNobNFEPeJ4kbqTOszmA golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.36.0 h1:JJjpVx6myfUsUdAzZuOSTTmRE0PfZeNWzzvKrP7amb4= -golang.org/x/mod v0.36.0/go.mod h1:moc6ELqsWcOw5Ef3xVprK5ul/MvtVvkIXLziUOICjUQ= +golang.org/x/mod v0.37.0 h1:vF1DjpVEshcIqoEaauuHebaLk1O1forxjxBaVn884JQ= +golang.org/x/mod v0.37.0/go.mod h1:m8S8VeM9r4dzDwjrKO0a1sZP3YjeMamRRlD+fmR2Q/0= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -745,7 +730,6 @@ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220817070843-5a390386f1f2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.46.0 h1:noSf2Fq6F8DBgS+LysIkx7rIExoNHJsxOAtPp4rthXw= golang.org/x/sys v0.46.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= @@ -814,6 +798,8 @@ honnef.co/go/tools v0.7.0 h1:w6WUp1VbkqPEgLz4rkBzH/CSU6HkoqNLp6GstyTx3lU= honnef.co/go/tools v0.7.0/go.mod h1:pm29oPxeP3P82ISxZDgIYeOaf9ta6Pi0EWvCFoLG2vc= howett.net/plist v1.0.0 h1:7CrbWYbPPO/PyNy38b2EB/+gYbjCe2DXBxgtOOZbSQM= howett.net/plist v1.0.0/go.mod h1:lqaXoTrLY4hg8tnEzNru53gicrbv7rrk+2xJA/7hw9g= +pgregory.net/rapid v1.3.0 h1:vBvO0VSqti75J1jjYqpgPNBLKMd1+gxa9fYo7vk/Exc= +pgregory.net/rapid v1.3.0/go.mod h1:dPlE4OBBxgXPqkP79flB6sJL1dx5azpI7HQ9MY9Z7uk= sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= software.sslmate.com/src/go-pkcs12 v0.4.0 h1:H2g08FrTvSFKUj+D309j1DPfk5APnIdAQAB8aEykJ5k= diff --git a/internal/telemetrytest/otel.go b/internal/telemetrytest/otel.go new file mode 100644 index 000000000..463818e74 --- /dev/null +++ b/internal/telemetrytest/otel.go @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: MPL-2.0 + +package telemetrytest + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + "go.opentelemetry.io/otel/trace" +) + +// NewTracerProvider returns a real TracerProvider wired to an in-memory +// SpanRecorder, so tests can assert on the spans that were actually emitted +// instead of relying on noop tracers that only verify wiring. +func NewTracerProvider() (*sdktrace.TracerProvider, *tracetest.SpanRecorder) { + sr := tracetest.NewSpanRecorder() + tp := sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(sr)) + return tp, sr +} + +// SpanCount asserts the total number of ended spans recorded by sr. +func SpanCount(t *testing.T, sr *tracetest.SpanRecorder, want int) { + t.Helper() + assert.Lenf(t, sr.Ended(), want, "expected %d ended span(s)", want) +} + +// MustSpanNamed returns the single ended span with the given name, failing the +// test if there is not exactly one match. +func MustSpanNamed(t *testing.T, sr *tracetest.SpanRecorder, name string) sdktrace.ReadOnlySpan { + t.Helper() + var matches []sdktrace.ReadOnlySpan + for _, s := range sr.Ended() { + if s.Name() == name { + matches = append(matches, s) + } + } + require.Lenf(t, matches, 1, "expected exactly 1 span named %q, got %d", name, len(matches)) + return matches[0] +} + +// SpanKind asserts the span's kind. +func SpanKind(t *testing.T, span sdktrace.ReadOnlySpan, want trace.SpanKind) { + t.Helper() + assert.Equalf(t, want, span.SpanKind(), "span %q kind", span.Name()) +} + +// SpanStatus asserts the span's status code. +func SpanStatus(t *testing.T, span sdktrace.ReadOnlySpan, want codes.Code) { + t.Helper() + assert.Equalf(t, want, span.Status().Code, "span %q status code", span.Name()) +} + +// SpanAttr returns the value of the named attribute and whether it was present. +func SpanAttr(span sdktrace.ReadOnlySpan, key string) (attribute.Value, bool) { + for _, kv := range span.Attributes() { + if string(kv.Key) == key { + return kv.Value, true + } + } + return attribute.Value{}, false +} + +// SpanHasStringAttr asserts the span carries a string attribute key==want. +func SpanHasStringAttr(t *testing.T, span sdktrace.ReadOnlySpan, key, want string) { + t.Helper() + v, ok := SpanAttr(span, key) + if !ok { + t.Fatalf("span %q has no attribute %q", span.Name(), key) + } + assert.Equalf(t, want, v.AsString(), "span %q attribute %q", span.Name(), key) +} + +// SpanHasInt64Attr asserts the span carries an int64 attribute key==want. +func SpanHasInt64Attr(t *testing.T, span sdktrace.ReadOnlySpan, key string, want int64) { + t.Helper() + v, ok := SpanAttr(span, key) + if !ok { + t.Fatalf("span %q has no attribute %q", span.Name(), key) + } + assert.Equalf(t, want, v.AsInt64(), "span %q attribute %q", span.Name(), key) +} + +// TraceID returns the trace ID carried by a recorded span. +func TraceID(span sdktrace.ReadOnlySpan) trace.TraceID { + return span.SpanContext().TraceID() +} diff --git a/internal/telemetrytest/otel_test.go b/internal/telemetrytest/otel_test.go new file mode 100644 index 000000000..883704435 --- /dev/null +++ b/internal/telemetrytest/otel_test.go @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MPL-2.0 + +package telemetrytest + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" +) + +func TestNewTracerProvider_RecordsSpans(t *testing.T) { + tp, sr := NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + tracer := tp.Tracer("test") + + ctx, parent := tracer.Start(context.Background(), "root", trace.WithSpanKind(trace.SpanKindServer)) + parent.SetAttributes(attribute.String("http.method", "GET")) + parent.SetStatus(codes.Ok, "") + parent.End() + + _, child := tracer.Start(ctx, "child", trace.WithSpanKind(trace.SpanKindInternal)) + child.End() + + SpanCount(t, sr, 2) + root := MustSpanNamed(t, sr, "root") + SpanKind(t, root, trace.SpanKindServer) + SpanStatus(t, root, codes.Ok) + SpanHasStringAttr(t, root, "http.method", "GET") + + v, ok := SpanAttr(root, "absent") + assert.False(t, ok, "absent attr should not be found") + _ = v + + childSpan := MustSpanNamed(t, sr, "child") + assert.Equal(t, TraceID(root), TraceID(childSpan), "parent and child share a trace ID") +} diff --git a/internal/telemetrytest/recorder_test.go b/internal/telemetrytest/recorder_test.go index 3d3d255b2..9f899b02a 100644 --- a/internal/telemetrytest/recorder_test.go +++ b/internal/telemetrytest/recorder_test.go @@ -37,3 +37,26 @@ func TestRecorder_GaugeAndHistogram(t *testing.T) { t.Fatalf("want 2 observations, got %v", c) } } + +func TestRecorder_GaugeSequence(t *testing.T) { + r := NewRecorder() + labels := metrics.Labels{"pg": "g1"} + + r.GaugeSet("g", 5, labels) + r.GaugeInc("g", labels) + r.GaugeInc("g", labels) + r.GaugeDec("g", labels) + if v := r.GaugeValue("g", labels); v != 6 { + t.Fatalf("set(5)+inc+inc-dec: want 6, got %v", v) + } + + r.GaugeSet("g", 0, labels) + if v := r.GaugeValue("g", labels); v != 0 { + t.Fatalf("reset to 0: want 0, got %v", v) + } + + r.GaugeInc("g", labels) + if v := r.GaugeValue("g", labels); v != 1 { + t.Fatalf("inc after set(0): want 1, got %v", v) + } +} diff --git a/service/cdc/postgres/service.go b/service/cdc/postgres/service.go index 20f9753c9..09bfa7b1c 100644 --- a/service/cdc/postgres/service.go +++ b/service/cdc/postgres/service.go @@ -25,6 +25,7 @@ import ( const ( retainedWALGauge = "wippy_cdc_retained_wal_bytes" changesCounter = "wippy_cdc_changes_total" + errorsCounter = "wippy_cdc_errors_total" ) const ( @@ -58,6 +59,7 @@ type SourceOptions struct { type Source struct { log *zap.Logger + coll metrics.Collector injectedCP Checkpointer cancel context.CancelFunc done chan struct{} @@ -196,7 +198,8 @@ func (s *Source) Start(ctx context.Context) (<-chan any, error) { default: } - go s.run(runCtx, conn, adminDB, cp, startLSN, snapshotName, publication, metrics.GetCollector(ctx), status, done) + s.coll = metrics.GetCollector(ctx) + go s.run(runCtx, conn, adminDB, cp, startLSN, snapshotName, publication, s.coll, status, done) return status, nil } @@ -410,6 +413,9 @@ func (s *Source) reportLag(ctx context.Context, adminDB *sql.DB, mc metrics.Coll func (s *Source) fail(_ context.Context, status chan any, err error) { s.log.Error("cdc stream error", zap.String("slot", s.slot), zap.Error(err)) + if s.coll != nil { + s.coll.CounterInc(errorsCounter, metrics.Labels{"slot": s.slot}) + } select { case status <- err: default: diff --git a/service/cdc/postgres/service_metrics_test.go b/service/cdc/postgres/service_metrics_test.go new file mode 100644 index 000000000..31a1cff31 --- /dev/null +++ b/service/cdc/postgres/service_metrics_test.go @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: MPL-2.0 + +package postgres + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/internal/telemetrytest" + "go.uber.org/zap" +) + +func TestSource_FailEmitsErrorCounter(t *testing.T) { + rec := telemetrytest.NewRecorder() + s := &Source{log: zap.NewNop(), slot: "test_slot", coll: rec} + + status := make(chan any, 1) + s.fail(context.Background(), status, errors.New("boom")) + + assert.Equal(t, 1.0, rec.CounterValue(errorsCounter, metrics.Labels{"slot": "test_slot"})) +} + +func TestSource_FailNilCollector(t *testing.T) { + s := &Source{log: zap.NewNop(), slot: "test_slot"} + status := make(chan any, 1) + s.fail(context.Background(), status, errors.New("boom")) +} diff --git a/service/exec/docker/docker.go b/service/exec/docker/docker.go index bef7c1f9e..a6233be4f 100644 --- a/service/exec/docker/docker.go +++ b/service/exec/docker/docker.go @@ -10,11 +10,11 @@ import ( "strings" "sync" - "github.com/docker/docker/api/types/container" - "github.com/docker/docker/api/types/mount" - "github.com/docker/docker/api/types/network" - "github.com/docker/docker/client" - "github.com/docker/docker/pkg/stdcopy" + "github.com/moby/moby/api/pkg/stdcopy" + "github.com/moby/moby/api/types/container" + "github.com/moby/moby/api/types/mount" + "github.com/moby/moby/api/types/network" + "github.com/moby/moby/client" execapi "github.com/wippyai/runtime/api/service/exec" "go.uber.org/zap" ) @@ -53,12 +53,12 @@ func NewDockerExecutor(log *zap.Logger, config *execapi.DockerExecutorConfig) (* return nil, err } - opts := []client.Opt{client.FromEnv, client.WithAPIVersionNegotiation()} + opts := []client.Opt{client.FromEnv} if config.Host != "" { opts = append(opts, client.WithHost(config.Host)) } - cli, err := client.NewClientWithOpts(opts...) + cli, err := client.New(opts...) if err != nil { return nil, NewDockerClientError(err) } @@ -231,7 +231,11 @@ func (p *Process) Start() error { Tty: false, } - resp, err := p.cli.ContainerCreate(ctx, config, hostConfig, &network.NetworkingConfig{}, nil, "") + resp, err := p.cli.ContainerCreate(ctx, client.ContainerCreateOptions{ + Config: config, + HostConfig: hostConfig, + NetworkingConfig: &network.NetworkingConfig{}, + }) if err != nil { return NewContainerCreateError(err) } @@ -239,14 +243,14 @@ func (p *Process) Start() error { p.containerID = resp.ID p.log.Debug("container created", zap.String("id", p.containerID)) - attachResp, err := p.cli.ContainerAttach(ctx, p.containerID, container.AttachOptions{ + attachResp, err := p.cli.ContainerAttach(ctx, p.containerID, client.ContainerAttachOptions{ Stream: true, Stdin: true, Stdout: true, Stderr: true, }) if err != nil { - _ = p.cli.ContainerRemove(ctx, p.containerID, container.RemoveOptions{Force: true}) + _, _ = p.cli.ContainerRemove(ctx, p.containerID, client.ContainerRemoveOptions{Force: true}) return NewContainerAttachError(err) } @@ -266,9 +270,9 @@ func (p *Process) Start() error { } }() - if err := p.cli.ContainerStart(ctx, p.containerID, container.StartOptions{}); err != nil { + if _, err := p.cli.ContainerStart(ctx, p.containerID, client.ContainerStartOptions{}); err != nil { attachResp.Close() - _ = p.cli.ContainerRemove(ctx, p.containerID, container.RemoveOptions{Force: true}) + _, _ = p.cli.ContainerRemove(ctx, p.containerID, client.ContainerRemoveOptions{Force: true}) return NewContainerStartError(err) } @@ -292,7 +296,7 @@ func (p *Process) Signal(sig int) error { p.mu.RUnlock() sigName := signalName(sig) - err := p.cli.ContainerKill(context.Background(), containerID, sigName) + _, err := p.cli.ContainerKill(context.Background(), containerID, client.ContainerKillOptions{Signal: sigName}) if err != nil { if strings.Contains(err.Error(), "is not running") { return ErrContainerStopped @@ -350,7 +354,11 @@ func (p *Process) Wait() error { containerID := p.containerID p.mu.RUnlock() - statusCh, errCh := p.cli.ContainerWait(context.Background(), containerID, container.WaitConditionNotRunning) + waitResult := p.cli.ContainerWait(context.Background(), containerID, client.ContainerWaitOptions{ + Condition: container.WaitConditionNotRunning, + }) + statusCh := waitResult.Result + errCh := waitResult.Error var exitCode int64 select { diff --git a/service/http/client/client_pool.go b/service/http/client/client_pool.go index 94c092a4a..87312061c 100644 --- a/service/http/client/client_pool.go +++ b/service/http/client/client_pool.go @@ -20,6 +20,7 @@ import ( netapi "github.com/wippyai/runtime/api/net" httpapi "github.com/wippyai/runtime/api/service/http" lru "github.com/wippyai/runtime/internal/cache" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) // clientKey identifies a unique client configuration. networkIdentity is a @@ -153,6 +154,34 @@ func (p *Pool) getOrCreate(key clientKey, overlayNetworkID string) *clientOnce { return co } +// closeIdler is implemented by transports that can release idle connections. +// Both *http.Transport and instrumentedTransport satisfy it. +type closeIdler interface { + CloseIdleConnections() +} + +// instrumentedTransport wraps an otelhttp-instrumented RoundTripper so outbound +// HTTP requests get client spans and W3C trace-context injection, while still +// exposing CloseIdleConnections on the underlying *http.Transport for the +// pool's eviction/cleanup path. +type instrumentedTransport struct { + traced gohttp.RoundTripper + base *gohttp.Transport +} + +func (t *instrumentedTransport) RoundTrip(req *gohttp.Request) (*gohttp.Response, error) { + return t.traced.RoundTrip(req) +} + +func (t *instrumentedTransport) CloseIdleConnections() { + t.base.CloseIdleConnections() +} + +// instrument returns an otelhttp-wrapped transport around base. +func instrument(base *gohttp.Transport) *instrumentedTransport { + return &instrumentedTransport{traced: otelhttp.NewTransport(base), base: base} +} + // closeIdle closes idle connections on co's client transport if co has been // initialized. Safe to call on a never-initialized clientOnce and from any // goroutine — the atomic Load synchronizes with the Store inside once.Do. @@ -164,7 +193,7 @@ func closeIdle(co *clientOnce) { if c == nil { return } - if tr, ok := c.Transport.(*gohttp.Transport); ok { + if tr, ok := c.Transport.(closeIdler); ok { tr.CloseIdleConnections() } } @@ -335,7 +364,7 @@ func createClientWithDialer(timeout time.Duration, dialFn func(ctx context.Conte DialContext: dialFn, } return &gohttp.Client{ - Transport: transport, + Transport: instrument(transport), Timeout: timeout, } } @@ -369,7 +398,7 @@ func createClient(timeout time.Duration, unixSocket string, maxIdleConns, maxIdl } return &gohttp.Client{ - Transport: transport, + Transport: instrument(transport), Timeout: timeout, } } diff --git a/service/http/client/client_pool_lru_test.go b/service/http/client/client_pool_lru_test.go index a885038e3..5b8081b00 100644 --- a/service/http/client/client_pool_lru_test.go +++ b/service/http/client/client_pool_lru_test.go @@ -87,8 +87,9 @@ func TestPoolLRU_EvictionClosesIdleConnections(t *testing.T) { pool := NewClientPoolWithConfig(PoolConfig{MaxClients: 1}) c1 := pool.GetClient(1*time.Second, "") - tr, ok := c1.Transport.(*gohttp.Transport) - require.True(t, ok, "expected *http.Transport") + tr, ok := c1.Transport.(*instrumentedTransport) + require.True(t, ok, "expected *instrumentedTransport") + base := tr.base // Force an idle connection into the transport by dialing a listener // and reading a tiny HTTP response. @@ -117,7 +118,7 @@ func TestPoolLRU_EvictionClosesIdleConnections(t *testing.T) { // observe this indirectly — a direct call must not panic and must // leave the transport in a state where subsequent CloseIdleConnections // is a no-op. The real guarantee is that eviction called it once. - tr.CloseIdleConnections() + base.CloseIdleConnections() assert.Equal(t, 1, pool.Size()) } diff --git a/service/http/client/outbound_trace_test.go b/service/http/client/outbound_trace_test.go new file mode 100644 index 000000000..c1628dc16 --- /dev/null +++ b/service/http/client/outbound_trace_test.go @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: MPL-2.0 + +package client + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wippyai/runtime/internal/telemetrytest" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" +) + +// TestOutbound_TracedWithClientSpan verifies the pooled HTTP client wraps its +// transport with otelhttp, so an outbound request issued within a span produces +// a client span and injects traceparent into the outgoing headers. +func TestOutbound_TracedWithClientSpan(t *testing.T) { + // otelhttp uses the global provider and propagator. + tp, sr := telemetrytest.NewTracerProvider() + prevTP := otel.GetTracerProvider() + prevProp := otel.GetTextMapPropagator() + otel.SetTracerProvider(tp) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{})) + t.Cleanup(func() { + otel.SetTracerProvider(prevTP) + otel.SetTextMapPropagator(prevProp) + _ = tp.Shutdown(context.Background()) + }) + + var gotTraceparent string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotTraceparent = r.Header.Get("traceparent") + w.WriteHeader(http.StatusOK) + })) + defer srv.Close() + + pool := NewClientPool() + cli := pool.GetClient(7*time.Second, "") + + tracer := tp.Tracer("test") + ctx, parent := tracer.Start(context.Background(), "caller") + defer parent.End() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, srv.URL, nil) + require.NoError(t, err) + resp, err := cli.Do(req) + require.NoError(t, err) + _ = resp.Body.Close() + + var clientSpan sdktrace.ReadOnlySpan + for _, s := range sr.Ended() { + if s.SpanKind() == trace.SpanKindClient { + clientSpan = s + break + } + } + require.NotNil(t, clientSpan, "expected an outbound client span") + assert.Equal(t, parent.SpanContext().TraceID(), telemetrytest.TraceID(clientSpan), + "client span must continue the caller trace") + assert.NotEmpty(t, gotTraceparent, "outgoing request must carry a traceparent header") +} diff --git a/service/http/middleware/httpmetrics/middleware.go b/service/http/middleware/httpmetrics/middleware.go index a3457b1ea..9c586b839 100644 --- a/service/http/middleware/httpmetrics/middleware.go +++ b/service/http/middleware/httpmetrics/middleware.go @@ -3,6 +3,9 @@ package httpmetrics import ( + "bufio" + "errors" + "net" "net/http" "strconv" "time" @@ -84,8 +87,20 @@ func (w *statusWriter) Write(b []byte) (int, error) { return w.ResponseWriter.Write(b) } +// Unwrap exposes the underlying writer so http.ResponseController (and other +// wrappers stacked above, e.g. the OTel statusRecorder) can reach the real +// connection for Flush/Hijack used by SSE and websocket relays. +func (w *statusWriter) Unwrap() http.ResponseWriter { return w.ResponseWriter } + func (w *statusWriter) Flush() { if flusher, ok := w.ResponseWriter.(http.Flusher); ok { flusher.Flush() } } + +func (w *statusWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { + if hj, ok := w.ResponseWriter.(http.Hijacker); ok { + return hj.Hijack() + } + return nil, nil, errors.New("httpmetrics: underlying response writer does not support Hijack") +} diff --git a/service/http/router.go b/service/http/router.go index 4c412de14..ef3913e8b 100644 --- a/service/http/router.go +++ b/service/http/router.go @@ -349,6 +349,9 @@ func (rm *RouteManager) Build() error { if len(routerEntry.middleware) > 0 { handler = applyMiddlewareChain(routerEntry.middleware, handler) } + // Set the route label as the outermost wrapper so pre-match middleware + // (e.g., OTel, http metrics) observes the real route instead of "unmatched". + handler = withRouteLabel(routeLabelFor(route.funcID, routeID), handler) allPatterns = append(allPatterns, patternEntry{handler, pattern}) // Auto-generate OPTIONS handler so CORS middleware can intercept preflight @@ -399,6 +402,25 @@ func (rm *RouteManager) Build() error { return nil } +// routeLabelFor returns the route label used for observability, preferring the +// function ID and falling back to the route ID. +func routeLabelFor(funcID, routeID registry.ID) string { + if l := funcID.String(); l != "" { + return l + } + return routeID.String() +} + +// withRouteLabel wraps next so the route label is set in the request frame +// before next runs. Applied as the outermost route wrapper so pre-match +// middleware (OTel, http metrics) sees the real route instead of "unmatched". +func withRouteLabel(label string, next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _ = httpapi.SetRouteLabel(r.Context(), label) + next.ServeHTTP(w, r) + }) +} + // createRouteHandler creates the handler for a route with param extraction and post-middleware func (rm *RouteManager) createRouteHandler(routeID registry.ID, route *RouteEntry, routerEntry *RouterEntry) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -418,15 +440,10 @@ func (rm *RouteManager) createRouteHandler(routeID registry.ID, route *RouteEntr routeInfo.Endpoint = routeID routeInfo.Func = route.funcID - // Extract route label (prefer Func over Endpoint) - routeLabel := route.funcID.String() - if routeLabel == "" { - routeLabel = routeID.String() - } - - // Add route info and label to FrameContext + // Add route info to FrameContext. The route label is set by the outer + // withRouteLabel wrapper so pre-match middleware (OTel, http metrics) + // already observes it; SetRouteInfo is still needed for downstream handlers. _ = httpapi.SetRouteInfo(r.Context(), routeInfo) - _ = httpapi.SetRouteLabel(r.Context(), routeLabel) // Apply post-match middleware and call endpoint handler finalHandler := route.handler diff --git a/service/http/router_test.go b/service/http/router_test.go index 732d85510..4d38ab0d7 100644 --- a/service/http/router_test.go +++ b/service/http/router_test.go @@ -80,7 +80,7 @@ func TestRouteManager_BasicOperations(t *testing.T) { err = rm.ReplaceMount("/static", replacement) require.NoError(t, err) rec := httptest.NewRecorder() - rm.mounts["/static"].ServeHTTP(rec, httptest.NewRequest(http.MethodGet, "/static/app.js", nil)) + rm.mounts["/static"].ServeHTTP(rec, httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/static/app.js", nil)) assert.Equal(t, "replacement", rec.Body.String()) // Replace validates paths and only updates existing mounts diff --git a/service/metrics/collector.go b/service/metrics/collector.go index 059e222c3..bd9f07d07 100644 --- a/service/metrics/collector.go +++ b/service/metrics/collector.go @@ -55,11 +55,11 @@ func (c *collector) GaugeSet(name string, value float64, labels api.Labels) { } func (c *collector) GaugeInc(name string, labels api.Labels) { - c.record(name, api.TypeGauge, 1, labels) + c.record(name, api.TypeGaugeAdd, 1, labels) } func (c *collector) GaugeDec(name string, labels api.Labels) { - c.record(name, api.TypeGauge, -1, labels) + c.record(name, api.TypeGaugeAdd, -1, labels) } func (c *collector) HistogramObserve(name string, value float64, labels api.Labels) { diff --git a/service/metrics/process_lifecycle.go b/service/metrics/process_lifecycle.go new file mode 100644 index 000000000..42029769b --- /dev/null +++ b/service/metrics/process_lifecycle.go @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: MPL-2.0 + +package metrics + +import ( + "context" + + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/pid" + processapi "github.com/wippyai/runtime/api/process" + "github.com/wippyai/runtime/api/runtime" +) + +const ( + ProcessStarted = "wippy_process_started_total" + ProcessTerminated = "wippy_process_terminated_total" + ProcessActive = "wippy_process_active" +) + +// ProcessLifecycle is a process.Lifecycle handler that emits process +// supervision metrics. A nil collector makes every method a no-op. +type ProcessLifecycle struct { + coll metrics.Collector +} + +func NewProcessLifecycle(coll metrics.Collector) *ProcessLifecycle { + return &ProcessLifecycle{coll: coll} +} + +func (p *ProcessLifecycle) OnStart(_ context.Context, _ pid.PID, _ processapi.Process) error { + if p == nil || p.coll == nil { + return nil + } + p.coll.CounterInc(ProcessStarted, metrics.Labels{}) + p.coll.GaugeInc(ProcessActive, metrics.Labels{}) + return nil +} + +func (p *ProcessLifecycle) OnComplete(_ context.Context, _ pid.PID, result *runtime.Result) { + if p == nil || p.coll == nil { + return + } + p.coll.GaugeDec(ProcessActive, metrics.Labels{}) + outcome := "completed" + if result != nil && result.Error != nil { + outcome = "error" + } + p.coll.CounterInc(ProcessTerminated, metrics.Labels{"result": outcome}) +} diff --git a/service/metrics/process_lifecycle_test.go b/service/metrics/process_lifecycle_test.go new file mode 100644 index 000000000..be0837a94 --- /dev/null +++ b/service/metrics/process_lifecycle_test.go @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: MPL-2.0 + +package metrics + +import ( + "context" + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/pid" + "github.com/wippyai/runtime/api/runtime" + "github.com/wippyai/runtime/internal/telemetrytest" +) + +func TestProcessLifecycle_StartAndComplete(t *testing.T) { + rec := telemetrytest.NewRecorder() + p := NewProcessLifecycle(rec) + + require.NoError(t, p.OnStart(context.Background(), pid.PID{}, nil)) + require.NoError(t, p.OnStart(context.Background(), pid.PID{}, nil)) + p.OnComplete(context.Background(), pid.PID{}, &runtime.Result{}) + + assert.Equal(t, 2.0, rec.CounterValue(ProcessStarted, metrics.Labels{})) + assert.Equal(t, 1.0, rec.CounterValue(ProcessTerminated, metrics.Labels{"result": "completed"})) + assert.Equal(t, 1.0, rec.GaugeValue(ProcessActive, metrics.Labels{})) +} + +func TestProcessLifecycle_ErrorResult(t *testing.T) { + rec := telemetrytest.NewRecorder() + p := NewProcessLifecycle(rec) + + p.OnComplete(context.Background(), pid.PID{}, &runtime.Result{Error: errors.New("boom")}) + + assert.Equal(t, 1.0, rec.CounterValue(ProcessTerminated, metrics.Labels{"result": "error"})) +} + +func TestProcessLifecycle_NilCollector(t *testing.T) { + p := NewProcessLifecycle(nil) + require.NoError(t, p.OnStart(context.Background(), pid.PID{}, nil)) + p.OnComplete(context.Background(), pid.PID{}, nil) +} diff --git a/service/metrics/prometheus/exporter.go b/service/metrics/prometheus/exporter.go index 43b814f7f..78781c3e6 100644 --- a/service/metrics/prometheus/exporter.go +++ b/service/metrics/prometheus/exporter.go @@ -76,9 +76,13 @@ func (e *Exporter) Record(name string, typ api.MetricType, value float64, labels counter := e.getOrCreateCounter(key, name, labelNames) counter.WithLabelValues(labelVals...).Add(value) - case api.TypeGauge: + case api.TypeGauge, api.TypeGaugeAdd: gauge := e.getOrCreateGauge(key, name, labelNames) - gauge.WithLabelValues(labelVals...).Set(value) + if typ == api.TypeGaugeAdd { + gauge.WithLabelValues(labelVals...).Add(value) + } else { + gauge.WithLabelValues(labelVals...).Set(value) + } case api.TypeHistogram: histo := e.getOrCreateHistogram(key, name, labelNames) diff --git a/service/metrics/prometheus/gauge_exporter_test.go b/service/metrics/prometheus/gauge_exporter_test.go new file mode 100644 index 000000000..ac2ee71da --- /dev/null +++ b/service/metrics/prometheus/gauge_exporter_test.go @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MPL-2.0 + +package prometheus + +import ( + "context" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + api "github.com/wippyai/runtime/api/metrics" + apicfg "github.com/wippyai/runtime/api/service/metrics" + impl "github.com/wippyai/runtime/service/metrics" +) + +// TestExporter_GaugeIncDecBalancesToZero is the regression test for the gauge +// Inc/Dec anomaly: GaugeInc/GaugeDec must be treated as additive deltas by the +// exporter, not absolute sets. With the bug, a balanced Inc(+1)/Dec(-1) was +// emitted as Set(-1), leaving gauges like wippy_function_in_flight reading -1. +func TestExporter_GaugeIncDecBalancesToZero(t *testing.T) { + coll := impl.NewCollector(apicfg.Config{}) + exp := NewExporter() + require.NoError(t, coll.RegisterExporter(exp)) + + labels := api.Labels{"k": "v"} + coll.GaugeInc("wippy_test_in_flight", labels) + coll.GaugeInc("wippy_test_in_flight", labels) + coll.GaugeDec("wippy_test_in_flight", labels) + coll.GaugeDec("wippy_test_in_flight", labels) + + require.NoError(t, coll.Close()) + + rec := httptest.NewRecorder() + exp.Handler().ServeHTTP(rec, httptest.NewRequestWithContext(context.Background(), "GET", "/metrics", nil)) + body := rec.Body.String() + require.Contains(t, body, "wippy_test_in_flight", "gauge series must be present") + + for _, line := range strings.Split(body, "\n") { + if strings.HasPrefix(line, "wippy_test_in_flight{") { + assert.True(t, strings.HasSuffix(strings.TrimSpace(line), " 0"), + "balanced GaugeInc/GaugeDec must read 0, got: %s", line) + return + } + } + t.Fatalf("wippy_test_in_flight sample line not found in output") +} + +// GaugeSet is absolute and must overwrite the running delta total. +func TestExporter_GaugeSetIsAbsolute(t *testing.T) { + coll := impl.NewCollector(apicfg.Config{}) + exp := NewExporter() + require.NoError(t, coll.RegisterExporter(exp)) + + labels := api.Labels{"k": "v"} + coll.GaugeInc("wippy_test_gauge", labels) // delta +1 + coll.GaugeSet("wippy_test_gauge", 42, labels) + coll.GaugeInc("wippy_test_gauge", labels) // delta +1 -> 43 + + require.NoError(t, coll.Close()) + + rec := httptest.NewRecorder() + exp.Handler().ServeHTTP(rec, httptest.NewRequestWithContext(context.Background(), "GET", "/metrics", nil)) + for _, line := range strings.Split(rec.Body.String(), "\n") { + if strings.HasPrefix(line, "wippy_test_gauge{") { + assert.True(t, strings.HasSuffix(strings.TrimSpace(line), " 43"), + "Set(42) then Inc must read 43, got: %s", line) + return + } + } + t.Fatalf("wippy_test_gauge sample line not found") +} diff --git a/service/otel/config.go b/service/otel/config.go index 385b830f9..ddab48c7b 100644 --- a/service/otel/config.go +++ b/service/otel/config.go @@ -183,6 +183,31 @@ func ApplyEnvOverrides(cfg *otelapi.Config, logger *zap.Logger) { } } + // OTEL_TRACES_SAMPLER (sampler type). Applied after the ARG so explicit + // always_on/always_off win over a ratio argument. + if sampler := os.Getenv("OTEL_TRACES_SAMPLER"); sampler != "" { + switch strings.ToLower(strings.TrimSpace(sampler)) { + case "always_on": + cfg.SampleRate = 1.0 + case "always_off": + cfg.SampleRate = 0.0 + case "traceidratio", "parentbased_traceidratio": + // ratio comes from OTEL_TRACES_SAMPLER_ARG (handled above) + default: + logger.Warn("unsupported traces sampler, ignoring", + zap.String("var", "OTEL_TRACES_SAMPLER"), + zap.String("value", sampler)) + } + } + + // OTEL_EXPORTER_OTLP_INSECURE + if insecure := os.Getenv("OTEL_EXPORTER_OTLP_INSECURE"); insecure != "" { + cfg.Insecure = strings.ToLower(insecure) == "true" + logger.Debug("using insecure from env", + zap.String("var", "OTEL_EXPORTER_OTLP_INSECURE"), + zap.Bool("value", cfg.Insecure)) + } + // OTEL_PROPAGATORS if propagators := os.Getenv("OTEL_PROPAGATORS"); propagators != "" { list := strings.Split(propagators, ",") diff --git a/service/otel/config_test.go b/service/otel/config_test.go index 2b959a518..2e1a2402c 100644 --- a/service/otel/config_test.go +++ b/service/otel/config_test.go @@ -133,6 +133,28 @@ func TestApplyEnvOverrides_SampleRateInvalid(t *testing.T) { assert.Equal(t, originalRate, cfg.SampleRate) } +func TestApplyEnvOverrides_TracesSamplerAlwaysOn(t *testing.T) { + cfg := DefaultConfig() + t.Setenv("OTEL_TRACES_SAMPLER", "always_on") + t.Setenv("OTEL_TRACES_SAMPLER_ARG", "0.25") + ApplyEnvOverrides(&cfg, zap.NewNop()) + assert.Equal(t, 1.0, cfg.SampleRate, "always_on must win over sampler arg") +} + +func TestApplyEnvOverrides_TracesSamplerAlwaysOff(t *testing.T) { + cfg := DefaultConfig() + t.Setenv("OTEL_TRACES_SAMPLER", "always_off") + ApplyEnvOverrides(&cfg, zap.NewNop()) + assert.Equal(t, 0.0, cfg.SampleRate) +} + +func TestApplyEnvOverrides_Insecure(t *testing.T) { + cfg := DefaultConfig() + t.Setenv("OTEL_EXPORTER_OTLP_INSECURE", "true") + ApplyEnvOverrides(&cfg, zap.NewNop()) + assert.True(t, cfg.Insecure) +} + func TestApplyEnvOverrides_Propagators(t *testing.T) { cfg := DefaultConfig() logger := zap.NewNop() diff --git a/service/otel/errors.go b/service/otel/errors.go index f77f57811..7f94425fc 100644 --- a/service/otel/errors.go +++ b/service/otel/errors.go @@ -40,3 +40,10 @@ func newShutdownMeterProviderError(cause error) error { WithDetails(attrs.NewBagFrom(map[string]any{"cause": cause.Error()})). WithCause(cause) } + +func newShutdownTracerProviderError(cause error) error { + return apierror.New(apierror.Internal, "failed to shutdown tracer provider"). + WithRetryable(apierror.False). + WithDetails(attrs.NewBagFrom(map[string]any{"cause": cause.Error()})). + WithCause(cause) +} diff --git a/service/otel/metrics.go b/service/otel/metrics.go index de1e4faec..9b6958fd8 100644 --- a/service/otel/metrics.go +++ b/service/otel/metrics.go @@ -15,15 +15,22 @@ type MetricsExporter struct { meter otelmetric.Meter counters map[string]otelmetric.Float64Counter gauges map[string]otelmetric.Float64Gauge + upDowns map[string]otelmetric.Float64UpDownCounter histos map[string]otelmetric.Float64Histogram mu sync.RWMutex } func NewMetricsExporter(provider otelmetric.MeterProvider) *MetricsExporter { + // Note: TypeGauge and TypeGaugeAdd map to different OTel instrument kinds + // (Float64Gauge vs Float64UpDownCounter). The OTel SDK keys instruments by + // name, so a metric must not mix GaugeSet and GaugeInc/Dec on the same + // name — the second instrument kind registration returns an error and that + // metric silently stops exporting. Use one or the other per metric name. return &MetricsExporter{ meter: provider.Meter("wippy-runtime"), counters: make(map[string]otelmetric.Float64Counter), gauges: make(map[string]otelmetric.Float64Gauge), + upDowns: make(map[string]otelmetric.Float64UpDownCounter), histos: make(map[string]otelmetric.Float64Histogram), } } @@ -50,6 +57,13 @@ func (e *MetricsExporter) Record(name string, typ api.MetricType, value float64, } gauge.Record(context.Background(), value, otelmetric.WithAttributes(attrs...)) + case api.TypeGaugeAdd: + udc, err := e.getOrCreateUpDownCounter(name) + if err != nil { + return err + } + udc.Add(context.Background(), value, otelmetric.WithAttributes(attrs...)) + case api.TypeHistogram: histo, err := e.getOrCreateHistogram(name) if err != nil { @@ -107,6 +121,29 @@ func (e *MetricsExporter) getOrCreateGauge(name string) (otelmetric.Float64Gauge return g, nil } +func (e *MetricsExporter) getOrCreateUpDownCounter(name string) (otelmetric.Float64UpDownCounter, error) { + e.mu.RLock() + u, ok := e.upDowns[name] + e.mu.RUnlock() + if ok { + return u, nil + } + + e.mu.Lock() + defer e.mu.Unlock() + + if u, ok = e.upDowns[name]; ok { + return u, nil + } + + u, err := e.meter.Float64UpDownCounter(name) + if err != nil { + return nil, err + } + e.upDowns[name] = u + return u, nil +} + // otelHistogramBuckets aligns OTel histogram bucket boundaries with the // Prometheus client_golang DefBuckets that our in-pod Prometheus exporter // uses (see service/metrics/prometheus/exporter.go). Without this, OTel's diff --git a/service/otel/metrics_doublecount_test.go b/service/otel/metrics_doublecount_test.go new file mode 100644 index 000000000..b5823c6ea --- /dev/null +++ b/service/otel/metrics_doublecount_test.go @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: MPL-2.0 + +package otel + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + metricsapi "github.com/wippyai/runtime/api/metrics" + apicfg "github.com/wippyai/runtime/api/service/metrics" + "github.com/wippyai/runtime/service/metrics" + "github.com/wippyai/runtime/service/metrics/prometheus" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +// TestCollector_PrometheusAndOTel_NoDoubleCount verifies that a single +// observation fans out to both the in-pod Prometheus exporter and the OTel +// bridge exporter, each reporting value 1 (not 2, not conflicting series). +func TestCollector_PrometheusAndOTel_NoDoubleCount(t *testing.T) { + coll := metrics.NewCollector(apicfg.Config{}) + + promExp := prometheus.NewExporter() + require.NoError(t, coll.RegisterExporter(promExp)) + + reader := sdkmetric.NewManualReader() + mp := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)) + defer func() { _ = mp.Shutdown(context.Background()) }() + require.NoError(t, coll.RegisterExporter(NewMetricsExporter(mp))) + + coll.CounterInc("wippy_dual_test_total", metricsapi.Labels{"k": "v"}) + + // Closing drains the buffer and runs a final flush to every exporter. + require.NoError(t, coll.Close()) + + // Prometheus side. + rec := httptest.NewRecorder() + promExp.Handler().ServeHTTP(rec, httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/metrics", nil)) + body := rec.Body.String() + assert.True(t, + strings.Contains(body, `wippy_dual_test_total{k="v"} 1`), + "prometheus side must report value 1 once, got:\n%s", body) + + // OTel side. + var rm metricdata.ResourceMetrics + require.NoError(t, reader.Collect(context.Background(), &rm)) + require.NotEmpty(t, rm.ScopeMetrics) + var total float64 + found := false + for _, m := range rm.ScopeMetrics[0].Metrics { + if m.Name != "wippy_dual_test_total" { + continue + } + sum, ok := m.Data.(metricdata.Sum[float64]) + if !ok { + continue + } + for _, dp := range sum.DataPoints { + for it := dp.Attributes.Iter(); it.Next(); { + kv := it.Attribute() + if string(kv.Key) == "k" && kv.Value.AsString() == "v" { + total, found = dp.Value, true + } + } + } + } + require.True(t, found, "OTel side must carry the k=v data point") + assert.Equal(t, 1.0, total, "OTel side must report value 1 (no double-count)") +} diff --git a/service/otel/metrics_test.go b/service/otel/metrics_test.go new file mode 100644 index 000000000..2089b162c --- /dev/null +++ b/service/otel/metrics_test.go @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: MPL-2.0 + +package otel + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + api "github.com/wippyai/runtime/api/metrics" + "go.opentelemetry.io/otel/attribute" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +func newTestMeterProvider() (*sdkmetric.MeterProvider, *sdkmetric.ManualReader) { + reader := sdkmetric.NewManualReader() + mp := sdkmetric.NewMeterProvider(sdkmetric.WithReader(reader)) + return mp, reader +} + +func collectMetrics(t *testing.T, reader *sdkmetric.ManualReader) []metricdata.Metrics { + t.Helper() + var rm metricdata.ResourceMetrics + require.NoError(t, reader.Collect(context.Background(), &rm)) + require.NotEmpty(t, rm.ScopeMetrics, "expected at least one scope") + return rm.ScopeMetrics[0].Metrics +} + +func findMetric(t *testing.T, metrics []metricdata.Metrics, name string) metricdata.Metrics { + t.Helper() + for _, m := range metrics { + if m.Name == name { + return m + } + } + t.Fatalf("metric %q not found", name) + return metricdata.Metrics{} +} + +func assertAttributeValue(t *testing.T, set attribute.Set, key, want string) { + t.Helper() + for iter := set.Iter(); iter.Next(); { + kv := iter.Attribute() + if string(kv.Key) == key { + assert.Equalf(t, want, kv.Value.AsString(), "attribute %q", key) + return + } + } + t.Fatalf("attribute %q not found in data point", key) +} + +func TestMetricsExporter_Counter(t *testing.T) { + mp, reader := newTestMeterProvider() + defer func() { _ = mp.Shutdown(context.Background()) }() + exp := NewMetricsExporter(mp) + + require.NoError(t, exp.Record("wippy_x_total", api.TypeCounter, 5, api.Labels{"k": "v"})) + + m := findMetric(t, collectMetrics(t, reader), "wippy_x_total") + sum, ok := m.Data.(metricdata.Sum[float64]) + require.Truef(t, ok, "counter Data should be Sum[float64], got %T", m.Data) + require.Len(t, sum.DataPoints, 1) + assert.Equal(t, 5.0, sum.DataPoints[0].Value) + assertAttributeValue(t, sum.DataPoints[0].Attributes, "k", "v") +} + +func TestMetricsExporter_Gauge(t *testing.T) { + mp, reader := newTestMeterProvider() + defer func() { _ = mp.Shutdown(context.Background()) }() + exp := NewMetricsExporter(mp) + + require.NoError(t, exp.Record("wippy_q", api.TypeGauge, 42, api.Labels{"slot": "s1"})) + + m := findMetric(t, collectMetrics(t, reader), "wippy_q") + g, ok := m.Data.(metricdata.Gauge[float64]) + require.Truef(t, ok, "gauge Data should be Gauge[float64], got %T", m.Data) + require.Len(t, g.DataPoints, 1) + assert.Equal(t, 42.0, g.DataPoints[0].Value) + assertAttributeValue(t, g.DataPoints[0].Attributes, "slot", "s1") +} + +func TestMetricsExporter_HistogramBuckets(t *testing.T) { + mp, reader := newTestMeterProvider() + defer func() { _ = mp.Shutdown(context.Background()) }() + exp := NewMetricsExporter(mp) + + require.NoError(t, exp.Record("wippy_d_seconds", api.TypeHistogram, 0.7, api.Labels{})) + + m := findMetric(t, collectMetrics(t, reader), "wippy_d_seconds") + h, ok := m.Data.(metricdata.Histogram[float64]) + require.Truef(t, ok, "histogram Data should be Histogram[float64], got %T", m.Data) + require.Len(t, h.DataPoints, 1) + assert.Equal(t, uint64(1), h.DataPoints[0].Count) + assert.Equal(t, 0.7, h.DataPoints[0].Sum) + assert.Equal(t, otelHistogramBuckets, h.DataPoints[0].Bounds, + "histogram bounds must stay aligned with Prometheus DefBuckets to avoid histogram_quantile corruption") +} + +func TestMetricsExporter_InstrumentCaching(t *testing.T) { + mp, reader := newTestMeterProvider() + defer func() { _ = mp.Shutdown(context.Background()) }() + exp := NewMetricsExporter(mp) + + require.NoError(t, exp.Record("wippy_c_total", api.TypeCounter, 1, api.Labels{})) + require.NoError(t, exp.Record("wippy_c_total", api.TypeCounter, 2, api.Labels{})) + + var matches []metricdata.Metrics + for _, m := range collectMetrics(t, reader) { + if m.Name == "wippy_c_total" { + matches = append(matches, m) + } + } + require.Len(t, matches, 1, "two records for the same name must reuse one instrument") + + sum := matches[0].Data.(metricdata.Sum[float64]) + require.Len(t, sum.DataPoints, 1) + assert.Equal(t, 3.0, sum.DataPoints[0].Value, "counter must accumulate 1+2") +} diff --git a/service/otel/propagation_test.go b/service/otel/propagation_test.go new file mode 100644 index 000000000..4c554e20f --- /dev/null +++ b/service/otel/propagation_test.go @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: MPL-2.0 + +package otel + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + attrsapi "github.com/wippyai/runtime/api/attrs" + ctxapi "github.com/wippyai/runtime/api/context" + queueapi "github.com/wippyai/runtime/api/queue" + "github.com/wippyai/runtime/api/registry" + "github.com/wippyai/runtime/api/runtime" + httpapi "github.com/wippyai/runtime/api/service/http" + otelapi "github.com/wippyai/runtime/api/service/otel" + "github.com/wippyai/runtime/internal/telemetrytest" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" +) + +// TestPropagation_HTTP_Function_Queue_Consume proves a single trace survives +// the full surface chain: HTTP server span -> function (Internal) -> queue +// publish (Producer, traceparent injected into the message) -> queue consume +// (Consumer, trace recovered from the message headers). +func TestPropagation_HTTP_Function_Queue_Consume(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{ + HTTP: otelapi.HTTPConfig{Enabled: true, ExtractHeaders: true, InjectHeaders: true}, + Interceptor: otelapi.InterceptorConfig{Enabled: true}, + Queue: otelapi.QueueConfig{Enabled: true}, + }, zap.NewNop(), tp) + httpMW := svc.HTTPMiddleware() + funcInter := svc.Interceptor() + require.NotNil(t, funcInter) + pi := NewPublishInterceptor(tp.Tracer("wippy-runtime")) + + var publishedMsg *queueapi.Message + handler := http.HandlerFunc(func(_ http.ResponseWriter, r *http.Request) { + task := runtime.Task{ID: registry.NewID("ns", "func")} + next := func(ctx context.Context, _ runtime.Task) (*runtime.Result, error) { + msg := &queueapi.Message{ID: "m1", Headers: attrsapi.NewBag()} + publishedMsg = msg + pubNext := func(_ context.Context, _ registry.ID, _ []*queueapi.Message) error { return nil } + if err := pi.Handle(ctx, registry.NewID("ns", "orders"), []*queueapi.Message{msg}, pubNext); err != nil { + return nil, err + } + return &runtime.Result{}, nil + } + _, _ = funcInter.Handle(r.Context(), task, next) + }) + wrapped := httpMW(handler) + + req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/orders", nil) + fcCtx, fc := ctxapi.OpenFrameContext(req.Context()) + defer ctxapi.ReleaseFrameContext(fc) + require.NoError(t, httpapi.SetRouteLabel(fcCtx, "/orders")) + req = req.WithContext(fcCtx) + + wrapped.ServeHTTP(httptest.NewRecorder(), req) + + // Simulate the worker consuming the published message. + require.NotNil(t, publishedMsg) + consumeTask := runtime.Task{ + ID: registry.NewID("ns", "consumer"), + Context: []ctxapi.Pair{{Value: &queueapi.Delivery{Message: publishedMsg}}}, + } + consumeNext := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { return &runtime.Result{}, nil } + _, _ = funcInter.Handle(context.Background(), consumeTask, consumeNext) + + // Every span in the chain must share a single trace ID. + spans := sr.Ended() + require.GreaterOrEqual(t, len(spans), 4, "expected at least the 4 chain spans, got %d", len(spans)) + + wantNames := map[string]bool{ + "GET /orders": false, + "ns:func": false, + "ns:orders.publish": false, + "ns:consumer": false, + } + for _, s := range spans { + if _, ok := wantNames[s.Name()]; ok { + wantNames[s.Name()] = true + } + } + for name, seen := range wantNames { + assert.Truef(t, seen, "expected span %q in the chain", name) + } + + var traceID trace.TraceID + for i, s := range spans { + if i == 0 { + traceID = telemetrytest.TraceID(s) + continue + } + assert.Equal(t, traceID, telemetrytest.TraceID(s), + "span %q must share the single chain trace ID", s.Name()) + } +} diff --git a/service/otel/provider.go b/service/otel/provider.go index 310736d10..2d13d3d12 100644 --- a/service/otel/provider.go +++ b/service/otel/provider.go @@ -146,24 +146,37 @@ func createHTTPExporter(ctx context.Context, cfg otelapi.Config, logger *zap.Log return otlptracehttp.New(ctx, opts...) } -// createResource creates an OTEL resource with service information +// createResource creates an OTEL resource with service information, merged +// onto the SDK default resource so every exported span and metric also carries +// standard process/host/os semconv attributes (host.name, process.pid, +// os.type, etc.). func createResource(cfg otelapi.Config) (*resource.Resource, error) { - attrs := []resource.Option{ + opts := []resource.Option{ resource.WithAttributes( semconv.ServiceName(cfg.ServiceName), ), + resource.WithHost(), + resource.WithProcess(), + resource.WithOS(), + resource.WithTelemetrySDK(), } if cfg.ServiceVersion != "" { - attrs = append(attrs, resource.WithAttributes( + opts = append(opts, resource.WithAttributes( semconv.ServiceVersion(cfg.ServiceVersion), )) } - return resource.New( - context.Background(), - attrs..., - ) + res, err := resource.New(context.Background(), opts...) + if err != nil { + return nil, err + } + + merged, err := resource.Merge(resource.Default(), res) + if err != nil { + return nil, err + } + return merged, nil } // createSampler creates a trace sampler based on sample rate @@ -301,3 +314,20 @@ func ShutdownMeterProvider(ctx context.Context, mp metric.MeterProvider, logger } return nil } + +// ShutdownTracerProvider gracefully shuts down the tracer provider, flushing +// any spans still held by the BatchSpanProcessor. Without this call the +// processor's bounded queue is dropped on exit. +func ShutdownTracerProvider(ctx context.Context, tp trace.TracerProvider, logger *zap.Logger) error { + if logger == nil { + logger = zap.NewNop() + } + if sdkTP, ok := tp.(*sdktrace.TracerProvider); ok { + logger.Debug("shutting down OTEL tracer provider") + if err := sdkTP.Shutdown(ctx); err != nil { + return newShutdownTracerProviderError(err) + } + logger.Debug("OTEL tracer provider shutdown complete") + } + return nil +} diff --git a/service/otel/provider_test.go b/service/otel/provider_test.go index 53cd94ac6..5f52fda76 100644 --- a/service/otel/provider_test.go +++ b/service/otel/provider_test.go @@ -4,17 +4,42 @@ package otel import ( "context" + "fmt" + "sync" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" otelapi "github.com/wippyai/runtime/api/service/otel" + "go.opentelemetry.io/otel/attribute" metricnoop "go.opentelemetry.io/otel/metric/noop" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace/noop" "go.uber.org/zap" ) +// recordingExporter records every exported span and does not clear on shutdown, +// so a test can prove the BatchSpanProcessor flushed its queue. +type recordingExporter struct { + mu sync.Mutex + spans int +} + +func (r *recordingExporter) ExportSpans(_ context.Context, spans []sdktrace.ReadOnlySpan) error { + r.mu.Lock() + defer r.mu.Unlock() + r.spans += len(spans) + return nil +} + +func (r *recordingExporter) Shutdown(context.Context) error { return nil } + +func (r *recordingExporter) Count() int { + r.mu.Lock() + defer r.mu.Unlock() + return r.spans +} + func TestInitializeProvider_Disabled(t *testing.T) { cfg := otelapi.Config{ Enabled: false, @@ -187,6 +212,23 @@ func TestCreateResource_WithVersion(t *testing.T) { require.NotNil(t, res) } +func TestCreateResource_MergesStandardAttributes(t *testing.T) { + res, err := createResource(otelapi.Config{ServiceName: "test-service"}) + require.NoError(t, err) + + get := func(k string) string { + v, ok := res.Set().Value(attribute.Key(k)) + if !ok { + return "" + } + return v.String() + } + assert.Equal(t, "test-service", get("service.name")) + for _, key := range []string{"host.name", "process.pid", "os.type", "telemetry.sdk.name"} { + assert.NotEmpty(t, get(key), "resource should carry standard semconv %q", key) + } +} + func TestInitializeMeterProvider_Disabled(t *testing.T) { cfg := otelapi.Config{ Enabled: false, @@ -236,6 +278,32 @@ func TestShutdownMeterProvider_NoopProvider(t *testing.T) { assert.NoError(t, err) } +func TestShutdownTracerProvider_FlushesSpans(t *testing.T) { + exp := &recordingExporter{} + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exp), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + tracer := tp.Tracer("test") + + ctx := context.Background() + for i := 0; i < 10; i++ { + _, span := tracer.Start(ctx, fmt.Sprintf("op-%d", i)) + span.End() + } + + require.Zero(t, exp.Count(), "spans must still be queued before shutdown") + + require.NoError(t, ShutdownTracerProvider(context.Background(), tp, zap.NewNop())) + + require.Equal(t, 10, exp.Count(), "shutdown must flush all queued spans to the exporter") +} + +func TestShutdownTracerProvider_NoopProvider(t *testing.T) { + err := ShutdownTracerProvider(context.Background(), noop.NewTracerProvider(), zap.NewNop()) + assert.NoError(t, err) +} + func TestProviderUsesBoundedBatcher(t *testing.T) { cfg := otelapi.Config{ Enabled: true, diff --git a/service/otel/queue_test.go b/service/otel/queue_test.go new file mode 100644 index 000000000..32fe01ca9 --- /dev/null +++ b/service/otel/queue_test.go @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MPL-2.0 + +package otel + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + attrsapi "github.com/wippyai/runtime/api/attrs" + queueapi "github.com/wippyai/runtime/api/queue" + "github.com/wippyai/runtime/api/registry" + "github.com/wippyai/runtime/internal/telemetrytest" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/trace" +) + +func TestPublishInterceptor_ProducerSpan(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + pi := NewPublishInterceptor(tp.Tracer("test")) + queueID := registry.NewID("ns", "orders") + msg := &queueapi.Message{ID: "m1", Headers: attrsapi.NewBag()} + + called := false + next := func(_ context.Context, _ registry.ID, _ []*queueapi.Message) error { + called = true + return nil + } + + require.NoError(t, pi.Handle(context.Background(), queueID, []*queueapi.Message{msg}, next)) + require.True(t, called) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:orders.publish") + telemetrytest.SpanKind(t, span, trace.SpanKindProducer) + telemetrytest.SpanHasStringAttr(t, span, "messaging.operation", "publish") + telemetrytest.SpanHasStringAttr(t, span, "messaging.destination.name", "ns:orders") + telemetrytest.SpanHasStringAttr(t, span, "messaging.message.id", "m1") + + tpVal, ok := msg.Headers.Get("traceparent") + require.True(t, ok, "traceparent must be injected into the message headers") + tpStr, ok := tpVal.(string) + require.True(t, ok) + assert.NotEmpty(t, tpStr) +} + +func TestExtractFromDelivery_LinksChild(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + tracer := tp.Tracer("test") + parentCtx, parent := tracer.Start(context.Background(), "publish.parent") + defer parent.End() + + msg := &queueapi.Message{ID: "m1", Headers: attrsapi.NewBag()} + otel.GetTextMapPropagator().Inject(parentCtx, &MessageHeaderCarrier{headers: msg.Headers}) + + extractedCtx, hasSpan := extractFromDelivery(context.Background(), &queueapi.Delivery{Message: msg}) + require.True(t, hasSpan, "extract must recover a valid span context") + + _, child := tracer.Start(extractedCtx, "consume") + child.End() + + consumed := telemetrytest.MustSpanNamed(t, sr, "consume") + assert.Equal(t, parent.SpanContext().TraceID(), telemetrytest.TraceID(consumed), + "consumer span must share the producer trace ID") +} diff --git a/service/otel/service.go b/service/otel/service.go index a9a89b972..d2d53eabe 100644 --- a/service/otel/service.go +++ b/service/otel/service.go @@ -3,7 +3,10 @@ package otel import ( + "bufio" stdcontext "context" + "errors" + "net" "net/http" ctxapi "github.com/wippyai/runtime/api/context" @@ -86,21 +89,72 @@ func (s *Service) HTTPMiddleware() func(http.Handler) http.Handler { propagator.Inject(ctx, propagation.HeaderCarrier(w.Header())) } + rec := &statusRecorder{ResponseWriter: w, status: http.StatusOK} r = r.WithContext(ctx) - next.ServeHTTP(w, r) + next.ServeHTTP(rec, r) + + span.SetAttributes(attribute.Int("http.response.status_code", rec.status)) + if rec.status >= http.StatusInternalServerError { + span.SetStatus(codes.Error, http.StatusText(rec.status)) + } }) } } +// statusRecorder wraps http.ResponseWriter to capture the response status code +// without otherwise altering behavior. The first WriteHeader call fixes the +// status; a handler that only calls Write defaults to 200 OK. Flush/Hijack/ +// Unwrap forward to the underlying writer so SSE (Flush) and websocket +// (Hijack) middlewares stacked inside the OTel middleware keep working, and +// http.ResponseController reaches the real writer. +type statusRecorder struct { + http.ResponseWriter + status int + wrote bool +} + +func (r *statusRecorder) Unwrap() http.ResponseWriter { return r.ResponseWriter } + +func (r *statusRecorder) WriteHeader(code int) { + if !r.wrote { + r.status = code + r.wrote = true + } + r.ResponseWriter.WriteHeader(code) +} + +func (r *statusRecorder) Write(b []byte) (int, error) { + if !r.wrote { + r.wrote = true + } + return r.ResponseWriter.Write(b) +} + +func (r *statusRecorder) Flush() { + if flusher, ok := r.ResponseWriter.(http.Flusher); ok { + flusher.Flush() + } +} + +func (r *statusRecorder) Hijack() (net.Conn, *bufio.ReadWriter, error) { + if hj, ok := r.ResponseWriter.(http.Hijacker); ok { + return hj.Hijack() + } + return nil, nil, errors.New("otel: underlying response writer does not support Hijack") +} + // OnStart implements scheduler.Lifecycle. func (s *Service) OnStart(ctx stdcontext.Context, p pid.PID, _ process.Process) error { if !s.cfg.Process.Enabled || !s.cfg.Process.TraceLifecycle { return nil } + // Continue the spawning trace when a remote parent is present, otherwise + // start a root span so unsupervised spawns are still observable. processSpanCtx, hasSpan := otelapi.GetRemoteSpanContext(ctx) - if !hasSpan || !processSpanCtx.IsValid() { - return nil + startCtx := ctx + if hasSpan && processSpanCtx.IsValid() { + startCtx = trace.ContextWithRemoteSpanContext(ctx, processSpanCtx) } sourceID, hasSource := runtime.GetFrameID(ctx) @@ -109,8 +163,7 @@ func (s *Service) OnStart(ctx stdcontext.Context, p pid.PID, _ process.Process) startEventName = sourceID.String() + ".started" } - ctxWithProcess := trace.ContextWithRemoteSpanContext(ctx, processSpanCtx) - _, startSpan := s.tracer.Start(ctxWithProcess, startEventName, + _, startSpan := s.tracer.Start(startCtx, startEventName, trace.WithSpanKind(trace.SpanKindInternal)) startSpan.SetAttributes( @@ -130,9 +183,12 @@ func (s *Service) OnComplete(ctx stdcontext.Context, p pid.PID, result *runtime. return } + // Continue the spawning trace when a remote parent is present, otherwise + // start a root span so unsupervised spawns are still observable. remoteSpanCtx, hasRemote := otelapi.GetRemoteSpanContext(ctx) - if !hasRemote || !remoteSpanCtx.IsValid() { - return + completeCtx := ctx + if hasRemote && remoteSpanCtx.IsValid() { + completeCtx = trace.ContextWithRemoteSpanContext(ctx, remoteSpanCtx) } sourceID, hasSource := runtime.GetFrameID(ctx) @@ -141,8 +197,7 @@ func (s *Service) OnComplete(ctx stdcontext.Context, p pid.PID, result *runtime. spanName = sourceID.String() + ".terminated" } - ctxWithRemote := trace.ContextWithRemoteSpanContext(ctx, remoteSpanCtx) - _, span := s.tracer.Start(ctxWithRemote, spanName, + _, span := s.tracer.Start(completeCtx, spanName, trace.WithSpanKind(trace.SpanKindInternal)) attrs := []attribute.KeyValue{ @@ -205,17 +260,19 @@ func (i *interceptor) Handle(ctx stdcontext.Context, task runtime.Task, next fun var delivery *queueapi.Delivery var isQueueMessage bool - // Priority 0: Check for queue delivery in task.Context (before it's written to frame) + // Priority 0: Check for queue delivery in task.Context (before it's written to frame). + // Any queue delivery is a Consumer span - parented to the producer trace when a + // traceparent is present, otherwise a root Consumer span - so consume-side + // telemetry exists even for messages from non-instrumented publishers. for _, pair := range task.Context { if d, ok := pair.Value.(*queueapi.Delivery); ok { delivery = d - extractedCtx, hasSpan := extractFromDelivery(ctx, delivery) - if hasSpan { + isQueueMessage = true + if extractedCtx, hasSpan := extractFromDelivery(ctx, delivery); hasSpan { ctx = extractedCtx - ctx, span = i.tracer.Start(ctx, spanName, - trace.WithSpanKind(trace.SpanKindConsumer)) - isQueueMessage = true } + ctx, span = i.tracer.Start(ctx, spanName, + trace.WithSpanKind(trace.SpanKindConsumer)) break } } diff --git a/service/otel/service_span_test.go b/service/otel/service_span_test.go new file mode 100644 index 000000000..84b52f066 --- /dev/null +++ b/service/otel/service_span_test.go @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: MPL-2.0 + +package otel + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + attrsapi "github.com/wippyai/runtime/api/attrs" + ctxapi "github.com/wippyai/runtime/api/context" + "github.com/wippyai/runtime/api/pid" + queueapi "github.com/wippyai/runtime/api/queue" + "github.com/wippyai/runtime/api/registry" + "github.com/wippyai/runtime/api/runtime" + httpapi "github.com/wippyai/runtime/api/service/http" + otelapi "github.com/wippyai/runtime/api/service/otel" + "github.com/wippyai/runtime/internal/telemetrytest" + "github.com/wippyai/runtime/service/http/middleware/httpmetrics" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" +) + +// useTraceContextPropagator installs the W3C TraceContext+Baggage propagator +// as the global for the duration of a test, restoring the previous value after. +func useTraceContextPropagator(t *testing.T) { + t.Helper() + prev := otel.GetTextMapPropagator() + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{})) + t.Cleanup(func() { otel.SetTextMapPropagator(prev) }) +} + +func TestHTTPMiddleware_ServerSpan(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{HTTP: otelapi.HTTPConfig{Enabled: true, ExtractHeaders: true, InjectHeaders: true}}, zap.NewNop(), tp) + wrapped := svc.HTTPMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + })) + + req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/users/123", nil) + fcCtx, fc := ctxapi.OpenFrameContext(req.Context()) + defer ctxapi.ReleaseFrameContext(fc) + require.NoError(t, httpapi.SetRouteLabel(fcCtx, "/users/:id")) + req = req.WithContext(fcCtx) + + rec := httptest.NewRecorder() + wrapped.ServeHTTP(rec, req) + + telemetrytest.SpanCount(t, sr, 1) + span := telemetrytest.MustSpanNamed(t, sr, "GET /users/:id") + telemetrytest.SpanKind(t, span, trace.SpanKindServer) + telemetrytest.SpanHasStringAttr(t, span, "http.method", "GET") + telemetrytest.SpanHasStringAttr(t, span, "http.route", "/users/:id") + assert.NotEmpty(t, rec.Header().Get("traceparent"), "traceparent must be injected into response headers") +} + +func TestHTTPMiddleware_ExtractsParent(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + tracer := tp.Tracer("test") + parentCtx, parent := tracer.Start(context.Background(), "remote.parent") + defer parent.End() + + svc := NewService(otelapi.Config{HTTP: otelapi.HTTPConfig{Enabled: true, ExtractHeaders: true}}, zap.NewNop(), tp) + wrapped := svc.HTTPMiddleware()(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {})) + + req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil) + otel.GetTextMapPropagator().Inject(parentCtx, propagation.HeaderCarrier(req.Header)) + + wrapped.ServeHTTP(httptest.NewRecorder(), req) + + server := telemetrytest.MustSpanNamed(t, sr, "GET unmatched") + assert.Equal(t, parent.SpanContext().TraceID(), telemetrytest.TraceID(server), + "server span must continue the parent trace") +} + +func TestInterceptor_SpanKind_QueueDeliveryWithoutTraceparent(t *testing.T) { + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + // Delivery with no traceparent (message from a non-instrumented publisher). + delivery := &queueapi.Delivery{Message: &queueapi.Message{ID: "m2", Headers: attrsapi.NewBag()}} + inter := &interceptor{tracer: tp.Tracer("test"), logger: zap.NewNop()} + task := runtime.Task{ + ID: registry.NewID("ns", "func"), + Context: []ctxapi.Pair{{Value: delivery}}, + } + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { return &runtime.Result{}, nil } + + _, err := inter.Handle(context.Background(), task, next) + require.NoError(t, err) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:func") + telemetrytest.SpanKind(t, span, trace.SpanKindConsumer) + telemetrytest.SpanHasStringAttr(t, span, "messaging.operation", "process") + telemetrytest.SpanHasStringAttr(t, span, "messaging.message.id", "m2") +} + +func TestInterceptor_SpanKind_RootIsServer(t *testing.T) { + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + inter := &interceptor{tracer: tp.Tracer("test"), logger: zap.NewNop()} + task := runtime.Task{ID: registry.NewID("ns", "func")} + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { + return &runtime.Result{}, nil + } + + _, err := inter.Handle(context.Background(), task, next) + require.NoError(t, err) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:func") + telemetrytest.SpanKind(t, span, trace.SpanKindServer) +} + +func TestInterceptor_SpanKind_ParentIsInternal(t *testing.T) { + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + tracer := tp.Tracer("test") + baseCtx, parent := tracer.Start(context.Background(), "parent") + defer parent.End() + + fcCtx, fc := ctxapi.OpenFrameContext(baseCtx) + defer ctxapi.ReleaseFrameContext(fc) + require.NoError(t, otelapi.SetSpan(fcCtx, parent)) + + inter := &interceptor{tracer: tracer, logger: zap.NewNop()} + task := runtime.Task{ID: registry.NewID("ns", "func")} + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { return &runtime.Result{}, nil } + + _, err := inter.Handle(fcCtx, task, next) + require.NoError(t, err) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:func") + telemetrytest.SpanKind(t, span, trace.SpanKindInternal) +} + +func TestInterceptor_SpanKind_QueueDeliveryIsConsumer(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + tracer := tp.Tracer("test") + parentCtx, parent := tracer.Start(context.Background(), "publish.parent") + defer parent.End() + + msg := &queueapi.Message{ID: "m1", Headers: attrsapi.NewBag()} + otel.GetTextMapPropagator().Inject(parentCtx, &MessageHeaderCarrier{headers: msg.Headers}) + delivery := &queueapi.Delivery{Message: msg} + + inter := &interceptor{tracer: tracer, logger: zap.NewNop()} + task := runtime.Task{ + ID: registry.NewID("ns", "func"), + Context: []ctxapi.Pair{{Value: delivery}}, + } + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { return &runtime.Result{}, nil } + + _, err := inter.Handle(context.Background(), task, next) + require.NoError(t, err) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:func") + telemetrytest.SpanKind(t, span, trace.SpanKindConsumer) + telemetrytest.SpanHasStringAttr(t, span, "messaging.operation", "process") + telemetrytest.SpanHasStringAttr(t, span, "messaging.message.id", "m1") + assert.Equal(t, parent.SpanContext().TraceID(), telemetrytest.TraceID(span), + "consumer span must continue the producer trace") +} + +func TestInterceptor_ErrorStatus(t *testing.T) { + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + inter := &interceptor{tracer: tp.Tracer("test"), logger: zap.NewNop()} + task := runtime.Task{ID: registry.NewID("ns", "func")} + next := func(_ context.Context, _ runtime.Task) (*runtime.Result, error) { + return nil, errors.New("boom") + } + + _, _ = inter.Handle(context.Background(), task, next) + + span := telemetrytest.MustSpanNamed(t, sr, "ns:func") + telemetrytest.SpanStatus(t, span, codes.Error) +} + +func TestProcessLifecycle_RootSpanWhenNoRemoteParent(t *testing.T) { + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{Process: otelapi.ProcessConfig{Enabled: true, TraceLifecycle: true}}, zap.NewNop(), tp) + + require.NoError(t, svc.OnStart(context.Background(), pid.PID{UniqID: "p1"}, nil)) + svc.OnComplete(context.Background(), pid.PID{UniqID: "p1"}, &runtime.Result{}) + + started := false + terminated := false + for _, s := range sr.Ended() { + if s.Name() == "process.started" { + started = true + telemetrytest.SpanKind(t, s, trace.SpanKindInternal) + } + if s.Name() == "process.terminated" { + terminated = true + } + } + assert.True(t, started, "unsupervised spawn must emit process.started") + assert.True(t, terminated, "unsupervised spawn must emit process.terminated") +} + +func TestHTTPMiddleware_StackedWithHttpmetrics_PreservesHijackFlush(t *testing.T) { + tp, _ := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{HTTP: otelapi.HTTPConfig{Enabled: true}}, zap.NewNop(), tp) + httpmw := httpmetrics.CreateHTTPMetricsMiddleware(telemetrytest.NewRecorder())(nil) + + inner := http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, isFlusher := w.(http.Flusher) + assert.True(t, isFlusher, "inner handler must see http.Flusher through otel+httpmetrics") + _, isHijacker := w.(http.Hijacker) + assert.True(t, isHijacker, "inner handler must see http.Hijacker through otel+httpmetrics") + w.WriteHeader(http.StatusOK) + }) + + // Stack order: otel (outer) -> httpmetrics -> handler, matching production. + stacked := svc.HTTPMiddleware()(httpmw(inner)) + stacked.ServeHTTP(httptest.NewRecorder(), + httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil)) +} + +func TestHTTPMiddleware_StatusRecorderPreservesFlusherAndHijacker(t *testing.T) { + tp, _ := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{HTTP: otelapi.HTTPConfig{Enabled: true}}, zap.NewNop(), tp) + wrapped := svc.HTTPMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + _, isFlusher := w.(http.Flusher) + assert.True(t, isFlusher, "inner handler must see http.Flusher (SSE relay depends on it)") + _, isHijacker := w.(http.Hijacker) + assert.True(t, isHijacker, "inner handler must see http.Hijacker (websocket relay depends on it)") + w.WriteHeader(http.StatusOK) + })) + + wrapped.ServeHTTP(httptest.NewRecorder(), + httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/", nil)) +} + +func TestHTTPMiddleware_CapturesStatusCode(t *testing.T) { + useTraceContextPropagator(t) + tp, sr := telemetrytest.NewTracerProvider() + defer func() { _ = tp.Shutdown(context.Background()) }() + + svc := NewService(otelapi.Config{HTTP: otelapi.HTTPConfig{Enabled: true}}, zap.NewNop(), tp) + + cases := []struct { + name string + status int + wantError bool + }{ + {"ok", http.StatusOK, false}, + {"not_found", http.StatusNotFound, false}, + {"server_error", http.StatusInternalServerError, true}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + sr.Reset() + wrapped := svc.HTTPMiddleware()(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(c.status) + })) + req := httptest.NewRequestWithContext(context.Background(), http.MethodGet, "/x", nil) + wrapped.ServeHTTP(httptest.NewRecorder(), req) + + span := telemetrytest.MustSpanNamed(t, sr, "GET unmatched") + telemetrytest.SpanHasInt64Attr(t, span, "http.response.status_code", int64(c.status)) + if c.wantError { + telemetrytest.SpanStatus(t, span, codes.Error) + } else { + telemetrytest.SpanStatus(t, span, codes.Unset) + } + }) + } +} diff --git a/service/queue/consumer/consumer.go b/service/queue/consumer/consumer.go index 10a43cb15..607eed0d2 100644 --- a/service/queue/consumer/consumer.go +++ b/service/queue/consumer/consumer.go @@ -5,9 +5,11 @@ package consumer import ( "context" "sync" + "time" ctxapi "github.com/wippyai/runtime/api/context" "github.com/wippyai/runtime/api/function" + "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" queueapi "github.com/wippyai/runtime/api/queue" "github.com/wippyai/runtime/api/registry" @@ -23,6 +25,7 @@ type Consumer struct { funcReg function.Registry config *consumerapi.Config logger *zap.Logger + tel *telemetry deliveries chan *queueapi.Delivery cancel context.CancelFunc workerCancel context.CancelFunc @@ -41,6 +44,7 @@ func NewConsumer( driver queueapi.Driver, funcReg function.Registry, logger *zap.Logger, + coll metrics.Collector, ) *Consumer { if logger == nil { logger = zap.NewNop() @@ -53,6 +57,7 @@ func NewConsumer( driver: driver, funcReg: funcReg, logger: logger, + tel: newTelemetry(coll), } } @@ -173,9 +178,14 @@ func (c *Consumer) processDelivery(ctx context.Context, delivery *queueapi.Deliv queueapi.ReleaseMessage(msg) }() + queueID := c.queueID.String() + c.tel.inFlightInc(queueID) + defer c.tel.inFlightDec(queueID) + start := time.Now() + c.logger.Debug("processing message", zap.String("consumer", c.id.String()), - zap.String("queue", c.queueID.String()), + zap.String("queue", queueID), zap.String("func", c.funcID.String()), zap.Int("worker_id", workerID), zap.String("message_id", msg.ID)) @@ -195,15 +205,18 @@ func (c *Consumer) processDelivery(ctx context.Context, delivery *queueapi.Deliv err = result.Error } + outcome := "ack" + // Ack or Nack based on result. MarkSettled gates the broker call: if // the handler already called msg:ack()/msg:nack() via the Lua // wrapper, the settle slot is claimed and the consumer must skip its // own settle to avoid double-ack (AMQP PRECONDITION_FAILED) or // double-nack/visibility-timeout races. if err != nil { + outcome = "nack" c.logger.Error("message processing failed", zap.String("consumer", c.id.String()), - zap.String("queue", c.queueID.String()), + zap.String("queue", queueID), zap.String("func", c.funcID.String()), zap.Int("worker_id", workerID), zap.String("message_id", msg.ID), @@ -232,4 +245,6 @@ func (c *Consumer) processDelivery(ctx context.Context, delivery *queueapi.Deliv } } } + + c.tel.recordProcessed(queueID, outcome, time.Since(start)) } diff --git a/service/queue/consumer/consumer_test.go b/service/queue/consumer/consumer_test.go index 2f7a68197..b1f92e929 100644 --- a/service/queue/consumer/consumer_test.go +++ b/service/queue/consumer/consumer_test.go @@ -42,6 +42,7 @@ func TestConsumer_StartStop(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) statusChan, err := consumer.Start(ctx) @@ -77,6 +78,7 @@ func TestConsumer_ProcessMessage(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -140,6 +142,7 @@ func TestConsumer_ProcessMessage_Error(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -210,6 +213,7 @@ func TestConsumer_StopTimeout(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) statusChan, err := consumer.Start(ctx) @@ -270,6 +274,7 @@ func TestConsumer_StopWithNoMessages(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) statusChan, err := consumer.Start(ctx) @@ -310,6 +315,7 @@ func TestConsumer_MultipleStopCalls(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -352,6 +358,7 @@ func TestConsumer_ConcurrentMessageProcessing(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -411,6 +418,7 @@ func TestConsumer_StopDuringProcessing(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -464,6 +472,7 @@ func TestConsumer_ContextCancellationStopsWorkers(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) statusChan, err := consumer.Start(ctx) @@ -504,6 +513,7 @@ func TestConsumer_AckNackAfterShutdown(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -572,6 +582,7 @@ func TestConsumer_SlowWorkers(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -636,6 +647,7 @@ func TestConsumer_DeadWorkerTimeout(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -699,6 +711,7 @@ func TestConsumer_MultipleWorkersOneBlocked(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -765,6 +778,7 @@ func TestConsumer_StopWithAllWorkersBlocked(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -903,6 +917,7 @@ func TestConsumer_StressHighThroughput(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -966,6 +981,7 @@ func TestConsumer_StressRapidStartStop(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -1011,6 +1027,7 @@ func TestConsumer_StressStartStopWithMessages(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -1078,6 +1095,7 @@ func TestConsumer_StressConcurrentConsumers(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -1145,6 +1163,7 @@ func TestConsumer_StressNackRequeue(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) @@ -1207,6 +1226,7 @@ func TestConsumer_StressResourceCleanup(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) statusChan, err := consumer.Start(ctx) @@ -1258,6 +1278,7 @@ func TestConsumer_StressMixedAckNack(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := consumer.Start(ctx) diff --git a/service/queue/consumer/manager.go b/service/queue/consumer/manager.go index a973b0280..425af0e7e 100644 --- a/service/queue/consumer/manager.go +++ b/service/queue/consumer/manager.go @@ -8,6 +8,7 @@ import ( "github.com/wippyai/runtime/api/event" "github.com/wippyai/runtime/api/function" + "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" queueapi "github.com/wippyai/runtime/api/queue" "github.com/wippyai/runtime/api/registry" @@ -24,6 +25,7 @@ type Manager struct { funcReg function.Registry dtt payload.Transcoder logger *zap.Logger + coll metrics.Collector consumers sync.Map } @@ -33,6 +35,7 @@ func NewManager( funcReg function.Registry, dtt payload.Transcoder, logger *zap.Logger, + coll metrics.Collector, ) *Manager { if logger == nil { logger = zap.NewNop() @@ -43,6 +46,7 @@ func NewManager( funcReg: funcReg, dtt: dtt, logger: logger, + coll: coll, } } @@ -94,6 +98,7 @@ func (m *Manager) addOrUpdate(ctx context.Context, entry registry.Entry, action driver, m.funcReg, m.logger, + m.coll, ) m.consumers.Store(entry.ID, consumer) diff --git a/service/queue/consumer/manager_test.go b/service/queue/consumer/manager_test.go index f79b43687..4a487be29 100644 --- a/service/queue/consumer/manager_test.go +++ b/service/queue/consumer/manager_test.go @@ -24,7 +24,7 @@ func TestManager_Add(t *testing.T) { funcReg := &mockFuncRegistry{} dtt := &mockDTT{} - manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop()) + manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop(), nil) config := &consumerapi.Config{ ConsumerOptions: queueapi.ConsumerOptions{ @@ -62,7 +62,7 @@ func TestManager_Add_QueueNotFound(t *testing.T) { funcReg := &mockFuncRegistry{} dtt := &mockDTT{} - manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop()) + manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop(), nil) config := &consumerapi.Config{ ConsumerOptions: queueapi.ConsumerOptions{ @@ -93,7 +93,7 @@ func TestManager_Add_DriverNotFound(t *testing.T) { funcReg := &mockFuncRegistry{} dtt := &mockDTT{} - manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop()) + manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop(), nil) config := &consumerapi.Config{ ConsumerOptions: queueapi.ConsumerOptions{ @@ -122,7 +122,7 @@ func TestManager_Update(t *testing.T) { funcReg := &mockFuncRegistry{} dtt := &mockDTT{} - manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop()) + manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop(), nil) consumerID := registry.NewID("test", "consumer") @@ -183,7 +183,7 @@ func TestManager_Delete(t *testing.T) { funcReg := &mockFuncRegistry{} dtt := &mockDTT{} - manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop()) + manager := NewManager(bus, queueMgr, funcReg, dtt, zap.NewNop(), nil) consumerID := registry.NewID("test", "consumer") diff --git a/service/queue/consumer/settle_coord_test.go b/service/queue/consumer/settle_coord_test.go index 3c3bee96d..0a71a61db 100644 --- a/service/queue/consumer/settle_coord_test.go +++ b/service/queue/consumer/settle_coord_test.go @@ -68,6 +68,7 @@ func TestConsumer_SkipsAutoAck_WhenHandlerAlreadySettled(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := c.Start(ctx) @@ -138,6 +139,7 @@ func TestConsumer_SkipsAutoAck_WhenHandlerAlreadyNacked(t *testing.T) { driver, funcReg, zap.NewNop(), + nil, ) _, err := c.Start(ctx) diff --git a/service/queue/consumer/telemetry.go b/service/queue/consumer/telemetry.go new file mode 100644 index 000000000..62800c5ea --- /dev/null +++ b/service/queue/consumer/telemetry.go @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MPL-2.0 + +package consumer + +import ( + "time" + + "github.com/wippyai/runtime/api/metrics" +) + +const ( + metricMessagesTotal = "wippy_queue_messages_total" + metricProcessDuration = "wippy_queue_process_duration_seconds" + metricInFlight = "wippy_queue_in_flight" +) + +// telemetry owns metric emission for the queue consumer subsystem. It is +// nil-safe so callers can ignore the absence of a configured collector. +type telemetry struct { + coll metrics.Collector +} + +func newTelemetry(coll metrics.Collector) *telemetry { + return &telemetry{coll: coll} +} + +func (t *telemetry) recordProcessed(queue, result string, duration time.Duration) { + if t == nil || t.coll == nil { + return + } + labels := metrics.Labels{"queue": queue, "result": result} + t.coll.CounterInc(metricMessagesTotal, labels) + t.coll.HistogramObserve(metricProcessDuration, duration.Seconds(), labels) +} + +func (t *telemetry) inFlightInc(queue string) { + if t == nil || t.coll == nil { + return + } + t.coll.GaugeInc(metricInFlight, metrics.Labels{"queue": queue}) +} + +func (t *telemetry) inFlightDec(queue string) { + if t == nil || t.coll == nil { + return + } + t.coll.GaugeDec(metricInFlight, metrics.Labels{"queue": queue}) +} diff --git a/service/queue/consumer/telemetry_test.go b/service/queue/consumer/telemetry_test.go new file mode 100644 index 000000000..51a15ebd3 --- /dev/null +++ b/service/queue/consumer/telemetry_test.go @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: MPL-2.0 + +package consumer + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/internal/telemetrytest" +) + +func TestTelemetry_RecordProcessed(t *testing.T) { + rec := telemetrytest.NewRecorder() + tel := newTelemetry(rec) + + tel.recordProcessed("ns:orders", "ack", 5*time.Millisecond) + + assert.Equal(t, 1.0, rec.CounterValue(metricMessagesTotal, + metrics.Labels{"queue": "ns:orders", "result": "ack"})) + assert.Equal(t, uint64(1), rec.HistogramCount(metricProcessDuration, + metrics.Labels{"queue": "ns:orders", "result": "ack"})) +} + +func TestTelemetry_NilCollector_NoPanic(t *testing.T) { + tel := newTelemetry(nil) + tel.recordProcessed("ns:orders", "nack", time.Millisecond) + tel.inFlightInc("ns:orders") + tel.inFlightDec("ns:orders") +} + +func TestTelemetry_InFlight(t *testing.T) { + rec := telemetrytest.NewRecorder() + tel := newTelemetry(rec) + + tel.inFlightInc("ns:orders") + tel.inFlightInc("ns:orders") + tel.inFlightDec("ns:orders") + + assert.Equal(t, 1.0, rec.GaugeValue(metricInFlight, + metrics.Labels{"queue": "ns:orders"})) +} diff --git a/service/sql/dispatcher.go b/service/sql/dispatcher.go index c9df1fc8d..d56807a4f 100644 --- a/service/sql/dispatcher.go +++ b/service/sql/dispatcher.go @@ -6,29 +6,26 @@ package sql import ( "context" "database/sql" + "time" "github.com/wippyai/runtime/api/dispatcher" + "github.com/wippyai/runtime/api/metrics" sqlapi "github.com/wippyai/runtime/api/service/sql" ) // Dispatcher handles SQL commands using a stateless goroutine pattern. -// -// Each command spawns a goroutine that executes the SQL operation and -// delivers results via the receiver. Goroutine lifecycle is tied to context: -// when context is canceled, operations check ctx.Err() and skip result -// delivery, allowing natural termination. -// -// Resource cleanup is handled by the Store layer (Store.Close releases -// connections) and FrameContext cleanup (commits/rollbacks transactions). -// This pattern is consistent with other stateless dispatchers in the system -// (contract, function) where explicit goroutine tracking isn't needed. -type Dispatcher struct{} +type Dispatcher struct { + coll metrics.Collector +} // NewDispatcher creates a new SQL dispatcher. func NewDispatcher() *Dispatcher { return &Dispatcher{} } +// SetCollector binds a metrics collector for telemetry. +func (d *Dispatcher) SetCollector(c metrics.Collector) { d.coll = c } + // Start is a no-op since this dispatcher has no background workers. func (d *Dispatcher) Start(_ context.Context) error { return nil @@ -58,7 +55,9 @@ func (d *Dispatcher) RegisterAll(register func(id dispatcher.CommandID, h dispat func (d *Dispatcher) handleQuery(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { qc := cmd.(*sqlapi.QueryCmd) go func() { + start := time.Now() resp := executeQuery(ctx, qc.DB, qc.Query, qc.Params) + recordSQLOp(d.coll, "query", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } @@ -69,7 +68,9 @@ func (d *Dispatcher) handleQuery(ctx context.Context, cmd dispatcher.Command, ta func (d *Dispatcher) handleExecute(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { ec := cmd.(*sqlapi.ExecuteCmd) go func() { + start := time.Now() resp := executeExec(ctx, ec.DB, ec.Query, ec.Params) + recordSQLOp(d.coll, "exec", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } @@ -110,7 +111,9 @@ func (d *Dispatcher) handleBegin(ctx context.Context, cmd dispatcher.Command, ta func (d *Dispatcher) handleStmtQuery(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { sc := cmd.(*sqlapi.StmtQueryCmd) go func() { + start := time.Now() resp := executeStmtQuery(ctx, sc.Stmt, sc.Params) + recordSQLOp(d.coll, "query", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } @@ -121,7 +124,9 @@ func (d *Dispatcher) handleStmtQuery(ctx context.Context, cmd dispatcher.Command func (d *Dispatcher) handleStmtExecute(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { sc := cmd.(*sqlapi.StmtExecuteCmd) go func() { + start := time.Now() resp := executeStmtExec(ctx, sc.Stmt, sc.Params) + recordSQLOp(d.coll, "exec", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } @@ -141,7 +146,9 @@ func (d *Dispatcher) handleStmtClose(_ context.Context, cmd dispatcher.Command, func (d *Dispatcher) handleTxQuery(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { tc := cmd.(*sqlapi.TxQueryCmd) go func() { + start := time.Now() resp := executeTxQuery(ctx, tc.Tx, tc.Query, tc.Params) + recordSQLOp(d.coll, "query", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } @@ -152,7 +159,9 @@ func (d *Dispatcher) handleTxQuery(ctx context.Context, cmd dispatcher.Command, func (d *Dispatcher) handleTxExecute(ctx context.Context, cmd dispatcher.Command, tag uint64, receiver dispatcher.ResultReceiver) error { tc := cmd.(*sqlapi.TxExecuteCmd) go func() { + start := time.Now() resp := executeTxExec(ctx, tc.Tx, tc.Query, tc.Params) + recordSQLOp(d.coll, "exec", resp.Error, time.Since(start)) if ctx.Err() == nil { receiver.CompleteYield(tag, resp, nil) } diff --git a/service/sql/telemetry.go b/service/sql/telemetry.go new file mode 100644 index 000000000..dfe884f58 --- /dev/null +++ b/service/sql/telemetry.go @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: MPL-2.0 + +package sql + +import ( + "time" + + "github.com/wippyai/runtime/api/metrics" +) + +const ( + sqlOpsTotal = "wippy_sql_ops_total" + sqlOpDuration = "wippy_sql_op_duration_seconds" +) + +// recordSQLOp emits per-operation count and latency. Nil-safe. +func recordSQLOp(coll metrics.Collector, op string, err error, duration time.Duration) { + if coll == nil { + return + } + result := "ok" + if err != nil { + result = "error" + } + labels := metrics.Labels{"op": op, "result": result} + coll.CounterInc(sqlOpsTotal, labels) + coll.HistogramObserve(sqlOpDuration, duration.Seconds(), labels) +} diff --git a/service/store/kv/store.go b/service/store/kv/store.go index dd44ff1df..c9a8a3e78 100644 --- a/service/store/kv/store.go +++ b/service/store/kv/store.go @@ -7,7 +7,9 @@ import ( "errors" "sort" "sync" + "time" + "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" "github.com/wippyai/runtime/api/registry" "github.com/wippyai/runtime/api/resource" @@ -42,6 +44,7 @@ type Store struct { engine kvapi.Engine dtt payload.Transcoder log *zap.Logger + coll metrics.Collector statusChan chan any id registry.ID namespace string @@ -81,10 +84,13 @@ func NewStoreWithInfo(id registry.ID, namespace string, engine kvapi.Engine, dtt } // Start implements supervisor.Service. -func (s *Store) Start(_ context.Context) (<-chan any, error) { +func (s *Store) Start(ctx context.Context) (<-chan any, error) { s.mu.Lock() defer s.mu.Unlock() s.closed = false + if s.coll == nil { + s.coll = metrics.GetCollector(ctx) + } select { case s.statusChan <- "store.kv started": default: @@ -119,7 +125,9 @@ func (s *Store) StoreInfo(_ context.Context) store.Info { // Get implements store.Store. func (s *Store) Get(_ context.Context, key registry.ID) (payload.Payload, error) { + start := time.Now() ent, err := s.engine.Get(physicalKey(s.namespace, key)) + recordKVOp(s.coll, s.namespace, "get", kvResult(err), time.Since(start)) if err != nil { return nil, mapNotFound(err) } @@ -137,26 +145,34 @@ func (s *Store) Entry(_ context.Context, key registry.ID) (store.VersionedEntry, // Set implements store.Store. A non-zero TTL binds the key to a fresh lease. func (s *Store) Set(ctx context.Context, entry store.Entry) error { + start := time.Now() b, err := encodeValue(s.dtt, entry.Value) if err != nil { + recordKVOp(s.coll, s.namespace, "set", "error", time.Since(start)) return err } phys := physicalKey(s.namespace, entry.Key) if entry.TTL > 0 { lease, err := s.engine.GrantLease(ctx, entry.TTL) if err != nil { + recordKVOp(s.coll, s.namespace, "set", "error", time.Since(start)) return err } _, err = s.engine.SetWithLease(phys, b, lease.ID()) + recordKVOp(s.coll, s.namespace, "set", kvResult(err), time.Since(start)) return err } _, err = s.engine.Set(phys, b) + recordKVOp(s.coll, s.namespace, "set", kvResult(err), time.Since(start)) return err } // Delete implements store.Store. func (s *Store) Delete(_ context.Context, key registry.ID) error { - return mapNotFound(s.engine.Delete(physicalKey(s.namespace, key))) + start := time.Now() + err := s.engine.Delete(physicalKey(s.namespace, key)) + recordKVOp(s.coll, s.namespace, "delete", kvResult(err), time.Since(start)) + return mapNotFound(err) } // Has implements store.Store. @@ -241,7 +257,9 @@ func (s *Store) SetIfAbsent(ctx context.Context, entry store.Entry) (bool, error } // Put implements store.Putter. -func (s *Store) Put(ctx context.Context, key registry.ID, value payload.Payload, opts store.PutOptions) (store.VersionedEntry, error) { +func (s *Store) Put(ctx context.Context, key registry.ID, value payload.Payload, opts store.PutOptions) (ve store.VersionedEntry, err error) { + start := time.Now() + defer func() { recordKVOp(s.coll, s.namespace, "put", kvResult(err), time.Since(start)) }() if opts.OnlyIfAbsent && opts.HasVersion { return store.VersionedEntry{}, store.ErrInvalidOptions } diff --git a/service/store/kv/telemetry.go b/service/store/kv/telemetry.go new file mode 100644 index 000000000..8cc865a0d --- /dev/null +++ b/service/store/kv/telemetry.go @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: MPL-2.0 + +package kv + +import ( + "errors" + "time" + + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/store/kv" +) + +const ( + kvOpsTotal = "wippy_kv_ops_total" + kvOpDuration = "wippy_kv_op_duration_seconds" +) + +// recordKVOp emits per-operation count and latency. Nil-safe. +func recordKVOp(coll metrics.Collector, namespace, op, result string, duration time.Duration) { + if coll == nil { + return + } + labels := metrics.Labels{"namespace": namespace, "op": op, "result": result} + coll.CounterInc(kvOpsTotal, labels) + coll.HistogramObserve(kvOpDuration, duration.Seconds(), labels) +} + +// kvResult classifies an engine error into a label value. +func kvResult(err error) string { + if err == nil { + return "ok" + } + if errors.Is(err, kv.ErrKeyNotFound) { + return "not_found" + } + return "error" +} diff --git a/service/store/kv/telemetry_test.go b/service/store/kv/telemetry_test.go new file mode 100644 index 000000000..649e5f469 --- /dev/null +++ b/service/store/kv/telemetry_test.go @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: MPL-2.0 + +package kv + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/registry" + "github.com/wippyai/runtime/api/store" + "github.com/wippyai/runtime/internal/telemetrytest" +) + +func TestStore_Telemetry_EmitsOpsMetrics(t *testing.T) { + ctx := context.Background() + s := newTestStore(t, "teltest") + rec := telemetrytest.NewRecorder() + s.coll = rec + + key := registry.ParseID("app:k1") + require.NoError(t, s.Set(ctx, store.Entry{Key: key, Value: jsonVal(`"v1"`)})) + _, err := s.Get(ctx, key) + require.NoError(t, err) + require.NoError(t, s.Delete(ctx, key)) + _, _ = s.Get(ctx, key) + + assert.Equal(t, 1.0, rec.CounterValue(kvOpsTotal, metrics.Labels{"namespace": "teltest", "op": "set", "result": "ok"})) + assert.Equal(t, 1.0, rec.CounterValue(kvOpsTotal, metrics.Labels{"namespace": "teltest", "op": "get", "result": "ok"})) + assert.Equal(t, 1.0, rec.CounterValue(kvOpsTotal, metrics.Labels{"namespace": "teltest", "op": "delete", "result": "ok"})) + assert.Equal(t, 1.0, rec.CounterValue(kvOpsTotal, metrics.Labels{"namespace": "teltest", "op": "get", "result": "not_found"})) + assert.Equal(t, uint64(1), rec.HistogramCount(kvOpDuration, metrics.Labels{"namespace": "teltest", "op": "get", "result": "ok"})) +} diff --git a/service/store/memory/manager.go b/service/store/memory/manager.go index 86ce2bba2..607e0651a 100644 --- a/service/store/memory/manager.go +++ b/service/store/memory/manager.go @@ -7,6 +7,7 @@ import ( "sync" "github.com/wippyai/runtime/api/event" + "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" "github.com/wippyai/runtime/api/registry" "github.com/wippyai/runtime/api/resource" @@ -22,6 +23,7 @@ type Manager struct { dtt payload.Transcoder bus event.Bus log *zap.Logger + coll metrics.Collector stores map[registry.ID]*Store mu sync.RWMutex } @@ -31,6 +33,7 @@ func NewManager( bus event.Bus, dtt payload.Transcoder, log *zap.Logger, + coll metrics.Collector, ) *Manager { if log == nil { log = zap.NewNop() @@ -39,6 +42,7 @@ func NewManager( log: log, dtt: dtt, bus: bus, + coll: coll, stores: make(map[registry.ID]*Store), } } @@ -64,6 +68,7 @@ func (m *Manager) Add(ctx context.Context, entry registry.Entry) error { // Create memory store store := NewStore(entry.ID, cfg, m.log) + store.coll = m.coll m.stores[entry.ID] = store // Register with supervisor @@ -129,6 +134,7 @@ func (m *Manager) Update(ctx context.Context, entry registry.Entry) error { // Create new store with updated config newStore := NewStore(entry.ID, cfg, m.log) + newStore.coll = m.coll m.stores[entry.ID] = newStore // Update supervisor entry diff --git a/service/store/memory/manager_test.go b/service/store/memory/manager_test.go index 1a0300fb6..586c27a31 100644 --- a/service/store/memory/manager_test.go +++ b/service/store/memory/manager_test.go @@ -59,7 +59,7 @@ func newTestManager(_ *testing.T) (*Manager, *mockBus) { json.Register(transcoder) bus := &mockBus{} log := zap.NewNop() - mgr := NewManager(bus, transcoder, log) + mgr := NewManager(bus, transcoder, log, nil) return mgr, bus } diff --git a/service/store/memory/memstore.go b/service/store/memory/memstore.go index 76955c75f..3ba973ce6 100644 --- a/service/store/memory/memstore.go +++ b/service/store/memory/memstore.go @@ -4,6 +4,7 @@ package memory import ( "context" + "errors" "sort" "strings" "sync" @@ -11,6 +12,7 @@ import ( memstore "github.com/wippyai/runtime/api/service/store/memory" + "github.com/wippyai/runtime/api/metrics" "github.com/wippyai/runtime/api/payload" "github.com/wippyai/runtime/api/registry" "github.com/wippyai/runtime/api/resource" @@ -36,6 +38,7 @@ var ( type Store struct { config *memstore.Config log *zap.Logger + coll metrics.Collector data map[string]*storeEntry statusChan chan any stopChan chan struct{} @@ -79,6 +82,10 @@ func (m *Store) Start(ctx context.Context) (<-chan any, error) { m.mu.Lock() defer m.mu.Unlock() + if m.coll == nil { + m.coll = metrics.GetCollector(ctx) + } + if m.closed { return nil, servicestore.ErrStoreClosed } @@ -131,7 +138,20 @@ func (m *Store) Stop(ctx context.Context) error { } // Get retrieves a value by key -func (m *Store) Get(_ context.Context, key registry.ID) (payload.Payload, error) { +func (m *Store) Get(_ context.Context, key registry.ID) (val payload.Payload, err error) { + start := time.Now() + defer func() { + result := "ok" + if err != nil { + if errors.Is(err, store.ErrKeyNotFound) { + result = "not_found" + } else { + result = "error" + } + } + recordOp(m.coll, m.id.String(), "get", result, time.Since(start)) + }() + m.mu.Lock() defer m.mu.Unlock() @@ -182,7 +202,10 @@ func (m *Store) Entry(_ context.Context, key registry.ID) (store.VersionedEntry, } // Set stores or updates a value with the given key -func (m *Store) Set(_ context.Context, entry store.Entry) error { +func (m *Store) Set(_ context.Context, entry store.Entry) (err error) { + start := time.Now() + defer func() { recordOp(m.coll, m.id.String(), "set", storeErrResult(err), time.Since(start)) }() + m.mu.Lock() defer m.mu.Unlock() @@ -220,7 +243,10 @@ func (m *Store) Set(_ context.Context, entry store.Entry) error { } // Put stores a value with optional absent/version preconditions. -func (m *Store) Put(_ context.Context, key registry.ID, value payload.Payload, opts store.PutOptions) (store.VersionedEntry, error) { +func (m *Store) Put(_ context.Context, key registry.ID, value payload.Payload, opts store.PutOptions) (ve store.VersionedEntry, err error) { + start := time.Now() + defer func() { recordOp(m.coll, m.id.String(), "put", storeErrResult(err), time.Since(start)) }() + m.mu.Lock() defer m.mu.Unlock() @@ -278,7 +304,10 @@ func (m *Store) Put(_ context.Context, key registry.ID, value payload.Payload, o } // Delete removes a value with the given key -func (m *Store) Delete(_ context.Context, key registry.ID) error { +func (m *Store) Delete(_ context.Context, key registry.ID) (err error) { + start := time.Now() + defer func() { recordOp(m.coll, m.id.String(), "delete", storeErrResult(err), time.Since(start)) }() + m.mu.Lock() defer m.mu.Unlock() diff --git a/service/store/memory/telemetry.go b/service/store/memory/telemetry.go new file mode 100644 index 000000000..2f3688493 --- /dev/null +++ b/service/store/memory/telemetry.go @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: MPL-2.0 + +package memory + +import ( + "errors" + "time" + + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/api/store" +) + +const ( + memOpsTotal = "wippy_kv_ops_total" + memOpDuration = "wippy_kv_op_duration_seconds" +) + +func storeErrResult(err error) string { + if err == nil { + return "ok" + } + if errors.Is(err, store.ErrKeyNotFound) || errors.Is(err, store.ErrKeyExists) || errors.Is(err, store.ErrVersionMismatch) { + return "not_found" + } + return "error" +} + +// recordOp emits per-operation count and latency. Nil-safe. +func recordOp(coll metrics.Collector, namespace, op, result string, duration time.Duration) { + if coll == nil { + return + } + labels := metrics.Labels{"namespace": namespace, "op": op, "result": result} + coll.CounterInc(memOpsTotal, labels) + coll.HistogramObserve(memOpDuration, duration.Seconds(), labels) +} diff --git a/service/temporal/client/factory.go b/service/temporal/client/factory.go index 99df03d64..6cb6197ed 100644 --- a/service/temporal/client/factory.go +++ b/service/temporal/client/factory.go @@ -39,6 +39,7 @@ type Factory interface { // DefaultClientFactory implements Factory type DefaultClientFactory struct { env env.Registry + metricsHandler client.MetricsHandler dataConverter func() converter.DataConverter clientInterceptors []interceptor.ClientInterceptor } @@ -48,11 +49,13 @@ func NewDefaultClientFactory( env env.Registry, dataConverter func() converter.DataConverter, clientInterceptors []interceptor.ClientInterceptor, + metricsHandler client.MetricsHandler, ) *DefaultClientFactory { return &DefaultClientFactory{ env: env, dataConverter: dataConverter, clientInterceptors: clientInterceptors, + metricsHandler: metricsHandler, } } @@ -85,9 +88,10 @@ func (f *DefaultClientFactory) CreateClient(ctx context.Context, logger *zap.Log // buildClientOptions constructs Temporal client options from config func (f *DefaultClientFactory) buildClientOptions(ctx context.Context, logger *zap.Logger, config *api.ClientConfig) (client.Options, error) { opts := client.Options{ - HostPort: config.Address, - Namespace: config.Namespace, - Logger: NewZapAdapter(logger), + HostPort: config.Address, + Namespace: config.Namespace, + Logger: NewZapAdapter(logger), + MetricsHandler: f.metricsHandler, } // Set data converter if available diff --git a/service/temporal/client/factory_test.go b/service/temporal/client/factory_test.go index 87806fe5a..6493fe029 100644 --- a/service/temporal/client/factory_test.go +++ b/service/temporal/client/factory_test.go @@ -27,7 +27,7 @@ import ( func TestNewDefaultClientFactory(t *testing.T) { env := &mockEnvRegistry{values: make(map[string]string)} - factory := NewDefaultClientFactory(env, nil, nil) + factory := NewDefaultClientFactory(env, nil, nil, nil) require.NotNil(t, factory) assert.Equal(t, env, factory.env) @@ -37,7 +37,7 @@ func TestNewDefaultClientFactory(t *testing.T) { func TestDefaultClientFactory_buildClientOptions(t *testing.T) { t.Run("basic options", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, newTestDataConverterProvider(), nil) + factory := NewDefaultClientFactory(nil, newTestDataConverterProvider(), nil, nil) config := &api.ClientConfig{ Address: "localhost:7233", Namespace: "default", @@ -57,7 +57,7 @@ func TestDefaultClientFactory_buildClientOptions(t *testing.T) { t.Run("with data converter", func(t *testing.T) { dc := &mockDataConverter{} - factory := NewDefaultClientFactory(nil, func() converter.DataConverter { return dc }, nil) + factory := NewDefaultClientFactory(nil, func() converter.DataConverter { return dc }, nil, nil) config := &api.ClientConfig{ Address: "localhost:7233", Namespace: "test", @@ -75,7 +75,7 @@ func TestDefaultClientFactory_buildClientOptions(t *testing.T) { t.Run("with interceptors", func(t *testing.T) { interceptors := []interceptor.ClientInterceptor{&mockClientInterceptor{}} - factory := NewDefaultClientFactory(nil, newTestDataConverterProvider(), interceptors) + factory := NewDefaultClientFactory(nil, newTestDataConverterProvider(), interceptors, nil) config := &api.ClientConfig{ Address: "localhost:7233", Namespace: "test", @@ -99,7 +99,7 @@ func newTestDataConverterProvider() func() converter.DataConverter { func TestDefaultClientFactory_configureAuth(t *testing.T) { t.Run("no auth", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{Type: api.AuthTypeNone}, } @@ -112,7 +112,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { }) t.Run("api key direct", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -129,7 +129,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { t.Run("api key from env", func(t *testing.T) { env := &mockEnvRegistry{values: map[string]string{"TEMPORAL_API_KEY": "env-key"}} - factory := NewDefaultClientFactory(env, nil, nil) + factory := NewDefaultClientFactory(env, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -145,7 +145,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { }) t.Run("api key from env without registry fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -167,7 +167,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { err := os.WriteFile(keyFile, []byte("file-api-key\n"), 0600) require.NoError(t, err) - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -183,7 +183,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { }) t.Run("api key from missing file fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -199,7 +199,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { }) t.Run("api key no source fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: api.AuthTypeAPIKey, @@ -214,7 +214,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { }) t.Run("unsupported auth type fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ Auth: api.AuthConfig{ Type: "unknown", @@ -231,7 +231,7 @@ func TestDefaultClientFactory_configureAuth(t *testing.T) { func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { t.Run("missing cert source fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ KeyPEM: "some-key", } @@ -243,7 +243,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { }) t.Run("missing key source fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ CertPEM: "some-cert", } @@ -255,7 +255,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { }) t.Run("key from env without registry fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ CertPEM: "some-cert", KeyPEMEnv: "TEMPORAL_KEY", @@ -268,7 +268,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { }) t.Run("missing cert file fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ CertFile: "/nonexistent/cert.pem", KeyPEM: "some-key", @@ -285,7 +285,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { certFile := filepath.Join(tmpDir, "cert.pem") require.NoError(t, os.WriteFile(certFile, []byte("test-cert"), 0600)) - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ CertFile: certFile, KeyFile: "/nonexistent/key.pem", @@ -298,7 +298,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { }) t.Run("invalid certificate fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) auth := api.AuthConfig{ CertPEM: "invalid-cert", KeyPEM: "invalid-key", @@ -313,7 +313,7 @@ func TestDefaultClientFactory_loadClientCertificate(t *testing.T) { func TestDefaultClientFactory_configureTLS(t *testing.T) { t.Run("TLS disabled", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: nil, } @@ -325,7 +325,7 @@ func TestDefaultClientFactory_configureTLS(t *testing.T) { }) t.Run("TLS enabled but not active", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: &api.TLSConfig{Enabled: false}, } @@ -337,7 +337,7 @@ func TestDefaultClientFactory_configureTLS(t *testing.T) { }) t.Run("TLS with server name", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: &api.TLSConfig{ Enabled: true, @@ -354,7 +354,7 @@ func TestDefaultClientFactory_configureTLS(t *testing.T) { }) t.Run("TLS insecure skip verify", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: &api.TLSConfig{ Enabled: true, @@ -370,7 +370,7 @@ func TestDefaultClientFactory_configureTLS(t *testing.T) { }) t.Run("TLS with missing CA file fails", func(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: &api.TLSConfig{ Enabled: true, @@ -390,7 +390,7 @@ func TestDefaultClientFactory_configureTLS(t *testing.T) { caFile := filepath.Join(tmpDir, "ca.pem") require.NoError(t, os.WriteFile(caFile, []byte("invalid-ca"), 0600)) - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) config := &api.ClientConfig{ TLS: &api.TLSConfig{ Enabled: true, @@ -434,7 +434,7 @@ func TestRewriteClientHeadersInterceptor(t *testing.T) { } func TestConfigureTransportHeaders_AppendsInterceptor(t *testing.T) { - factory := NewDefaultClientFactory(nil, nil, nil) + factory := NewDefaultClientFactory(nil, nil, nil, nil) opts := &client.Options{} factory.configureTransportHeaders(opts) diff --git a/service/temporal/client/manager.go b/service/temporal/client/manager.go index 210268db9..e264376cc 100644 --- a/service/temporal/client/manager.go +++ b/service/temporal/client/manager.go @@ -18,6 +18,7 @@ import ( "github.com/wippyai/runtime/api/supervisor" "github.com/wippyai/runtime/internal/entry" "github.com/wippyai/runtime/service/temporal/peer" + "go.temporal.io/sdk/client" "go.temporal.io/sdk/converter" "go.temporal.io/sdk/interceptor" "go.uber.org/zap" @@ -30,6 +31,7 @@ type Manager struct { dtt payload.Transcoder bus event.Bus env env.Registry + metricsHandler client.MetricsHandler factory Factory dataConverter converter.DataConverter dataConverterProvider func() converter.DataConverter @@ -94,6 +96,14 @@ func WithInterceptors(interceptors []interceptor.ClientInterceptor) ManagerOptio } } +// WithMetricsHandler sets the Temporal SDK metrics handler for the Manager, +// bridging the SDK's workflow/worker metrics onto the wippy collector. +func WithMetricsHandler(h client.MetricsHandler) ManagerOption { + return func(m *Manager) { + m.metricsHandler = h + } +} + // WithFactory sets a custom client factory for the Manager func WithFactory(factory Factory) ManagerOption { return func(m *Manager) { @@ -131,7 +141,7 @@ func NewManager(opts ...ManagerOption) (*Manager, error) { if provider == nil && m.dataConverter != nil { provider = func() converter.DataConverter { return m.dataConverter } } - m.factory = NewDefaultClientFactory(m.env, provider, m.clientInterceptors) + m.factory = NewDefaultClientFactory(m.env, provider, m.clientInterceptors, m.metricsHandler) } return m, nil diff --git a/service/temporal/client/metrics_handler.go b/service/temporal/client/metrics_handler.go new file mode 100644 index 000000000..7e83c689a --- /dev/null +++ b/service/temporal/client/metrics_handler.go @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: MPL-2.0 + +package client + +import ( + "time" + + "github.com/wippyai/runtime/api/metrics" + "go.temporal.io/sdk/client" +) + +// metricsHandler bridges the Temporal SDK metrics.Handler interface onto the +// wippy metrics collector, so the SDK's own workflow/worker/poller metrics +// flow to both the Prometheus and OTel sinks. Tags become metric labels; a nil +// collector makes every method a no-op. +type metricsHandler struct { + coll metrics.Collector + tags metrics.Labels +} + +// NewMetricsHandler returns a Temporal metrics.Handler backed by coll. +func NewMetricsHandler(coll metrics.Collector) client.MetricsHandler { + return &metricsHandler{coll: coll} +} + +func (h *metricsHandler) WithTags(tags map[string]string) client.MetricsHandler { + merged := make(metrics.Labels, len(h.tags)+len(tags)) + for k, v := range h.tags { + merged[k] = v + } + for k, v := range tags { + merged[k] = v + } + return &metricsHandler{coll: h.coll, tags: merged} +} + +type metricsCounterFunc func(int64) + +func (f metricsCounterFunc) Inc(d int64) { f(d) } + +type metricsGaugeFunc func(float64) + +func (f metricsGaugeFunc) Update(d float64) { f(d) } + +type metricsTimerFunc func(time.Duration) + +func (f metricsTimerFunc) Record(d time.Duration) { f(d) } + +func (h *metricsHandler) Counter(name string) client.MetricsCounter { + return metricsCounterFunc(func(d int64) { + if h.coll != nil { + h.coll.CounterAdd(name, float64(d), h.tags) + } + }) +} + +func (h *metricsHandler) Gauge(name string) client.MetricsGauge { + return metricsGaugeFunc(func(d float64) { + if h.coll != nil { + h.coll.GaugeSet(name, d, h.tags) + } + }) +} + +func (h *metricsHandler) Timer(name string) client.MetricsTimer { + return metricsTimerFunc(func(d time.Duration) { + if h.coll != nil { + h.coll.HistogramObserve(name, d.Seconds(), h.tags) + } + }) +} diff --git a/service/temporal/client/metrics_handler_test.go b/service/temporal/client/metrics_handler_test.go new file mode 100644 index 000000000..55063e508 --- /dev/null +++ b/service/temporal/client/metrics_handler_test.go @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MPL-2.0 + +package client + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/wippyai/runtime/api/metrics" + "github.com/wippyai/runtime/internal/telemetrytest" +) + +func TestMetricsHandler_CounterGaugeTimer(t *testing.T) { + rec := telemetrytest.NewRecorder() + h := NewMetricsHandler(rec) + + h.Counter("temporal_workflow_completed").Inc(1) + h.Counter("temporal_workflow_completed").Inc(2) + h.Gauge("temporal_worker_task_queue_active").Update(5) + h.Timer("temporal_workflow_task_latency").Record(100 * time.Millisecond) + + assert.Equal(t, 3.0, rec.CounterValue("temporal_workflow_completed", metrics.Labels{})) + assert.Equal(t, 5.0, rec.GaugeValue("temporal_worker_task_queue_active", metrics.Labels{})) + assert.Equal(t, uint64(1), rec.HistogramCount("temporal_workflow_task_latency", metrics.Labels{})) +} + +func TestMetricsHandler_WithTagsMergesLabels(t *testing.T) { + rec := telemetrytest.NewRecorder() + h := NewMetricsHandler(rec).WithTags(map[string]string{"namespace": "default", "task_queue": "orders"}) + + h.Counter("temporal_workflow_completed").Inc(1) + + assert.Equal(t, 1.0, rec.CounterValue("temporal_workflow_completed", + metrics.Labels{"namespace": "default", "task_queue": "orders"})) +} + +func TestMetricsHandler_NilCollector(t *testing.T) { + h := NewMetricsHandler(nil) + h.Counter("x").Inc(1) + h.Gauge("y").Update(1) + h.Timer("z").Record(time.Millisecond) +} + +func TestMetricsHandler_WithTagsChains(t *testing.T) { + rec := telemetrytest.NewRecorder() + h := NewMetricsHandler(rec).WithTags(map[string]string{"a": "1"}).WithTags(map[string]string{"b": "2"}) + + h.Counter("c").Inc(1) + + assert.Equal(t, 1.0, rec.CounterValue("c", metrics.Labels{"a": "1", "b": "2"})) +} diff --git a/tests/otel_e2e_test.go b/tests/otel_e2e_test.go new file mode 100644 index 000000000..5d4c8125e --- /dev/null +++ b/tests/otel_e2e_test.go @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: MPL-2.0. + +//go:build integration + +package tests + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net" + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + otelapi "github.com/wippyai/runtime/api/service/otel" + "github.com/wippyai/runtime/service/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" +) + +// TestOTLP_TracesReachJaeger validates the runtime's real OTLP trace pipeline +// end to end: service/otel.InitializeProvider builds a TracerProvider whose +// OTLP/HTTP exporter ships spans to a live Jaeger collector, and Jaeger's API +// then serves them back. Run with Jaeger up (`make otel-up`). +func TestOTLP_TracesReachJaeger(t *testing.T) { + const ( + otlpEndpoint = "localhost:4318" + jaegerAPI = "http://localhost:16686" + service = "wippy-e2e-test" + op = "e2e.root" + ) + + if !reachable(otlpEndpoint, 2*time.Second) { + t.Skipf("jaeger OTLP endpoint %s not reachable - run `make otel-up`", otlpEndpoint) + } + + tp, err := otel.InitializeProvider(context.Background(), otelapi.Config{ + Enabled: true, + TracesEnabled: true, + Endpoint: otlpEndpoint, + Protocol: "http/protobuf", + Insecure: true, + ServiceName: service, + SampleRate: 1.0, + }, zap.NewNop()) + require.NoError(t, err) + + tracer := tp.Tracer("wippy-runtime") + _, span := tracer.Start(context.Background(), op, + trace.WithAttributes(attribute.String("e2e", "otel-traces"))) + span.End() + + // Shutdown flushes the BatchSpanProcessor so the span reaches Jaeger. + require.NoError(t, otel.ShutdownTracerProvider(context.Background(), tp, zap.NewNop())) + + var found bool + for i := 0; i < 40; i++ { + if jaegerHasOperation(jaegerAPI, service, op) { + found = true + break + } + time.Sleep(500 * time.Millisecond) + } + assert.True(t, found, "span %q for service %q never appeared in Jaeger", op, service) +} + +func jaegerHasOperation(jaegerAPI, service, op string) bool { + url := fmt.Sprintf("%s/api/traces?service=%s&limit=50", jaegerAPI, service) + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, url, nil) + if err != nil { + return false + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return false + } + defer func() { _ = resp.Body.Close() }() + if resp.StatusCode != http.StatusOK { + return false + } + body, err := io.ReadAll(resp.Body) + if err != nil { + return false + } + var parsed struct { + Data []struct { + Spans []struct { + OperationName string `json:"operationName"` + } `json:"spans"` + } `json:"data"` + } + if json.Unmarshal(body, &parsed) != nil { + return false + } + for _, tr := range parsed.Data { + for _, sp := range tr.Spans { + if sp.OperationName == op { + return true + } + } + } + return false +} + +func reachable(addr string, timeout time.Duration) bool { + dialer := net.Dialer{Timeout: timeout} + conn, err := dialer.DialContext(context.Background(), "tcp", addr) + if err != nil { + return false + } + _ = conn.Close() + return true +}