diff --git a/python/cerebrium/.env.example b/python/cerebrium/.env.example new file mode 100644 index 0000000..8727610 --- /dev/null +++ b/python/cerebrium/.env.example @@ -0,0 +1,21 @@ +# Cerebrium secrets — set these in the Cerebrium dashboard under +# Project Settings → Secrets, or with `cerebrium secrets`. They surface +# inside the container as environment variables. + +# Last9 OTLP HTTP endpoint base — see Integrations → OpenTelemetry in app.last9.io +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-aps1.last9.io + +# Last9 auth header in OTEL env-var format (note the inner '='): +# = +# Copy the full Basic value from Last9's OpenTelemetry integration page. +OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic + +# OTLP over HTTP/protobuf (4318 path semantics, not 4317 gRPC). +OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf + +# Optional: override what shows up as service.name in Last9. Defaults +# to the value of CEREBRIUM_APP_NAME if present. +OTEL_SERVICE_NAME=otel-autoinstrument-last9 + +# Optional: surface deployment.environment on every span/metric. +OTEL_RESOURCE_ATTRIBUTES=deployment.environment=production diff --git a/python/cerebrium/.gitignore b/python/cerebrium/.gitignore new file mode 100644 index 0000000..8ee566b --- /dev/null +++ b/python/cerebrium/.gitignore @@ -0,0 +1,27 @@ +# Environment/secrets +.env +.env.local +.env.*.local + +# Cerebrium build artifacts +.cerebrium/ +build/ + +# Python +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ + +# IDE +.idea/ +.vscode/ +*.swp + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log diff --git a/python/cerebrium/README.md b/python/cerebrium/README.md new file mode 100644 index 0000000..fd0cacb --- /dev/null +++ b/python/cerebrium/README.md @@ -0,0 +1,83 @@ +# Cerebrium + OpenTelemetry Auto-Instrumentation → Last9 + +Deploy a FastAPI app on Cerebrium with zero OpenTelemetry code in your source files. `opentelemetry-instrument` runs in the entrypoint and auto-patches FastAPI, `requests`, `httpx`, and logging at process startup. Traces ship to Last9 over OTLP/HTTP. + +This complements Cerebrium's platform-side **metrics export** (resource and execution metrics shipped from the platform itself). Together you get metrics + traces in Last9. + +## Prerequisites + +- Cerebrium account with the CLI installed: `pip install cerebrium` +- Last9 account — grab the OTLP HTTP endpoint and Auth Header from **Integrations → OpenTelemetry** in [app.last9.io](https://app.last9.io) + +## Quick Start + +1. **Clone this directory and set up secrets in Cerebrium.** + + ```bash + cerebrium login + cerebrium secrets set OTEL_EXPORTER_OTLP_ENDPOINT https://otlp-aps1.last9.io + cerebrium secrets set OTEL_EXPORTER_OTLP_HEADERS 'Authorization=Basic ' + cerebrium secrets set OTEL_EXPORTER_OTLP_PROTOCOL http/protobuf + cerebrium secrets set OTEL_SERVICE_NAME otel-autoinstrument-last9 + ``` + + See `.env.example` for the full list of variables. + +2. **Deploy.** + + ```bash + cerebrium deploy + ``` + + Build runs `opentelemetry-bootstrap --action=install` which detects every supported library in your pip deps and pulls the matching instrumentation package. Look for it in the build logs. + +3. **Invoke the app.** + + ```bash + curl -X POST https://api.cortex.cerebrium.ai/v4/p-XXXXXX/otel-autoinstrument-last9/predict \ + -H "Authorization: Bearer " \ + -H "Content-Type: application/json" \ + -d '{"prompt": "trace me", "run_id": "test-1"}' + ``` + +4. **View traces in Last9.** + + Open [Traces Explorer](https://app.last9.io/traces), filter by `service.name = otel-autoinstrument-last9`. You should see a `POST /predict` parent span with child `GET` spans for the two `httpbin.org` calls. + +## How it works + +- `cerebrium.toml` defines `entrypoint = ["opentelemetry-instrument", "uvicorn", "main:app", ...]`. Cerebrium runs that command directly, so the OTel launcher gets first-class control of process startup. +- `opentelemetry-instrument` reads `OTEL_*` env vars, sets `PYTHONPATH` to OTel's `sitecustomize.py`, then `exec`s your command. From that point every Python import gets the OTel-patched version of the library. +- `main.py` is just FastAPI. Nothing else. + +## What gets auto-instrumented + +Anything in the dependencies that has an `opentelemetry-instrumentation-*` package on PyPI. Common picks for AI workloads: + +| Library | Captures | +|---|---| +| `fastapi` | Request span per route, status code, route template | +| `requests`, `httpx`, `urllib3` | Outbound HTTP spans with method, URL, status | +| `logging` | Trace and span IDs injected into log records | +| `openai`, `anthropic` | Per-call LLM spans with model, token counts | +| `sqlalchemy`, `psycopg2`, `redis` | DB and cache spans | + +To add more, list them in `[cerebrium.dependencies.pip]` and re-deploy — `opentelemetry-bootstrap` picks them up. + +## Combining with Cerebrium's metrics export + +This example handles **traces only**. For platform-side **metrics** (CPU, GPU, container counts, run latencies), configure Cerebrium's dashboard: + +- **Integrations → Metrics Export → Custom OTLP** +- Endpoint: `https://otlp-aps1.last9.io` (base URL — Cerebrium auto-appends `/v1/metrics`) +- Auth Header Name: `Authorization` +- Auth Header Value: `Basic ` + +Full guide: [Cerebrium integration on docs.last9.io](https://last9.io/docs/integrations/cerebrium) + +## Troubleshooting + +- **No traces in Last9.** Check Cerebrium build logs for the `opentelemetry-bootstrap` output — it must list at least `fastapi` and `requests`. If absent, the deps didn't install or the shell command failed. +- **`AttributeError: ... has no attribute 'instrument'` at startup.** A library was imported before `opentelemetry-instrument` could patch it — make sure no `pre_build_commands` or custom shell commands import your app modules. +- **Header malformed errors from OTLP.** `OTEL_EXPORTER_OTLP_HEADERS` uses `=` between header name and value, not `:`. Correct format: `Authorization=Basic `. +- **Spans appear but with no parent/child link.** A library is being imported at module top level before the sitecustomize fires. Move the import inside the function, or add an explicit `*Instrumentor().instrument()` call in a small bootstrap module loaded first. diff --git a/python/cerebrium/cerebrium.toml b/python/cerebrium/cerebrium.toml new file mode 100644 index 0000000..31911a9 --- /dev/null +++ b/python/cerebrium/cerebrium.toml @@ -0,0 +1,51 @@ +# Cerebrium app with OpenTelemetry auto-instrumentation exporting to Last9. +# main.py contains zero OTel code — opentelemetry-instrument in the entrypoint +# does all the patching at process startup. + +[cerebrium.deployment] +name = "otel-autoinstrument-last9" +python_version = "3.12" +include = ["*"] +exclude = [".*", "__pycache__", "*.pyc"] + +# Install all available instrumentors after pip deps are resolved. +# opentelemetry-bootstrap detects every supported library in the env and +# pins the matching instrumentation package for opentelemetry-instrument. +shell_commands = [ + "opentelemetry-bootstrap --action=install", +] + +[cerebrium.hardware] +cpu = 1 +memory = 2 +compute = "CPU" + +[cerebrium.scaling] +min_replicas = 0 +max_replicas = 1 +cooldown = 60 + +[cerebrium.dependencies.pip] +fastapi = "*" +uvicorn = "*" +requests = "*" +httpx = "*" +# Pin OTel core + semconv together. Without this, pip can resolve a newer +# exporter-otlp-proto-common (which imports semconv._incubating) against an +# older semantic-conventions wheel — startup fails with ModuleNotFoundError. +opentelemetry-distro = ">=0.51b0" +opentelemetry-exporter-otlp = ">=1.30.0" +opentelemetry-semantic-conventions = ">=0.51b0" + +[cerebrium.runtime.custom] +port = 8000 +healthcheck_endpoint = "/health" +readycheck_endpoint = "/ready" +# Prepending opentelemetry-instrument is the auto-instrumentation hook. +# It sets PYTHONPATH to the OTel sitecustomize, then execs uvicorn. +entrypoint = [ + "opentelemetry-instrument", + "uvicorn", "main:app", + "--host", "0.0.0.0", + "--port", "8000", +] diff --git a/python/cerebrium/main.py b/python/cerebrium/main.py new file mode 100644 index 0000000..a093182 --- /dev/null +++ b/python/cerebrium/main.py @@ -0,0 +1,53 @@ +"""Cerebrium app — zero OpenTelemetry code. + +All tracing is wired by `opentelemetry-instrument` in the entrypoint +(see cerebrium.toml). FastAPI, requests, and httpx are auto-patched. +""" + +import logging +import time + +import requests +from fastapi import FastAPI +from pydantic import BaseModel + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + +app = FastAPI() + + +class PredictRequest(BaseModel): + prompt: str = "hello world" + run_id: str | None = None + + +@app.get("/health") +def health(): + return {"status": "ok"} + + +@app.get("/ready") +def ready(): + return {"status": "ok"} + + +@app.post("/predict") +def predict(req: PredictRequest): + log.info("predict: prompt_len=%d run_id=%s", len(req.prompt), req.run_id) + start = time.perf_counter() + statuses = [] + for url in ("https://httpbin.org/uuid", "https://httpbin.org/headers"): + try: + r = requests.get(url, timeout=5) + statuses.append(r.status_code) + except Exception as e: + log.warning("downstream call failed: url=%s err=%s", url, e) + statuses.append(-1) + latency_ms = (time.perf_counter() - start) * 1000.0 + log.info("predict: done in %.1fms statuses=%s", latency_ms, statuses) + return { + "prompt": req.prompt, + "downstream_status": statuses, + "latency_ms": round(latency_ms, 2), + }