Skip to content

Commit bb1bd01

Browse files
committed
DLPX-96312 Add InfluxDB/Telegraf infrastructure for Engine Performance Analytics
PR URL: https://www.github.com/delphix/performance-diagnostics/pull/119
1 parent 1a46d8d commit bb1bd01

8 files changed

Lines changed: 296 additions & 7 deletions

File tree

debian/control

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ Standards-Version: 4.1.2
1313

1414
Package: performance-diagnostics
1515
Architecture: any
16-
Depends: python3-bpfcc, python3-minimal, python3-psutil, telegraf, docker.io
16+
Depends: python3-bpfcc, python3-minimal, python3-psutil, telegraf, docker.io, influxdb2
1717
Description: eBPF-based Performance Diagnostic Tools
1818
A collection of eBPF-based tools for diagnosing performance issues.

debian/rules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,6 @@ override_dh_auto_install:
2626
dh_install telegraf/delphix-telegraf-service telegraf/perf_playbook /usr/bin
2727
dh_install telegraf/delphix-telegraf.service /lib/systemd/system
2828
dh_install telegraf/telegraf* telegraf/*.sh /etc/telegraf
29+
dh_install influxdb/delphix-influxdb-service influxdb/delphix-influxdb-init /usr/bin
30+
dh_install influxdb/delphix-influxdb.service /lib/systemd/system
31+
dh_install influxdb/influxdb.conf influxdb/influxdb-init.conf /etc/influxdb

influxdb/delphix-influxdb-init

Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
#!/bin/bash -eu
2+
#
3+
# Copyright (c) 2026 by Delphix. All rights reserved.
4+
#
5+
# SPDX-License-Identifier: GPL-2.0-or-later
6+
#
7+
# One-time InfluxDB initialization: creates org, bucket, admin token,
8+
# a read-only token for DCT Smart Proxy, and appends the
9+
# [[outputs.influxdb_v2]] stanza to /etc/telegraf/telegraf.base.
10+
# Skips setup if InfluxDB is already initialized.
11+
#
12+
13+
INFLUXDB_URL="http://127.0.0.1:8086"
14+
INFLUXDB_CONFIG_DIR="/etc/influxdb"
15+
INFLUXDB_META_FILE="$INFLUXDB_CONFIG_DIR/influxdb_meta"
16+
# State file written immediately after /api/v2/setup so the script can resume
17+
# if it is interrupted before the metadata file is fully written.
18+
INFLUXDB_SETUP_STATE_FILE="$INFLUXDB_CONFIG_DIR/influxdb_setup_state"
19+
TELEGRAF_BASE="/etc/telegraf/telegraf.base"
20+
INFLUXDB_INIT_CONF="$INFLUXDB_CONFIG_DIR/influxdb-init.conf"
21+
22+
# Load tunable configuration (org, bucket, retention, wait parameters).
23+
# shellcheck source=/etc/influxdb/influxdb-init.conf
24+
# shellcheck disable=SC1091
25+
source "$INFLUXDB_INIT_CONF"
26+
27+
INFLUXDB_ADMIN_USER="admin"
28+
INFLUXDB_ADMIN_PASSWORD="$(openssl rand -hex 16)"
29+
30+
#
31+
# Log a message to stderr with a timestamp.
32+
#
33+
log() {
34+
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] $*" >&2
35+
}
36+
37+
#
38+
# Extract a field from a JSON string using python3.
39+
#
40+
json_field() {
41+
local json="$1"
42+
local field="$2"
43+
echo "$json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())$field)" ||
44+
{ log "ERROR: Failed to parse field '$field' from JSON response."; return 1; }
45+
}
46+
47+
#
48+
# POST to the InfluxDB HTTP API. Exits with an error if the request fails.
49+
#
50+
influx_post() {
51+
local endpoint="$1"
52+
local data="$2"
53+
local auth_header="${3:-}"
54+
55+
local curl_args=(-sf -X POST "$INFLUXDB_URL$endpoint" -H 'Content-Type: application/json' -d "$data")
56+
[[ -n "$auth_header" ]] && curl_args+=(-H "Authorization: Token $auth_header")
57+
58+
local response
59+
response=$(curl "${curl_args[@]}") ||
60+
{ log "ERROR: HTTP POST to '$endpoint' failed."; return 1; }
61+
echo "$response"
62+
}
63+
64+
mkdir -p "$INFLUXDB_CONFIG_DIR"
65+
66+
# Skip if already fully initialized.
67+
if [[ -f "$INFLUXDB_META_FILE" ]]; then
68+
log "InfluxDB already initialized, skipping."
69+
exit 0
70+
fi
71+
72+
#
73+
# Wait for InfluxDB to be ready.
74+
#
75+
log "Waiting for InfluxDB to be ready..."
76+
ready=false
77+
for i in $(seq 1 "$INFLUXDB_WAIT_RETRIES"); do
78+
if curl -sf "$INFLUXDB_URL/health" &>/dev/null; then
79+
ready=true
80+
break
81+
fi
82+
log "InfluxDB not ready yet (attempt $i/$INFLUXDB_WAIT_RETRIES), retrying in ${INFLUXDB_WAIT_INTERVAL}s..."
83+
sleep "$INFLUXDB_WAIT_INTERVAL"
84+
done
85+
86+
if [[ "$ready" != "true" ]]; then
87+
log "ERROR: InfluxDB did not become ready after $((INFLUXDB_WAIT_RETRIES * INFLUXDB_WAIT_INTERVAL))s."
88+
exit 1
89+
fi
90+
log "InfluxDB is ready."
91+
92+
#
93+
# Initial setup — creates org, bucket, and returns admin token + IDs.
94+
# /api/v2/setup is a one-shot operation; if the script is interrupted after
95+
# this point and re-run, the state file lets us skip setup and reuse the
96+
# already-created admin token.
97+
#
98+
ADMIN_TOKEN=""
99+
ORG_ID=""
100+
BUCKET_ID=""
101+
102+
if [[ -f "$INFLUXDB_SETUP_STATE_FILE" ]]; then
103+
log "Found existing setup state, loading admin token and IDs..."
104+
while IFS= read -r line; do
105+
key="${line%%=*}"
106+
value="${line#*=}"
107+
case "$key" in
108+
ADMIN_TOKEN) ADMIN_TOKEN="$value" ;;
109+
ORG_ID) ORG_ID="$value" ;;
110+
BUCKET_ID) BUCKET_ID="$value" ;;
111+
esac
112+
done <"$INFLUXDB_SETUP_STATE_FILE"
113+
else
114+
log "Running initial InfluxDB setup..."
115+
SETUP_RESPONSE=$(influx_post "/api/v2/setup" "{
116+
\"username\": \"$INFLUXDB_ADMIN_USER\",
117+
\"password\": \"$INFLUXDB_ADMIN_PASSWORD\",
118+
\"org\": \"$INFLUXDB_ORG\",
119+
\"bucket\": \"$INFLUXDB_BUCKET\",
120+
\"retentionPeriodSeconds\": $INFLUXDB_RETENTION_SECONDS
121+
}") || exit 1
122+
123+
ADMIN_TOKEN=$(json_field "$SETUP_RESPONSE" "['auth']['token']") || exit 1
124+
ORG_ID=$(json_field "$SETUP_RESPONSE" "['org']['id']") || exit 1
125+
BUCKET_ID=$(json_field "$SETUP_RESPONSE" "['bucket']['id']") || exit 1
126+
127+
# Persist admin token + IDs immediately so a subsequent re-run can resume
128+
# without repeating the one-shot setup call.
129+
old_umask="$(umask)"
130+
umask 077
131+
tmp_state="$(mktemp "${INFLUXDB_SETUP_STATE_FILE}.XXXXXX")"
132+
printf 'ADMIN_TOKEN=%s\nORG_ID=%s\nBUCKET_ID=%s\n' \
133+
"$ADMIN_TOKEN" "$ORG_ID" "$BUCKET_ID" >"$tmp_state"
134+
chmod 600 "$tmp_state"
135+
mv "$tmp_state" "$INFLUXDB_SETUP_STATE_FILE"
136+
umask "$old_umask"
137+
fi
138+
139+
#
140+
# Create a write-only token for Telegraf.
141+
#
142+
log "Creating Telegraf write token..."
143+
WRITE_TOKEN_RESPONSE=$(influx_post "/api/v2/authorizations" "{
144+
\"orgID\": \"$ORG_ID\",
145+
\"description\": \"telegraf-write-token\",
146+
\"permissions\": [
147+
{\"action\": \"write\", \"resource\": {\"type\": \"buckets\", \"id\": \"$BUCKET_ID\", \"orgID\": \"$ORG_ID\"}}
148+
]
149+
}" "$ADMIN_TOKEN") || exit 1
150+
WRITE_TOKEN=$(json_field "$WRITE_TOKEN_RESPONSE" "['token']") || exit 1
151+
152+
#
153+
# Create a read-only token for DCT Smart Proxy.
154+
#
155+
log "Creating DCT Smart Proxy read token..."
156+
READ_TOKEN_RESPONSE=$(influx_post "/api/v2/authorizations" "{
157+
\"orgID\": \"$ORG_ID\",
158+
\"description\": \"dct-read-token\",
159+
\"permissions\": [
160+
{\"action\": \"read\", \"resource\": {\"type\": \"buckets\", \"id\": \"$BUCKET_ID\", \"orgID\": \"$ORG_ID\"}}
161+
]
162+
}" "$ADMIN_TOKEN") || exit 1
163+
READ_TOKEN=$(json_field "$READ_TOKEN_RESPONSE" "['token']") || exit 1
164+
165+
#
166+
# Append the [[outputs.influxdb_v2]] stanza to telegraf.base so Telegraf
167+
# knows where to ship metrics. Input stanzas already exist in the file.
168+
#
169+
log "Appending InfluxDB output stanza to $TELEGRAF_BASE..."
170+
if [[ ! -f "$TELEGRAF_BASE" ]]; then
171+
log "WARNING: Telegraf base config not found at '$TELEGRAF_BASE'; skipping stanza append." \
172+
"Run init_influxdb again once Telegraf is installed to complete Telegraf configuration."
173+
elif grep -q "^[[:space:]]*#\{0,1\}\[\[outputs\.influxdb_v2\]\]" "$TELEGRAF_BASE"; then
174+
log "InfluxDB output stanza already present in $TELEGRAF_BASE, skipping."
175+
chmod 640 "$TELEGRAF_BASE"
176+
else
177+
cat >>"$TELEGRAF_BASE" <<EOF
178+
179+
[[outputs.influxdb_v2]]
180+
urls = ["http://127.0.0.1:8086"]
181+
token = "$WRITE_TOKEN"
182+
organization = "$INFLUXDB_ORG"
183+
bucket = "$INFLUXDB_BUCKET"
184+
namepass = ["cpu", "disk", "diskio", "mem", "net", "procstat", "processes", "swap", "system", "zfs"]
185+
EOF
186+
# Enforce restrictive permissions so the write token is not world-readable.
187+
chmod 640 "$TELEGRAF_BASE"
188+
fi
189+
190+
#
191+
# Persist org/bucket/admin credentials/tokens so DE APIs can expose them to DCT
192+
# and so the admin can access the InfluxDB UI. File is chmod 600 (root-only).
193+
#
194+
log "Writing InfluxDB metadata to $INFLUXDB_META_FILE..."
195+
# Use a restrictive umask and a temp file to avoid a window where tokens are
196+
# readable by non-root users, then atomically move the file into place.
197+
old_umask="$(umask)"
198+
umask 077
199+
tmp_meta="$(mktemp "${INFLUXDB_META_FILE}.XXXXXX")"
200+
cat >"$tmp_meta" <<EOF
201+
INFLUXDB_ORG=$INFLUXDB_ORG
202+
INFLUXDB_BUCKET=$INFLUXDB_BUCKET
203+
INFLUXDB_ADMIN_USER=$INFLUXDB_ADMIN_USER
204+
INFLUXDB_ADMIN_PASSWORD=$INFLUXDB_ADMIN_PASSWORD
205+
INFLUXDB_WRITE_TOKEN=$WRITE_TOKEN
206+
INFLUXDB_READ_TOKEN=$READ_TOKEN
207+
EOF
208+
chmod 600 "$tmp_meta"
209+
mv "$tmp_meta" "$INFLUXDB_META_FILE"
210+
umask "$old_umask"
211+
212+
rm -f "$INFLUXDB_SETUP_STATE_FILE"
213+
log "InfluxDB initialized successfully."

influxdb/delphix-influxdb-service

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
#
3+
# Copyright (c) 2026 by Delphix. All rights reserved.
4+
#
5+
# SPDX-License-Identifier: GPL-2.0-or-later
6+
#
7+
# Wrapper script to start InfluxDB 2.x and run first-time initialization.
8+
#
9+
10+
INFLUXDB_CONFIG=/etc/influxdb/influxdb.conf
11+
INFLUXDB_INIT=/usr/bin/delphix-influxdb-init
12+
13+
function log() {
14+
echo "[$(date -u '+%Y-%m-%dT%H:%M:%SZ')] delphix-influxdb-service: $*" >&2
15+
}
16+
17+
# Start influxd in the background
18+
/usr/bin/influxd --config-path "$INFLUXDB_CONFIG" &
19+
INFLUXDB_PID=$!
20+
21+
log "Started influxd (PID ${INFLUXDB_PID})"
22+
23+
# Run initialization (the init script handles waiting for InfluxDB to be ready)
24+
if ! $INFLUXDB_INIT; then
25+
log "ERROR: Initialization failed, stopping influxd"
26+
kill "$INFLUXDB_PID" 2>/dev/null
27+
exit 1
28+
fi
29+
30+
wait "$INFLUXDB_PID"

influxdb/delphix-influxdb.service

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[Unit]
2+
Description=Delphix InfluxDB Time Series Database
3+
Documentation=https://docs.influxdata.com/influxdb/v2/
4+
PartOf=delphix.target
5+
After=delphix-platform.service
6+
PartOf=delphix-platform.service
7+
8+
[Service]
9+
User=root
10+
ExecStart=/usr/bin/delphix-influxdb-service
11+
Restart=on-failure
12+
RestartForceExitStatus=SIGPIPE
13+
KillMode=control-group
14+
15+
[Install]
16+
WantedBy=delphix.target

influxdb/influxdb-init.conf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#
2+
# Copyright (c) 2026 by Delphix. All rights reserved.
3+
#
4+
# SPDX-License-Identifier: GPL-2.0-or-later
5+
#
6+
# Configuration for delphix-influxdb-init.
7+
# Sourced by /usr/bin/delphix-influxdb-init at runtime.
8+
#
9+
10+
INFLUXDB_ORG="delphix"
11+
INFLUXDB_BUCKET="default"
12+
INFLUXDB_RETENTION_SECONDS=2592000 # 30 days (720h)
13+
INFLUXDB_WAIT_RETRIES=30
14+
INFLUXDB_WAIT_INTERVAL=2

influxdb/influxdb.conf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#
2+
# Copyright 2024 Delphix. All rights reserved.
3+
#
4+
# SPDX-License-Identifier: GPL-2.0-or-later
5+
#
6+
# InfluxDB 2.x Configuration
7+
#
8+
9+
bolt-path = "/var/lib/influxdb2/influxdb.bolt"
10+
engine-path = "/var/lib/influxdb2/engine"
11+
http-bind-address = "127.0.0.1:8086"
12+
log-level = "warn"

telegraf/telegraf.base

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,13 @@
4444
data_format = "json"
4545
namepass = ["agg_*"]
4646

47-
# Enable Live Monitoring, intended for internal Delphix use only:
48-
#[[outputs.influxdb]]
49-
# urls = ["http://dbsvr.company.com:8086"]
50-
# database = "live_metrics"
51-
# skip_database_creation = true
52-
# data_format = "influx"
47+
# Update token from /etc/influxdb/influxdb_meta (INFLUXDB_ADMIN_TOKEN), then uncomment and restart telegraf.
48+
#[[outputs.influxdb_v2]]
49+
# urls = ["http://127.0.0.1:8086"]
50+
# token = ""
51+
# organization = "delphix"
52+
# bucket = "default"
53+
# namepass = ["cpu", "disk", "diskio", "mem", "net", "procstat", "processes", "swap", "system", "zfs"]
5354

5455
###############################################################################
5556
# INPUT PLUGINS #

0 commit comments

Comments
 (0)