Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 46 additions & 6 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -712,7 +712,7 @@ jobs:
memory-soak-pr:
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 90
timeout-minutes: 300
needs: [build-dockers-amd]
strategy:
fail-fast: false
Expand All @@ -721,7 +721,9 @@ jobs:
env:
DOCKER_TAG: pr-${{ github.event.pull_request.number }}
ESPRESSO_NODE_GENESIS_FILE: genesis/${{ matrix.genesis }}.toml
DURATION_SECONDS: 3600
DURATION_SECONDS: 14400
RUST_LOG: error
ESPRESSO_NODE_TELEMETRY_LOG: error
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v8.1.0
Expand All @@ -742,8 +744,44 @@ jobs:
- name: Start demo + smoke test
run: just soak::up

- name: Sample
run: just soak::sample
# Sample in 1h chunks, re-uploading (overwrite) after each so partial data
# survives a host-level OOM/kill: when the runner host dies, `if: always()`
# steps do not run, so only artifacts already uploaded persist.
- name: Sample chunk 1/4 (1h)
run: DURATION_SECONDS=3600 just soak::sample
- name: Upload partial soak samples (1h)
if: always()
uses: actions/upload-artifact@v7
with:
name: memory-soak-${{ matrix.genesis }}
path: ./soak-samples
retention-days: 90
overwrite: true

- name: Sample chunk 2/4 (1h)
run: DURATION_SECONDS=3600 just soak::sample --append
- name: Upload partial soak samples (2h)
if: always()
uses: actions/upload-artifact@v7
with:
name: memory-soak-${{ matrix.genesis }}
path: ./soak-samples
retention-days: 90
overwrite: true

- name: Sample chunk 3/4 (1h)
run: DURATION_SECONDS=3600 just soak::sample --append
- name: Upload partial soak samples (3h)
if: always()
uses: actions/upload-artifact@v7
with:
name: memory-soak-${{ matrix.genesis }}
path: ./soak-samples
retention-days: 90
overwrite: true

- name: Sample chunk 4/4 (1h)
run: DURATION_SECONDS=3600 just soak::sample --append

- name: Render summary + chart
if: always()
Expand All @@ -760,6 +798,7 @@ jobs:
name: memory-soak-${{ matrix.genesis }}
path: ./soak-samples
retention-days: 90
overwrite: true

- name: Upload soak logs
if: always()
Expand All @@ -772,15 +811,16 @@ jobs:
memory-soak-non-pr:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
timeout-minutes: 90
timeout-minutes: 300
needs: [build-dockers-amd, create-multiplatform-docker-image]
strategy:
fail-fast: false
matrix:
genesis: [demo-drb-header, demo-epoch-reward]
env:
ESPRESSO_NODE_GENESIS_FILE: genesis/${{ matrix.genesis }}.toml
DURATION_SECONDS: 3600
DURATION_SECONDS: 14400
RUST_LOG: error
steps:
- uses: actions/checkout@v6
- uses: astral-sh/setup-uv@v8.1.0
Expand Down
21 changes: 20 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/espresso/node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ serde_json = { workspace = true }
serialization-api = { path = "../../serialization/api" }
sha2 = { workspace = true }
snafu = { workspace = true }
snmalloc-rs = "0.7.4"
sqlx = { workspace = true }
staking-cli = { workspace = true }
static_assertions = { workspace = true }
Expand Down
3 changes: 3 additions & 0 deletions crates/espresso/node/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#[global_allocator]
static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc;

mod external_event_handler;
mod message_compat_tests;
mod proposal_fetcher;
Expand Down
18 changes: 13 additions & 5 deletions crates/process-metrics/scripts/soak.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@ def _append_jsonl(f, ts: int, rows: list[dict]) -> None:
f.write(json.dumps(row) + "\n")


def run_sampling(output_dir: Path, duration_seconds: int) -> None:
def run_sampling(output_dir: Path, duration_seconds: int, append: bool = False) -> None:
docker_path = output_dir / "docker-stats.jsonl"
metrics_path = output_dir / "node-metrics.jsonl"
docker_path.write_text("")
metrics_path.write_text("")
if not append:
docker_path.write_text("")
metrics_path.write_text("")
if duration_seconds <= 0:
log.warning(f"duration_seconds={duration_seconds}; skipping sampling")
return
Expand Down Expand Up @@ -360,15 +361,22 @@ def cli(log_level: str) -> None:
default=Path("./soak-samples"),
type=PathOpt,
)
def sample(duration_seconds: int, output_dir: Path) -> None:
@opt(
"--append",
is_flag=True,
envvar="SOAK_APPEND",
default=False,
help="append to existing JSONL instead of truncating (for chunked sampling)",
)
def sample(duration_seconds: int, output_dir: Path, append: bool) -> None:
"""Scrape docker stats + each node's /v0/status/metrics into JSONL."""
output_dir.mkdir(parents=True, exist_ok=True)
env_path = REPO_ROOT / ".env"
if not env_path.exists():
log.error(".env not found. Copy .env.docker.example to .env first.")
sys.exit(1)
load_dotenv(env_path, override=False)
run_sampling(output_dir, duration_seconds)
run_sampling(output_dir, duration_seconds, append=append)


@cli.command()
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ services:
- ESPRESSO_NODE_TELEMETRY_LOGS_ENABLE=true
- ESPRESSO_NODE_TELEMETRY_METRICS_ENABLE=true
- ESPRESSO_NODE_TELEMETRY_ENDPOINT=http://intentionally-unreachable-telemetry-endpoint
- ESPRESSO_NODE_TELEMETRY_LOG=info
- ESPRESSO_NODE_TELEMETRY_LOG=${ESPRESSO_NODE_TELEMETRY_LOG:-info}
- ESPRESSO_NODE_EMBEDDED_DB=false
- ESPRESSO_NODE_GENESIS_FILE=${ESPRESSO_NODE_GENESIS_FILE:-genesis/demo.toml}
- ESPRESSO_NODE_ORCHESTRATOR_URL=http://orchestrator:${ESPRESSO_ORCHESTRATOR_PORT}
Expand Down Expand Up @@ -579,7 +579,7 @@ services:
environment:
- ESPRESSO_NODE_TELEMETRY_LOGS_ENABLE=true
- ESPRESSO_NODE_TELEMETRY_ENDPOINT=http://vector:${ESPRESSO_DEMO_TELEMETRY_OTLP_PORT}
- ESPRESSO_NODE_TELEMETRY_LOG=info
- ESPRESSO_NODE_TELEMETRY_LOG=${ESPRESSO_NODE_TELEMETRY_LOG:-info}
- ESPRESSO_NODE_EMBEDDED_DB=false
- ESPRESSO_NODE_GENESIS_FILE=${ESPRESSO_NODE_GENESIS_FILE:-genesis/demo.toml}
- ESPRESSO_NODE_ORCHESTRATOR_URL=http://orchestrator:${ESPRESSO_ORCHESTRATOR_PORT}
Expand Down
Loading