Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions bin/network-monitor/assets/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ body {
justify-content: center;
transition: all 0.2s;
color: var(--color-text-faint);
flex-shrink: 0;
}

.copy-button:hover {
Expand Down Expand Up @@ -429,12 +430,16 @@ body {

/* When the value carries an inline action (currently only the copy button), keep them on the
same line. Without this `.metric-value` is inline, and the whitespace between the value text
and the button is a wrap opportunity — so long values (URLs in particular) push the button
onto its own row below. */
and the button is a wrap opportunity, so long values (URLs in particular) push the button
onto its own row below. `min-width: 0` plus `overflow-wrap: anywhere` let values with no
natural break points (e.g. URLs without hyphens) wrap inside the card instead of overflowing
it and dragging the button outside the box. */
.metric-value:has(.copy-button) {
display: inline-flex;
align-items: center;
gap: 4px;
min-width: 0;
overflow-wrap: anywhere;
}

.metric-value.warning-delta,
Expand Down
8 changes: 2 additions & 6 deletions bin/network-monitor/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@ pub async fn start_monitor(config: MonitorConfig) -> Result<()> {

let rpc_rx = tasks.spawn_rpc_checker(&config);

let prover_rxs = if config.remote_prover_urls.is_empty() {
Vec::new()
} else {
tasks.spawn_prover_tasks(&config).await
};
let prover_rxs = tasks.spawn_prover_tasks(&config);

let faucet_rx = config.faucet_url.is_some().then(|| tasks.spawn_faucet(&config));

Expand All @@ -48,7 +44,7 @@ pub async fn start_monitor(config: MonitorConfig) -> Result<()> {
let (ntx_increment_rx, ntx_tracking_rx) = if config.disable_ntx_service {
(None, None)
} else {
let (increment_rx, tracking_rx) = tasks.spawn_ntx_service(&config).await?;
let (increment_rx, tracking_rx) = tasks.spawn_ntx_service(&config);
(Some(increment_rx), Some(tracking_rx))
};

Expand Down
26 changes: 17 additions & 9 deletions bin/network-monitor/src/counter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ pub struct IncrementService {
}

impl IncrementService {
/// Display name of the service, shared with the bootstrap seeding code in
/// [`crate::monitor::tasks`].
pub const NAME: &'static str = "Local Transactions";

pub async fn new(
config: MonitorConfig,
wallet_account: Account,
Expand Down Expand Up @@ -330,7 +334,7 @@ impl IncrementService {

impl Service for IncrementService {
fn name(&self) -> &'static str {
"Local Transactions"
Self::NAME
}

fn interval(&self) -> Duration {
Expand Down Expand Up @@ -413,6 +417,10 @@ pub struct CounterTrackingService {
}

impl CounterTrackingService {
/// Display name of the service, shared with the bootstrap seeding code in
/// [`crate::monitor::tasks`].
pub const NAME: &'static str = "Network Transactions";

pub async fn new(
config: MonitorConfig,
counter_receiver: watch::Receiver<Account>,
Expand Down Expand Up @@ -552,7 +560,7 @@ impl CounterTrackingService {

impl Service for CounterTrackingService {
fn name(&self) -> &'static str {
"Network Transactions"
Self::NAME
}

fn interval(&self) -> Duration {
Expand Down Expand Up @@ -664,15 +672,15 @@ fn build_increment_status(details: &IncrementDetails, last_error: Option<String>
let service_details = ServiceDetails::NtxIncrement(details.clone());

if let Some(err) = last_error {
ServiceStatus::unhealthy("Local Transactions", err, service_details)
ServiceStatus::unhealthy(IncrementService::NAME, err, service_details)
} else if details.success_count == 0 && details.failure_count > 0 {
ServiceStatus::unhealthy(
"Local Transactions",
IncrementService::NAME,
format!("no successful increments ({} failures)", details.failure_count),
service_details,
)
} else {
ServiceStatus::healthy("Local Transactions", service_details)
ServiceStatus::healthy(IncrementService::NAME, service_details)
}
}

Expand All @@ -694,7 +702,7 @@ fn build_tracking_status(
let service_details = ServiceDetails::NtxTracking(details.clone());

if let Some(err) = last_error {
return ServiceStatus::unhealthy("Network Transactions", err, service_details);
return ServiceStatus::unhealthy(CounterTrackingService::NAME, err, service_details);
}

if over_threshold_streak >= PENDING_UNHEALTHY_CONFIRMATION_POLLS {
Expand All @@ -703,13 +711,13 @@ fn build_tracking_status(
"counter trailing expected by {pending} (> {threshold}) for {over_threshold_streak} \
consecutive polls",
);
return ServiceStatus::unhealthy("Network Transactions", err, service_details);
return ServiceStatus::unhealthy(CounterTrackingService::NAME, err, service_details);
}

if details.current_value.is_some() {
ServiceStatus::healthy("Network Transactions", service_details)
ServiceStatus::healthy(CounterTrackingService::NAME, service_details)
} else {
ServiceStatus::unknown("Network Transactions", service_details)
ServiceStatus::unknown(CounterTrackingService::NAME, service_details)
}
}

Expand Down
7 changes: 4 additions & 3 deletions bin/network-monitor/src/deploy/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,10 @@ pub mod wallet;
///
/// At startup the monitor may come up before the node's RPC endpoint is accepting connections, so
/// the eager `connect()` (and the follow-up `get_block_header_by_number` request) is retried with
/// exponential backoff instead of aborting the binary on the first refused connection. The schedule
/// is bounded so a genuinely unreachable or misconfigured endpoint still surfaces as a fatal error
/// rather than hanging forever.
/// exponential backoff instead of failing on the first refused connection. The schedule is bounded
/// so a single handshake attempt returns within a few minutes; callers that must survive a
/// genuinely unreachable endpoint (e.g. the NTX bootstrap in `monitor::tasks`) wrap it in their
/// own unbounded retry loop.
const GENESIS_DISCOVERY_BACKOFF_INITIAL: Duration = Duration::from_secs(1);
const GENESIS_DISCOVERY_BACKOFF_MAX: Duration = Duration::from_secs(30);
const GENESIS_DISCOVERY_MAX_RETRIES: usize = 10;
Expand Down
61 changes: 49 additions & 12 deletions bin/network-monitor/src/faucet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
//! This module contains the logic for periodically testing faucet functionality
//! by requesting proof-of-work challenges, solving them, and submitting token requests.

use std::time::Duration;
use std::time::{Duration, Instant};

use anyhow::Context;
use hex;
use miden_node_utils::spawn::spawn_blocking_in_current_span;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
Expand Down Expand Up @@ -88,6 +89,8 @@ pub struct FaucetService {
url: Url,
client: Client,
interval: Duration,
/// Wall-clock cap on solving a single `PoW` challenge.
solve_timeout: Duration,
/// A valid public account ID used as the recipient for faucet token requests. Generated once at
/// construction from a throwaway wallet account; the minted tokens are never spent.
account_id: String,
Expand All @@ -109,6 +112,7 @@ impl FaucetService {
url,
client,
interval,
solve_timeout: request_timeout,
account_id: wallet_account.id().to_string(),
success_count: 0,
failure_count: 0,
Expand Down Expand Up @@ -145,7 +149,9 @@ impl Service for FaucetService {
let start_time = std::time::Instant::now();
let mut last_error: Option<String> = None;

match perform_faucet_test(&self.client, &self.url, &self.account_id).await {
match perform_faucet_test(&self.client, &self.url, &self.account_id, self.solve_timeout)
.await
{
Ok((minted_tokens, metadata)) => {
self.success_count += 1;
self.last_tx_id = Some(minted_tokens.tx_id.clone());
Expand Down Expand Up @@ -198,6 +204,7 @@ async fn perform_faucet_test(
client: &Client,
faucet_url: &Url,
account_id: &str,
solve_timeout: Duration,
) -> anyhow::Result<(GetTokensResponse, GetMetadataResponse)> {
debug!("Using recipient account ID: {} (length: {})", account_id, account_id.len());

Expand All @@ -210,7 +217,7 @@ async fn perform_faucet_test(

let response = client.get(pow_url).send().await?;

let response_text: String = response.text().await?;
let response_text = read_success_body(response).await.context("/pow request failed")?;
debug!("Faucet PoW response: {}", response_text);

let challenge_response: PowChallengeResponse =
Expand All @@ -222,9 +229,16 @@ async fn perform_faucet_test(
&challenge_response.challenge[..16.min(challenge_response.challenge.len())]
);

// Step 2: Solve the PoW challenge
let nonce = solve_pow_challenge(&challenge_response.challenge, challenge_response.target)
.context("Failed to solve PoW challenge")?;
// Step 2: Solve the PoW challenge off the async runtime; hashing is CPU-bound and would
// otherwise stall every other checker task scheduled on this worker thread.
let challenge = challenge_response.challenge.clone();
let target = challenge_response.target;
let nonce = spawn_blocking_in_current_span(move || {
solve_pow_challenge(&challenge, target, solve_timeout)
})
.await
.context("PoW solver task panicked")?
.context("Failed to solve PoW challenge")?;

debug!("Solved PoW challenge with nonce: {}", nonce);

Expand All @@ -240,7 +254,7 @@ async fn perform_faucet_test(

let response = client.get(tokens_url).send().await?;

let response_text: String = response.text().await?;
let response_text = read_success_body(response).await.context("/get_tokens request failed")?;
debug!("Faucet /get_tokens response: {}", response_text);

let tokens_response: GetTokensResponse =
Expand All @@ -251,7 +265,8 @@ async fn perform_faucet_test(

let response = client.get(metadata_url).send().await?;

let response_text = response.text().await?;
let response_text =
read_success_body(response).await.context("/get_metadata request failed")?;
debug!("Faucet /get_metadata response: {}", response_text);

let metadata: GetMetadataResponse =
Expand All @@ -260,6 +275,16 @@ async fn perform_faucet_test(
Ok((tokens_response, metadata))
}

/// Reads the response body, failing with the HTTP status code and body when the request was not
/// successful, so server-side errors (e.g. 429 or 500) surface directly on the card instead of as a
/// deserialization failure.
async fn read_success_body(response: reqwest::Response) -> anyhow::Result<String> {
let status = response.status();
let body = response.text().await?;
anyhow::ensure!(status.is_success(), "HTTP {status}: {body}");
Ok(body)
}

/// Deserialize a faucet response using [`serde_path_to_error`] so that the failing JSON path (e.g.
/// `max_supply`, `explorer_url`) is included in the error message. Combined with
/// `#[serde(deny_unknown_fields)]` on each response type, this means renamed, removed, or newly
Expand All @@ -274,15 +299,19 @@ where

/// Solves a proof-of-work challenge using SHA-256 hashing.
///
/// This is CPU-bound and must run on a blocking thread (see the `spawn_blocking` call site).
///
/// # Arguments
///
/// * `challenge` - The challenge string in hexadecimal format.
/// * `target` - The target value. A solution is valid if H(challenge, nonce) < target.
/// * `timeout` - Wall-clock cap; checked every 100k attempts so a pathological difficulty cannot
/// pin the blocking thread indefinitely.
///
/// # Returns
///
/// The nonce that solves the challenge, or an error if no solution is found within reasonable
/// bounds.
/// The nonce that solves the challenge, or an error if no solution is found within the attempt
/// and time bounds.
#[instrument(
parent = None,
target = COMPONENT,
Expand All @@ -292,8 +321,9 @@ where
ret(level = "debug"),
err
)]
fn solve_pow_challenge(challenge: &str, target: u64) -> anyhow::Result<u64> {
fn solve_pow_challenge(challenge: &str, target: u64, timeout: Duration) -> anyhow::Result<u64> {
let challenge_bytes = hex::decode(challenge).context("Failed to decode challenge from hex")?;
let started = Instant::now();

// Try up to 100 million nonces.
for nonce in 0..MAX_CHALLENGE_ATTEMPTS {
Expand All @@ -316,8 +346,15 @@ fn solve_pow_challenge(challenge: &str, target: u64) -> anyhow::Result<u64> {
return Ok(nonce);
}

// Log progress every 100k attempts
// Check the deadline and log progress every 100k attempts
if nonce % 100_000 == 0 && nonce > 0 {
let elapsed = started.elapsed();
if elapsed >= timeout {
anyhow::bail!(
"Failed to solve PoW challenge within {timeout:?} ({nonce} attempts, target \
{target})"
);
}
debug!(
"PoW attempt {}: current_hash={}, target={} (~{} bits)",
nonce,
Expand Down
Loading
Loading