Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rock/admin/metrics/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ class MetricsConstants:
SANDBOX_REQUEST_TOTAL = "request.total"
SANDBOX_REQUEST_SUCCESS = "request.success"
SANDBOX_REQUEST_FAILURE = "request.failure"
SANDBOX_REQUEST_CLIENT_ERROR = "request.client_error"

SANDBOX_REQUEST_RT = "request.rt"

Expand Down
6 changes: 5 additions & 1 deletion rock/admin/metrics/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from rock.admin.metrics.constants import MetricsConstants
from rock.admin.metrics.monitor import MetricsMonitor
from rock.sdk.common.exceptions import BadRequestRockError

if TYPE_CHECKING:
from rock.sandbox.sandbox_meta_store import SandboxMetaStore
Expand Down Expand Up @@ -130,7 +131,10 @@ def _record_metrics(metrics_monitor: MetricsMonitor, result, attributes: dict, s
is_exception = isinstance(result, Exception)
if is_exception:
metric_attrs = {**attributes, "error_type": type(result).__name__}
metrics_monitor.record_counter_by_name(f"{metric_prefix}.failure", 1, metric_attrs)
if isinstance(result, BadRequestRockError):
metrics_monitor.record_counter_by_name(f"{metric_prefix}.client_error", 1, metric_attrs)
else:
metrics_monitor.record_counter_by_name(f"{metric_prefix}.failure", 1, metric_attrs)
else:
metric_attrs = attributes
metrics_monitor.record_counter_by_name(f"{metric_prefix}.success", 1, metric_attrs)
Expand Down
1 change: 1 addition & 0 deletions rock/admin/metrics/monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def _register_metrics(self):
# Request count metrics
self._register_counter(MetricsConstants.SANDBOX_REQUEST_SUCCESS, "Number of successful requests")
self._register_counter(MetricsConstants.SANDBOX_REQUEST_FAILURE, "Number of failed requests")
self._register_counter(MetricsConstants.SANDBOX_REQUEST_CLIENT_ERROR, "Number of client error requests")
self._register_counter(MetricsConstants.SANDBOX_REQUEST_TOTAL, "Number of total requests")
# Sandbox metrics
self._register_gauge(MetricsConstants.SANDBOX_TOTAL_COUNT, "Number of sandbox")
Expand Down
34 changes: 30 additions & 4 deletions tests/unit/admin/metrics/test_decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,9 @@ async def do_something(self, sandbox_id):
assert attrs["namespace"] == "n1"


def test_record_metrics_bad_request_goes_to_failure():
"""BadRequestRockError lands in the generic `.failure` bucket along with
every other exception type — there is no separate client-fault bucket."""
def test_record_metrics_bad_request_goes_to_client_error():
"""BadRequestRockError lands in the `.client_error` bucket, not `.failure`,
so client faults don't pollute server failure metrics."""
mock_metrics_monitor = Mock(spec=MetricsMonitor)
attributes = {"operation": "test_op"}
start_time = 0
Expand All @@ -202,6 +202,32 @@ def test_record_metrics_bad_request_goes_to_failure():
_record_metrics(mock_metrics_monitor, exception, attributes, start_time, "test")

error_attrs = {**attributes, "error_type": "BadRequestRockError"}
mock_metrics_monitor.record_counter_by_name.assert_any_call("test.failure", 1, error_attrs)
mock_metrics_monitor.record_counter_by_name.assert_any_call("test.client_error", 1, error_attrs)
mock_metrics_monitor.record_gauge_by_name.assert_called_once_with("test.rt", 1000.0, error_attrs)
mock_metrics_monitor.record_counter_by_name.assert_any_call("test.total", 1, error_attrs)
# Verify it does NOT go to .failure
failure_calls = [c for c in mock_metrics_monitor.record_counter_by_name.call_args_list if c[0][0] == "test.failure"]
assert len(failure_calls) == 0


def test_record_metrics_server_error_goes_to_failure():
"""Non-client exceptions (e.g. InternalServerRockError, generic Exception)
still land in the `.failure` bucket."""
from rock.sdk.common.exceptions import InternalServerRockError

mock_metrics_monitor = Mock(spec=MetricsMonitor)
attributes = {"operation": "test_op"}
start_time = 0
exception = InternalServerRockError("internal error")

with patch("rock.admin.metrics.decorator.time.perf_counter", return_value=1.0):
with pytest.raises(InternalServerRockError):
_record_metrics(mock_metrics_monitor, exception, attributes, start_time, "test")

error_attrs = {**attributes, "error_type": "InternalServerRockError"}
mock_metrics_monitor.record_counter_by_name.assert_any_call("test.failure", 1, error_attrs)
# Verify it does NOT go to .client_error
client_error_calls = [
c for c in mock_metrics_monitor.record_counter_by_name.call_args_list if c[0][0] == "test.client_error"
]
assert len(client_error_calls) == 0
Loading