Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rock/actions/sandbox/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class SandboxStatusResponse(BaseModel):
start_time: str | None = None
stop_time: str | None = None
create_time: str | None = None
state_history: list[dict[str, str]] = []


class CommandResponse(BaseModel):
Expand Down
1 change: 1 addition & 0 deletions rock/actions/sandbox/sandbox_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class SandboxInfo(TypedDict, total=False):
stop_time: str
delete_time: str
extended_params: dict[str, str]
state_history: list[dict[str, str]]


_SANDBOX_INFO_KEYS = frozenset(SandboxInfo.__annotations__.keys())
Expand Down
9 changes: 9 additions & 0 deletions rock/admin/proto/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ class SandboxStartResponse(SandboxResponse):
disk_limit_rootfs: str | None = None


class StateTransitionRecord(BaseModel):
from_state: str
to_state: str
event: str
timestamp: str


# TODO: inherit from SandboxStartResponse
class SandboxStatusResponse(BaseModel):
sandbox_id: str = None
Expand All @@ -36,6 +43,7 @@ class SandboxStatusResponse(BaseModel):
start_time: str | None = None
stop_time: str | None = None
create_time: str | None = None
state_history: list[StateTransitionRecord] = []

@classmethod
def from_sandbox_info(cls, sandbox_info: "SandboxInfo") -> "SandboxStatusResponse":
Expand All @@ -53,6 +61,7 @@ def from_sandbox_info(cls, sandbox_info: "SandboxInfo") -> "SandboxStatusRespons
cpus=sandbox_info.get("cpus"),
memory=sandbox_info.get("memory"),
disk_limit_rootfs=sandbox_info.get("disk_limit_rootfs"),
state_history=sandbox_info.get("state_history", []),
)


Expand Down
4 changes: 4 additions & 0 deletions rock/deployments/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,10 @@ async def restart(self):

logger.info(f"Restarting container {self._container_name} with docker start")

self._service_status.update_status(
phase_name="image_pull", status=Status.SUCCESS, message="skip image pull on restart"
)

# Reuse the same Popen-based attached start used by start(), so the
# restart path also produces a valid self._container_process. Without
# this, _stop() would skip its `if self._container_process is not None`
Expand Down
1 change: 1 addition & 0 deletions rock/sandbox/sandbox_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ async def get_status(self, sandbox_id, include_all_states: bool = False) -> Sand
start_time=sandbox_info.get("start_time"),
stop_time=sandbox_info.get("stop_time"),
create_time=sandbox_info.get("create_time"),
state_history=sm.sandbox_info.get("state_history", []) if sm.sandbox_info else [],
)

async def build_sandbox_info_from_redis(self, sandbox_id: str, deployment_info: SandboxInfo) -> SandboxInfo | None:
Expand Down
21 changes: 21 additions & 0 deletions rock/sandbox/sandbox_statemachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,24 @@ def __init__(self, **kwargs):

# Callbacks

def before_transition(self, event, source, target):
if source.value == target.value:
return
if self.sandbox_info is None:
self.sandbox_info = {}
history = self.sandbox_info.setdefault("state_history", [])
history.append(
{
"from_state": source.value.value,
"to_state": target.value.value,
"event": str(event),
"timestamp": get_iso8601_timestamp(),
}
)
# Cap history to avoid unbounded growth in long-lived sandboxes
if len(history) > 100:
del history[:-100]

async def on_stop(self, sandbox_id: str, operator, meta_store, reason: StopReason = StopReason.MANUAL) -> None:
logger.info(f"stop sandbox {sandbox_id} (reason={reason.value})")
sandbox_info = self.sandbox_info or {}
Expand Down Expand Up @@ -98,6 +116,8 @@ async def on_alive(self, sandbox_id: str, meta_store, sandbox_info: SandboxInfo)
sandbox_info["state"] = RockState.RUNNING
if not sandbox_info.get("start_time"):
sandbox_info["start_time"] = get_iso8601_timestamp()
if self.sandbox_info and "state_history" in self.sandbox_info:
sandbox_info["state_history"] = self.sandbox_info["state_history"]
await meta_store.update(sandbox_id, sandbox_info)

async def on_restart(self, sandbox_id: str, operator, meta_store) -> None:
Expand Down Expand Up @@ -136,6 +156,7 @@ async def on_restart(self, sandbox_id: str, operator, meta_store) -> None:
new_info = dict(info)
new_info["state"] = RockState.PENDING
new_info.pop("stop_time", None)
new_info.pop("phases", None)
await meta_store.update(sandbox_id, new_info)
await meta_store.update_timeout(sandbox_id, timeout_info)

Expand Down
52 changes: 52 additions & 0 deletions tests/unit/deployments/test_docker_restart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Tests for DockerDeployment.restart() phase handling."""

import json
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from rock.deployments.constants import Status
from rock.deployments.docker import DockerDeployment


@pytest.fixture
def deployment():
with patch("rock.deployments.docker.env_vars") as mock_env:
mock_env.ROCK_WORKER_ENV_TYPE = "docker"
mock_env.ROCK_SERVICE_STATUS_DIR = "/tmp/test_status"
d = DockerDeployment(container_name="sb-1", image="python:3.11", port=22555)
return d


class TestRestartPhases:
@pytest.mark.asyncio
async def test_image_pull_is_success_after_restart(self, deployment):
"""Restart reuses existing container — image_pull should be marked SUCCESS."""
status_data = {
"phases": {
"image_pull": {"status": "success", "message": "image pull success"},
"docker_run": {"status": "success", "message": "docker run success"},
},
"port_mapping": {"22555": 22555},
}

mock_popen = MagicMock()
with (
patch.object(deployment, "_docker_start", return_value=mock_popen),
patch("os.path.exists", return_value=True),
patch("builtins.open", create=True) as mock_open,
patch.object(deployment, "_wait_until_alive", new_callable=AsyncMock),
patch("rock.deployments.docker.RemoteSandboxRuntime") as mock_runtime_cls,
):
mock_open.return_value.__enter__ = lambda s: s
mock_open.return_value.__exit__ = MagicMock(return_value=False)
mock_open.return_value.read = MagicMock(return_value=json.dumps(status_data))
mock_runtime = MagicMock()
mock_runtime_cls.from_config.return_value = mock_runtime

# Patch json.load to return status_data
with patch("json.load", return_value=status_data):
await deployment.restart()

phase = deployment._service_status.get_phase("image_pull")
assert phase.status == Status.SUCCESS, f"Expected image_pull to be SUCCESS after restart, got {phase.status}"
105 changes: 105 additions & 0 deletions tests/unit/sandbox/test_sandbox_statemachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,16 @@ async def test_updates_state_to_pending(self, mock_meta_store):
updated_info = mock_meta_store.update.call_args[0][1]
assert updated_info["state"] == State.PENDING

@pytest.mark.asyncio
async def test_clears_stale_phases_on_restart(self, mock_meta_store):
info = dict(_VALID_RESTART_INFO)
info["phases"] = {"image_pull": {"status": "done", "message": "ok"}}
info["stop_time"] = "2026-01-01T01:00:00+08:00"
await self._send_restart(mock_meta_store, sandbox_info=info)
updated_info = mock_meta_store.update.call_args[0][1]
assert "phases" not in updated_info
assert "stop_time" not in updated_info

@pytest.mark.asyncio
async def test_writes_timeout_built_from_spec(self, mock_meta_store):
# auto_clear_time_minutes=30 in spec → make_timeout_info uses 30
Expand Down Expand Up @@ -411,3 +421,98 @@ async def test_missing_spec_skips_operator_but_still_archives(self, mock_meta_st
async def test_restores_deleted_from_state_value(self):
sm = await SandboxStateMachine.from_state_value(State.DELETED, sandbox_info={})
assert sm.deleted.is_active


# ---------------------------------------------------------------------------
# state_history recording
# ---------------------------------------------------------------------------


class TestStateHistory:
@pytest.mark.asyncio
async def test_alive_records_history(self):
sm = SandboxStateMachine(sandbox_info={"sandbox_id": "sb-1"})
await sm.activate_initial_state()
await sm.send("alive", sandbox_id="sb-1", meta_store=AsyncMock(), sandbox_info={})
history = sm.sandbox_info["state_history"]
assert len(history) == 1
assert history[0]["from_state"] == "pending"
assert history[0]["to_state"] == "running"
assert history[0]["event"] == "alive"
assert "timestamp" in history[0]

@pytest.mark.asyncio
async def test_stop_records_history(self):
sm = await SandboxStateMachine.from_state_value(State.RUNNING, sandbox_info={"sandbox_id": "sb-1"})
await sm.send("stop", sandbox_id="sb-1", operator=AsyncMock(), meta_store=AsyncMock())
history = sm.sandbox_info["state_history"]
assert len(history) == 1
assert history[0]["from_state"] == "running"
assert history[0]["to_state"] == "stopped"
assert history[0]["event"] == "stop"

@pytest.mark.asyncio
async def test_full_lifecycle_accumulates_history(self):
sm = SandboxStateMachine(sandbox_info={"sandbox_id": "sb-1"})
await sm.activate_initial_state()
await sm.send("alive", sandbox_id="sb-1", meta_store=AsyncMock(), sandbox_info={})
await sm.send("stop", sandbox_id="sb-1", operator=AsyncMock(), meta_store=AsyncMock())
history = sm.sandbox_info["state_history"]
assert len(history) == 2
assert history[0]["from_state"] == "pending"
assert history[0]["to_state"] == "running"
assert history[1]["from_state"] == "running"
assert history[1]["to_state"] == "stopped"

@pytest.mark.asyncio
async def test_stop_noop_skips_history(self):
sm = await SandboxStateMachine.from_state_value(State.STOPPED, sandbox_info={"sandbox_id": "sb-1"})
await sm.send("stop_noop", sandbox_id="sb-1")
history = sm.sandbox_info.get("state_history", [])
assert len(history) == 0

@pytest.mark.asyncio
async def test_restart_records_history(self):
sm = await SandboxStateMachine.from_state_value(State.STOPPED, sandbox_info=dict(_VALID_RESTART_INFO))
await sm.send("restart", sandbox_id="sb-1", operator=AsyncMock(), meta_store=AsyncMock())
history = sm.sandbox_info["state_history"]
assert len(history) == 1
assert history[0]["from_state"] == "stopped"
assert history[0]["to_state"] == "pending"
assert history[0]["event"] == "restart"

@pytest.mark.asyncio
async def test_delete_records_history(self):
sm = await SandboxStateMachine.from_state_value(State.STOPPED, sandbox_info=dict(_VALID_DELETE_INFO))
await sm.send("delete", sandbox_id="sb-1", operator=AsyncMock(), meta_store=AsyncMock())
history = sm.sandbox_info["state_history"]
assert len(history) == 1
assert history[0]["from_state"] == "stopped"
assert history[0]["to_state"] == "deleted"
assert history[0]["event"] == "delete"

@pytest.mark.asyncio
async def test_history_preserved_from_state_value(self):
existing_history = [
{"from_state": "pending", "to_state": "running", "event": "alive", "timestamp": "2026-01-01T00:00:00+08:00"}
]
sm = await SandboxStateMachine.from_state_value(
State.RUNNING, sandbox_info={"sandbox_id": "sb-1", "state_history": existing_history}
)
await sm.send("stop", sandbox_id="sb-1", operator=AsyncMock(), meta_store=AsyncMock())
history = sm.sandbox_info["state_history"]
assert len(history) == 2
assert history[0]["event"] == "alive"
assert history[1]["event"] == "stop"

@pytest.mark.asyncio
async def test_alive_copies_history_to_operator_sandbox_info(self):
sm = SandboxStateMachine(sandbox_info={"sandbox_id": "sb-1"})
await sm.activate_initial_state()
operator_info = {"sandbox_id": "sb-1"}
mock_meta_store = AsyncMock()
await sm.send("alive", sandbox_id="sb-1", meta_store=mock_meta_store, sandbox_info=operator_info)
updated_info = mock_meta_store.update.call_args[0][1]
assert "state_history" in updated_info
assert len(updated_info["state_history"]) == 1
assert updated_info["state_history"][0]["event"] == "alive"
Loading