fix: add LRU limit to SESSION_AGENT_CACHE to prevent memory bloat

The agent cache stores full AIAgent instances (each holding complete
conversation history) without size limit. Long-running servers with
many sessions can accumulate unbounded memory usage.

Changes:
- Replace dict with OrderedDict for LRU tracking
- Add SESSION_AGENT_CACHE_MAX = 50 limit
- Evict least-recently-used entries when cache exceeds limit
- Call move_to_end() on cache hits to maintain LRU order

This prevents memory exhaustion on servers with many active sessions.
This commit is contained in:
happy5318
2026-04-29 17:35:12 +08:00
parent 72b4ff66f0
commit 65e5690772
2 changed files with 10 additions and 1 deletions
+4 -1
View File
@@ -1978,7 +1978,10 @@ SERVER_START_TIME = time.time()
# Agent cache: reuse AIAgent across messages in the same WebUI session so that
# _user_turn_count survives between turns. This mirrors the gateway's
# _agent_cache pattern and is required for injectionFrequency: "first-turn".
SESSION_AGENT_CACHE: dict = {} # session_id -> (AIAgent, config_sig)
# LRU cache with size limit to prevent memory bloat
import collections
SESSION_AGENT_CACHE: collections.OrderedDict = collections.OrderedDict() # LRU cache
SESSION_AGENT_CACHE_MAX = 50 # Maximum cached agents (each holds full conversation history)
SESSION_AGENT_CACHE_LOCK = threading.Lock()
+6
View File
@@ -1592,6 +1592,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
_cached = SESSION_AGENT_CACHE.get(session_id)
if _cached and _cached[1] == _agent_sig:
agent = _cached[0]
SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used
logger.debug('[webui] Reusing cached agent for session %s', session_id)
if agent is not None:
@@ -1617,6 +1618,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
agent = _AIAgent(**_agent_kwargs)
with SESSION_AGENT_CACHE_LOCK:
SESSION_AGENT_CACHE[session_id] = (agent, _agent_sig)
SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used
from api.config import SESSION_AGENT_CACHE_MAX
while len(SESSION_AGENT_CACHE) > SESSION_AGENT_CACHE_MAX:
evicted_sid, _ = SESSION_AGENT_CACHE.popitem(last=False)
logger.debug('[webui] Evicted LRU agent from cache: %s', evicted_sid)
logger.debug('[webui] Created new agent for session %s', session_id)
# Store agent instance for cancel/interrupt propagation