diff --git a/api/config.py b/api/config.py index eeebb540..34fce4ba 100644 --- a/api/config.py +++ b/api/config.py @@ -1978,7 +1978,10 @@ SERVER_START_TIME = time.time() # Agent cache: reuse AIAgent across messages in the same WebUI session so that # _user_turn_count survives between turns. This mirrors the gateway's # _agent_cache pattern and is required for injectionFrequency: "first-turn". -SESSION_AGENT_CACHE: dict = {} # session_id -> (AIAgent, config_sig) +# LRU cache with size limit to prevent memory bloat +import collections +SESSION_AGENT_CACHE: collections.OrderedDict = collections.OrderedDict() # LRU cache +SESSION_AGENT_CACHE_MAX = 50 # Maximum cached agents (each holds full conversation history) SESSION_AGENT_CACHE_LOCK = threading.Lock() diff --git a/api/streaming.py b/api/streaming.py index 044a7548..a2d2ffe9 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -1592,6 +1592,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta _cached = SESSION_AGENT_CACHE.get(session_id) if _cached and _cached[1] == _agent_sig: agent = _cached[0] + SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used logger.debug('[webui] Reusing cached agent for session %s', session_id) if agent is not None: @@ -1617,6 +1618,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta agent = _AIAgent(**_agent_kwargs) with SESSION_AGENT_CACHE_LOCK: SESSION_AGENT_CACHE[session_id] = (agent, _agent_sig) + SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used + from api.config import SESSION_AGENT_CACHE_MAX + while len(SESSION_AGENT_CACHE) > SESSION_AGENT_CACHE_MAX: + evicted_sid, _ = SESSION_AGENT_CACHE.popitem(last=False) + logger.debug('[webui] Evicted LRU agent from cache: %s', evicted_sid) logger.debug('[webui] Created new agent for session %s', session_id) # Store agent instance for cancel/interrupt propagation