mirror of
https://github.com/nesquena/hermes-webui.git
synced 2026-05-30 21:50:16 +00:00
fix: add LRU limit to SESSION_AGENT_CACHE to prevent memory bloat
The agent cache stores full AIAgent instances (each holding complete conversation history) without size limit. Long-running servers with many sessions can accumulate unbounded memory usage. Changes: - Replace dict with OrderedDict for LRU tracking - Add SESSION_AGENT_CACHE_MAX = 50 limit - Evict least-recently-used entries when cache exceeds limit - Call move_to_end() on cache hits to maintain LRU order This prevents memory exhaustion on servers with many active sessions.
This commit is contained in:
+4
-1
@@ -1978,7 +1978,10 @@ SERVER_START_TIME = time.time()
|
||||
# Agent cache: reuse AIAgent across messages in the same WebUI session so that
|
||||
# _user_turn_count survives between turns. This mirrors the gateway's
|
||||
# _agent_cache pattern and is required for injectionFrequency: "first-turn".
|
||||
SESSION_AGENT_CACHE: dict = {} # session_id -> (AIAgent, config_sig)
|
||||
# LRU cache with size limit to prevent memory bloat
|
||||
import collections
|
||||
SESSION_AGENT_CACHE: collections.OrderedDict = collections.OrderedDict() # LRU cache
|
||||
SESSION_AGENT_CACHE_MAX = 50 # Maximum cached agents (each holds full conversation history)
|
||||
SESSION_AGENT_CACHE_LOCK = threading.Lock()
|
||||
|
||||
|
||||
|
||||
@@ -1592,6 +1592,7 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
_cached = SESSION_AGENT_CACHE.get(session_id)
|
||||
if _cached and _cached[1] == _agent_sig:
|
||||
agent = _cached[0]
|
||||
SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used
|
||||
logger.debug('[webui] Reusing cached agent for session %s', session_id)
|
||||
|
||||
if agent is not None:
|
||||
@@ -1617,6 +1618,11 @@ def _run_agent_streaming(session_id, msg_text, model, workspace, stream_id, atta
|
||||
agent = _AIAgent(**_agent_kwargs)
|
||||
with SESSION_AGENT_CACHE_LOCK:
|
||||
SESSION_AGENT_CACHE[session_id] = (agent, _agent_sig)
|
||||
SESSION_AGENT_CACHE.move_to_end(session_id) # LRU: mark as recently used
|
||||
from api.config import SESSION_AGENT_CACHE_MAX
|
||||
while len(SESSION_AGENT_CACHE) > SESSION_AGENT_CACHE_MAX:
|
||||
evicted_sid, _ = SESSION_AGENT_CACHE.popitem(last=False)
|
||||
logger.debug('[webui] Evicted LRU agent from cache: %s', evicted_sid)
|
||||
logger.debug('[webui] Created new agent for session %s', session_id)
|
||||
|
||||
# Store agent instance for cancel/interrupt propagation
|
||||
|
||||
Reference in New Issue
Block a user