rm routing logic

balogh.adam@icloud.com · balogh.adam@icloud.com · commit 3f3547a4b8c8 · 2026-02-17T16:46:06.000+01:00
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -28,7 +28,7 @@ python3.13 -m pytest path/to/test_file.py -v
 
 ### Request Flow
 ```
-User Query → FastAPI (/api/v2/agent/run) → Firebase Auth → Query Router (Gemini)
+User Query → FastAPI (/api/v2/agent/run) → Firebase Auth → Agent Selection (client-specified, defaults to analytics)
     ↓
     ├→ Analytics Agent: portfolio analysis, token research, market trends
     └→ Investor Agent: DeFi opportunity finding, yield optimization
@@ -41,7 +41,7 @@ Agent executes tools → LLM inference → Post-process response → Return Agen
 - **agent/**: Agent orchestration - `agent_executors.py` creates LangGraph ReAct agents, `tools.py` defines agent tools, `prompts.py` loads Jinja2 templates
 - **server/**: FastAPI app in `fastapi_server.py`, auth, validation, activity tracking
 - **onchain/**: Blockchain data layer - `pools/` for DeFi protocol abstraction, `tokens/` for metadata, `portfolio/` for wallet analysis, `analytics/` for metrics
-- **templates/**: Jinja2 prompt templates for agents (`analyst_agent.jinja2`, `investor_agent.jinja2`, `router.jinja2`)
+- **templates/**: Jinja2 prompt templates for agents (`analyst_agent.jinja2`, `investor_agent.jinja2`)
 - **api/api_types.py**: All Pydantic models (Token, Pool, Portfolio, Message types)
 
 ### Protocol System
diff --git a/agent/agent_executors.py b/agent/agent_executors.py
@@ -40,9 +40,6 @@
 GOOGLE_GEMINI_20_FLASH_MODEL = (
     "gemini-2.0-flash"  # $0.1/M input tokens; $0.4/M output tokens
 )
-GOOGLE_GEMINI_FLASH_15_8B_MODEL = (
-    "gemini-2.5-flash-lite"  # $0.0375/M input tokens; $0.15/M output tokens
-)
 LLAMA_3_1_405B_MODEL = (
     "meta-llama/llama-3.1-405b-instruct"  # $0.8/M input tokens; $0.8/M output tokens
 )
@@ -63,25 +60,11 @@
 
 # Select model based on configuration
 SUGGESTIONS_MODEL = GOOGLE_GEMINI_20_FLASH_MODEL
-ROUTING_MODEL = GOOGLE_GEMINI_FLASH_15_8B_MODEL
 REASONING_MODEL = GOOGLE_GEMINI_20_FLASH_MODEL
 BASE_URL = "https://generativelanguage.googleapis.com/v1beta/"
 API_KEY = os.getenv("GEMINI_API_KEY")
 
 
-def create_routing_model() -> BaseChatModel:
-    return ChatOpenAI(
-        model=ROUTING_MODEL,
-        temperature=0.0,
-        max_tokens=500,
-        api_key=config.DUMMY_X402_API_KEY,
-        http_async_client=x402_http_client,
-        stream_usage=True,
-        streaming=True,
-        base_url=config.LLM_SERVER_URL,
-    )
-
-
 def create_suggestions_model() -> BaseChatModel:
     return ChatOpenAI(
         model=SUGGESTIONS_MODEL,
diff --git a/agent/prompts.py b/agent/prompts.py
@@ -14,7 +14,6 @@
 investor_agent_template = env.get_template("investor_agent.jinja2")
 analytics_agent_template = env.get_template("analytics_agent.jinja2")
 suggestions_template = env.get_template("suggestions.jinja2")
-router_template = env.get_template("router.jinja2")
 
 
 # We ignore token holdings with a total value of less than $1
@@ -108,27 +107,3 @@ def get_analytics_prompt(
     return analytics_agent_prompt
 
 
-def get_router_prompt(message_history: List[Message], current_message: str) -> str:
-    """Get the router prompt to determine which agent should handle the request."""
-
-    MAX_AGENT_MESSAGE_LENGTH = 400
-
-    # Truncate assistant response to 400 characters, also include the message type
-    message_history = [
-        {
-            "type": message.type,
-            "message": (
-                message.message[:MAX_AGENT_MESSAGE_LENGTH] + "..."
-                if message.type == "assistant"
-                and len(message.message) > MAX_AGENT_MESSAGE_LENGTH
-                else message.message
-            ),
-        }
-        for message in message_history
-    ]
-
-    router_prompt = router_template.render(
-        message_history=message_history,
-        current_message=current_message,
-    )
-    return router_prompt
diff --git a/api/api_types.py b/api/api_types.py
@@ -123,7 +123,7 @@ class Context(BaseModel):
 class AgentChatRequest(BaseModel):
     context: Context
     message: UserMessage
-    agent: Optional[AgentType] = None
+    agent: AgentType = AgentType.ANALYTICS
     captchaToken: Optional[str] = None
 
 
diff --git a/server/fastapi_server.py b/server/fastapi_server.py
@@ -36,13 +36,11 @@
     create_investor_executor,
     create_suggestions_model,
     create_analytics_executor,
-    create_routing_model,
 )
 from agent.prompts import (
     get_investor_agent_prompt,
     get_suggestions_prompt,
     get_analytics_prompt,
-    get_router_prompt,
 )
 from agent.tools import (
     create_investor_agent_toolkit,
@@ -138,7 +136,6 @@ async def shutdown_event():
         await cow_validator.close()
 
     # Initialize agents
-    router_model = create_routing_model()
     suggestions_model = create_suggestions_model()
     analytics_agent = create_analytics_executor(token_metadata_repo)
     investor_agent = create_investor_executor()
@@ -150,7 +147,6 @@ async def shutdown_event():
     protocol_registry.register_protocol(KaminoProtocol())
 
     # Store agents in app state
-    app.state.router_model = router_model
     app.state.suggestions_model = suggestions_model
     app.state.analytics_agent = analytics_agent
     app.state.investor_agent = investor_agent
@@ -320,7 +316,6 @@ async def run_agent(
                 portfolio=portfolio,
                 investor_agent=investor_agent,
                 analytics_agent=analytics_agent,
-                router_model=router_model,
             )
 
             return (
@@ -536,49 +531,15 @@ async def handle_agent_chat_request(
     token_metadata_repo: TokenMetadataRepo,
     investor_agent: any,
     analytics_agent: any,
-    router_model: ChatOpenAI,
 ) -> AgentMessage:
-    # If agent is explicitly specified, bypass router
-    if request.agent is not None:
-        if request.agent == AgentType.ANALYTICS:
-            return await handle_analytics_chat_request(
-                request, token_metadata_repo, portfolio, analytics_agent
-            )
-        elif request.agent == AgentType.INVESTOR:
-            return await handle_investor_chat_request(
-                request, portfolio, investor_agent, protocol_registry
-            )
-        else:
-            raise ValueError(f"Invalid agent type specified: {request.agent}")
-
-    # Otherwise use router to determine agent
-    router_prompt = get_router_prompt(
-        message_history=request.context.conversationHistory[-NUM_MESSAGES_TO_KEEP:],
-        current_message=request.message.message,
-    )
-
-    router_response = await router_model.ainvoke(router_prompt)
-    selected_agent = router_response.content.strip().lower()
-
-    # Extract agent type from response if it contains additional text
-    if "yield_agent" in selected_agent:
-        selected_agent = AgentType.YIELD
-    elif "analytics_agent" in selected_agent:
-        selected_agent = AgentType.ANALYTICS
-    else:
-        # Default to analytics agent if no clear choice
-        selected_agent = AgentType.ANALYTICS
-
-    if selected_agent == AgentType.ANALYTICS:
-        return await handle_analytics_chat_request(
-            request, token_metadata_repo, portfolio, analytics_agent
-        )
-    elif selected_agent == AgentType.YIELD:
+    if request.agent == AgentType.YIELD:
         return await handle_investor_chat_request(
             request, portfolio, investor_agent, protocol_registry
         )
     else:
-        raise ValueError(f"Invalid agent selection from router: {selected_agent}")
+        return await handle_analytics_chat_request(
+            request, token_metadata_repo, portfolio, analytics_agent
+        )
 
 
 async def handle_investor_chat_request(
diff --git a/templates/router.jinja2 b/templates/router.jinja2