From 9a380bc19c74e857194a9e6444ffe73be9ab5d67 Mon Sep 17 00:00:00 2001 From: Heiko Date: Sat, 13 Jun 2026 13:56:08 +0200 Subject: [PATCH] Normalize i18n and prompt baseline --- backend/app/api/simulation.py | 29 +- backend/app/services/ontology_generator.py | 192 ++----- backend/app/services/report_agent.py | 503 ++++++------------ .../services/simulation_config_generator.py | 134 ++--- backend/app/utils/llm_client.py | 5 +- backend/app/utils/locale.py | 37 +- frontend/index.html | 8 +- frontend/src/i18n/index.js | 15 +- 8 files changed, 335 insertions(+), 588 deletions(-) diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 3a8e1e3fc8..b1f1e2eb6a 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -14,33 +14,30 @@ from ..services.simulation_manager import SimulationManager, SimulationStatus from ..services.simulation_runner import SimulationRunner, RunnerStatus from ..utils.logger import get_logger -from ..utils.locale import t, get_locale, set_locale +from ..utils.locale import t, get_locale, set_locale, get_language_instruction from ..models.project import ProjectManager logger = get_logger('mirofish.api.simulation') -# Interview prompt 优化前缀 -# 添加此前缀可以避免Agent调用工具,直接用文本回复 -INTERVIEW_PROMPT_PREFIX = "结合你的人设、所有的过往记忆与行动,不调用任何工具直接用文本回复我:" +# Interview prompt prefix. +# This keeps interview calls in plain text and lets the requested locale drive the answer language. +INTERVIEW_PROMPT_PREFIX = ( + "Answer directly in plain text. Use your persona, all prior memories, prior actions, " + "and the simulation context. Do not call tools. Do not return JSON. " + "Follow this language instruction: {language_instruction}\n\nQuestion: " +) def optimize_interview_prompt(prompt: str) -> str: - """ - 优化Interview提问,添加前缀避免Agent调用工具 - - Args: - prompt: 原始提问 - - Returns: - 优化后的提问 - """ + """Add a plain-text interview prefix so agents answer instead of calling tools.""" if not prompt: return prompt - # 避免重复添加前缀 - if prompt.startswith(INTERVIEW_PROMPT_PREFIX): + if prompt.startswith("Answer directly in plain text."): return prompt - return f"{INTERVIEW_PROMPT_PREFIX}{prompt}" + return INTERVIEW_PROMPT_PREFIX.format( + language_instruction=get_language_instruction() + ) + prompt # ============== 实体读取接口 ============== diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index 01a3d799a5..8be1dbb3ae 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -27,149 +27,45 @@ def _to_pascal_case(name: str) -> str: # 本体生成的系统提示词 -ONTOLOGY_SYSTEM_PROMPT = """你是一个专业的知识图谱本体设计专家。你的任务是分析给定的文本内容和模拟需求,设计适合**社交媒体舆论模拟**的实体类型和关系类型。 +ONTOLOGY_SYSTEM_PROMPT = """You are a knowledge-graph ontology designer for social simulation. Analyze the uploaded material and the simulation requirement, then design entity types and relationship types that fit a social-media-style multi-agent simulation. -**重要:你必须输出有效的JSON格式数据,不要输出任何其他内容。** +You must return valid JSON only. Do not include Markdown, comments, or explanatory text outside JSON. -## 核心任务背景 +Context: every entity type should represent an actor that can speak, react, influence others, or be represented by an account in the simulation. Suitable entities include people, roles, companies, institutions, regulators, media organizations, platforms, professional groups, client groups, and other real-world actor categories. Avoid abstract concepts, topics, emotions, and attitudes as entity types. -我们正在构建一个**社交媒体舆论模拟系统**。在这个系统中: -- 每个实体都是一个可以在社交媒体上发声、互动、传播信息的"账号"或"主体" -- 实体之间会相互影响、转发、评论、回应 -- 我们需要模拟舆论事件中各方的反应和信息传播路径 - -因此,**实体必须是现实中真实存在的、可以在社媒上发声和互动的主体**: - -**可以是**: -- 具体的个人(公众人物、当事人、意见领袖、专家学者、普通人) -- 公司、企业(包括其官方账号) -- 组织机构(大学、协会、NGO、工会等) -- 政府部门、监管机构 -- 媒体机构(报纸、电视台、自媒体、网站) -- 社交媒体平台本身 -- 特定群体代表(如校友会、粉丝团、维权群体等) - -**不可以是**: -- 抽象概念(如"舆论"、"情绪"、"趋势") -- 主题/话题(如"学术诚信"、"教育改革") -- 观点/态度(如"支持方"、"反对方") - -## 输出格式 - -请输出JSON格式,包含以下结构: - -```json +Output schema: { - "entity_types": [ - { - "name": "实体类型名称(英文,PascalCase)", - "description": "简短描述(英文,不超过100字符)", - "attributes": [ - { - "name": "属性名(英文,snake_case)", - "type": "text", - "description": "属性描述" - } - ], - "examples": ["示例实体1", "示例实体2"] - } - ], - "edge_types": [ - { - "name": "关系类型名称(英文,UPPER_SNAKE_CASE)", - "description": "简短描述(英文,不超过100字符)", - "source_targets": [ - {"source": "源实体类型", "target": "目标实体类型"} - ], - "attributes": [] - } - ], - "analysis_summary": "对文本内容的简要分析说明" + "entity_types": [ + { + "name": "EntityTypeNameInEnglishPascalCase", + "description": "Short description in the target language", + "attributes": [ + {"name": "attribute_name", "type": "text", "description": "Attribute description in the target language"} + ], + "examples": ["Example 1", "Example 2"] + } + ], + "edge_types": [ + { + "name": "RELATIONSHIP_TYPE_IN_UPPER_SNAKE_CASE", + "description": "Short description in the target language", + "source_targets": [{"source": "SourceEntityType", "target": "TargetEntityType"}], + "attributes": [] + } + ], + "analysis_summary": "Brief analysis in the target language" } -``` - -## 设计指南(极其重要!) - -### 1. 实体类型设计 - 必须严格遵守 - -**数量要求:必须正好10个实体类型** - -**层次结构要求(必须同时包含具体类型和兜底类型)**: - -你的10个实体类型必须包含以下层次: -A. **兜底类型(必须包含,放在列表最后2个)**: - - `Person`: 任何自然人个体的兜底类型。当一个人不属于其他更具体的人物类型时,归入此类。 - - `Organization`: 任何组织机构的兜底类型。当一个组织不属于其他更具体的组织类型时,归入此类。 - -B. **具体类型(8个,根据文本内容设计)**: - - 针对文本中出现的主要角色,设计更具体的类型 - - 例如:如果文本涉及学术事件,可以有 `Student`, `Professor`, `University` - - 例如:如果文本涉及商业事件,可以有 `Company`, `CEO`, `Employee` - -**为什么需要兜底类型**: -- 文本中会出现各种人物,如"中小学教师"、"路人甲"、"某位网友" -- 如果没有专门的类型匹配,他们应该被归入 `Person` -- 同理,小型组织、临时团体等应该归入 `Organization` - -**具体类型的设计原则**: -- 从文本中识别出高频出现或关键的角色类型 -- 每个具体类型应该有明确的边界,避免重叠 -- description 必须清晰说明这个类型和兜底类型的区别 - -### 2. 关系类型设计 - -- 数量:6-10个 -- 关系应该反映社媒互动中的真实联系 -- 确保关系的 source_targets 涵盖你定义的实体类型 - -### 3. 属性设计 - -- 每个实体类型1-3个关键属性 -- **注意**:属性名不能使用 `name`、`uuid`、`group_id`、`created_at`、`summary`(这些是系统保留字) -- 推荐使用:`full_name`, `title`, `role`, `position`, `location`, `description` 等 - -## 实体类型参考 - -**个人类(具体)**: -- Student: 学生 -- Professor: 教授/学者 -- Journalist: 记者 -- Celebrity: 明星/网红 -- Executive: 高管 -- Official: 政府官员 -- Lawyer: 律师 -- Doctor: 医生 - -**个人类(兜底)**: -- Person: 任何自然人(不属于上述具体类型时使用) - -**组织类(具体)**: -- University: 高校 -- Company: 公司企业 -- GovernmentAgency: 政府机构 -- MediaOutlet: 媒体机构 -- Hospital: 医院 -- School: 中小学 -- NGO: 非政府组织 - -**组织类(兜底)**: -- Organization: 任何组织机构(不属于上述具体类型时使用) - -## 关系类型参考 - -- WORKS_FOR: 工作于 -- STUDIES_AT: 就读于 -- AFFILIATED_WITH: 隶属于 -- REPRESENTS: 代表 -- REGULATES: 监管 -- REPORTS_ON: 报道 -- COMMENTS_ON: 评论 -- RESPONDS_TO: 回应 -- SUPPORTS: 支持 -- OPPOSES: 反对 -- COLLABORATES_WITH: 合作 -- COMPETES_WITH: 竞争 +Design rules: +- Create exactly 10 entity types. +- The last two entity types must be Person and Organization as fallback types. +- Create 8 specific entity types that fit the uploaded material. +- Create 6 to 10 relationship types. +- Entity type names must be English PascalCase. +- Relationship names must be English UPPER_SNAKE_CASE. +- Attribute names must be English snake_case. +- Do not use reserved attribute names: name, uuid, group_id, created_at, summary. +- Natural-language descriptions, examples where possible, and analysis_summary must follow the requested response language. """ @@ -245,31 +141,31 @@ def _build_user_message( combined_text = combined_text[:self.MAX_TEXT_LENGTH_FOR_LLM] combined_text += f"\n\n...(原文共{original_length}字,已截取前{self.MAX_TEXT_LENGTH_FOR_LLM}字用于本体分析)..." - message = f"""## 模拟需求 + message = f"""## Simulation requirement {simulation_requirement} -## 文档内容 +## Uploaded material {combined_text} """ if additional_context: message += f""" -## 额外说明 +## Additional context {additional_context} """ message += """ -请根据以上内容,设计适合社会舆论模拟的实体类型和关系类型。 - -**必须遵守的规则**: -1. 必须正好输出10个实体类型 -2. 最后2个必须是兜底类型:Person(个人兜底)和 Organization(组织兜底) -3. 前8个是根据文本内容设计的具体类型 -4. 所有实体类型必须是现实中可以发声的主体,不能是抽象概念 -5. 属性名不能使用 name、uuid、group_id 等保留字,用 full_name、org_name 等替代 +Based on the material above, design entity and relationship types for the simulation. Return JSON only. + +Mandatory rules: +1. Output exactly 10 entity types. +2. The last 2 entity types must be fallback types: Person and Organization. +3. The first 8 entity types must be specific to the provided material. +4. Every entity type must be a real-world actor that can plausibly speak or act, not an abstract concept. +5. Do not use reserved attribute names such as name, uuid, or group_id. Use alternatives such as full_name or org_name. """ return message diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index cecd70b464..a0fb928016 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -474,388 +474,209 @@ def to_dict(self) -> Dict[str, Any]: # ── 工具描述 ── TOOL_DESC_INSIGHT_FORGE = """\ -【深度洞察检索 - 强大的检索工具】 -这是我们强大的检索函数,专为深度分析设计。它会: -1. 自动将你的问题分解为多个子问题 -2. 从多个维度检索模拟图谱中的信息 -3. 整合语义搜索、实体分析、关系链追踪的结果 -4. 返回最全面、最深度的检索内容 - -【使用场景】 -- 需要深入分析某个话题 -- 需要了解事件的多个方面 -- 需要获取支撑报告章节的丰富素材 - -【返回内容】 -- 相关事实原文(可直接引用) -- 核心实体洞察 -- 关系链分析""" +[Deep insight search] +Use this tool for important report sections and strategic analysis. It decomposes a question into several focused sub-questions, searches simulation facts, entities, and relationship chains, then returns evidence-rich material for the report. + +Best use cases: +- Core drivers, tensions, causal mechanisms, winners and losers +- Lead scenario, weak signals, contradictions, decision points +- Any section that needs more than a simple lookup + +Input: +- query: the question or topic to analyze +- report_context: optional context from the current report section + +Output: +- generated sub-questions +- simulation facts suitable for citation +- relevant entities and relationship chains +""" TOOL_DESC_PANORAMA_SEARCH = """\ -【广度搜索 - 获取全貌视图】 -这个工具用于获取模拟结果的完整全貌,特别适合了解事件演变过程。它会: -1. 获取所有相关节点和关系 -2. 区分当前有效的事实和历史/过期的事实 -3. 帮助你了解舆情是如何演变的 - -【使用场景】 -- 需要了解事件的完整发展脉络 -- 需要对比不同阶段的舆情变化 -- 需要获取全面的实体和关系信息 - -【返回内容】 -- 当前有效事实(模拟最新结果) -- 历史/过期事实(演变记录) -- 所有涉及的实体""" +[Panorama search] +Use this tool to understand the full simulation picture and event development. It retrieves current and historical facts, including expired or superseded facts. + +Best use cases: +- What happened over time +- Which actors appeared in the simulation +- How the discussion, positions, or conflicts evolved + +Output: +- active facts +- historical facts +- involved entities +- basic counts +""" TOOL_DESC_QUICK_SEARCH = """\ -【简单搜索 - 快速检索】 -轻量级的快速检索工具,适合简单、直接的信息查询。 +[Quick search] +Use this lightweight tool for simple, direct lookups. It is useful for checking one fact, one actor, one term, or a narrow topic. -【使用场景】 -- 需要快速查找某个具体信息 -- 需要验证某个事实 -- 简单的信息检索 +Input: +- query: a precise search phrase -【返回内容】 -- 与查询最相关的事实列表""" +Output: +- matching facts from the simulation memory +""" TOOL_DESC_INTERVIEW_AGENTS = """\ -【深度采访 - 真实Agent采访(双平台)】 -调用OASIS模拟环境的采访API,对正在运行的模拟Agent进行真实采访! -这不是LLM模拟,而是调用真实的采访接口获取模拟Agent的原始回答。 -默认在Twitter和Reddit两个平台同时采访,获取更全面的观点。 - -功能流程: -1. 自动读取人设文件,了解所有模拟Agent -2. 智能选择与采访主题最相关的Agent(如学生、媒体、官方等) -3. 自动生成采访问题 -4. 调用 /api/simulation/interview/batch 接口在双平台进行真实采访 -5. 整合所有采访结果,提供多视角分析 - -【使用场景】 -- 需要从不同角色视角了解事件看法(学生怎么看?媒体怎么看?官方怎么说?) -- 需要收集多方意见和立场 -- 需要获取模拟Agent的真实回答(来自OASIS模拟环境) -- 想让报告更生动,包含"采访实录" - -【返回内容】 -- 被采访Agent的身份信息 -- 各Agent在Twitter和Reddit两个平台的采访回答 -- 关键引言(可直接引用) -- 采访摘要和观点对比 - -【重要】需要OASIS模拟环境正在运行才能使用此功能!""" +[Agent interviews] +Use this tool to interview simulated agents and add direct perspectives to the report. The tool selects relevant agents, generates interview questions if needed, calls the simulation interview API on both Twitter and Reddit contexts, and summarizes the responses. + +Best use cases: +- Need direct voice from simulated actors +- Need contrasting perspectives +- Need report sections with interview excerpts or stakeholder views + +Input: +- interview_topic: interview topic or requirement +- max_agents: optional number of agents to interview, default 5, max 10 + +Output: +- selected agents and rationale +- interview questions +- raw answers from the simulated platforms +- interview summary and key viewpoints +""" # ── 大纲规划 prompt ── PLAN_SYSTEM_PROMPT = """\ -你是一个「未来预测报告」的撰写专家,拥有对模拟世界的「上帝视角」——你可以洞察模拟中每一位Agent的行为、言论和互动。 - -【核心理念】 -我们构建了一个模拟世界,并向其中注入了特定的「模拟需求」作为变量。模拟世界的演化结果,就是对未来可能发生情况的预测。你正在观察的不是"实验数据",而是"未来的预演"。 - -【你的任务】 -撰写一份「未来预测报告」,回答: -1. 在我们设定的条件下,未来发生了什么? -2. 各类Agent(人群)是如何反应和行动? -3. 这个模拟揭示了哪些值得关注的未来趋势和风险? - -【报告定位】 -- ✅ 这是一份基于模拟的未来预测报告,揭示"如果这样,未来会怎样" -- ✅ 聚焦于预测结果:事件走向、群体反应、涌现现象、潜在风险 -- ✅ 模拟世界中的Agent言行就是对未来人群行为的预测 -- ❌ 不是对现实世界现状的分析 -- ❌ 不是泛泛而谈的舆情综述 - -【章节数量限制】 -- 最少2个章节,最多5个章节 -- 不需要子章节,每个章节直接撰写完整内容 -- 内容要精炼,聚焦于核心预测发现 -- 章节结构由你根据预测结果自主设计 - -请输出JSON格式的报告大纲,格式如下: -{ - "title": "报告标题", - "summary": "报告摘要(一句话概括核心预测发现)", - "sections": [ - { - "title": "章节标题", - "description": "章节内容描述" - } - ] -} +You are an expert writer of future-prediction management reports. You observe a simulated world from a full-system perspective: agent profiles, actions, statements, interactions, and graph facts. -注意:sections数组最少2个,最多5个元素!""" +Core idea: the simulation is a rehearsal of a possible future under the supplied conditions. The report should explain what happened in that future rehearsal, how actor groups reacted, and which strategic patterns, risks, and opportunities emerged. -PLAN_USER_PROMPT_TEMPLATE = """\ -【预测场景设定】 -我们向模拟世界注入的变量(模拟需求):{simulation_requirement} +Your task: +1. Create a concise report outline. +2. Focus on predicted dynamics, actor reactions, emerging conflicts, alliances, market shifts, risks, and options. +3. Keep the report management-oriented and useful for decisions. -【模拟世界规模】 -- 参与模拟的实体数量: {total_nodes} -- 实体间产生的关系数量: {total_edges} -- 实体类型分布: {entity_types} -- 活跃Agent数量: {total_entities} +Rules: +- Produce 2 to 5 sections. +- Each section should be a substantial report section, not a subheading placeholder. +- Return valid JSON only. +- Use the requested response language for title, summary, section titles, and descriptions. -【模拟预测到的部分未来事实样本】 -{related_facts_json} +JSON schema: +{ + "title": "Report title", + "summary": "One-sentence core prediction", + "sections": [ + {"title": "Section title", "description": "What this section covers"} + ] +} +""" -请以「上帝视角」审视这个未来预演: -1. 在我们设定的条件下,未来呈现出了什么样的状态? -2. 各类人群(Agent)是如何反应和行动的? -3. 这个模拟揭示了哪些值得关注的未来趋势? +PLAN_USER_PROMPT_TEMPLATE = """\ +Simulation requirement: +{simulation_requirement} -根据预测结果,设计最合适的报告章节结构。 +Simulation world size: +- Total graph nodes: {total_nodes} +- Total graph edges: {total_edges} +- Entity type distribution: {entity_types} +- Active agents/entities: {total_entities} -【再次提醒】报告章节数量:最少2个,最多5个,内容要精炼聚焦于核心预测发现。""" +Sample facts from the simulated future: +{related_facts_json} -# ── 章节生成 prompt ── +Design the best report structure for this future rehearsal. Focus on the most decision-relevant findings. Return JSON only. +""" SECTION_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个「未来预测报告」的撰写专家,正在撰写报告的一个章节。 - -报告标题: {report_title} -报告摘要: {report_summary} -预测场景(模拟需求): {simulation_requirement} - -当前要撰写的章节: {section_title} - -═══════════════════════════════════════════════════════════════ -【核心理念】 -═══════════════════════════════════════════════════════════════ - -模拟世界是对未来的预演。我们向模拟世界注入了特定条件(模拟需求), -模拟中Agent的行为和互动,就是对未来人群行为的预测。 - -你的任务是: -- 揭示在设定条件下,未来发生了什么 -- 预测各类人群(Agent)是如何反应和行动的 -- 发现值得关注的未来趋势、风险和机会 - -❌ 不要写成对现实世界现状的分析 -✅ 要聚焦于"未来会怎样"——模拟结果就是预测的未来 - -═══════════════════════════════════════════════════════════════ -【最重要的规则 - 必须遵守】 -═══════════════════════════════════════════════════════════════ - -1. 【必须调用工具观察模拟世界】 - - 你正在以「上帝视角」观察未来的预演 - - 所有内容必须来自模拟世界中发生的事件和Agent言行 - - 禁止使用你自己的知识来编写报告内容 - - 每个章节至少调用3次工具(最多5次)来观察模拟的世界,它代表了未来 - -2. 【必须引用Agent的原始言行】 - - Agent的发言和行为是对未来人群行为的预测 - - 在报告中使用引用格式展示这些预测,例如: - > "某类人群会表示:原文内容..." - - 这些引用是模拟预测的核心证据 - -3. 【语言一致性 - 引用内容必须翻译为报告语言】 - - 工具返回的内容可能包含与报告语言不同的表述 - - 报告必须全部使用与用户指定语言一致的语言撰写 - - 当你引用工具返回的其他语言内容时,必须将其翻译为报告语言后再写入 - - 翻译时保持原意不变,确保表述自然通顺 - - 这一规则同时适用于正文和引用块(> 格式)中的内容 +You are writing one section of a future-prediction management report. -4. 【忠实呈现预测结果】 - - 报告内容必须反映模拟世界中的代表未来的模拟结果 - - 不要添加模拟中不存在的信息 - - 如果某方面信息不足,如实说明 +Report title: {report_title} +Report summary: {report_summary} +Simulation requirement: {simulation_requirement} +Current section: {section_title} -═══════════════════════════════════════════════════════════════ -【⚠️ 格式规范 - 极其重要!】 -═══════════════════════════════════════════════════════════════ +The simulated world is evidence. Agent behavior, posts, comments, interviews, and graph facts represent the predicted future under the scenario conditions. -【一个章节 = 最小内容单位】 -- 每个章节是报告的最小分块单位 -- ❌ 禁止在章节内使用任何 Markdown 标题(#、##、###、#### 等) -- ❌ 禁止在内容开头添加章节主标题 -- ✅ 章节标题由系统自动添加,你只需撰写纯正文内容 -- ✅ 使用**粗体**、段落分隔、引用、列表来组织内容,但不要用标题 - -【正确示例】 -``` -本章节分析了事件的舆论传播态势。通过对模拟数据的深入分析,我们发现... - -**首发引爆阶段** - -微博作为舆情的第一现场,承担了信息首发的核心功能: - -> "微博贡献了68%的首发声量..." - -**情绪放大阶段** - -抖音平台进一步放大了事件影响力: - -- 视觉冲击力强 -- 情绪共鸣度高 -``` - -【错误示例】 -``` -## 执行摘要 ← 错误!不要添加任何标题 -### 一、首发阶段 ← 错误!不要用###分小节 -#### 1.1 详细分析 ← 错误!不要用####细分 - -本章节分析了... -``` - -═══════════════════════════════════════════════════════════════ -【可用检索工具】(每章节调用3-5次) -═══════════════════════════════════════════════════════════════ +Rules: +1. Use tools to inspect the simulation before writing. Each section should use at least 3 tool calls and at most 5. +2. Base claims on tool results. Do not invent facts. +3. Translate any quoted or retrieved material into the requested report language before including it. +4. Write the entire section in the requested report language. +5. Do not use Markdown headings inside the section. The system adds section titles. Use paragraphs, bullets, bold emphasis, and quote blocks only. +6. If evidence is thin, state that clearly and explain the implication. +Available tools: {tools_description} -【工具使用建议 - 请混合使用不同工具,不要只用一种】 -- insight_forge: 深度洞察分析,自动分解问题并多维度检索事实和关系 -- panorama_search: 广角全景搜索,了解事件全貌、时间线和演变过程 -- quick_search: 快速验证某个具体信息点 -- interview_agents: 采访模拟Agent,获取不同角色的第一人称观点和真实反应 +Use a ReACT loop. In each response do exactly one of these: -═══════════════════════════════════════════════════════════════ -【工作流程】 -═══════════════════════════════════════════════════════════════ - -每次回复你只能做以下两件事之一(不可同时做): - -选项A - 调用工具: -输出你的思考,然后用以下格式调用一个工具: +A) Call one tool: -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "tool_name", "parameters": {{"parameter": "value"}}}} -系统会执行工具并把结果返回给你。你不需要也不能自己编写工具返回结果。 - -选项B - 输出最终内容: -当你已通过工具获取了足够信息,以 "Final Answer:" 开头输出章节内容。 - -⚠️ 严格禁止: -- 禁止在一次回复中同时包含工具调用和 Final Answer -- 禁止自己编造工具返回结果(Observation),所有工具结果由系统注入 -- 每次回复最多调用一个工具 - -═══════════════════════════════════════════════════════════════ -【章节内容要求】 -═══════════════════════════════════════════════════════════════ - -1. 内容必须基于工具检索到的模拟数据 -2. 大量引用原文来展示模拟效果 -3. 使用Markdown格式(但禁止使用标题): - - 使用 **粗体文字** 标记重点(代替子标题) - - 使用列表(-或1.2.3.)组织要点 - - 使用空行分隔不同段落 - - ❌ 禁止使用 #、##、###、#### 等任何标题语法 -4. 【引用格式规范 - 必须单独成段】 - 引用必须独立成段,前后各有一个空行,不能混在段落中: - - ✅ 正确格式: - ``` - 校方的回应被认为缺乏实质内容。 - - > "校方的应对模式在瞬息万变的社交媒体环境中显得僵化和迟缓。" - - 这一评价反映了公众的普遍不满。 - ``` - - ❌ 错误格式: - ``` - 校方的回应被认为缺乏实质内容。> "校方的应对模式..." 这一评价反映了... - ``` -5. 保持与其他章节的逻辑连贯性 -6. 【避免重复】仔细阅读下方已完成的章节内容,不要重复描述相同的信息 -7. 【再次强调】不要添加任何标题!用**粗体**代替小节标题""" -SECTION_USER_PROMPT_TEMPLATE = """\ -已完成的章节内容(请仔细阅读,避免重复): -{previous_content} - -═══════════════════════════════════════════════════════════════ -【当前任务】撰写章节: {section_title} -═══════════════════════════════════════════════════════════════ +B) Write the final section content: +Final Answer: ... -【重要提醒】 -1. 仔细阅读上方已完成的章节,避免重复相同的内容! -2. 开始前必须先调用工具获取模拟数据 -3. 请混合使用不同工具,不要只用一种 -4. 报告内容必须来自检索结果,不要使用自己的知识 +Never include a tool call and Final Answer in the same response. Never fabricate tool output. +""" -【⚠️ 格式警告 - 必须遵守】 -- ❌ 不要写任何标题(#、##、###、####都不行) -- ❌ 不要写"{section_title}"作为开头 -- ✅ 章节标题由系统自动添加 -- ✅ 直接写正文,用**粗体**代替小节标题 +SECTION_USER_PROMPT_TEMPLATE = """\ +Previously completed sections, for continuity and to avoid repetition: +{previous_content} -请开始: -1. 首先思考(Thought)这个章节需要什么信息 -2. 然后调用工具(Action)获取模拟数据 -3. 收集足够信息后输出 Final Answer(纯正文,无任何标题)""" +Current task: write the section "{section_title}". -# ── ReACT 循环内消息模板 ── +Start by thinking about which simulation evidence is needed, then call a tool. After enough evidence has been gathered, output Final Answer with section body text only. Do not repeat the section title. Do not use Markdown headings. +""" REACT_OBSERVATION_TEMPLATE = """\ -Observation(检索结果): - -═══ 工具 {tool_name} 返回 ═══ +Observation from tool {tool_name}: {result} -═══════════════════════════════════════════════════════════════ -已调用工具 {tool_calls_count}/{max_tool_calls} 次(已用: {used_tools_str}){unused_hint} -- 如果信息充分:以 "Final Answer:" 开头输出章节内容(必须引用上述原文) -- 如果需要更多信息:调用一个工具继续检索 -═══════════════════════════════════════════════════════════════""" +Tool calls used: {tool_calls_count}/{max_tool_calls}. Used tools: {used_tools_str}. {unused_hint} +If the evidence is sufficient, respond with Final Answer and write the section. If more evidence is needed, call one additional tool. +""" REACT_INSUFFICIENT_TOOLS_MSG = ( - "【注意】你只调用了{tool_calls_count}次工具,至少需要{min_tool_calls}次。" - "请再调用工具获取更多模拟数据,然后再输出 Final Answer。{unused_hint}" + "You have used {tool_calls_count} tool calls. At least {min_tool_calls} are required before Final Answer. " + "Call another tool to inspect more simulation evidence. {unused_hint}" ) REACT_INSUFFICIENT_TOOLS_MSG_ALT = ( - "当前只调用了 {tool_calls_count} 次工具,至少需要 {min_tool_calls} 次。" - "请调用工具获取模拟数据。{unused_hint}" + "You have only used {tool_calls_count} tool calls. At least {min_tool_calls} are required. " + "Call a tool now. {unused_hint}" ) REACT_TOOL_LIMIT_MSG = ( - "工具调用次数已达上限({tool_calls_count}/{max_tool_calls}),不能再调用工具。" - '请立即基于已获取的信息,以 "Final Answer:" 开头输出章节内容。' + "The tool-call limit has been reached ({tool_calls_count}/{max_tool_calls}). " + "Now write the section using Final Answer." ) -REACT_UNUSED_TOOLS_HINT = "\n💡 你还没有使用过: {unused_list},建议尝试不同工具获取多角度信息" - -REACT_FORCE_FINAL_MSG = "已达到工具调用限制,请直接输出 Final Answer: 并生成章节内容。" +REACT_UNUSED_TOOLS_HINT = "Unused tools worth considering: {unused_list}." -# ── Chat prompt ── +REACT_FORCE_FINAL_MSG = "Tool-call or iteration limit reached. Write Final Answer now." CHAT_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个简洁高效的模拟预测助手。 +You are a concise simulation-analysis assistant. -【背景】 -预测条件: {simulation_requirement} +Simulation requirement: {simulation_requirement} -【已生成的分析报告】 +Existing report: {report_content} -【规则】 -1. 优先基于上述报告内容回答问题 -2. 直接回答问题,避免冗长的思考论述 -3. 仅在报告内容不足以回答时,才调用工具检索更多数据 -4. 回答要简洁、清晰、有条理 +Rules: +1. Prefer the existing report when it answers the question. +2. Answer directly and concisely. +3. Use tools only if the report is insufficient. +4. Use the requested response language for the entire answer. -【可用工具】(仅在需要时使用,最多调用1-2次) +Available tools: {tools_description} -【工具调用格式】 +Tool-call format: -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "tool_name", "parameters": {{"parameter": "value"}}}} +""" -【回答风格】 -- 简洁直接,不要长篇大论 -- 使用 > 格式引用关键内容 -- 优先给出结论,再解释原因""" - -CHAT_OBSERVATION_SUFFIX = "\n\n请简洁回答问题。" - +CHAT_OBSERVATION_SUFFIX = "\n\nAnswer the user concisely in the requested response language." # ═══════════════════════════════════════════════════════════════ # ReportAgent 主类 @@ -923,8 +744,8 @@ def _define_tools(self) -> Dict[str, Dict[str, Any]]: "name": "insight_forge", "description": TOOL_DESC_INSIGHT_FORGE, "parameters": { - "query": "你想深入分析的问题或话题", - "report_context": "当前报告章节的上下文(可选,有助于生成更精准的子问题)" + "query": "Question or topic to analyze deeply", + "report_context": "Optional current section context for better sub-questions" } }, "panorama_search": { @@ -947,8 +768,8 @@ def _define_tools(self) -> Dict[str, Dict[str, Any]]: "name": "interview_agents", "description": TOOL_DESC_INTERVIEW_AGENTS, "parameters": { - "interview_topic": "采访主题或需求描述(如:'了解学生对宿舍甲醛事件的看法')", - "max_agents": "最多采访的Agent数量(可选,默认5,最大10)" + "interview_topic": "Interview topic or requirement", + "max_agents": "Maximum number of agents to interview, default 5, max 10" } } } @@ -1126,7 +947,7 @@ def _is_valid_tool_call(self, data: dict) -> bool: def _get_tools_description(self) -> str: """生成工具描述文本""" - desc_parts = ["可用工具:"] + desc_parts = ["Available tools:"] for name, tool in self.tools.items(): params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()]) desc_parts.append(f"- {name}: {tool['description']}") @@ -1194,7 +1015,7 @@ def plan_outline( )) outline = ReportOutline( - title=response.get("title", "模拟分析报告"), + title=response.get("title", "Simulation Analysis Report"), summary=response.get("summary", ""), sections=sections ) @@ -1209,12 +1030,12 @@ def plan_outline( logger.error(t('report.outlinePlanFailed', error=str(e))) # 返回默认大纲(3个章节,作为fallback) return ReportOutline( - title="未来预测报告", - summary="基于模拟预测的未来趋势与风险分析", + title="Future Prediction Report", + summary="Future trends and risk analysis based on simulation evidence", sections=[ - ReportSection(title="预测场景与核心发现"), - ReportSection(title="人群行为预测分析"), - ReportSection(title="趋势展望与风险提示") + ReportSection(title="Scenario and Key Findings"), + ReportSection(title="Actor Behavior Analysis"), + ReportSection(title="Trends and Risks") ] ) @@ -1291,7 +1112,7 @@ def _generate_section_react( all_tools = {"insight_forge", "panorama_search", "quick_search", "interview_agents"} # 报告上下文,用于InsightForge的子问题生成 - report_context = f"章节标题: {section.title}\n模拟需求: {self.simulation_requirement}" + report_context = f"Section title: {section.title}\nSimulation requirement: {self.simulation_requirement}" for iteration in range(max_iterations): if progress_callback: @@ -1313,8 +1134,8 @@ def _generate_section_react( logger.warning(t('report.sectionIterNone', title=section.title, iteration=iteration + 1)) # 如果还有迭代次数,添加消息并重试 if iteration < max_iterations - 1: - messages.append({"role": "assistant", "content": "(响应为空)"}) - messages.append({"role": "user", "content": "请继续生成内容。"}) + messages.append({"role": "assistant", "content": "(empty response)"}) + messages.append({"role": "user", "content": "Please continue in the requested response language."}) continue # 最后一次迭代也返回 None,跳出循环进入强制收尾 break @@ -1339,11 +1160,11 @@ def _generate_section_react( messages.append({ "role": "user", "content": ( - "【格式错误】你在一次回复中同时包含了工具调用和 Final Answer,这是不允许的。\n" - "每次回复只能做以下两件事之一:\n" - "- 调用一个工具(输出一个 块,不要写 Final Answer)\n" - "- 输出最终内容(以 'Final Answer:' 开头,不要包含 )\n" - "请重新回复,只做其中一件事。" + "Format error: your response contains both a tool call and Final Answer.\n" + "Each response must do exactly one of these two things:\n" + "- Call one tool by outputting one block.\n" + "- Output final content starting with 'Final Answer:' and no .\n" + "Reply again and do only one of the two." ), }) continue @@ -1377,7 +1198,7 @@ def _generate_section_react( if tool_calls_count < min_tool_calls: messages.append({"role": "assistant", "content": response}) unused_tools = all_tools - used_tools - unused_hint = f"(这些工具还未使用,推荐用一下他们: {', '.join(unused_tools)})" if unused_tools else "" + unused_hint = f"(Unused tools worth considering: {', '.join(unused_tools)})" if unused_tools else "" messages.append({ "role": "user", "content": REACT_INSUFFICIENT_TOOLS_MSG.format( @@ -1451,7 +1272,7 @@ def _generate_section_react( unused_tools = all_tools - used_tools unused_hint = "" if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION: - unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list="、".join(unused_tools)) + unused_hint = REACT_UNUSED_TOOLS_HINT.format(unused_list=", ".join(unused_tools)) messages.append({"role": "assistant", "content": response}) messages.append({ @@ -1473,7 +1294,7 @@ def _generate_section_react( if tool_calls_count < min_tool_calls: # 工具调用次数不足,推荐未用过的工具 unused_tools = all_tools - used_tools - unused_hint = f"(这些工具还未使用,推荐用一下他们: {', '.join(unused_tools)})" if unused_tools else "" + unused_hint = f"(Unused tools worth considering: {', '.join(unused_tools)})" if unused_tools else "" messages.append({ "role": "user", diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cb77f6b6cd..127921c01e 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -540,28 +540,28 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An # 计算最大允许值(80%的agent数) max_agents_allowed = max(1, int(num_entities * 0.9)) - prompt = f"""基于以下模拟需求,生成时间模拟配置。 + prompt = f"""Generate a time simulation configuration based on the following simulation requirement. {context_truncated} -## 任务 -请生成时间配置JSON。 +## Task +Generate the time configuration JSON. -### 基本原则(仅供参考,需根据具体事件和参与群体灵活调整): -- 请根据模拟场景推断目标用户群体所在时区和作息习惯,以下为东八区(UTC+8)的参考示例 -- 凌晨0-5点几乎无人活动(活跃度系数0.05) -- 早上6-8点逐渐活跃(活跃度系数0.4) -- 工作时间9-18点中等活跃(活跃度系数0.7) -- 晚间19-22点是高峰期(活跃度系数1.5) -- 23点后活跃度下降(活跃度系数0.5) -- 一般规律:凌晨低活跃、早间渐增、工作时段中等、晚间高峰 -- **重要**:以下示例值仅供参考,你需要根据事件性质、参与群体特点来调整具体时段 - - 例如:学生群体高峰可能是21-23点;媒体全天活跃;官方机构只在工作时间 - - 例如:突发热点可能导致深夜也有讨论,off_peak_hours 可适当缩短 +### Basic principles, adapt them to the event and actor groups: +- Infer the target actor group's time zone and daily rhythm from the simulation scenario. UTC+8 examples: +- 00:00-05:00 is usually very quiet, activity multiplier 0.05 +- 06:00-08:00 activity increases, activity multiplier 0.4 +- 09:00-18:00 moderate workday activity, activity multiplier 0.7 +- 19:00-22:00 often peak activity, activity multiplier 1.5 +- After 23:00 activity declines, activity multiplier 0.5 +- General rhythm: low at night, rising in the morning, moderate during work hours, high in the evening +- Important: these are examples only. Adjust the exact hours to the event type and actor group. + - Example: students may peak at 21:00-23:00; media may be active all day; institutions may post during office hours + - Example: breaking issues may stay active late at night, so off_peak_hours can be shorter -### 返回JSON格式(不要markdown) +### Return JSON only, no Markdown -示例: +Example: {{ "total_simulation_hours": 72, "minutes_per_round": 60, @@ -571,27 +571,27 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An "off_peak_hours": [0, 1, 2, 3, 4, 5], "morning_hours": [6, 7, 8], "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - "reasoning": "针对该事件的时间配置说明" + "reasoning": "Brief explanation of this time configuration" }} -字段说明: -- total_simulation_hours (int): 模拟总时长,24-168小时,突发事件短、持续话题长 -- minutes_per_round (int): 每轮时长,30-120分钟,建议60分钟 -- agents_per_hour_min (int): 每小时最少激活Agent数(取值范围: 1-{max_agents_allowed}) -- agents_per_hour_max (int): 每小时最多激活Agent数(取值范围: 1-{max_agents_allowed}) -- peak_hours (int数组): 高峰时段,根据事件参与群体调整 -- off_peak_hours (int数组): 低谷时段,通常深夜凌晨 -- morning_hours (int数组): 早间时段 -- work_hours (int数组): 工作时段 -- reasoning (string): 简要说明为什么这样配置""" - - system_prompt = "你是社交媒体模拟专家。返回纯JSON格式,时间配置需符合模拟场景中目标用户群体的作息习惯。" +Field guide: +- total_simulation_hours (int): total simulation duration, 24-168 hours; shorter for acute events, longer for ongoing topics +- minutes_per_round (int): duration per round, 30-120 minutes, recommended 60 +- agents_per_hour_min (int): minimum active agents per hour, range 1-{max_agents_allowed} +- agents_per_hour_max (int): maximum active agents per hour, range 1-{max_agents_allowed} +- peak_hours (array of int): peak activity hours, adapted to actor groups +- off_peak_hours (array of int): low-activity hours, usually late night +- morning_hours (array of int): morning hours +- work_hours (array of int): working hours +- reasoning (string): brief explanation of the configuration""" + + system_prompt = "You are a social simulation configuration expert. Return pure JSON only. The time configuration must match the scenario geography, target actor groups, and working/living rhythm." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置") + logger.warning(f"时间配置LLM生成失败: {e}, Fallback default configuration") return self._get_default_time_config(num_entities) def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: @@ -605,7 +605,7 @@ def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: "off_peak_hours": [0, 1, 2, 3, 4, 5], "morning_hours": [6, 7, 8], "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - "reasoning": "使用默认中国人作息配置(每轮1小时)" + "reasoning": "Fallback default rhythm configuration, one hour per round" } def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig: @@ -673,47 +673,47 @@ def _generate_event_config( # 使用配置的上下文截断长度 context_truncated = context[:self.EVENT_CONFIG_CONTEXT_LENGTH] - prompt = f"""基于以下模拟需求,生成事件配置。 + prompt = f"""Generate an event configuration based on the following simulation requirement. -模拟需求: {simulation_requirement} +Simulation requirement: {simulation_requirement} {context_truncated} -## 可用实体类型及示例 +## Available entity types and examples {type_info} -## 任务 -请生成事件配置JSON: -- 提取热点话题关键词 -- 描述舆论发展方向 -- 设计初始帖子内容,**每个帖子必须指定 poster_type(发布者类型)** +## Task +Generate event configuration JSON: +- Extract hot-topic keywords +- Describe the expected narrative direction +- Design initial posts. Each post must specify poster_type. -**重要**: poster_type 必须从上面的"可用实体类型"中选择,这样初始帖子才能分配给合适的 Agent 发布。 -例如:官方声明应由 Official/University 类型发布,新闻由 MediaOutlet 发布,学生观点由 Student 发布。 +**Important**: poster_type must be selected from the available entity types above, so initial posts can be assigned to suitable agents. +Example: official statements should be posted by Official/University types, news by MediaOutlet, student views by Student. -返回JSON格式(不要markdown): +Return JSON only, no Markdown: {{ - "hot_topics": ["关键词1", "关键词2", ...], - "narrative_direction": "<舆论发展方向描述>", + "hot_topics": ["keyword 1", "keyword 2", ...], + "narrative_direction": "", "initial_posts": [ - {{"content": "帖子内容", "poster_type": "实体类型(必须从可用类型中选择)"}}, + {{"content": "post content", "poster_type": "entity type, must be selected from available types"}}, ... ], - "reasoning": "<简要说明>" + "reasoning": "" }}""" - system_prompt = "你是舆论分析专家。返回纯JSON格式。注意 poster_type 必须精确匹配可用实体类型。" + system_prompt = "You are a social dynamics analyst. Return pure JSON only. The poster_type value must exactly match one of the available entity types." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'poster_type' field value MUST be in English PascalCase exactly matching the available entity types. Only 'content', 'narrative_direction', 'hot_topics' and 'reasoning' fields should use the specified language." try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置") + logger.warning(f"事件配置LLM生成失败: {e}, Fallback default configuration") return { "hot_topics": [], "narrative_direction": "", "initial_posts": [], - "reasoning": "使用默认配置" + "reasoning": "Fallback default configuration" } def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig: @@ -830,34 +830,34 @@ def _generate_agent_configs_batch( "summary": e.summary[:summary_len] if e.summary else "" }) - prompt = f"""基于以下信息,为每个实体生成社交媒体活动配置。 + prompt = f"""Generate social-media activity settings for each entity based on the following information. -模拟需求: {simulation_requirement} +Simulation requirement: {simulation_requirement} -## 实体列表 +## Entity list ```json {json.dumps(entity_list, ensure_ascii=False, indent=2)} ``` -## 任务 -为每个实体生成活动配置,注意: -- **时间符合目标用户群体作息**:以下为参考(东八区),请根据模拟场景调整 -- **官方机构**(University/GovernmentAgency):活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0) -- **媒体**(MediaOutlet):活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5) -- **个人**(Student/Person/Alumni):活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2) -- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0) +## Task +Generate activity configuration for every entity. Pay attention to: +- Match activity times to the target actor group's daily rhythm. The examples below use UTC+8 and must be adapted to the scenario. +- Institutions (University/GovernmentAgency): low activity 0.1-0.3, office-hour activity 09:00-17:00, slow response 60-240 minutes, high influence 2.5-3.0 +- MediaOutlet: medium activity 0.4-0.6, broad daily activity 08:00-23:00, fast response 5-30 minutes, high influence 2.0-2.5 +- Individuals (Student/Person/Alumni): high activity 0.6-0.9, mainly evening activity 18:00-23:00, fast response 1-15 minutes, lower influence 0.8-1.2 +- Public figures/experts: medium activity 0.4-0.6, medium-to-high influence 1.5-2.0 -返回JSON格式(不要markdown): +Return JSON only, no Markdown: {{ "agent_configs": [ {{ - "agent_id": <必须与输入一致>, + "agent_id": , "activity_level": <0.0-1.0>, - "posts_per_hour": <发帖频率>, - "comments_per_hour": <评论频率>, - "active_hours": [<活跃小时列表,考虑中国人作息>], - "response_delay_min": <最小响应延迟分钟>, - "response_delay_max": <最大响应延迟分钟>, + "posts_per_hour": , + "comments_per_hour": , + "active_hours": [], + "response_delay_min": , + "response_delay_max": , "sentiment_bias": <-1.0到1.0>, "stance": "", "influence_weight": <影响力权重> @@ -866,7 +866,7 @@ def _generate_agent_configs_batch( ] }}""" - system_prompt = "你是社交媒体行为分析专家。返回纯JSON,配置需符合模拟场景中目标用户群体的作息习惯。" + system_prompt = "You are a social-media behavior analyst. Return pure JSON only. Agent activity patterns must match the scenario geography, roles, incentives, and likely communication habits." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'stance' field value MUST be one of the English strings: 'supportive', 'opposing', 'neutral', 'observer'. All JSON field names and numeric values must remain unchanged. Only natural language text fields should use the specified language." try: diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f49b..bb8f3c45db 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -55,8 +55,11 @@ def chat( "model": self.model, "messages": messages, "temperature": temperature, - "max_tokens": max_tokens, } + if self.model.startswith("gpt-5"): + kwargs["max_completion_tokens"] = max_tokens + else: + kwargs["max_tokens"] = max_tokens if response_format: kwargs["response_format"] = response_format diff --git a/backend/app/utils/locale.py b/backend/app/utils/locale.py index 23d04aa9d5..1ca3987866 100644 --- a/backend/app/utils/locale.py +++ b/backend/app/utils/locale.py @@ -22,19 +22,34 @@ def set_locale(locale: str): """Set locale for current thread. Call at the start of background threads.""" - _thread_local.locale = locale + _thread_local.locale = _normalize_locale(locale) + + +def _normalize_locale(raw: str | None) -> str: + if not raw: + return 'en' + candidates = [] + for part in raw.split(','): + code = part.split(';')[0].strip().lower().replace('_', '-') + if code: + candidates.append(code) + if '-' in code: + candidates.append(code.split('-')[0]) + for code in candidates: + if code in _languages or code in _translations: + return code + return 'en' def get_locale() -> str: if has_request_context(): - raw = request.headers.get('Accept-Language', 'zh') - return raw if raw in _translations else 'zh' - return getattr(_thread_local, 'locale', 'zh') + return _normalize_locale(request.headers.get('Accept-Language', 'en')) + return getattr(_thread_local, 'locale', 'en') def t(key: str, **kwargs) -> str: locale = get_locale() - messages = _translations.get(locale, _translations.get('zh', {})) + messages = _translations.get(locale, _translations.get('en', _translations.get('zh', {}))) value = messages for part in key.split('.'): @@ -45,7 +60,7 @@ def t(key: str, **kwargs) -> str: break if value is None: - value = _translations.get('zh', {}) + value = _translations.get('en', _translations.get('zh', {})) for part in key.split('.'): if isinstance(value, dict): value = value.get(part) @@ -65,5 +80,11 @@ def t(key: str, **kwargs) -> str: def get_language_instruction() -> str: locale = get_locale() - lang_config = _languages.get(locale, _languages.get('zh', {})) - return lang_config.get('llmInstruction', '请使用中文回答。') + lang_config = _languages.get(locale, _languages.get('en', {})) + instruction = lang_config.get('llmInstruction', 'Please respond in English.') + return ( + f"{instruction} Use this language for every natural-language field, report title, " + f"section title, explanation, quote translation, generated social post, interview question, " + f"interview answer, and reasoning text. Keep JSON keys, enum values, IDs, entity type names, " + f"relationship type names, and code-like values in their required machine-readable format." + ) diff --git a/frontend/index.html b/frontend/index.html index 0b80095c6d..ef4194d6a2 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -1,15 +1,15 @@ - + - + - - MiroFish - 预测万物 + + MiroFish - Predict Everything
diff --git a/frontend/src/i18n/index.js b/frontend/src/i18n/index.js index aa26553527..9ada40deab 100644 --- a/frontend/src/i18n/index.js +++ b/frontend/src/i18n/index.js @@ -5,21 +5,30 @@ const localeFiles = import.meta.glob('../../../locales/!(languages).json', { eag const messages = {} const availableLocales = [] +const enabledLocales = new Set(['en']) for (const path in localeFiles) { const key = path.match(/\/([^/]+)\.json$/)[1] - if (languages[key]) { + if (languages[key] && enabledLocales.has(key)) { messages[key] = localeFiles[path].default availableLocales.push({ key, label: languages[key].label }) } } -const savedLocale = localStorage.getItem('locale') || 'zh' +const rawSavedLocale = localStorage.getItem('locale') +const supportedLocaleKeys = Object.keys(messages) +const savedLocale = rawSavedLocale && rawSavedLocale !== 'zh' && supportedLocaleKeys.includes(rawSavedLocale) + ? rawSavedLocale + : (supportedLocaleKeys.includes('en') ? 'en' : availableLocales[0]?.key || 'en') + +if (rawSavedLocale !== savedLocale) { + localStorage.setItem('locale', savedLocale) +} const i18n = createI18n({ legacy: false, locale: savedLocale, - fallbackLocale: 'zh', + fallbackLocale: 'en', messages })