diff --git a/.env.example b/.env.example index 78a3b72c07..2aa57f90b2 100644 --- a/.env.example +++ b/.env.example @@ -13,4 +13,36 @@ ZEP_API_KEY=your_zep_api_key_here # 注意如果不使用加速配置,env文件中就不要出现下面的配置项 LLM_BOOST_API_KEY=your_api_key_here LLM_BOOST_BASE_URL=your_base_url_here -LLM_BOOST_MODEL_NAME=your_model_name_here \ No newline at end of file +LLM_BOOST_MODEL_NAME=your_model_name_here + +# ===== 安全配置(C1)===== +# 生产模式下必须设置自定义 SECRET_KEY(否则启动校验失败)。生成示例:python -c "import secrets;print(secrets.token_hex(32))" +SECRET_KEY=change_me_to_a_random_secret +# 调试模式默认关闭;设为 true 会启用 Werkzeug 交互式调试器(可远程 RCE),切勿在联网/生产开启 +FLASK_DEBUG=false + +# ===== API 鉴权(C2)===== +# 所有 /api/* 端点需携带 `X-API-Key: `(或 `Authorization: Bearer `) +# AUTH_ENABLED=true(默认)时必须设置 API_KEY;本地开发可设 AUTH_ENABLED=false 关闭鉴权 +AUTH_ENABLED=true +API_KEY=change_me_to_a_strong_api_key +# 前端构建期变量:必须等于 API_KEY。docker compose 会把它作为 build-arg 注入前端构建, +# 使打包后的 UI 自动带上 X-API-Key。注意:它会被打进客户端包、可被任何访问者提取(见 README 安全说明)。 +VITE_API_KEY=change_me_to_a_strong_api_key + +# ===== CORS 允许来源(H4)===== +# 逗号分隔的前端来源;不再用通配 '*'。生产填前端域名,例如 https://app.example.com +ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000 + +# ===== 模拟成本上限(C3,denial-of-wallet 防护)===== +# 客户端未传 max_rounds 时的默认轮数上限(完整长度模拟请按请求传 max_rounds 或调高此值) +# 默认 150 覆盖典型配置(72h/30min=144 轮)以免悄悄截断标准演示 +OASIS_DEFAULT_MAX_ROUNDS=150 +# 硬上限:无论客户端传入何值都不得超过 +OASIS_MAX_ROUNDS_CAP=200 +OASIS_MAX_AGENTS_CAP=1000 + +# ===== 模拟超时(C4,秒)===== +# 单轮 env.step 超时 + 整轮模拟总超时,防止 LLM/网络挂起导致 run 永久 wedge +OASIS_ROUND_TIMEOUT_SEC=600 +OASIS_RUN_TIMEOUT_SEC=7200 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index e656468603..0660dbc228 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,17 @@ RUN npm ci \ # 复制项目源码 COPY . . +# C2:前端 API Key 是 Vite 构建期变量,必须在 `npm run build` 之前进入构建环境,否则打包后的 +# UI 带空 key、在 AUTH_ENABLED=true 下所有 /api/* 会 401。由 docker compose 经 build-arg 注入。 +ARG VITE_API_KEY="" +ENV VITE_API_KEY=$VITE_API_KEY + +# C1:构建前端静态产物,生产用 `vite preview` 提供(不再运行 Vite 开发服务器) +RUN npm run build + EXPOSE 3000 5001 -# 同时启动前后端(开发模式) -CMD ["npm", "run", "dev"] \ No newline at end of file +# C1:生产启动 —— 后端用 gunicorn(单 worker 多线程,保留进程内模拟态), +# 前端用 vite preview 提供已构建产物。开发请改用 `npm run dev`。 +# 安全前提:须经 .env 设置 SECRET_KEY 与 API_KEY(FLASK_DEBUG 默认 false)。 +CMD ["npm", "run", "start"] \ No newline at end of file diff --git a/README.md b/README.md index de082935a7..29621f4538 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,48 @@ LLM_MODEL_NAME=qwen-plus ZEP_API_KEY=your_zep_api_key ``` +#### Security configuration (required for production) + +The backend ships secure-by-default. When `FLASK_DEBUG=false` (the default) the app +runs under **gunicorn** (no Werkzeug debugger) and refuses to start unless these are set: + +```env +# A random secret (required when FLASK_DEBUG=false): +# python -c "import secrets;print(secrets.token_hex(32))" +SECRET_KEY=your_random_secret + +# API-key auth is ON by default — every /api/* request must carry the key. +# Clients send X-API-Key: (or Authorization: Bearer ). +AUTH_ENABLED=true +API_KEY=your_strong_api_key +``` + +- The bundled web UI reads the key from the **build-time** var **`VITE_API_KEY`** (set it equal to + `API_KEY`) and sends it automatically as `X-API-Key`. It must be present **before** the frontend + is built: + - **Docker:** put `VITE_API_KEY` in the root `.env`; `docker compose up --build` injects it as a + build-arg (docker-compose.yml `build.args` → Dockerfile `ARG VITE_API_KEY` → `npm run build`). + The pre-built `ghcr.io` image (used by a bare `docker compose up` without `--build`) bakes no + custom key — rebuild, or use `AUTH_ENABLED=false` for that path. + - **Local frontend build:** copy `frontend/.env.example` → `frontend/.env` and set `VITE_API_KEY`, + then `npm run build`. (The root `.env` is read by the backend only, not by Vite.) + - ⚠️ A key baked into the client bundle is extractable by anyone who loads the page — for + multi-tenant/public exposure replace this with session login or a gateway that injects per-user + tokens. For local/internal/VPN or behind-a-gateway single-host use it is sufficient. +- Local development / simplest single-host demo: set `AUTH_ENABLED=false` to disable the key + requirement entirely (the API is then protected only by your network boundary). +- Cost controls (denial-of-wallet): a run is bounded by `OASIS_DEFAULT_MAX_ROUNDS` (when the + client omits `max_rounds`), the hard ceilings `OASIS_MAX_ROUNDS_CAP` / `OASIS_MAX_AGENTS_CAP`, + and per-round / total timeouts `OASIS_ROUND_TIMEOUT_SEC` / `OASIS_RUN_TIMEOUT_SEC`. See + `.env.example` for defaults. +- Run the production server with a **single worker** (`gunicorn -w 1 --threads N`); simulation + run-state is held in-process, so multiple workers break stop/status routing. +- **CSP / API origin:** the frontend ships a Content-Security-Policy whose `connect-src` allows + `'self'` + `http://localhost:5001` (the default API). If you point the UI at a different API + host (set `VITE_API_BASE_URL`), you **must** add that origin to `connect-src` in + `frontend/index.html` too, or the browser will silently block all API calls. Restrict + `ALLOWED_ORIGINS` (backend) to your real frontend origin in production. + #### 2. Install Dependencies ```bash diff --git a/backend/app/__init__.py b/backend/app/__init__.py index aba624bba9..7328e9f59f 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -2,6 +2,7 @@ MiroFish Backend - Flask应用工厂 """ +import hmac import os import warnings @@ -9,7 +10,7 @@ # 需要在所有其他导入之前设置 warnings.filterwarnings("ignore", message=".*resource_tracker.*") -from flask import Flask, request +from flask import Flask, jsonify, request from flask_cors import CORS from .config import Config @@ -18,9 +19,16 @@ def create_app(config_class=Config): """Flask应用工厂函数""" + # 配置校验(C1/C2):在工厂内执行,确保 gunicorn(生产)路径也强制校验。 + # run.py(开发入口)也会单独校验,这里覆盖 `gunicorn app:create_app()` 这条不经过 run.py 的路径, + # 否则 SECRET_KEY/API_KEY/LLM_API_KEY/ZEP_API_KEY 的缺省检查在生产中形同虚设。 + config_errors = config_class.validate() + if config_errors: + raise RuntimeError("配置错误,无法启动:\n - " + "\n - ".join(config_errors)) + app = Flask(__name__) app.config.from_object(config_class) - + # 设置JSON编码:确保中文直接显示(而不是 \uXXXX 格式) # Flask >= 2.3 使用 app.json.ensure_ascii,旧版本使用 JSON_AS_ASCII 配置 if hasattr(app, 'json') and hasattr(app.json, 'ensure_ascii'): @@ -39,9 +47,40 @@ def create_app(config_class=Config): logger.info("MiroFish Backend 启动中...") logger.info("=" * 50) - # 启用CORS - CORS(app, resources={r"/api/*": {"origins": "*"}}) - + # 启用CORS(H4):限定来源为 Config.ALLOWED_ORIGINS(默认本地前端源),不再通配 '*'。 + CORS(app, resources={r"/api/*": {"origins": Config.ALLOWED_ORIGINS}}) + + # API Key 鉴权(C2):所有 /api/* 端点强制鉴权。 + # 客户端通过 `X-API-Key: ` 或 `Authorization: Bearer ` 传入。 + # /health 等非 /api 路径豁免;CORS 预检(OPTIONS)放行(浏览器预检不带自定义头)。 + @app.before_request + def require_api_key(): + if not Config.AUTH_ENABLED: + return None + path = request.path or '' + if not path.startswith('/api/'): + return None + if request.method == 'OPTIONS': + return None + provided = request.headers.get('X-API-Key', '') + if not provided: + auth_header = request.headers.get('Authorization', '') + if auth_header.startswith('Bearer '): + provided = auth_header[7:] + expected = Config.API_KEY or '' + if not expected: + return jsonify({"success": False, "error": "Unauthorized"}), 401 + # 常量时间比较,避免时序侧信道。两侧编码为 bytes —— compare_digest 对含非 ASCII 字符的 + # str 会抛 TypeError;编码后任何输入都安全,绝不让鉴权拒绝路径崩成 500。 + try: + ok = hmac.compare_digest(provided.encode('utf-8'), expected.encode('utf-8')) + except Exception: + ok = False + if not ok: + return jsonify({"success": False, "error": "Unauthorized"}), 401 + return None + + # 注册模拟进程清理函数(确保服务器关闭时终止所有模拟进程) from .services.simulation_runner import SimulationRunner SimulationRunner.register_cleanup() @@ -61,7 +100,16 @@ def log_response(response): logger = get_logger('mirofish.request') logger.debug(f"响应: {response.status_code}") return response - + + # 安全响应头(纵深防御):API 响应附带基础安全头。前端 HTML 的 CSP 由 index.html 的 + # + vite preview 响应头提供(后端不直接服务 HTML)。 + @app.after_request + def security_headers(response): + response.headers.setdefault('X-Content-Type-Options', 'nosniff') + response.headers.setdefault('X-Frame-Options', 'DENY') + response.headers.setdefault('Referrer-Policy', 'strict-origin-when-cross-origin') + return response + # 注册蓝图 from .api import graph_bp, simulation_bp, report_bp app.register_blueprint(graph_bp, url_prefix='/api/graph') diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 759ff48b0e..9a7a598c60 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -4,7 +4,7 @@ """ import os -import traceback +from ..utils.security import safe_traceback, safe_error, upload_content_ok import threading from flask import request, jsonify @@ -182,7 +182,8 @@ def generate_ontology(): all_text = "" for file in uploaded_files: - if file and file.filename and allowed_file(file.filename): + # 扩展名白名单 + 魔术字节嗅探(拒绝改名混入的二进制/伪装文件) + if file and file.filename and allowed_file(file.filename) and upload_content_ok(file, file.filename): # 保存文件到项目目录 file_info = ProjectManager.save_file_to_project( project.project_id, @@ -250,8 +251,8 @@ def generate_ontology(): except Exception as e: return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -495,17 +496,17 @@ def wait_progress_callback(msg, progress_ratio): except Exception as e: # 更新项目状态为失败 build_logger.error(f"[{task_id}] 图谱构建失败: {str(e)}") - build_logger.debug(traceback.format_exc()) + build_logger.debug(safe_traceback()) project.status = ProjectStatus.FAILED - project.error = str(e) + project.error = safe_error(e) ProjectManager.save_project(project) task_manager.update_task( task_id, status=TaskStatus.FAILED, - message=t('progress.buildFailed', error=str(e)), - error=traceback.format_exc() + message=t('progress.buildFailed', error=safe_error(e)), + error=safe_traceback() ) # 启动后台线程 @@ -524,8 +525,8 @@ def wait_progress_callback(msg, progress_ratio): except Exception as e: return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -589,8 +590,8 @@ def get_graph_data(graph_id: str): except Exception as e: return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -617,6 +618,6 @@ def delete_graph(graph_id: str): except Exception as e: return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 diff --git a/backend/app/api/report.py b/backend/app/api/report.py index d7f2a4d03a..ccd414ddcc 100644 --- a/backend/app/api/report.py +++ b/backend/app/api/report.py @@ -4,7 +4,7 @@ """ import os -import traceback +from ..utils.security import safe_traceback, safe_error import threading from flask import request, jsonify, send_file @@ -173,7 +173,7 @@ def progress_callback(stage, progress, message): except Exception as e: logger.error(f"报告生成失败: {str(e)}") - task_manager.fail_task(task_id, str(e)) + task_manager.fail_task(task_id, safe_error(e)) # 启动后台线程 thread = threading.Thread(target=run_generate, daemon=True) @@ -195,8 +195,8 @@ def progress_callback(stage, progress, message): logger.error(f"启动报告生成任务失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -268,7 +268,7 @@ def get_generate_status(): logger.error(f"查询任务状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e) + "error": safe_error(e) }), 500 @@ -311,8 +311,8 @@ def get_report(report_id: str): logger.error(f"获取报告失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -350,8 +350,8 @@ def get_report_by_simulation(simulation_id: str): logger.error(f"获取报告失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -390,8 +390,8 @@ def list_reports(): logger.error(f"列出报告失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -436,8 +436,8 @@ def download_report(report_id: str): logger.error(f"下载报告失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -462,8 +462,8 @@ def delete_report(report_id: str): logger.error(f"删除报告失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -559,8 +559,8 @@ def chat_with_report_agent(): logger.error(f"对话失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -602,8 +602,8 @@ def get_report_progress(report_id: str): logger.error(f"获取报告进度失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -653,8 +653,8 @@ def get_report_sections(report_id: str): logger.error(f"获取章节列表失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -697,8 +697,8 @@ def get_single_section(report_id: str, section_index: int): logger.error(f"获取章节内容失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -748,8 +748,8 @@ def check_report_status(simulation_id: str): logger.error(f"检查报告状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -809,8 +809,8 @@ def get_agent_log(report_id: str): logger.error(f"获取Agent日志失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -843,8 +843,8 @@ def stream_agent_log(report_id: str): logger.error(f"获取Agent日志失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -891,8 +891,8 @@ def get_console_log(report_id: str): logger.error(f"获取控制台日志失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -925,8 +925,8 @@ def stream_console_log(report_id: str): logger.error(f"获取控制台日志失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -975,8 +975,8 @@ def search_graph_tool(): logger.error(f"图谱搜索失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1015,6 +1015,6 @@ def get_graph_statistics_tool(): logger.error(f"获取图谱统计失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 3a8e1e3fc8..f76b2fc914 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -4,7 +4,7 @@ """ import os -import traceback +from ..utils.security import safe_traceback, safe_error from flask import request, jsonify, send_file from . import simulation_bp @@ -85,8 +85,8 @@ def get_graph_entities(graph_id: str): logger.error(f"获取图谱实体失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -118,8 +118,8 @@ def get_entity_detail(graph_id: str, entity_uuid: str): logger.error(f"获取实体详情失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -155,8 +155,8 @@ def get_entities_by_type(graph_id: str, entity_type: str): logger.error(f"获取实体失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -232,8 +232,8 @@ def create_simulation(): logger.error(f"创建模拟失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -597,14 +597,14 @@ def progress_callback(stage, progress, message, **kwargs): ) except Exception as e: - logger.error(f"准备模拟失败: {str(e)}") - task_manager.fail_task(task_id, str(e)) + logger.error(f"准备模拟失败: {e}", exc_info=True) + task_manager.fail_task(task_id, safe_error(e)) # 更新模拟状态为失败 state = manager.get_simulation(simulation_id) if state: state.status = SimulationStatus.FAILED - state.error = str(e) + state.error = safe_error(e) manager._save_simulation_state(state) # 启动后台线程 @@ -634,8 +634,8 @@ def progress_callback(stage, progress, message, **kwargs): logger.error(f"启动准备任务失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -748,7 +748,7 @@ def get_prepare_status(): logger.error(f"查询任务状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e) + "error": safe_error(e) }), 500 @@ -780,8 +780,8 @@ def get_simulation(simulation_id: str): logger.error(f"获取模拟状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -809,8 +809,8 @@ def list_simulations(): logger.error(f"列出模拟失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -982,8 +982,8 @@ def get_simulation_history(): logger.error(f"获取历史模拟失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1020,8 +1020,8 @@ def get_simulation_profiles(simulation_id: str): logger.error(f"获取Profile失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1130,8 +1130,8 @@ def get_simulation_profiles_realtime(simulation_id: str): logger.error(f"实时获取Profile失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1250,8 +1250,8 @@ def get_simulation_config_realtime(simulation_id: str): logger.error(f"实时获取Config失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1286,8 +1286,8 @@ def get_simulation_config(simulation_id: str): logger.error(f"获取配置失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1315,8 +1315,8 @@ def download_simulation_config(simulation_id: str): logger.error(f"下载配置失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1367,8 +1367,8 @@ def download_simulation_script(script_name: str): logger.error(f"下载脚本失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1441,8 +1441,8 @@ def generate_profiles(): logger.error(f"生成Profile失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1636,8 +1636,8 @@ def start_simulation(): logger.error(f"启动模拟失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1695,8 +1695,8 @@ def stop_simulation(): logger.error(f"停止模拟失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1755,8 +1755,8 @@ def get_run_status(simulation_id: str): logger.error(f"获取运行状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1856,8 +1856,8 @@ def get_run_status_detail(simulation_id: str): logger.error(f"获取详细状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1910,8 +1910,8 @@ def get_simulation_actions(simulation_id: str): logger.error(f"获取动作历史失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1950,8 +1950,8 @@ def get_simulation_timeline(simulation_id: str): logger.error(f"获取时间线失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -1977,8 +1977,8 @@ def get_agent_stats(simulation_id: str): logger.error(f"获取Agent统计失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2057,8 +2057,8 @@ def get_simulation_posts(simulation_id: str): logger.error(f"获取帖子失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2132,8 +2132,8 @@ def get_simulation_comments(simulation_id: str): logger.error(f"获取评论失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2263,8 +2263,8 @@ def interview_agent(): logger.error(f"Interview失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2401,8 +2401,8 @@ def interview_agents_batch(): logger.error(f"批量Interview失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2504,8 +2504,8 @@ def interview_all_agents(): logger.error(f"全局Interview失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2576,8 +2576,8 @@ def get_interview_history(): logger.error(f"获取Interview历史失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2641,8 +2641,8 @@ def get_env_status(): logger.error(f"获取环境状态失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 @@ -2711,6 +2711,6 @@ def close_simulation_env(): logger.error(f"关闭环境失败: {str(e)}") return jsonify({ "success": False, - "error": str(e), - "traceback": traceback.format_exc() + "error": safe_error(e), + "traceback": safe_traceback() }), 500 diff --git a/backend/app/config.py b/backend/app/config.py index de63e2b4b0..5c27a3a991 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -21,9 +21,27 @@ class Config: """Flask配置类""" # Flask配置 + # 注意:SECRET_KEY 的默认值是公开值,仅供 DEBUG 模式使用;生产模式(DEBUG=false) + # 必须通过环境变量设置自定义值(见 validate())。 SECRET_KEY = os.environ.get('SECRET_KEY', 'mirofish-secret-key') - DEBUG = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true' - + # 安全默认(C1):DEBUG 默认关闭,避免误把 Werkzeug 交互式调试器(可远程 RCE)暴露到网络。 + DEBUG = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true' + + # 认证配置(C2):所有 /api/* 端点强制 API Key 鉴权 + # AUTH_ENABLED 默认开启;本地开发可显式设 AUTH_ENABLED=false 关闭。 + # fail-closed 解析:仅显式 false/0/no/off 才关闭鉴权;其余任何值(含空白、拼写错误、 + # 带尾换行的 'true\n'、'1'、'yes' 等)一律视为开启,避免 env 配置失误悄悄回到零鉴权。 + API_KEY = os.environ.get('API_KEY') + AUTH_ENABLED = os.environ.get('AUTH_ENABLED', 'true').strip().lower() not in ('false', '0', 'no', 'off') + + # CORS 允许来源(H4):不再用通配 '*'。默认仅本地前端开发/预览源;生产用逗号分隔的 + # ALLOWED_ORIGINS 指定前端域名(例如 https://app.example.com)。'*' 仍可显式设置但不推荐。 + ALLOWED_ORIGINS = [ + o.strip() for o in os.environ.get( + 'ALLOWED_ORIGINS', 'http://localhost:3000,http://127.0.0.1:3000' + ).split(',') if o.strip() + ] + # JSON配置 - 禁用ASCII转义,让中文直接显示(而不是 \uXXXX 格式) JSON_AS_ASCII = False @@ -45,7 +63,18 @@ class Config: DEFAULT_CHUNK_OVERLAP = 50 # 默认重叠大小 # OASIS模拟配置 - OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10')) + # OASIS_DEFAULT_MAX_ROUNDS(C3):客户端未显式传 max_rounds 时应用的默认轮数上限。 + # 之前此常量从未被引用(dead config),现已在 SimulationRunner 中生效。默认 150 覆盖 + # 典型配置(72h/30min = 144 轮)以免悄悄截断标准演示;更长的配置会被截到此值,且无论如何 + # 都不会超过硬上限 OASIS_MAX_ROUNDS_CAP。匿名 denial-of-wallet 已由 C2 鉴权堵住,此处 + # 仅约束“已鉴权客户端”单次运行的成本上界。 + OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '150')) + # 硬上限(C3,denial-of-wallet 防护):无论客户端传入何值,轮数/agent 数都不得超过这些上限。 + OASIS_MAX_ROUNDS_CAP = int(os.environ.get('OASIS_MAX_ROUNDS_CAP', '200')) + OASIS_MAX_AGENTS_CAP = int(os.environ.get('OASIS_MAX_AGENTS_CAP', '1000')) + # 模拟超时(C4,秒):每轮 env.step 超时 + 整轮模拟总超时。子进程读取同名环境变量。 + OASIS_ROUND_TIMEOUT_SEC = int(os.environ.get('OASIS_ROUND_TIMEOUT_SEC', '600')) + OASIS_RUN_TIMEOUT_SEC = int(os.environ.get('OASIS_RUN_TIMEOUT_SEC', '7200')) OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations') # OASIS平台可用动作配置 @@ -71,5 +100,11 @@ def validate(cls) -> list[str]: errors.append("LLM_API_KEY 未配置") if not cls.ZEP_API_KEY: errors.append("ZEP_API_KEY 未配置") + # C1:生产模式必须设置自定义 SECRET_KEY(默认值是公开值,可伪造签名 / 削弱调试器 PIN) + if not cls.DEBUG and cls.SECRET_KEY == 'mirofish-secret-key': + errors.append("生产模式(FLASK_DEBUG=false)必须设置自定义 SECRET_KEY") + # C2:开启鉴权时必须配置 API_KEY,否则所有 /api/* 都会 401 + if cls.AUTH_ENABLED and not cls.API_KEY: + errors.append("AUTH_ENABLED=true 时必须设置 API_KEY(或显式 AUTH_ENABLED=false 关闭鉴权)") return errors diff --git a/backend/app/models/project.py b/backend/app/models/project.py index 089789374e..9bbd5c0910 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -12,6 +12,7 @@ from enum import Enum from dataclasses import dataclass, field, asdict from ..config import Config +from ..utils.security import validate_id class ProjectStatus(str, Enum): @@ -112,6 +113,7 @@ def _ensure_projects_dir(cls): @classmethod def _get_project_dir(cls, project_id: str) -> str: """获取项目目录路径""" + validate_id(project_id, 'project_id') # 路径校验,阻断穿越后再 join/rmtree return os.path.join(cls.PROJECTS_DIR, project_id) @classmethod diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 37c9969c79..80f9cc6f14 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -18,6 +18,10 @@ from ..utils.zep_paging import fetch_all_nodes, fetch_all_edges from .text_processor import TextProcessor from ..utils.locale import t, get_locale, set_locale +from ..utils.logger import get_logger +from ..utils.security import safe_error + +logger = get_logger('mirofish.graph_builder') @dataclass @@ -186,9 +190,8 @@ def _build_graph_worker( }) except Exception as e: - import traceback - error_msg = f"{str(e)}\n{traceback.format_exc()}" - self.task_manager.fail_task(task_id, error_msg) + logger.error(f"graph build failed (task {task_id})", exc_info=True) + self.task_manager.fail_task(task_id, safe_error(e)) def create_graph(self, name: str) -> str: """创建Zep图谱(公开方法)""" @@ -339,7 +342,7 @@ def add_text_batches( except Exception as e: if progress_callback: - progress_callback(t('progress.batchFailed', batch=batch_num, error=str(e)), 0) + progress_callback(t('progress.batchFailed', batch=batch_num, error=safe_error(e)), 0) raise return episode_uuids diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index cecd70b464..2fd6b4c54d 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -21,6 +21,7 @@ from ..config import Config from ..utils.llm_client import LLMClient from ..utils.logger import get_logger +from ..utils.security import validate_id from ..utils.locale import get_language_instruction, t from .zep_tools import ( ZepToolsService, @@ -48,6 +49,7 @@ def __init__(self, report_id: str): Args: report_id: 报告ID,用于确定日志文件路径 """ + validate_id(report_id, 'report_id') # path check: report_id flows straight into the file path self.report_id = report_id self.log_file_path = os.path.join( Config.UPLOAD_FOLDER, 'reports', report_id, 'agent_log.jsonl' @@ -319,6 +321,7 @@ def __init__(self, report_id: str): Args: report_id: 报告ID,用于确定日志文件路径 """ + validate_id(report_id, 'report_id') # path check: report_id flows straight into the file path self.report_id = report_id self.log_file_path = os.path.join( Config.UPLOAD_FOLDER, 'reports', report_id, 'console_log.txt' @@ -1910,6 +1913,7 @@ def _ensure_reports_dir(cls): @classmethod def _get_report_folder(cls, report_id: str) -> str: """获取报告文件夹路径""" + validate_id(report_id, 'report_id') # path check before join/rmtree return os.path.join(cls.REPORTS_DIR, report_id) @classmethod diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py index 0d161a9095..4750ecc5f9 100644 --- a/backend/app/services/simulation_manager.py +++ b/backend/app/services/simulation_manager.py @@ -14,6 +14,7 @@ from ..config import Config from ..utils.logger import get_logger +from ..utils.security import validate_id from .zep_entity_reader import ZepEntityReader, FilteredEntities from .oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile from .simulation_config_generator import SimulationConfigGenerator, SimulationParameters @@ -138,6 +139,7 @@ def __init__(self): def _get_simulation_dir(self, simulation_id: str) -> str: """获取模拟数据目录""" + validate_id(simulation_id, 'simulation_id') # 路径校验,阻断穿越后再 join/makedirs sim_dir = os.path.join(self.SIMULATION_DATA_DIR, simulation_id) os.makedirs(sim_dir, exist_ok=True) return sim_dir diff --git a/backend/app/services/simulation_runner.py b/backend/app/services/simulation_runner.py index e86021f808..94c3823c08 100644 --- a/backend/app/services/simulation_runner.py +++ b/backend/app/services/simulation_runner.py @@ -21,6 +21,7 @@ from ..config import Config from ..utils.logger import get_logger from ..utils.locale import get_locale, set_locale +from ..utils.security import validate_id from .zep_graph_memory_updater import ZepGraphMemoryManager from .simulation_ipc import SimulationIPCClient, CommandType, IPCResponse @@ -239,10 +240,16 @@ def get_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: cls._run_states[simulation_id] = state return state + @classmethod + def _run_dir(cls, simulation_id: str) -> str: + """Validated RUN_STATE_DIR/ -- blocks path traversal before any fs op.""" + validate_id(simulation_id, 'simulation_id') + return os.path.join(cls.RUN_STATE_DIR, simulation_id) + @classmethod def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: """从文件加载运行状态""" - state_file = os.path.join(cls.RUN_STATE_DIR, simulation_id, "run_state.json") + state_file = os.path.join(cls._run_dir(simulation_id), "run_state.json") if not os.path.exists(state_file): return None @@ -298,7 +305,7 @@ def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: @classmethod def _save_run_state(cls, state: SimulationRunState): """保存运行状态到文件""" - sim_dir = os.path.join(cls.RUN_STATE_DIR, state.simulation_id) + sim_dir = cls._run_dir(state.simulation_id) os.makedirs(sim_dir, exist_ok=True) state_file = os.path.join(sim_dir, "run_state.json") @@ -331,13 +338,16 @@ def start_simulation( Returns: SimulationRunState """ + # 路径校验:simulation_id 会进入 RUN_STATE_DIR 下的 join/makedirs 与子进程参数 + validate_id(simulation_id, 'simulation_id') + # 检查是否已在运行 existing = cls.get_run_state(simulation_id) if existing and existing.runner_status in [RunnerStatus.RUNNING, RunnerStatus.STARTING]: raise ValueError(f"模拟已在运行中: {simulation_id}") # 加载模拟配置 - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") if not os.path.exists(config_path): @@ -351,14 +361,27 @@ def start_simulation( total_hours = time_config.get("total_simulation_hours", 72) minutes_per_round = time_config.get("minutes_per_round", 30) total_rounds = int(total_hours * 60 / minutes_per_round) - - # 如果指定了最大轮数,则截断 - if max_rounds is not None and max_rounds > 0: - original_rounds = total_rounds - total_rounds = min(total_rounds, max_rounds) - if total_rounds < original_rounds: - logger.info(f"轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") - + + # C3(denial-of-wallet 防护):限制总轮数。 + # - max_rounds 未指定时,应用服务端默认上限 OASIS_DEFAULT_MAX_ROUNDS(之前为 dead config) + # - 无论是否指定,都不得超过硬上限 OASIS_MAX_ROUNDS_CAP + effective_max = max_rounds if (max_rounds is not None and max_rounds > 0) else Config.OASIS_DEFAULT_MAX_ROUNDS + effective_max = min(effective_max, Config.OASIS_MAX_ROUNDS_CAP) + if total_rounds > effective_max: + logger.info( + f"轮数已限制: {total_rounds} -> {effective_max} " + f"(max_rounds={max_rounds}, default={Config.OASIS_DEFAULT_MAX_ROUNDS}, cap={Config.OASIS_MAX_ROUNDS_CAP})" + ) + total_rounds = effective_max + + # C3(denial-of-wallet 防护):限制 agent 数量。超过硬上限直接拒绝(避免巨额并发 LLM 调用)。 + agent_count = len(config.get("agent_configs", [])) + if agent_count > Config.OASIS_MAX_AGENTS_CAP: + raise ValueError( + f"Agent 数量 {agent_count} 超过上限 {Config.OASIS_MAX_AGENTS_CAP}," + f"请减少种子实体或调高 OASIS_MAX_AGENTS_CAP 环境变量" + ) + state = SimulationRunState( simulation_id=simulation_id, runner_status=RunnerStatus.STARTING, @@ -419,9 +442,9 @@ def start_simulation( "--config", config_path, # 使用完整配置文件路径 ] - # 如果指定了最大轮数,添加到命令行参数 - if max_rounds is not None and max_rounds > 0: - cmd.extend(["--max-rounds", str(max_rounds)]) + # C3:始终把已限制的有效轮数传给子进程,确保子进程按 default/cap 截断 + # (total_rounds 此处已应用 OASIS_DEFAULT_MAX_ROUNDS 与 OASIS_MAX_ROUNDS_CAP) + cmd.extend(["--max-rounds", str(total_rounds)]) # 创建主日志文件,避免 stdout/stderr 管道缓冲区满导致进程阻塞 main_log_path = os.path.join(sim_dir, "simulation.log") @@ -482,7 +505,7 @@ def start_simulation( def _monitor_simulation(cls, simulation_id: str, locale: str = 'zh'): """监控模拟进程,解析动作日志""" set_locale(locale) - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) # 新的日志结构:分平台的动作日志 twitter_actions_log = os.path.join(sim_dir, "twitter", "actions.jsonl") @@ -700,7 +723,7 @@ def _check_all_platforms_completed(cls, state: SimulationRunState) -> bool: Returns: True 如果所有启用的平台都已完成 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, state.simulation_id) + sim_dir = cls._run_dir(state.simulation_id) twitter_log = os.path.join(sim_dir, "twitter", "actions.jsonl") reddit_log = os.path.join(sim_dir, "reddit", "actions.jsonl") @@ -910,7 +933,7 @@ def get_all_actions( Returns: 完整的动作列表(按时间戳排序,新的在前) """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) actions = [] # 读取 Twitter 动作文件(根据文件路径自动设置 platform 为 twitter) @@ -1124,7 +1147,7 @@ def cleanup_simulation_logs(cls, simulation_id: str) -> Dict[str, Any]: """ import shutil - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): return {"success": True, "message": "模拟目录不存在,无需清理"} @@ -1242,7 +1265,7 @@ def cleanup_all_simulations(cls): # 同时更新 state.json,将状态设为 stopped try: - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) state_file = os.path.join(sim_dir, "state.json") logger.info(f"尝试更新 state.json: {state_file}") if os.path.exists(state_file): @@ -1381,7 +1404,7 @@ def check_env_alive(cls, simulation_id: str) -> bool: Returns: True 表示环境存活,False 表示环境已关闭 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): return False @@ -1399,7 +1422,7 @@ def get_env_status_detail(cls, simulation_id: str) -> Dict[str, Any]: Returns: 状态详情字典,包含 status, twitter_available, reddit_available, timestamp """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) status_file = os.path.join(sim_dir, "env_status.json") default_status = { @@ -1453,7 +1476,7 @@ def interview_agent( ValueError: 模拟不存在或环境未运行 TimeoutError: 等待响应超时 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): raise ValueError(f"模拟不存在: {simulation_id}") @@ -1515,7 +1538,7 @@ def interview_agents_batch( ValueError: 模拟不存在或环境未运行 TimeoutError: 等待响应超时 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): raise ValueError(f"模拟不存在: {simulation_id}") @@ -1572,7 +1595,7 @@ def interview_all_agents( Returns: 全局采访结果字典 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): raise ValueError(f"模拟不存在: {simulation_id}") @@ -1625,7 +1648,7 @@ def close_simulation_env( Returns: 操作结果字典 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) if not os.path.exists(sim_dir): raise ValueError(f"模拟不存在: {simulation_id}") @@ -1736,7 +1759,7 @@ def get_interview_history( Returns: Interview历史记录列表 """ - sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) + sim_dir = cls._run_dir(simulation_id) results = [] diff --git a/backend/app/utils/logger.py b/backend/app/utils/logger.py index 93422afafb..ee4e7d1263 100644 --- a/backend/app/utils/logger.py +++ b/backend/app/utils/logger.py @@ -27,17 +27,23 @@ def _ensure_utf8_stdout(): LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'logs') -def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging.Logger: +def setup_logger(name: str = 'mirofish', level: int = None) -> logging.Logger: """ 设置日志器 - + Args: name: 日志器名称 - level: 日志级别 - + level: 日志级别(None 时按 FLASK_DEBUG 自动选择) + Returns: 配置好的日志器 """ + # H5:生产模式(FLASK_DEBUG=false)下用 INFO —— 避免把请求体/调试细节(可能含上传内容、 + # 提示词、客户端传入的凭据)以明文写入轮转日志文件。开发模式仍用 DEBUG。 + if level is None: + from ..config import Config + level = logging.DEBUG if Config.DEBUG else logging.INFO + # 确保日志目录存在 os.makedirs(LOG_DIR, exist_ok=True) @@ -71,7 +77,7 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging. backupCount=5, encoding='utf-8' ) - file_handler.setLevel(logging.DEBUG) + file_handler.setLevel(level) file_handler.setFormatter(detailed_formatter) # 2. 控制台处理器 - 简洁日志(INFO及以上) diff --git a/backend/app/utils/security.py b/backend/app/utils/security.py new file mode 100644 index 0000000000..bf0af2d22e --- /dev/null +++ b/backend/app/utils/security.py @@ -0,0 +1,69 @@ +""" +安全辅助函数(评审 H5 / 上传嗅探 / 路径校验)。 +- safe_traceback: 仅在 DEBUG 时把堆栈返回客户端,生产环境只记到服务端日志 +- validate_id: 校验 URL 传入的 id,避免落入文件系统 sink(路径穿越/异常字符) +- upload_content_ok: 按魔术字节嗅探上传内容,防止改扩展名的二进制混入 +""" + +import os +import re +import traceback as _traceback + +from .logger import get_logger + +# 允许的 id 字符集:字母数字 + 下划线 + 连字符,长度 1-64。 +# 排除 '/'、'.'、'\\' 等可用于路径穿越或越级的字符。 +_ID_RE = re.compile(r'^[A-Za-z0-9_-]{1,64}$') + + +def safe_traceback() -> str: + """ + H5:完整堆栈始终写入服务端日志;仅在 DEBUG 模式才把堆栈返回客户端, + 生产环境返回通用提示,避免向客户端泄露内部路径/栈帧。 + """ + # 延迟导入 Config,避免与配置模块的潜在循环依赖 + from ..config import Config + tb = _traceback.format_exc() + get_logger('mirofish.error').error(tb) + return tb if Config.DEBUG else 'Internal server error (see server logs)' + + +def safe_error(e) -> str: + """ + 客户端可见的错误文案:DEBUG 模式返回异常消息,生产环境返回通用提示, + 避免异常消息本身(如 FileNotFoundError 的路径、ValueError 里的配置值)泄露给客户端。 + 完整异常仍由各调用点的 logger.error / safe_traceback 记到服务端日志。 + """ + from ..config import Config + return str(e) if Config.DEBUG else 'Internal server error (see server logs)' + + +def validate_id(value: str, kind: str = 'id') -> str: + """ + 路径校验:拒绝任何不匹配 _ID_RE 的 id(含 '..'、'/'、空值), + 在 id 进入 os.path.join / makedirs / rmtree 之前阻断路径穿越。 + """ + if not isinstance(value, str) or not _ID_RE.match(value): + raise ValueError(f'Invalid {kind}: {value!r}') + return value + + +def upload_content_ok(file_storage, filename: str) -> bool: + """ + 上传嗅探:按扩展名校验文件头部内容,使改名的二进制无法通过扩展名白名单。 + - pdf:必须以 %PDF- 开头 + - txt/md/markdown:头部不得含 NUL 字节(典型二进制特征) + 读取后将流指针复位,避免影响后续保存。 + """ + ext = os.path.splitext(filename)[1].lower().lstrip('.') + head = file_storage.read(512) + file_storage.seek(0) + if ext == 'pdf': + return head[:5] == b'%PDF-' + if ext in ('txt', 'md', 'markdown'): + # 接受带 BOM 的 UTF-16/UTF-32/UTF-8 文本(这些合法文本会含 NUL 字节, + # 与 file_parser 的多编码支持一致);否则按头部含 NUL 判定为二进制并拒绝。 + if head.startswith((b'\xff\xfe', b'\xfe\xff', b'\xef\xbb\xbf')): + return True + return b'\x00' not in head + return False diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 8c65b7294a..de9e2f45cc 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -12,7 +12,9 @@ dependencies = [ # 核心框架 "flask>=3.0.0", "flask-cors>=6.0.0", - + # 生产 WSGI 服务器(C1:替代 Werkzeug 开发服务器/调试器;须以 -w 1 --threads N 单进程启动) + "gunicorn>=21.0.0", + # LLM 相关 "openai>=1.0.0", diff --git a/backend/requirements.txt b/backend/requirements.txt index 4f146296ba..5853199f16 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -8,6 +8,9 @@ # ============= 核心框架 ============= flask>=3.0.0 flask-cors>=6.0.0 +# 生产 WSGI 服务器(C1:替代 Flask 自带的 Werkzeug 开发服务器/调试器) +# 注意:因模拟运行态保存在进程内类变量,必须以单 worker 多线程启动(-w 1 --threads N) +gunicorn>=21.0.0 # ============= LLM 相关 ============= # OpenAI SDK(统一使用 OpenAI 格式调用 LLM) diff --git a/backend/scripts/run_parallel_simulation.py b/backend/scripts/run_parallel_simulation.py index 2a627ffd04..3017c0b7e6 100644 --- a/backend/scripts/run_parallel_simulation.py +++ b/backend/scripts/run_parallel_simulation.py @@ -81,6 +81,11 @@ def _utf8_open(file, mode='r', buffering=-1, encoding=None, errors=None, _shutdown_event = None _cleanup_done = False +# C4:模拟超时(秒)。从环境变量读取(子进程继承 Flask 父进程的环境)。 +# 每轮 env.step 超时防止单轮因 LLM/网络挂起而永久 wedge;总超时为整轮模拟的硬墙钟上限。 +_ROUND_TIMEOUT_SEC = int(os.environ.get("OASIS_ROUND_TIMEOUT_SEC", "600")) +_RUN_TIMEOUT_SEC = int(os.environ.get("OASIS_RUN_TIMEOUT_SEC", "7200")) + # 添加 backend 目录到路径 # 脚本固定位于 backend/scripts/ 目录 _scripts_dir = os.path.dirname(os.path.abspath(__file__)) @@ -333,8 +338,10 @@ async def _interview_single_platform(self, agent_id: int, prompt: str, platform: action_args={"prompt": prompt} ) actions = {agent: interview_action} - await env.step(actions) - + # C4:采访 env.step 加超时;TimeoutError 会被本方法的 except Exception 捕获并返回错误响应, + # 不会让持久化命令循环永久卡死。 + await asyncio.wait_for(env.step(actions), timeout=_ROUND_TIMEOUT_SEC) + result = self._get_interview_result(agent_id, actual_platform) result["platform"] = actual_platform return result @@ -466,7 +473,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], print(f" 警告: 无法获取Twitter Agent {agent_id}: {e}") if twitter_actions: - await self.twitter_env.step(twitter_actions) + await asyncio.wait_for(self.twitter_env.step(twitter_actions), timeout=_ROUND_TIMEOUT_SEC) # C4:批量采访超时 for interview in twitter_interviews: agent_id = interview.get("agent_id") @@ -493,7 +500,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], print(f" 警告: 无法获取Reddit Agent {agent_id}: {e}") if reddit_actions: - await self.reddit_env.step(reddit_actions) + await asyncio.wait_for(self.reddit_env.step(reddit_actions), timeout=_ROUND_TIMEOUT_SEC) # C4:批量采访超时 for interview in reddit_interviews: agent_id = interview.get("agent_id") @@ -1203,8 +1210,12 @@ def log_info(msg): pass if initial_actions: - await result.env.step(initial_actions) - log_info(f"已发布 {len(initial_actions)} 条初始帖子") + # C4:初始帖子的 env.step 也加超时 —— 否则在进入受保护的主循环前就可能因 LLM/网络挂起而 wedge + try: + await asyncio.wait_for(result.env.step(initial_actions), timeout=_ROUND_TIMEOUT_SEC) + log_info(f"已发布 {len(initial_actions)} 条初始帖子") + except asyncio.TimeoutError: + log_info(f"[超时] 初始帖子 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过初始帖子继续") # 记录 round 0 结束 if action_logger: @@ -1250,8 +1261,20 @@ def log_info(msg): action_logger.log_round_end(round_num + 1, 0) continue + # C4:总时长上限 —— 超过则优雅停止(保留环境,后续仍可 close/interview) + if (datetime.now() - start_time).total_seconds() > _RUN_TIMEOUT_SEC: + log_info(f"[超时] 模拟总时长超过 {_RUN_TIMEOUT_SEC}s,在第 {round_num + 1} 轮停止") + break + actions = {agent: LLMAction() for _, agent in active_agents} - await result.env.step(actions) + # C4:每轮超时 —— 防止 env.step 因 LLM/网络挂起而永久 wedge + try: + await asyncio.wait_for(result.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) + except asyncio.TimeoutError: + log_info(f"[超时] 第 {round_num + 1} 轮 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过并停止循环") + if action_logger: + action_logger.log_round_end(round_num + 1, 0) + break # 从数据库获取实际执行的动作并记录 actual_actions, last_rowid = fetch_new_actions_from_db( @@ -1402,8 +1425,12 @@ def log_info(msg): pass if initial_actions: - await result.env.step(initial_actions) - log_info(f"已发布 {len(initial_actions)} 条初始帖子") + # C4:初始帖子的 env.step 也加超时 —— 否则在进入受保护的主循环前就可能因 LLM/网络挂起而 wedge + try: + await asyncio.wait_for(result.env.step(initial_actions), timeout=_ROUND_TIMEOUT_SEC) + log_info(f"已发布 {len(initial_actions)} 条初始帖子") + except asyncio.TimeoutError: + log_info(f"[超时] 初始帖子 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过初始帖子继续") # 记录 round 0 结束 if action_logger: @@ -1449,8 +1476,20 @@ def log_info(msg): action_logger.log_round_end(round_num + 1, 0) continue + # C4:总时长上限 —— 超过则优雅停止(保留环境,后续仍可 close/interview) + if (datetime.now() - start_time).total_seconds() > _RUN_TIMEOUT_SEC: + log_info(f"[超时] 模拟总时长超过 {_RUN_TIMEOUT_SEC}s,在第 {round_num + 1} 轮停止") + break + actions = {agent: LLMAction() for _, agent in active_agents} - await result.env.step(actions) + # C4:每轮超时 —— 防止 env.step 因 LLM/网络挂起而永久 wedge + try: + await asyncio.wait_for(result.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) + except asyncio.TimeoutError: + log_info(f"[超时] 第 {round_num + 1} 轮 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过并停止循环") + if action_logger: + action_logger.log_round_end(round_num + 1, 0) + break # 从数据库获取实际执行的动作并记录 actual_actions, last_rowid = fetch_new_actions_from_db( @@ -1577,16 +1616,34 @@ async def main(): reddit_result: Optional[PlatformSimulation] = None if args.twitter_only: - twitter_result = await run_twitter_simulation(config, simulation_dir, twitter_logger, log_manager, args.max_rounds) + # C4:与并行路径对称 —— 单平台非超时异常也隔离为 None 并记录,确保后续 env.close 块仍可达 + try: + twitter_result = await run_twitter_simulation(config, simulation_dir, twitter_logger, log_manager, args.max_rounds) + except Exception as e: + log_manager.error(f"[Twitter] 模拟异常,已隔离: {e}") + twitter_result = None elif args.reddit_only: - reddit_result = await run_reddit_simulation(config, simulation_dir, reddit_logger, log_manager, args.max_rounds) + try: + reddit_result = await run_reddit_simulation(config, simulation_dir, reddit_logger, log_manager, args.max_rounds) + except Exception as e: + log_manager.error(f"[Reddit] 模拟异常,已隔离: {e}") + reddit_result = None else: # 并行运行(每个平台使用独立的日志记录器) + # C4:return_exceptions=True —— 一个平台抛异常(如 LLM 401/429/配额)不再取消另一平台, + # 也不会绕过下方的 env.close(否则环境泄漏,违背“优雅停止、保留环境”的设计)。 results = await asyncio.gather( run_twitter_simulation(config, simulation_dir, twitter_logger, log_manager, args.max_rounds), run_reddit_simulation(config, simulation_dir, reddit_logger, log_manager, args.max_rounds), + return_exceptions=True, ) twitter_result, reddit_result = results + if isinstance(twitter_result, BaseException): + log_manager.error(f"[Twitter] 模拟异常,已隔离: {twitter_result}") + twitter_result = None + if isinstance(reddit_result, BaseException): + log_manager.error(f"[Reddit] 模拟异常,已隔离: {reddit_result}") + reddit_result = None total_elapsed = (datetime.now() - start_time).total_seconds() log_manager.info("=" * 60) diff --git a/backend/scripts/run_reddit_simulation.py b/backend/scripts/run_reddit_simulation.py index 14907cbda5..c5196082b4 100644 --- a/backend/scripts/run_reddit_simulation.py +++ b/backend/scripts/run_reddit_simulation.py @@ -29,6 +29,10 @@ _shutdown_event = None _cleanup_done = False +# C4:模拟超时(秒),从环境变量读取(子进程继承父进程环境) +_ROUND_TIMEOUT_SEC = int(os.environ.get("OASIS_ROUND_TIMEOUT_SEC", "600")) +_RUN_TIMEOUT_SEC = int(os.environ.get("OASIS_RUN_TIMEOUT_SEC", "7200")) + # 添加项目路径 _scripts_dir = os.path.dirname(os.path.abspath(__file__)) _backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..')) @@ -230,7 +234,7 @@ async def handle_interview(self, command_id: str, agent_id: int, prompt: str) -> # 执行Interview actions = {agent: interview_action} - await self.env.step(actions) + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) # C4:采访超时(TimeoutError 由本块 except 捕获) # 从数据库获取结果 result = self._get_interview_result(agent_id) @@ -276,7 +280,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) return False # 执行批量Interview - await self.env.step(actions) + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) # C4:采访超时(TimeoutError 由本块 except 捕获) # 获取所有结果 results = {} @@ -616,8 +620,12 @@ async def run(self, max_rounds: int = None): print(f" 警告: 无法为Agent {agent_id}创建初始帖子: {e}") if initial_actions: - await self.env.step(initial_actions) - print(f" 已发布 {len(initial_actions)} 条初始帖子") + # C4:初始帖子的 env.step 也加超时,避免进入主循环前就因 LLM/网络挂起而 wedge + try: + await asyncio.wait_for(self.env.step(initial_actions), timeout=_ROUND_TIMEOUT_SEC) + print(f" 已发布 {len(initial_actions)} 条初始帖子") + except asyncio.TimeoutError: + print(f" [超时] 初始帖子 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过继续", flush=True) # 主模拟循环 print("\n开始模拟循环...") @@ -634,13 +642,23 @@ async def run(self, max_rounds: int = None): if not active_agents: continue - + + # C4:总时长上限 —— 超过则优雅停止 + if (datetime.now() - start_time).total_seconds() > _RUN_TIMEOUT_SEC: + print(f"[超时] 模拟总时长超过 {_RUN_TIMEOUT_SEC}s,在第 {round_num + 1} 轮停止", flush=True) + break + actions = { agent: LLMAction() for _, agent in active_agents } - - await self.env.step(actions) + + # C4:每轮超时 —— 防止 env.step 因 LLM/网络挂起而永久 wedge + try: + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) + except asyncio.TimeoutError: + print(f"[超时] 第 {round_num + 1} 轮 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过并停止循环", flush=True) + break if (round_num + 1) % 10 == 0 or round_num == 0: elapsed = (datetime.now() - start_time).total_seconds() diff --git a/backend/scripts/run_twitter_simulation.py b/backend/scripts/run_twitter_simulation.py index caab9e9d35..48f5893745 100644 --- a/backend/scripts/run_twitter_simulation.py +++ b/backend/scripts/run_twitter_simulation.py @@ -29,6 +29,10 @@ _shutdown_event = None _cleanup_done = False +# C4:模拟超时(秒),从环境变量读取(子进程继承父进程环境) +_ROUND_TIMEOUT_SEC = int(os.environ.get("OASIS_ROUND_TIMEOUT_SEC", "600")) +_RUN_TIMEOUT_SEC = int(os.environ.get("OASIS_RUN_TIMEOUT_SEC", "7200")) + # 添加项目路径 _scripts_dir = os.path.dirname(os.path.abspath(__file__)) _backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..')) @@ -230,7 +234,7 @@ async def handle_interview(self, command_id: str, agent_id: int, prompt: str) -> # 执行Interview actions = {agent: interview_action} - await self.env.step(actions) + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) # C4:采访超时(TimeoutError 由本块 except 捕获) # 从数据库获取结果 result = self._get_interview_result(agent_id) @@ -276,7 +280,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) return False # 执行批量Interview - await self.env.step(actions) + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) # C4:采访超时(TimeoutError 由本块 except 捕获) # 获取所有结果 results = {} @@ -623,8 +627,12 @@ async def run(self, max_rounds: int = None): print(f" 警告: 无法为Agent {agent_id}创建初始帖子: {e}") if initial_actions: - await self.env.step(initial_actions) - print(f" 已发布 {len(initial_actions)} 条初始帖子") + # C4:初始帖子的 env.step 也加超时,避免进入主循环前就因 LLM/网络挂起而 wedge + try: + await asyncio.wait_for(self.env.step(initial_actions), timeout=_ROUND_TIMEOUT_SEC) + print(f" 已发布 {len(initial_actions)} 条初始帖子") + except asyncio.TimeoutError: + print(f" [超时] 初始帖子 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过继续", flush=True) # 主模拟循环 print("\n开始模拟循环...") @@ -643,15 +651,25 @@ async def run(self, max_rounds: int = None): if not active_agents: continue - + + # C4:总时长上限 —— 超过则优雅停止 + if (datetime.now() - start_time).total_seconds() > _RUN_TIMEOUT_SEC: + print(f"[超时] 模拟总时长超过 {_RUN_TIMEOUT_SEC}s,在第 {round_num + 1} 轮停止", flush=True) + break + # 构建动作 actions = { agent: LLMAction() for _, agent in active_agents } - + # 执行动作 - await self.env.step(actions) + # C4:每轮超时 —— 防止 env.step 因 LLM/网络挂起而永久 wedge + try: + await asyncio.wait_for(self.env.step(actions), timeout=_ROUND_TIMEOUT_SEC) + except asyncio.TimeoutError: + print(f"[超时] 第 {round_num + 1} 轮 env.step 超过 {_ROUND_TIMEOUT_SEC}s,跳过并停止循环", flush=True) + break # 打印进度 if (round_num + 1) % 10 == 0 or round_num == 0: diff --git a/backend/uv.lock b/backend/uv.lock index 642dd9c363..1b3132fb3e 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -511,6 +511,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/51/c7/b64cae5dba3a1b138d7123ec36bb5ccd39d39939f18454407e5468f4763f/fsspec-2025.12.0-py3-none-any.whl", hash = "sha256:8bf1fe301b7d8acfa6e8571e3b1c3d158f909666642431cc78a1b7b4dbc5ec5b", size = 201422, upload-time = "2025-12-03T15:23:41.434Z" }, ] +[[package]] +name = "gunicorn" +version = "26.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/b7/a4a3f632f823e432ce6bc65f62961b7980c898c77f075a2f7118cb3846fe/gunicorn-26.0.0.tar.gz", hash = "sha256:ca9346f85e3a4aeeb64d491045c16b9a35647abd37ea15efe53080eb8b090baf", size = 727286, upload-time = "2026-05-05T06:38:25.529Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/40/9c2384fc2be4ad25dd4a49decd5ad9ea5a3639814c11bd40ab77cb9f0a14/gunicorn-26.0.0-py3-none-any.whl", hash = "sha256:40233d26a5f0d1872916188c276e21641155111c2853f0c2cd55260aec0d24fc", size = 212009, upload-time = "2026-05-05T06:38:23.007Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -994,6 +1006,7 @@ dependencies = [ { name = "charset-normalizer" }, { name = "flask" }, { name = "flask-cors" }, + { name = "gunicorn" }, { name = "openai" }, { name = "pydantic" }, { name = "pymupdf" }, @@ -1022,6 +1035,7 @@ requires-dist = [ { name = "charset-normalizer", specifier = ">=3.0.0" }, { name = "flask", specifier = ">=3.0.0" }, { name = "flask-cors", specifier = ">=6.0.0" }, + { name = "gunicorn", specifier = ">=21.0.0" }, { name = "openai", specifier = ">=1.0.0" }, { name = "pipreqs", marker = "extra == 'dev'", specifier = ">=0.5.0" }, { name = "pydantic", specifier = ">=2.0.0" }, diff --git a/docker-compose.yml b/docker-compose.yml index 637f1dfaee..96a2168712 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,6 +3,13 @@ services: image: ghcr.io/666ghj/mirofish:latest # 加速镜像(如拉取缓慢可替换上方地址) # image: ghcr.nju.edu.cn/666ghj/mirofish:latest + # C2:本地构建以把前端 API Key 烤进打包产物。`docker compose up --build` 会用根 .env 里的 + # VITE_API_KEY 作为 build-arg;若直接拉取上方预构建镜像(不 --build),其内置的 key 不可控, + # 此时请改用 AUTH_ENABLED=false(单机/内网)或自行构建带 VITE_API_KEY 的镜像。 + build: + context: . + args: + VITE_API_KEY: ${VITE_API_KEY:-} container_name: mirofish env_file: - .env diff --git a/frontend/.env.example b/frontend/.env.example new file mode 100644 index 0000000000..63e0a3b88f --- /dev/null +++ b/frontend/.env.example @@ -0,0 +1,9 @@ +# 前端构建期变量(Vite 读取,仅 VITE_ 前缀会暴露到客户端包) +# 本地前端构建:复制为 frontend/.env 并填写;Docker 构建则由 docker compose 经 build-arg 注入(见根 .env 的 VITE_API_KEY)。 + +# 必须等于后端 .env 的 API_KEY —— 打包后的 UI 会用它作为 X-API-Key 调用 /api/*。 +# ⚠️ 会被打进客户端包、可被任何访问者提取,不能当作多租户隔离手段(见 README 安全说明)。 +VITE_API_KEY= + +# 后端 API 基地址(默认走前端预览/开发服务器的 /api 代理到后端;如直连后端请填写完整地址) +# VITE_API_BASE_URL=http://localhost:5001 diff --git a/frontend/index.html b/frontend/index.html index 0b80095c6d..0eda42d063 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -6,6 +6,11 @@ + + diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 3e56d752df..7570ce3873 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "axios": "^1.14.0", "d3": "^7.9.0", + "dompurify": "^3.4.10", "vue": "^3.5.24", "vue-i18n": "^11.3.0", "vue-router": "^4.6.3" @@ -938,6 +939,13 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/trusted-types": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/@types/trusted-types/-/trusted-types-2.0.7.tgz", + "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", + "license": "MIT", + "optional": true + }, "node_modules/@vitejs/plugin-vue": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/@vitejs/plugin-vue/-/plugin-vue-6.0.2.tgz", @@ -1435,7 +1443,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } @@ -1538,6 +1545,15 @@ "node": ">=0.4.0" } }, + "node_modules/dompurify": { + "version": "3.4.10", + "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.10.tgz", + "integrity": "sha512-0xzNv0e7oYC6yyuOGZIABPM4qtg3QxLFniDNPP4ZP90wR8Yq3zgwpRbrNiT4N3IKqDbbYFEJLV+JWEs19aZ//w==", + "license": "(MPL-2.0 OR Apache-2.0)", + "optionalDependencies": { + "@types/trusted-types": "^2.0.7" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -1913,7 +1929,6 @@ "integrity": "sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -2053,7 +2068,6 @@ "integrity": "sha512-ITcnkFeR3+fI8P1wMgItjGrR10170d8auB4EpMLPqmx6uxElH3a/hHGQabSHKdqd4FXWO1nFIp9rRn7JQ34ACQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.25.0", "fdir": "^6.5.0", @@ -2128,7 +2142,6 @@ "resolved": "https://registry.npmjs.org/vue/-/vue-3.5.25.tgz", "integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==", "license": "MIT", - "peer": true, "dependencies": { "@vue/compiler-dom": "3.5.25", "@vue/compiler-sfc": "3.5.25", diff --git a/frontend/package.json b/frontend/package.json index 1501b628f9..65bcdacbac 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -11,6 +11,7 @@ "dependencies": { "axios": "^1.14.0", "d3": "^7.9.0", + "dompurify": "^3.4.10", "vue": "^3.5.24", "vue-i18n": "^11.3.0", "vue-router": "^4.6.3" diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index e840e1166a..d29f22521a 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -10,10 +10,19 @@ const service = axios.create({ } }) +// API Key(C2):后端默认对 /api/* 强制鉴权,前端需在每个请求带上 X-API-Key。 +// 注意:构建到客户端包里的 VITE_API_KEY 是【可被任何访问者从 JS 包中提取】的,因此对“公开部署” +// 它只能挡住不加载页面的脚本式滥用,不能当作多租户隔离手段。真正的多租户场景应改为会话登录鉴权, +// 或在网关处注入按用户签发的 token;单机/内网/网关后部署时此值足够。 +const API_KEY = import.meta.env.VITE_API_KEY || '' + // 请求拦截器 service.interceptors.request.use( config => { config.headers['Accept-Language'] = i18n.global.locale.value + if (API_KEY) { + config.headers['X-API-Key'] = API_KEY + } return config }, error => { diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue index 8e53ceb53b..402092fb9d 100644 --- a/frontend/src/components/Step4Report.vue +++ b/frontend/src/components/Step4Report.vue @@ -391,6 +391,7 @@