From e47ebb3bf363ecc15563bb8d05ab4c65f88e2268 Mon Sep 17 00:00:00 2001 From: Ryan Hui Date: Sat, 6 Jun 2026 11:16:42 +0800 Subject: [PATCH] Add ManufacturingAgentSuite example Add a minimal OPEA manufacturing reference package with compose profiles, smoke tests, benchmark documentation, and CI-compatible lint fixes. Signed-off-by: Ryan Hui --- .pre-commit-config.yaml | 1 + EdgeCraftRAG/cli/README.md | 2 + EdgeCraftRAG/cli/client.py | 19 +- EdgeCraftRAG/cli/config.py | 1 - EdgeCraftRAG/cli/main.py | 55 ++-- EdgeCraftRAG/cli/quickstart.py | 10 +- EdgeCraftRAG/cli/setup.py | 4 +- .../docker_compose/intel/gpu/arc/README.md | 13 +- .../docker_compose/intel/gpu/arc/README_zh.md | 25 +- EdgeCraftRAG/docs/API_Guide.md | 1 + EdgeCraftRAG/docs/Advanced_Setup.md | 4 +- EdgeCraftRAG/docs/Advanced_Setup_zh.md | 3 +- EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md | 15 +- .../docs/Explore_Edge_Craft_RAG_zh.md | 8 +- EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py | 6 +- EdgeCraftRAG/edgecraftrag/api/v1/data.py | 3 +- .../edgecraftrag/api/v1/knowledge_base.py | 56 ++-- EdgeCraftRAG/edgecraftrag/api/v1/model.py | 4 +- EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py | 10 +- .../components/agents/deep_search/config.py | 2 +- .../agents/deep_search/deep_search.py | 1 + .../edgecraftrag/components/benchmark.py | 6 +- .../edgecraftrag/components/generator.py | 28 +- .../edgecraftrag/components/indexer.py | 3 +- .../edgecraftrag/components/knowledge_base.py | 7 +- EdgeCraftRAG/edgecraftrag/components/model.py | 113 ++++---- .../components/ov_llamaindex_helper.py | 35 ++- .../edgecraftrag/components/pipeline.py | 97 ++++--- .../edgecraftrag/components/retriever.py | 4 +- .../edgecraftrag/config_repository.py | 2 +- .../edgecraftrag/controllers/agentmgr.py | 14 +- .../edgecraftrag/controllers/filemgr.py | 2 +- .../controllers/knowledge_basemgr.py | 4 +- .../edgecraftrag/controllers/modelmgr.py | 11 +- .../edgecraftrag/controllers/pipelinemgr.py | 17 +- EdgeCraftRAG/edgecraftrag/requirements.txt | 26 +- EdgeCraftRAG/tests/common.sh | 2 +- .../configs/test_pipeline_local_llm.json | 2 +- .../tests/test_pipeline_local_llm.json | 2 +- EdgeCraftRAG/tools/README.md | 6 +- EdgeCraftRAG/tools/README_zh.md | 9 +- EdgeCraftRAG/tools/build_images.sh | 3 + EdgeCraftRAG/ui/vue/components.d.ts | 5 +- EdgeCraftRAG/ui/vue/src/auto-imports.d.ts | 168 ++++++------ EdgeCraftRAG/ui/vue/src/i18n/en.ts | 60 ++--- EdgeCraftRAG/ui/vue/src/i18n/zh.ts | 15 +- .../ui/vue/src/utils/customRenderer.ts | 32 +-- ManufacturingAgentSuite/README.md | 68 +++++ ManufacturingAgentSuite/assets/flow.md | 33 +++ ManufacturingAgentSuite/benchmark/README.md | 26 ++ .../docker_compose/intel/cpu/xeon/README.md | 53 ++++ .../intel/cpu/xeon/compose.opea-tei.yaml | 40 +++ .../intel/cpu/xeon/compose.yaml | 33 +++ .../manufacturing_agent_suite.py | 244 ++++++++++++++++++ .../tests/test_compose_on_xeon.sh | 86 ++++++ 55 files changed, 1072 insertions(+), 427 deletions(-) create mode 100644 ManufacturingAgentSuite/README.md create mode 100644 ManufacturingAgentSuite/assets/flow.md create mode 100644 ManufacturingAgentSuite/benchmark/README.md create mode 100644 ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/README.md create mode 100644 ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.opea-tei.yaml create mode 100644 ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.yaml create mode 100644 ManufacturingAgentSuite/manufacturing_agent_suite.py create mode 100644 ManufacturingAgentSuite/tests/test_compose_on_xeon.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 972776fbd4..49f636da9e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -82,6 +82,7 @@ repos: rev: v1.7.7 hooks: - id: docformatter + language_version: python3.12 args: [ --in-place, --wrap-summaries=0, # 0 means disable wrap diff --git a/EdgeCraftRAG/cli/README.md b/EdgeCraftRAG/cli/README.md index 98dfaf5eb6..66711967d1 100644 --- a/EdgeCraftRAG/cli/README.md +++ b/EdgeCraftRAG/cli/README.md @@ -329,12 +329,14 @@ The CLI will display error messages from the API in JSON format. Network errors ## Tips - Use `--help` with any command to see detailed help: + ```bash ecrag pipeline --help ecrag pipeline create --help ``` - Pipe JSON output to other tools: + ```bash ecrag kb list | jq '.[]' | head -n 20 ``` diff --git a/EdgeCraftRAG/cli/client.py b/EdgeCraftRAG/cli/client.py index b96c2d7ffb..8c4730464b 100644 --- a/EdgeCraftRAG/cli/client.py +++ b/EdgeCraftRAG/cli/client.py @@ -2,17 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 import json -import requests -from typing import Optional, Dict, Any +from typing import Any, Dict, Optional from urllib.parse import urljoin +import requests + class EcragApiClient: """API client for Edge Craft RAG.""" def __init__(self, host: str = "http://localhost", server_port: int = 16010, mega_port: int = 16011): """Initialize the API client. - + Args: host: The host URL (default: http://localhost) server_port: The server port (default: 16010) @@ -21,10 +22,10 @@ def __init__(self, host: str = "http://localhost", server_port: int = 16010, meg # Normalize host URL if not host.startswith(("http://", "https://")): host = f"http://{host}" - + # Remove trailing slash if present host = host.rstrip("/") - + self.server_url = f"{host}:{server_port}" self.mega_url = f"{host}:{mega_port}" @@ -170,12 +171,16 @@ def delete_knowledge_base(self, kb_name: str) -> Dict[str, Any]: def add_files_to_kb(self, kb_name: str, local_paths: list) -> Dict[str, Any]: """Add files to a knowledge base.""" url = urljoin(self.server_url, f"/v1/knowledge/{kb_name}/files") - return self._request("POST", url, json={"local_paths": local_paths}, headers={"Content-Type": "application/json"}) + return self._request( + "POST", url, json={"local_paths": local_paths}, headers={"Content-Type": "application/json"} + ) def delete_files_from_kb(self, kb_name: str, local_paths: list) -> Dict[str, Any]: """Delete files from a knowledge base.""" url = urljoin(self.server_url, f"/v1/knowledge/{kb_name}/files") - return self._request("DELETE", url, json={"local_paths": local_paths}, headers={"Content-Type": "application/json"}) + return self._request( + "DELETE", url, json={"local_paths": local_paths}, headers={"Content-Type": "application/json"} + ) # Experience Management def get_experiences(self) -> Dict[str, Any]: diff --git a/EdgeCraftRAG/cli/config.py b/EdgeCraftRAG/cli/config.py index 4308d629db..e540b61a0d 100644 --- a/EdgeCraftRAG/cli/config.py +++ b/EdgeCraftRAG/cli/config.py @@ -1,6 +1,5 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - """Configuration for EdgeCraft RAG CLI.""" import os diff --git a/EdgeCraftRAG/cli/main.py b/EdgeCraftRAG/cli/main.py index 33d7df33f3..8d32d39d27 100644 --- a/EdgeCraftRAG/cli/main.py +++ b/EdgeCraftRAG/cli/main.py @@ -2,10 +2,11 @@ # SPDX-License-Identifier: Apache-2.0 import json -import click import os from pathlib import Path from typing import Optional + +import click from cli.client import EcragApiClient from cli.config import get_config @@ -34,26 +35,26 @@ def run_chatqna_query(client: EcragApiClient, query: str, top_n: int, max_tokens @click.pass_context def cli(ctx, host: Optional[str], port: Optional[int], mega_port: Optional[int]): """EdgeCraft RAG CLI Tool. - + Configure server connection via command-line options or environment variables: - ECRAG_HOST: Server host (default: http://localhost) - ECRAG_PORT: Server port (default: 16010) - ECRAG_MEGA_PORT: Mega service port (default: 16011) """ ctx.ensure_object(dict) - + # Get defaults from config config = get_config() - + # Use provided options or environment/defaults final_host = host or config.host final_port = port or config.port final_mega_port = mega_port or config.mega_port - + # Normalize host URL if not final_host.startswith(("http://", "https://")): final_host = f"http://{final_host}" - + ctx.obj["client"] = EcragApiClient(host=final_host, server_port=final_port, mega_port=final_mega_port) @@ -204,7 +205,7 @@ def load(ctx, model_type: str, model_id: str, model_path: str, device: str, weig @model.command() @click.pass_context -def list(ctx): +def list(ctx): # noqa: F811 """List all models.""" client = ctx.obj["client"] result = client.get_models() @@ -214,7 +215,7 @@ def list(ctx): @model.command() @click.option("--id", "model_id", required=True, help="Model ID") @click.pass_context -def get(ctx, model_id: str): +def get(ctx, model_id: str): # noqa: F811 """Get a specific model.""" client = ctx.obj["client"] result = client.get_model(model_id) @@ -241,7 +242,7 @@ def update(ctx, model_id: str, device: Optional[str], weight: Optional[str]): @model.command() @click.option("--id", "model_id", required=True, help="Model ID") @click.pass_context -def delete(ctx, model_id: str): +def delete(ctx, model_id: str): # noqa: F811 """Delete a model.""" client = ctx.obj["client"] if click.confirm(f"Are you sure you want to delete model '{model_id}'?"): @@ -284,7 +285,7 @@ def kb(): @click.option("--description", help="Knowledge base description") @click.option("-f", "--file", type=click.Path(exists=True), help="KB config JSON file") @click.pass_context -def create(ctx, name: str, description: Optional[str], file: Optional[str]): +def create(ctx, name: str, description: Optional[str], file: Optional[str]): # noqa: F811 """Create a knowledge base.""" client = ctx.obj["client"] @@ -302,7 +303,7 @@ def create(ctx, name: str, description: Optional[str], file: Optional[str]): @kb.command() @click.pass_context -def list(ctx): +def list(ctx): # noqa: F811 """List all knowledge bases.""" client = ctx.obj["client"] result = client.get_knowledge_bases() @@ -312,7 +313,7 @@ def list(ctx): @kb.command() @click.option("-n", "--name", required=True, help="Knowledge base name") @click.pass_context -def get(ctx, name: str): +def get(ctx, name: str): # noqa: F811 """Get a specific knowledge base.""" client = ctx.obj["client"] result = client.get_knowledge_base(name) @@ -322,7 +323,7 @@ def get(ctx, name: str): @kb.command() @click.option("-n", "--name", required=True, help="Knowledge base name") @click.pass_context -def get_json(ctx, name: str): +def get_json(ctx, name: str): # noqa: F811 """Get knowledge base JSON data.""" client = ctx.obj["client"] result = client.get_knowledge_base_json(name) @@ -346,7 +347,7 @@ def filemap(ctx, name: str, page_num: int, page_size: int): @click.option("--active", type=bool, help="Set active status") @click.option("--description", help="Update description") @click.pass_context -def update(ctx, name: str, active: Optional[bool], description: Optional[str]): +def update(ctx, name: str, active: Optional[bool], description: Optional[str]): # noqa: F811 """Update a knowledge base.""" client = ctx.obj["client"] kb_data = {"name": name} @@ -361,7 +362,7 @@ def update(ctx, name: str, active: Optional[bool], description: Optional[str]): @kb.command() @click.option("-n", "--name", required=True, help="Knowledge base name") @click.pass_context -def delete(ctx, name: str): +def delete(ctx, name: str): # noqa: F811 """Delete a knowledge base.""" client = ctx.obj["client"] if click.confirm(f"Are you sure you want to delete knowledge base '{name}'?"): @@ -402,7 +403,7 @@ def experience(): @experience.command() @click.pass_context -def list(ctx): +def list(ctx): # noqa: F811 """List all experiences.""" client = ctx.obj["client"] result = client.get_experiences() @@ -412,7 +413,7 @@ def list(ctx): @experience.command() @click.option("--id", required=True, help="Experience ID") @click.pass_context -def get(ctx, id: str): +def get(ctx, id: str): # noqa: F811 """Get a specific experience.""" client = ctx.obj["client"] result = client.get_experience(id) @@ -424,7 +425,7 @@ def get(ctx, id: str): @click.option("--question", required=True, help="Question") @click.option("--content", multiple=True, required=True, help="Answer content") @click.pass_context -def create(ctx, id: str, question: str, content: tuple): +def create(ctx, id: str, question: str, content: tuple): # noqa: F811 """Create or update an experience.""" client = ctx.obj["client"] exp_data = {"idx": id, "question": question, "content": list(content)} @@ -435,7 +436,7 @@ def create(ctx, id: str, question: str, content: tuple): @experience.command() @click.option("--id", required=True, help="Experience ID") @click.pass_context -def delete(ctx, id: str): +def delete(ctx, id: str): # noqa: F811 """Delete an experience.""" client = ctx.obj["client"] if click.confirm(f"Are you sure you want to delete experience '{id}'?"): @@ -464,7 +465,7 @@ def agent(): @agent.command() @click.pass_context -def list(ctx): +def list(ctx): # noqa: F811 """List all agents.""" client = ctx.obj["client"] result = client.get_agents() @@ -474,7 +475,7 @@ def list(ctx): @agent.command() @click.option("-n", "--name", required=True, help="Agent name") @click.pass_context -def get(ctx, name: str): +def get(ctx, name: str): # noqa: F811 """Get a specific agent.""" client = ctx.obj["client"] result = client.get_agent(name) @@ -496,7 +497,7 @@ def configs(ctx, type: str): @click.option("--type", required=True, help="Agent type") @click.option("--pipeline", required=True, help="Pipeline index or name") @click.pass_context -def create(ctx, name: str, type: str, pipeline: str): +def create(ctx, name: str, type: str, pipeline: str): # noqa: F811 """Create an agent.""" client = ctx.obj["client"] agent_data = {"name": name, "type": type, "pipeline_idx": pipeline} @@ -508,7 +509,7 @@ def create(ctx, name: str, type: str, pipeline: str): @click.option("-n", "--name", required=True, help="Agent name") @click.option("--active", type=bool, help="Active status") @click.pass_context -def update(ctx, name: str, active: Optional[bool]): +def update(ctx, name: str, active: Optional[bool]): # noqa: F811 """Update an agent.""" client = ctx.obj["client"] agent_data = {} @@ -521,7 +522,7 @@ def update(ctx, name: str, active: Optional[bool]): @agent.command() @click.option("-n", "--name", required=True, help="Agent name") @click.pass_context -def delete(ctx, name: str): +def delete(ctx, name: str): # noqa: F811 """Delete an agent.""" client = ctx.obj["client"] if click.confirm(f"Are you sure you want to delete agent '{name}'?"): @@ -540,7 +541,7 @@ def prompt(): @prompt.command() @click.pass_context -def get(ctx): +def get(ctx): # noqa: F811 """Get the current system prompt.""" client = ctx.obj["client"] result = client.get_prompt() @@ -674,7 +675,7 @@ def session(): @session.command() @click.pass_context -def list(ctx): +def list(ctx): # noqa: F811 """List all sessions.""" client = ctx.obj["client"] result = client.get_sessions() @@ -684,7 +685,7 @@ def list(ctx): @session.command() @click.option("--id", required=True, help="Session ID") @click.pass_context -def get(ctx, id: str): +def get(ctx, id: str): # noqa: F811 """Get a specific session.""" client = ctx.obj["client"] result = client.get_session(id) diff --git a/EdgeCraftRAG/cli/quickstart.py b/EdgeCraftRAG/cli/quickstart.py index d37188763f..0921fb372e 100644 --- a/EdgeCraftRAG/cli/quickstart.py +++ b/EdgeCraftRAG/cli/quickstart.py @@ -1,25 +1,25 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - """Quick start guide for EdgeCraft RAG CLI.""" import json import sys + from cli.client import EcragApiClient def test_connection(host: str = "http://localhost", port: int = 16010): """Test connection to EdgeCraft RAG server.""" client = EcragApiClient(host=host, server_port=port) - + try: print(f"Testing connection to {client.server_url}...") result = client.get_system_info() - + if "error" in result: print(f"❌ Connection failed: {result['error']}") return False - + print("✓ Connection successful!") print(f" System Info: {json.dumps(result, indent=2)}") return True @@ -62,7 +62,7 @@ def quick_start_guide(): export ECRAG_MEGA_PORT=16011 COMMON COMMANDS: - + Pipeline Management: ecrag pipeline list ecrag pipeline get --name diff --git a/EdgeCraftRAG/cli/setup.py b/EdgeCraftRAG/cli/setup.py index ca5255ba0a..fc59a4e8b5 100644 --- a/EdgeCraftRAG/cli/setup.py +++ b/EdgeCraftRAG/cli/setup.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - """Canonical setup script for EdgeCraft RAG CLI.""" from setuptools import setup - setup( name="ecrag-cli", version="0.1.0", @@ -35,4 +33,4 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", ], -) \ No newline at end of file +) diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md index 62baca9037..f69f8fe3bf 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md @@ -20,19 +20,22 @@ This section describes how to quickly deploy and test the EdgeCraftRAG service m ### 1. Prerequisites -EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU and Core Ultra Platform. Prerequisites are shown as below: +EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU and Core Ultra Platform. Prerequisites are shown as below: #### Core Ultra + **OS**: Ubuntu 24.04 or newer **Driver & libraries**: Please refer to [Installing Client GPUs on Ubuntu Desktop](https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop) **Available Inferencing Framework**: openVINO #### Intel Arc B60 -**OS**: Ubuntu 25.04 Desktop (for Core Ultra and Xeon-W), Ubuntu 25.04 Server (for Xeon-SP). + +**OS**: Ubuntu 25.04 Desktop (for Core Ultra and Xeon-W), Ubuntu 25.04 Server (for Xeon-SP). **Driver & libraries**: Please refer to [Install Bare Metal Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-bare-metal-environment) for detailed setup **Available Inferencing Framework**: openVINO, vLLM #### Intel Arc A770 + **OS**: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel) **Driver & libraries**: Please refer to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/client/overview.html#ubuntu-22.04) for detailed driver & libraries setup **Available Inferencing Framework**: openVINO, vLLM @@ -48,9 +51,9 @@ cd GenAIExamples/EdgeCraftRAG > **NOTE**: If you want to checkout a released version, such as v1.5: > ->``` ->git checkout v1.5 ->``` +> ``` +> git checkout v1.5 +> ``` ### 3. Run quick_start.sh diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md index c1320d2ead..31a7bd8dac 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md @@ -23,16 +23,19 @@ EC-RAG 支持 vLLM 部署(默认方式)以及面向 Intel Arc GPU 和 Core Ultra 平台的本地 OpenVINO 部署。前置条件如下: #### Core Ultra + **操作系统**:Ubuntu 24.04 或更高版本 **驱动与库**:请参考 [Installing Client GPUs on Ubuntu Desktop](https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop) **可用推理框架**:openVINO #### Intel Arc B60 + **操作系统**:Ubuntu 25.04 Desktop(适用于 Core Ultra 和 Xeon-W),Ubuntu 25.04 Server(适用于 Xeon-SP)。 **驱动与库**:详细安装请参考 [Install Bare Metal Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-bare-metal-environment) **可用推理框架**:openVINO、vLLM #### Intel Arc A770 + **操作系统**:Ubuntu Server 22.04.1 或更高版本(至少 6.2 LTS 内核) **驱动与库**:详细驱动与库安装请参考 [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/client/overview.html#ubuntu-22.04) **可用推理框架**:openVINO、vLLM @@ -48,9 +51,9 @@ cd GenAIExamples/EdgeCraftRAG > **注意**:如果你想切换到某个发布版本,例如 v1.5: > ->``` ->git checkout v1.5 ->``` +> ``` +> git checkout v1.5 +> ``` ### 3. 运行 quick_start.sh @@ -112,11 +115,11 @@ If you are accessing from another machine, replace ${HOST_IP} with your server's 下表全面概述了示例 Docker Compose 文件中各类部署所使用的 EdgeCraftRAG 服务。表中每一行代表一个独立服务,详细说明了可用镜像及其在部署架构中的功能描述。 -| 服务名称 | 可选镜像名称 | 可选 | 描述 | -| ------------------- | ---------------------------------------- | ---- | ------------------------------------------------------------------------------------------------ | -| etcd | quay.io/coreos/etcd:v3.5.5 | 否 | 提供分布式键值存储,用于服务发现和配置管理。 | -| minio | minio/minio:RELEASE.2023-03-20T20-16-18Z | 否 | 提供对象存储服务,用于存储文档和模型文件。 | -| milvus-standalone | milvusdb/milvus:v2.4.6 | 否 | 提供向量数据库能力,用于管理 embedding 和相似度检索。 | -| edgecraftrag-server | opea/edgecraftrag-server:latest | 否 | 作为 EdgeCraftRAG 服务后端,具体形态随部署方式不同而变化。 | -| edgecraftrag-ui | opea/edgecraftrag-ui:latest | 否 | 提供 EdgeCraftRAG 服务的用户界面。 | -| ecrag | opea/edgecraftrag:latest | 否 | 作为反向代理,管理 UI 与后端服务之间的流量。 | +| 服务名称 | 可选镜像名称 | 可选 | 描述 | +| ------------------- | ---------------------------------------- | ---- | ---------------------------------------------------------- | +| etcd | quay.io/coreos/etcd:v3.5.5 | 否 | 提供分布式键值存储,用于服务发现和配置管理。 | +| minio | minio/minio:RELEASE.2023-03-20T20-16-18Z | 否 | 提供对象存储服务,用于存储文档和模型文件。 | +| milvus-standalone | milvusdb/milvus:v2.4.6 | 否 | 提供向量数据库能力,用于管理 embedding 和相似度检索。 | +| edgecraftrag-server | opea/edgecraftrag-server:latest | 否 | 作为 EdgeCraftRAG 服务后端,具体形态随部署方式不同而变化。 | +| edgecraftrag-ui | opea/edgecraftrag-ui:latest | 否 | 提供 EdgeCraftRAG 服务的用户界面。 | +| ecrag | opea/edgecraftrag:latest | 否 | 作为反向代理,管理 UI 与后端服务之间的流量。 | diff --git a/EdgeCraftRAG/docs/API_Guide.md b/EdgeCraftRAG/docs/API_Guide.md index 2b0c318bc6..0fbb9fd50f 100644 --- a/EdgeCraftRAG/docs/API_Guide.md +++ b/EdgeCraftRAG/docs/API_Guide.md @@ -1,6 +1,7 @@ # Edge Craft Retrieval-Augmented Generation API Guide > **Base URLs** +> > - EC-RAG Server: `http://${HOST_IP}:16010` > - EC-RAG Mega Service: `http://${HOST_IP}:16011` diff --git a/EdgeCraftRAG/docs/Advanced_Setup.md b/EdgeCraftRAG/docs/Advanced_Setup.md index 3c57e6cc1f..167ee3acd7 100644 --- a/EdgeCraftRAG/docs/Advanced_Setup.md +++ b/EdgeCraftRAG/docs/Advanced_Setup.md @@ -42,6 +42,7 @@ optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-re #### LLM ##### openVINO + If you have Core Ultra platform only, please prepare openVINO models: You can also run openVINO models on discrete GPU. @@ -51,6 +52,7 @@ optimum-cli export openvino --model Qwen/Qwen3-8B ${MODEL_PATH}/OpenVINO/Qwen3-8 ``` ##### vLLM + Alternatively, if you have discrete GPU and want to use vLLM, please prepare models for vLLM: ```bash @@ -178,5 +180,3 @@ export TP=4 # for multi GPU, you can change TP value export ZE_AFFINITY_MASK=0,1,2,3 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d ``` - - diff --git a/EdgeCraftRAG/docs/Advanced_Setup_zh.md b/EdgeCraftRAG/docs/Advanced_Setup_zh.md index f8b16dc0d7..537c57eba4 100644 --- a/EdgeCraftRAG/docs/Advanced_Setup_zh.md +++ b/EdgeCraftRAG/docs/Advanced_Setup_zh.md @@ -42,6 +42,7 @@ optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-re #### LLM ##### openVINO + 如果仅使用 Core Ultra 平台,请准备 openVINO 模型: 你也可以在独立 GPU 上运行 openVINO 模型。 @@ -51,6 +52,7 @@ optimum-cli export openvino --model Qwen/Qwen3-8B ${MODEL_PATH}/OpenVINO/Qwen3-8 ``` ##### vLLM + 如果你有独立 GPU 并希望使用 vLLM,可按如下方式准备模型: ```bash @@ -105,7 +107,6 @@ export MILVUS_ENABLED=0 # export CHAT_HISTORY_ROUND= # 按需修改 ``` - ### 使用 Docker Compose 在 Intel GPU 上部署服务 #### 选项 a:为 Core Ultra / Arc B60 / Arc A770 部署基于 openVINO LLM 的 EC-RAG diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md index ee091a5fc8..9cebfcb5b2 100644 --- a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md @@ -37,18 +37,17 @@ Then, you can submit messages in the chat box in `Chat` page. ![alt text](../assets/img/Explore_Edge_Craft_RAG_08.jpg) ## ChatQnA with Kbadmin in UI - + ### Kbadmin Knowledge Base - + Go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. Please select 'kbadmin' in `Type`and select kb name from the kbs you created in kbadmin UI page. Loading kb name might be slow ,please wait with patient - + ![alt text](../assets/img/Explore_Edge_Craft_RAG_09.png) - -Ten you can select embedding infomation in 'Indexer' page - + +Ten you can select embedding information in 'Indexer' page + ![alt text](../assets/img/Explore_Edge_Craft_RAG_10.png) - + After creation, you can see kbadmin tag in knowledge base then you can submit messages in the chat box in `Chat` page. ![alt text](../assets/img/Explore_Edge_Craft_RAG_11.png) - diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md index 10534cd127..f01bb4ef47 100644 --- a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md @@ -42,12 +42,12 @@ 流水线创建完成后,前往 `Knowledge Base` 页面,点击 `Create Knowledge Base` 按钮创建知识库。 请在 `Type` 中选择 `kbadmin`,并从 kbadmin UI 页面中已创建的知识库列表中选择 kb 名称。加载kb名称可能比较耗时,请耐心等待。 - + ![alt text](../assets/img/Explore_Edge_Craft_RAG_09.png) - + 在 `Indexer` 页面,填写 Embedding 服务和向量数据库信息,注意 Embedding 服务端口为 13020,向量数据库端口为 29530。 - + ![alt text](../assets/img/Explore_Edge_Craft_RAG_10.png) - + 然后,在 `Chat` 页面的聊天框中提交您的问题。 ![alt text](../assets/img/Explore_Edge_Craft_RAG_11.png) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py index 01ddaabc65..332bcf1055 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -124,7 +124,9 @@ async def res_gen_json(): async def context_suffix_gen(): yield '","contexts":' + json.dumps(serialize_retrievals(retrievals)) + "}" - query_gen = stream_generator('{"query":' + json.dumps(original_query, ensure_ascii=False) + ',"response":"') + query_gen = stream_generator( + '{"query":' + json.dumps(original_query, ensure_ascii=False) + ',"response":"' + ) output_gen = chain_async_generators([query_gen, res_gen_json(), context_suffix_gen()]) return StreamingResponse(output_gen, media_type="text/plain") @@ -149,7 +151,7 @@ async def res_gen_json(): yield json.dumps(token, ensure_ascii=False)[1:-1] # Reconstruct RagOut in stream response - query_gen = stream_generator('{"query":' + json.dumps(request.messages, ensure_ascii=False) + ',') + query_gen = stream_generator('{"query":' + json.dumps(request.messages, ensure_ascii=False) + ",") s_contexts = json.dumps(serialize_contexts(contexts)) context_gen = stream_generator('"contexts":' + s_contexts + ',"response":"') diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py index fff065f86e..aad19e0ed6 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/data.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -5,12 +5,13 @@ import os from typing import List +from edgecraftrag.api.v1.knowledge_base import add_file_to_knowledge_base from edgecraftrag.api_schema import DataIn, FilesIn from edgecraftrag.config_repository import MilvusConfigRepository from edgecraftrag.context import ctx from edgecraftrag.env import UI_DIRECTORY from fastapi import FastAPI, File, HTTPException, UploadFile, status -from edgecraftrag.api.v1.knowledge_base import add_file_to_knowledge_base + data_app = FastAPI() diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py index c17298c138..5a7b14df2f 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py @@ -8,26 +8,13 @@ from typing import Dict, List, Union from edgecraftrag.api_schema import DataIn, ExperienceIn, KnowledgeBaseCreateIn -from edgecraftrag.components.query_preprocess import query_search -from edgecraftrag.components.indexer import get_kbs_info -from edgecraftrag.config_repository import ( - MilvusConfigRepository, - save_knowledge_configurations, -) -from edgecraftrag.context import ctx -from edgecraftrag.env import ( - KNOWLEDGEBASE_FILE, - SEARCH_CONFIG_PATH, - SEARCH_DIR, - UI_DIRECTORY, -) from edgecraftrag.base import ( IndexerType, ModelType, NodeParserType, ) from edgecraftrag.components.benchmark import Benchmark -from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer +from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer, get_kbs_info from edgecraftrag.components.node_parser import ( HierarchyNodeParser, KBADMINParser, @@ -35,7 +22,19 @@ SWindowNodeParser, UnstructedNodeParser, ) -from fastapi import FastAPI, HTTPException, status, Query +from edgecraftrag.components.query_preprocess import query_search +from edgecraftrag.config_repository import ( + MilvusConfigRepository, + save_knowledge_configurations, +) +from edgecraftrag.context import ctx +from edgecraftrag.env import ( + KNOWLEDGEBASE_FILE, + SEARCH_CONFIG_PATH, + SEARCH_DIR, + UI_DIRECTORY, +) +from fastapi import FastAPI, HTTPException, Query, status kb_app = FastAPI() @@ -51,7 +50,9 @@ async def get_all_knowledge_bases(): # Get knowledge base files in a certain range. @kb_app.get("/v1/knowledge/{knowledge_name}/filemap") -async def get_knowledge_base_filemap(knowledge_name: str, page_num: int = Query(1, ge=1), page_size: int = Query(20, ge=1)): +async def get_knowledge_base_filemap( + knowledge_name: str, page_num: int = Query(1, ge=1), page_size: int = Query(20, ge=1) +): kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) if kb and kb.file_map: file_map = kb.file_map @@ -61,7 +62,7 @@ async def get_knowledge_base_filemap(knowledge_name: str, page_num: int = Query( if start >= filemap_len: return None file_map_subset = itertools.islice(file_map.items(), start, end) - return {"file_map": dict(file_map_subset),"total": kb.calculate_totals()} + return {"file_map": dict(file_map_subset), "total": kb.calculate_totals()} else: return None @@ -101,7 +102,7 @@ async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): active_pl.update_retriever_list(ctx.knowledgemgr.get_active_knowledge_base()) except Exception as e: ctx.knowledgemgr.delete_knowledge_base(knowledge.name) - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) await save_knowledge_configurations("add", kb) return "Create knowledge base successfully" except Exception as e: @@ -156,7 +157,7 @@ async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) # reload data for knowledge base - node_parser_changed = (kb_node_parser != kb.node_parser) + node_parser_changed = kb_node_parser != kb.node_parser if node_parser_changed or kb_indexer != kb.indexer: await handle_reload_data(kb, node_parser_changed) elif kb.comp_subtype == "kbadmin_kb": @@ -181,10 +182,10 @@ async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): @kb_app.post(path="/v1/knowledge/{knowledge_name}/files") async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): """ - 1. Parse file into Llamaindex Document and add file to filemgr - 2. Add file path to knowledge base - 3. Update nodes and vector store for knowledge base - 4. Update pipeline retriever if active knowledge base's indexer changed + 1. Parse file into Llamaindex Document and add file to filemgr + 2. Add file path to knowledge base + 3. Update nodes and vector store for knowledge base + 4. Update pipeline retriever if active knowledge base's indexer changed """ try: kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) @@ -227,7 +228,7 @@ async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Error uploading file.") # update retriever with indexer since indexer updated - if kb.active: + if kb.active: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if active_pl: active_pl.update_retriever(kb, prev_indexer) @@ -263,7 +264,7 @@ async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): ) await remove_document_handler(document_list, kb) # update retriever with indexer since indexer updated - if kb.active: + if kb.active: active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if active_pl: active_pl.update_retriever(kb, prev_indexer) @@ -465,6 +466,7 @@ async def handle_reload_data(kb, node_parser_changed: bool = False): # update indexer await kb.update_nodes_to_indexer() + async def update_kb_handler(kb, knowledge): if kb.enable_benchmark: kb.benchmark = Benchmark(True, "") @@ -499,7 +501,9 @@ async def update_kb_handler(kb, knowledge): ctx.get_node_parser_mgr().add(kb.node_parser) if knowledge.indexer is not None: ind = knowledge.indexer - found_indexer = ctx.get_indexer_mgr().search_indexer(ind) if ind.indexer_type != IndexerType.MILVUS_VECTOR else None + found_indexer = ( + ctx.get_indexer_mgr().search_indexer(ind) if ind.indexer_type != IndexerType.MILVUS_VECTOR else None + ) if found_indexer is not None: kb.indexer = found_indexer else: diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/model.py b/EdgeCraftRAG/edgecraftrag/api/v1/model.py index 7dbccaf284..dadd8a5ea7 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/model.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/model.py @@ -198,9 +198,7 @@ def get_available_models(model_type): normalized_model_type = (model_type or "").strip().lower() def _is_llm_model_dir(file_names: set) -> bool: - if "openvino_model.xml" in file_names and any( - name.endswith(".bin") for name in file_names - ): + if "openvino_model.xml" in file_names and any(name.endswith(".bin") for name in file_names): return True if "config.json" in file_names and ( diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py index 35cf91a875..11224d44d7 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -6,7 +6,7 @@ import re import time import weakref -from openvino import Core, Type + from edgecraftrag.api_schema import MilvusConnectRequest, PipelineCreateIn from edgecraftrag.base import ( GeneratorType, @@ -17,11 +17,11 @@ from edgecraftrag.components.benchmark import Benchmark from edgecraftrag.components.generator import FreeChatGenerator, QnAGenerator from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor - from edgecraftrag.config_repository import MilvusConfigRepository, save_pipeline_configurations from edgecraftrag.context import ctx from edgecraftrag.env import PIPELINE_FILE from fastapi import FastAPI, File, HTTPException, UploadFile, status +from openvino import Core, Type from pymilvus import connections pipeline_app = FastAPI() @@ -233,10 +233,12 @@ async def update_pipeline_handler(pl, req): else: raise Exception("Inference Type Not Supported") flag = pl.check_top_k(ctx.get_knowledge_mgr().get_all_knowledge_bases()) - if flag == True: + if flag: await save_pipeline_configurations("update", pl) if pl.status.active != req.active: - ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_knowledge_mgr().get_active_knowledge_base()) + ctx.get_pipeline_mgr().activate_pipeline( + pl.name, req.active, ctx.get_knowledge_mgr().get_active_knowledge_base() + ) return pl diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py index e7066c767d..513d0d9bcf 100644 --- a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/config.py @@ -4,8 +4,8 @@ from __future__ import annotations -from copy import deepcopy import json +from copy import deepcopy from pathlib import Path from typing import Any, Dict diff --git a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py index 34fba173af..91cb3a3a3d 100644 --- a/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py +++ b/EdgeCraftRAG/edgecraftrag/components/agents/deep_search/deep_search.py @@ -22,6 +22,7 @@ from .postprocessing import postproc_query as default_postproc_query from .utils import Role, import_module_from_path + class DeepSearchState(BaseModel): question: str query: str diff --git a/EdgeCraftRAG/edgecraftrag/components/benchmark.py b/EdgeCraftRAG/edgecraftrag/components/benchmark.py index 20872e59cc..bc9ffd39bb 100644 --- a/EdgeCraftRAG/edgecraftrag/components/benchmark.py +++ b/EdgeCraftRAG/edgecraftrag/components/benchmark.py @@ -118,9 +118,9 @@ def insert_llm_data_genai(self, idx, input_token_size=-1, model=None): metrics = {} metrics["input_token_size"] = input_token_size metrics["output_token_size"] = model().perf_metrics.get_num_generated_tokens() - metrics["generation_time"] = model().perf_metrics.get_inference_duration().mean/1000 - metrics["first_token_latency"] = model().perf_metrics.get_ttft().mean/1000 - metrics["other_tokens_avg_latency"] = model().perf_metrics.get_tpot().mean/1000 + metrics["generation_time"] = model().perf_metrics.get_inference_duration().mean / 1000 + metrics["first_token_latency"] = model().perf_metrics.get_ttft().mean / 1000 + metrics["other_tokens_avg_latency"] = model().perf_metrics.get_tpot().mean / 1000 self.llm_data_list[idx] = metrics diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py index fcad2433d8..d9fcf5dfe9 100644 --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -5,15 +5,15 @@ import json import os import time -import weakref import urllib.request +import weakref from concurrent.futures import ThreadPoolExecutor from urllib.parse import urlparse from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import BaseComponent, CompType, GeneratorType, InferenceType, NodeParserType -from edgecraftrag.utils import get_prompt_template, resolve_prompt_template_path from edgecraftrag.components.agents.utils import build_document_node_block +from edgecraftrag.utils import get_prompt_template, resolve_prompt_template_path from fastapi.responses import StreamingResponse from llama_index.llms.openai_like import OpenAILike from pydantic import model_serializer @@ -103,13 +103,18 @@ async def local_stream_generator(lock, llm, prompt_str, unstructured_str, benchm if unstructured_str: yield unstructured_str if enable_benchmark: - benchmark.update_benchmark_data_genai(benchmark_index, CompType.GENERATOR, time.perf_counter() - start_time, weakref.ref(llm)) - benchmark.insert_llm_data_genai(benchmark_index, benchmark.cal_input_token_size(prompt_str), weakref.ref(llm)) + benchmark.update_benchmark_data_genai( + benchmark_index, CompType.GENERATOR, time.perf_counter() - start_time, weakref.ref(llm) + ) + benchmark.insert_llm_data_genai( + benchmark_index, benchmark.cal_input_token_size(prompt_str), weakref.ref(llm) + ) except Exception as e: start_idx = str(e).find("message") + len("message") result_error = str(e)[start_idx:] yield f"code:0000{result_error}" + async def stream_generator(llm, prompt_str, unstructured_str, benchmark=None, benchmark_index=None): enable_benchmark = benchmark.is_enabled() if benchmark else False start_time = time.perf_counter() if enable_benchmark else None @@ -219,7 +224,7 @@ def __init__( ) self.llm = llm_model - self.vllm_name = llm_model().model_id if not isinstance(llm_model, str) else llm_model + self.vllm_name = llm_model().model_id if not isinstance(llm_model, str) else llm_model if self.inference_type == InferenceType.LOCAL: self.lock = asyncio.Lock() if self.inference_type == InferenceType.VLLM: @@ -324,7 +329,14 @@ async def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): sub_questions = kwargs.get("sub_questions", None) text_gen_context, prompt_str = self.query_transform(chat_request, retrieved_nodes, sub_questions=sub_questions) # self.llm().config.update_generation_config(config) - self.llm().config.update_generation_config(temperature=chat_request.temperature,top_p=chat_request.top_p, top_k=chat_request.top_k, typical_p=chat_request.typical_p, repetition_penalty=chat_request.repetition_penalty, do_sample=chat_request.temperature > 0.0) + self.llm().config.update_generation_config( + temperature=chat_request.temperature, + top_p=chat_request.top_p, + top_k=chat_request.top_k, + typical_p=chat_request.typical_p, + repetition_penalty=chat_request.repetition_penalty, + do_sample=chat_request.temperature > 0.0, + ) self.llm().config.max_new_tokens = chat_request.max_tokens unstructured_str = "" if node_parser_type == NodeParserType.UNSTRUCTURED: @@ -332,7 +344,9 @@ async def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): if chat_request.stream: # Asynchronous generator async def generator(): - async for chunk in local_stream_generator(self.lock, self.llm(), prompt_str, unstructured_str, benchmark, benchmark_index): + async for chunk in local_stream_generator( + self.lock, self.llm(), prompt_str, unstructured_str, benchmark, benchmark_index + ): yield chunk or "" await asyncio.sleep(0) diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py index 1e2349aa12..6248c87db3 100644 --- a/EdgeCraftRAG/edgecraftrag/components/indexer.py +++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py @@ -6,14 +6,15 @@ import faiss from edgecraftrag.base import BaseComponent, CompType, IndexerType from edgecraftrag.context import ctx +from langchain_milvus import Milvus from langchain_openai import OpenAIEmbeddings from llama_index.core import StorageContext, VectorStoreIndex from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.vector_stores.milvus import MilvusVectorStore from pydantic import model_serializer -from langchain_milvus import Milvus from pymilvus import Collection, MilvusException, connections, utility + class VectorIndexer(BaseComponent, VectorStoreIndex): def __init__(self, embed_model, vector_type, vector_url="http://localhost:19530", kb_name="default_kb"): BaseComponent.__init__( diff --git a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py index 0a1071a344..543af8139d 100644 --- a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py @@ -3,8 +3,8 @@ import json import os -import uuid import time +import uuid from typing import Any, Dict, List, Optional, Union from edgecraftrag.base import BaseComponent, BenchType, CompType @@ -16,6 +16,7 @@ from llama_index.core.schema import Document from pydantic import Field, model_serializer + class Knowledge(BaseComponent): node_parser: Optional[BaseComponent] = Field(default=None) @@ -346,7 +347,7 @@ def calculate_totals(self): else: total = None return total - + def update_nodes(self, nodes: List[Document]): self.nodes = nodes @@ -369,7 +370,7 @@ async def run_node_parser(self, docs: List[Document]) -> Any: self.benchmark.update_benchmark_data(benchmark_index, BenchType.CHUNK_NUM, benchmark_data) self.add_nodes(nodes) return nodes - + async def update_nodes_to_indexer(self) -> Any: if self.indexer is not None: self.indexer.insert_nodes(self.nodes) diff --git a/EdgeCraftRAG/edgecraftrag/components/model.py b/EdgeCraftRAG/edgecraftrag/components/model.py index 3fec80ac82..ec1575a3d6 100644 --- a/EdgeCraftRAG/edgecraftrag/components/model.py +++ b/EdgeCraftRAG/edgecraftrag/components/model.py @@ -1,24 +1,26 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import asyncio import io import os from pathlib import Path -import asyncio +from threading import Event, Thread from typing import Any, Optional -import openvino_genai -import openvino as ov + import numpy as np +import openvino as ov +import openvino_genai from edgecraftrag.base import BaseComponent, CompType, ModelType +from edgecraftrag.components.ov_llamaindex_helper import OpenVINOGenAIEmbedding, OpenVINOGenAIReranking +from llama_index.core.base.llms.types import CompletionResponse, CompletionResponseAsyncGen, CompletionResponseGen from llama_index.embeddings.huggingface_openvino import OpenVINOEmbedding from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openvino import OpenVINOLLM -from llama_index.postprocessor.openvino_rerank import OpenVINORerank -from edgecraftrag.components.ov_llamaindex_helper import OpenVINOGenAIEmbedding, OpenVINOGenAIReranking from llama_index.llms.openvino_genai import OpenVINOGenAILLM +from llama_index.postprocessor.openvino_rerank import OpenVINORerank from pydantic import Field, model_serializer -from llama_index.core.base.llms.types import CompletionResponse, CompletionResponseAsyncGen, CompletionResponseGen -from threading import Event, Thread + def resolve_model_path(model_path: str) -> str: if not model_path: @@ -127,18 +129,13 @@ def __init__(self, model_id, model_path, device, weight): model_path = resolve_model_path(model_path) if not model_exist(model_path): OpenVINOEmbedding.create_and_save_openvino_model(model_id, model_path) - model_kwargs={ - "ov_config": { - "NUM_STREAMS": "1", - "PERFORMANCE_HINT": "LATENCY" - } - } + model_kwargs = {"ov_config": {"NUM_STREAMS": "1", "PERFORMANCE_HINT": "LATENCY"}} OpenVINOEmbedding.__init__(self, model_id_or_path=model_path, device=device, model_kwargs=model_kwargs) if device == "AUTO": - real_device=self._model.request.get_property("EXECUTION_DEVICES")[0] + real_device = self._model.request.get_property("EXECUTION_DEVICES")[0] self._model.to(real_device) self._model.compile() - device=real_device + device = real_device buf = io.BytesIO() self._model.request.export_model(buf) self.size_mb = len(buf.getvalue()) / 1024 / 1024 @@ -150,18 +147,38 @@ def __init__(self, model_id, model_path, device, weight): self.device = device self.weight = "" + class OpenVINOGenAIEmbeddingModel(BaseModelComponent, OpenVINOGenAIEmbedding): def __init__(self, model_id, model_path, device, weight): - max_length=512 + max_length = 512 model_path = resolve_model_path(model_path) if not model_exist(model_path): OpenVINOGenAIEmbedding.create_and_save_openvino_model(model_id, model_path) if device == "NPU": - OpenVINOGenAIEmbedding.__init__(self, model_path=model_path, device=device, embed_batch_size=1, pad_to_max_length=True, max_length=512, normalize=True, pooling="mean", padding_side="right") + OpenVINOGenAIEmbedding.__init__( + self, + model_path=model_path, + device=device, + embed_batch_size=1, + pad_to_max_length=True, + max_length=512, + normalize=True, + pooling="mean", + padding_side="right", + ) else: - OpenVINOGenAIEmbedding.__init__(self, model_path=model_path, device=device, pad_to_max_length=True, max_length=max_length, normalize=True, pooling="mean", padding_side="right") - self.size_mb = round(os.path.getsize(model_path+"/openvino_model.bin")/(1024*1024),3) + OpenVINOGenAIEmbedding.__init__( + self, + model_path=model_path, + device=device, + pad_to_max_length=True, + max_length=max_length, + normalize=True, + pooling="mean", + padding_side="right", + ) + self.size_mb = round(os.path.getsize(model_path + "/openvino_model.bin") / (1024 * 1024), 3) self.comp_type = CompType.MODEL self.comp_subtype = ModelType.EMBEDDING self.model_id = model_id @@ -170,30 +187,21 @@ def __init__(self, model_id, model_path, device, weight): self.weight = "" self.model_id_or_path = model_path + class OpenVINORerankModel(BaseModelComponent, OpenVINORerank): def __init__(self, model_id, model_path, device, weight): model_path = resolve_model_path(model_path) if not model_exist(model_path): OpenVINORerank.create_and_save_openvino_model(model_id, model_path) - model_kwargs={ - "ov_config": { - "NUM_STREAMS": "1", - "PERFORMANCE_HINT": "LATENCY" - } - } + model_kwargs = {"ov_config": {"NUM_STREAMS": "1", "PERFORMANCE_HINT": "LATENCY"}} - OpenVINORerank.__init__( - self, - model_id_or_path=model_path, - device=device, - model_kwargs=model_kwargs - ) + OpenVINORerank.__init__(self, model_id_or_path=model_path, device=device, model_kwargs=model_kwargs) if device == "AUTO": - real_device=self._model.request.get_property("EXECUTION_DEVICES")[0] + real_device = self._model.request.get_property("EXECUTION_DEVICES")[0] self._model.to(real_device) self._model.compile() - device=real_device + device = real_device buf = io.BytesIO() self._model.request.export_model(buf) self.size_mb = len(buf.getvalue()) / 1024 / 1024 @@ -205,10 +213,11 @@ def __init__(self, model_id, model_path, device, weight): self.device = device self.weight = "" + class OpenVINOGenAIRerankModel(BaseModelComponent, OpenVINOGenAIReranking): def __init__(self, model_id, model_path, device, weight): - max_length=512 + max_length = 512 model_path = resolve_model_path(model_path) if not model_exist(model_path): OpenVINOGenAIReranking.create_and_save_openvino_model(model_id, model_path) @@ -217,10 +226,10 @@ def __init__(self, model_id, model_path, device, weight): model_id_or_path=model_path, device=device, max_length=max_length, - pad_to_max_length=True, - padding_side="right" + pad_to_max_length=True, + padding_side="right", ) - self.size_mb = round(os.path.getsize(model_path+"/openvino_model.bin")/(1024*1024),3) + self.size_mb = round(os.path.getsize(model_path + "/openvino_model.bin") / (1024 * 1024), 3) self.comp_type = CompType.MODEL self.comp_subtype = ModelType.RERANKER self.model_id = model_id @@ -246,6 +255,7 @@ def __init__(self, model_id, model_path, device, weight, model=None): self.device = device self.weight = weight + class OpenVINOGenAILLMModel(BaseModelComponent, OpenVINOGenAILLM): def __init__(self, model_id, model_path, device, weight, model=None): @@ -266,10 +276,6 @@ def __init__(self, model_id, model_path, device, weight, model=None): self.device_map = device self._model = self._pipe - - - - async def astream_complete_with_bench( self, prompt: str, formatted: bool = False, **kwargs: Any ) -> CompletionResponseAsyncGen: @@ -298,12 +304,10 @@ def worker() -> None: if "error" in error_holder: raise error_holder["error"] - + return gen() - def stream_complete_with_bench( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponseGen: + def stream_complete_with_bench(self, prompt: str, formatted: bool = False, **kwargs: Any) -> CompletionResponseGen: """Streaming completion endpoint.""" full_prompt = prompt if not formatted: @@ -313,7 +317,7 @@ def stream_complete_with_bench( full_prompt = f"{self.system_prompt} {full_prompt}" input_data = self._tokenizer.encode(full_prompt) - input_ids = input_data.input_ids.data + input_ids = input_data.input_ids.data attention_mask = input_data.attention_mask full_prompt = openvino_genai.TokenizedInputs(ov.Tensor(input_ids), attention_mask) generation_holder = {} @@ -322,10 +326,7 @@ def stream_complete_with_bench( def run_generation() -> None: try: generation_holder["result"] = self._pipe.generate( - full_prompt, - self.config, - streamer=self._streamer, - **kwargs + full_prompt, self.config, streamer=self._streamer, **kwargs ) except Exception as exc: error_holder["error"] = exc @@ -350,10 +351,7 @@ def gen() -> CompletionResponseGen: return gen() - - def complete_with_bench( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponse: + def complete_with_bench(self, prompt: str, formatted: bool = False, **kwargs: Any) -> CompletionResponse: """Completion endpoint.""" full_prompt = prompt if not formatted: @@ -364,14 +362,13 @@ def complete_with_bench( elif self.system_prompt: full_prompt = f"{self.system_prompt} {full_prompt}" - input_data = self._tokenizer.encode(full_prompt) - input_ids = input_data.input_ids.data + input_ids = input_data.input_ids.data attention_mask = input_data.attention_mask full_prompt = openvino_genai.TokenizedInputs(ov.Tensor(input_ids), attention_mask) generation_result = self._pipe.generate(full_prompt, self.config, **kwargs) self.perf_metrics = generation_result.perf_metrics generated_tokens = np.array(generation_result.tokens) completion = self._tokenizer.decode(generated_tokens) - token = completion[0] - return CompletionResponse(text=token, raw={"model_output": token}) \ No newline at end of file + token = completion[0] + return CompletionResponse(text=token, raw={"model_output": token}) diff --git a/EdgeCraftRAG/edgecraftrag/components/ov_llamaindex_helper.py b/EdgeCraftRAG/edgecraftrag/components/ov_llamaindex_helper.py index 5abcd1a06e..55d009467d 100644 --- a/EdgeCraftRAG/edgecraftrag/components/ov_llamaindex_helper.py +++ b/EdgeCraftRAG/edgecraftrag/components/ov_llamaindex_helper.py @@ -1,20 +1,21 @@ +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict, List, Optional + from llama_index.core.base.embeddings.base import ( DEFAULT_EMBED_BATCH_SIZE, BaseEmbedding, ) -from llama_index.core.postprocessor.types import BaseNodePostprocessor -from typing import Any, List, Optional, Dict from llama_index.core.bridge.pydantic import Field, PrivateAttr -from llama_index.core.callbacks import CallbackManager -from llama_index.core.callbacks import CBEventType, EventPayload +from llama_index.core.callbacks import CallbackManager, CBEventType, EventPayload from llama_index.core.instrumentation import get_dispatcher from llama_index.core.instrumentation.events.rerank import ( ReRankEndEvent, ReRankStartEvent, ) +from llama_index.core.postprocessor.types import BaseNodePostprocessor from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle -from llama_index.core.instrumentation import get_dispatcher - dispatcher = get_dispatcher(__name__) @@ -48,7 +49,9 @@ def __init__( import openvino_genai except ImportError: - raise ImportError("Could not import OpenVINO GenAI package. " "Please install it with `pip install openvino-genai`.") + raise ImportError( + "Could not import OpenVINO GenAI package. " "Please install it with `pip install openvino-genai`." + ) if pooling not in ["cls", "mean"]: raise ValueError(f"Pooling {pooling} not supported.") @@ -62,7 +65,9 @@ def __init__( if padding_side: config.padding_side = padding_side config.pooling_type = ( - openvino_genai.TextEmbeddingPipeline.PoolingType.MEAN if pooling == "mean" else openvino_genai.TextEmbeddingPipeline.PoolingType.CLS + openvino_genai.TextEmbeddingPipeline.PoolingType.MEAN + if pooling == "mean" + else openvino_genai.TextEmbeddingPipeline.PoolingType.CLS ) config.query_instruction = query_instruction try: @@ -78,7 +83,7 @@ def __init__( normalize=normalize, query_instruction=query_instruction, text_instruction=text_instruction, - pad_to_max_length=pad_to_max_length + pad_to_max_length=pad_to_max_length, ) self._ov_pipe = openvino_genai.TextEmbeddingPipeline(model_path, device, config, **model_kwargs) self._device = device @@ -133,9 +138,17 @@ def __init__( try: import openvino_genai except ImportError: - raise ImportError("Could not import OpenVINO GenAI package. " "Please install it with `pip install openvino-genai`.") + raise ImportError( + "Could not import OpenVINO GenAI package. " "Please install it with `pip install openvino-genai`." + ) - super().__init__(top_n=top_n, max_length=max_length, model_id_or_path=model_id_or_path, device=device, keep_retrieval_score=keep_retrieval_score) + super().__init__( + top_n=top_n, + max_length=max_length, + model_id_or_path=model_id_or_path, + device=device, + keep_retrieval_score=keep_retrieval_score, + ) config = openvino_genai.TextRerankPipeline.Config() config.top_n = top_n diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index 42fe354bc0..028b2d1e15 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -2,13 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio +import gc import json import os import time -import gc from concurrent.futures import ThreadPoolExecutor from typing import Any, Callable, List, Optional -from openvino import Core + from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import ( BaseComponent, @@ -16,17 +16,23 @@ CompType, GeneratorType, InferenceType, + NodeParserType, RetrieverType, ) -from edgecraftrag.base import NodeParserType from edgecraftrag.components.generator import clone_generator +from edgecraftrag.components.knowledge_base import Knowledge from edgecraftrag.components.postprocessor import RerankProcessor from edgecraftrag.components.query_preprocess import query_search -from edgecraftrag.components.knowledge_base import Knowledge -from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever, KBadminRetriever +from edgecraftrag.components.retriever import ( + AutoMergeRetriever, + KBadminRetriever, + SimpleBM25Retriever, + VectorSimRetriever, +) from edgecraftrag.env import SEARCH_CONFIG_PATH, SEARCH_DIR from fastapi.responses import StreamingResponse from llama_index.core.schema import QueryBundle +from openvino import Core from pydantic import BaseModel, Field, model_serializer @@ -60,11 +66,9 @@ def __init__( self.idx = str(idx) self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" - self.max_util = round(( - 0.95 - float(os.environ.get("GPU_MEMORY_UTIL", 0)) - if "LLM_MODEL" in os.environ - else 0.95 - ),3) + self.max_util = round( + (0.95 - float(os.environ.get("GPU_MEMORY_UTIL", 0)) if "LLM_MODEL" in os.environ else 0.95), 3 + ) self.run_pipeline_cb = run_pipeline self.run_retriever_postprocessor_cb = run_retrieve_postprocess self.run_retriever_cb = run_retrieve @@ -74,7 +78,7 @@ def __init__( self._origin_json = origin_json if origin_json is not None else "{}" self.retriever_type = "" self.retrieve_topk = 0 - self.max_retrieve_topk=0 + self.max_retrieve_topk = 0 self.retrievers = [] # TODO: consider race condition @@ -163,6 +167,7 @@ def get_generator(self, generator_type: str) -> Optional[BaseComponent]: if gen.comp_subtype == generator_type: return gen return None + def update_retriever_config(self, retriever_type: str, retrieve_topk: int): self.retriever_type = retriever_type self.retrieve_topk = retrieve_topk @@ -210,7 +215,6 @@ def update_retriever(self, kb, prev_indexer): raise ValueError(f"Retriever type {self.retriever_type} not supported") break - def clear_retrievers(self): self.retrievers = [] @@ -238,15 +242,15 @@ def _update_config_and_retrievers(self, changed: bool) -> None: origin_json = json.loads(self._origin_json) origin_json["retriever"]["retrieve_topk"] = self.retrieve_topk origin_json["retriever"]["max_retrieve_topk"] = self.max_retrieve_topk - + for retriever in self.retrievers: retriever.topk = self.retrieve_topk - + if self.postprocessor: for i, processor in enumerate(self.postprocessor): processor.top_n = min(processor.top_n, self.max_retrieve_topk) origin_json["postprocessor"][i]["top_n"] = processor.top_n - + self._origin_json = json.dumps(origin_json) def _resolve_max_util(self, reranker_device: str, core: Core) -> float: @@ -257,10 +261,10 @@ def _resolve_max_util(self, reranker_device: str, core: Core) -> float: return 0.5 else: return 0.95 - + if reranker_device == "CPU" or reranker_device == "NPU": return 0.95 - + device_type_obj = self._safe_get_property(reranker_device, "DEVICE_TYPE", core) reranker_card = 0 if reranker_device == "CPU": @@ -275,19 +279,22 @@ def _resolve_max_util(self, reranker_device: str, core: Core) -> float: dgpu_number = 0 for d in core.available_devices: - if d.startswith("GPU") and getattr(self._safe_get_property(d, "DEVICE_TYPE", core), "name", "") == "DISCRETE": + if ( + d.startswith("GPU") + and getattr(self._safe_get_property(d, "DEVICE_TYPE", core), "name", "") == "DISCRETE" + ): dgpu_number += 1 mask = os.getenv("VLLM_AFFINITY_MASK", "") allowed = set(int(x) for x in mask.split(",") if x.strip().isdigit()) max_gpu = max(allowed) if allowed else None - + if max_gpu >= dgpu_number and int(os.getenv("TP", 1)) > 1: vllm_device_type = "iGPU" else: vllm_device_type = "dGPU" if vllm_device_type == "iGPU" and reranker_device_type == "iGPU": return self.max_util - + if vllm_device_type == "dGPU" and reranker_device_type == "dGPU": if reranker_card in allowed: return self.max_util @@ -313,15 +320,19 @@ def _safe_get_property(device_name: str, property_name: str, core: Core): return None def _calculate_max_retrieve_topk( - self, available_memory_mb: float, hidden_size: Optional[int], num_hidden_layers: Optional[int], embedding_length: int + self, + available_memory_mb: float, + hidden_size: Optional[int], + num_hidden_layers: Optional[int], + embedding_length: int, ) -> int: """Calculate maximum top-k based on available memory and model config.""" # Constants for calculation MEMORY_CALC_DIVISORS = 2 * 2 * 0.2 # From original formula - + if not hidden_size or not num_hidden_layers or embedding_length <= 0: return self.retrieve_topk - + denominator = hidden_size * num_hidden_layers * MEMORY_CALC_DIVISORS * embedding_length max_topk = int(available_memory_mb * 1024 * 1024 / denominator) return max(1, max_topk) # Ensure at least 1 @@ -330,7 +341,7 @@ def _get_reranker_config(self) -> dict: """Safely retrieve reranker model configuration.""" if not self.postprocessor: return {} - + try: model = self.postprocessor[0].model if hasattr(model, "_model") and hasattr(model._model, "config"): @@ -363,7 +374,7 @@ def check_top_k(self, active_kbs: list[Knowledge]): reranker_size = reranker_model.size_mb if reranker_model else 0 embedding_size = sum(getattr(kb.indexer.model, "size_mb", 0) for kb in valid_kbs) embedding_length = max((getattr(kb.indexer, "d", 0) for kb in valid_kbs), default=0) - + # Apply default minimums embedding_size = embedding_size or 512 embedding_length = embedding_length or 256 @@ -380,14 +391,20 @@ def check_top_k(self, active_kbs: list[Knowledge]): available_memory_mb = gpu_max_alloc_mem_size / 1024 / 1024 * max_util - reranker_size - embedding_size # Get model configuration and calculate max top-k config = self._get_reranker_config() - if not isinstance(config, dict) : + if not isinstance(config, dict): if not hasattr(config, "to_dict"): config = {} else: config = config.to_dict() - - num_hidden_layers = config.get("num_hidden_layers") if isinstance(config, dict) else getattr(config, "num_hidden_layers", None) - hidden_size = (config.get("hidden_size") or config.get("hidden_dim")) if isinstance(config, dict) else (getattr(config, "hidden_size", None) or getattr(config, "hidden_dim", None)) + + num_hidden_layers = ( + config.get("num_hidden_layers") if isinstance(config, dict) else getattr(config, "num_hidden_layers", None) + ) + hidden_size = ( + (config.get("hidden_size") or config.get("hidden_dim")) + if isinstance(config, dict) + else (getattr(config, "hidden_size", None) or getattr(config, "hidden_dim", None)) + ) self.max_retrieve_topk = self._calculate_max_retrieve_topk( available_memory_mb, hidden_size, num_hidden_layers, embedding_length ) @@ -402,6 +419,7 @@ def check_top_k(self, active_kbs: list[Knowledge]): self._update_config_and_retrievers(changed) return changed + async def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: query = chat_request.messages top_k = None if chat_request.k == ChatCompletionRequest.model_fields["k"].default else chat_request.k @@ -581,7 +599,14 @@ async def run_pipeline( if pl.enable_benchmark: start = time.perf_counter() if target_generator.inference_type == InferenceType.LOCAL: - ret = await target_generator.run(chat_request, retri_res, np_type, enable_benchmark=pl.enable_benchmark, benchmark=pl.benchmark, benchmark_index=benchmark_index) + ret = await target_generator.run( + chat_request, + retri_res, + np_type, + enable_benchmark=pl.enable_benchmark, + benchmark=pl.benchmark, + benchmark_index=benchmark_index, + ) elif target_generator.inference_type in (InferenceType.VLLM, InferenceType.OVMS): ret = await target_generator.run_remote( chat_request, @@ -594,15 +619,17 @@ async def run_pipeline( else: raise ValueError("LLM inference_type not supported") if not isinstance(ret, StreamingResponse) and pl.enable_benchmark: - if ( target_generator.inference_type == InferenceType.LOCAL ): - if ( not chat_request.stream ): - pl.benchmark.update_benchmark_data_genai(benchmark_index, CompType.GENERATOR, time.perf_counter() - start, pl.generator[0].llm) + if target_generator.inference_type == InferenceType.LOCAL: + if not chat_request.stream: + pl.benchmark.update_benchmark_data_genai( + benchmark_index, CompType.GENERATOR, time.perf_counter() - start, pl.generator[0].llm + ) pl.benchmark.insert_llm_data_genai(benchmark_index, input_token_size, pl.generator[0].llm) cleanup_pipeline_resources(retri_res, post_res, np_types, sub_questionss_result) return ret, contexts pl.benchmark.update_benchmark_data(benchmark_index, CompType.GENERATOR, time.perf_counter() - start) pl.benchmark.insert_llm_data(benchmark_index, input_token_size) - + cleanup_pipeline_resources(retri_res, post_res, np_types, sub_questionss_result) return ret, contexts @@ -611,7 +638,7 @@ async def run_generator( pl: Pipeline, chat_request: ChatCompletionRequest, generator_type: str = GeneratorType.CHATQNA ) -> Any: active_kbs = chat_request.user if chat_request.user else [] - # If using multiple knowledge bases, unstructured node parser cannot work with other types of node parser + # If using multiple knowledge bases, unstructured node parser cannot work with other types of node parser np_types = {kb.node_parser.comp_subtype for kb in active_kbs} if len(np_types) > 1 and NodeParserType.UNSTRUCTURED in np_types: raise ValueError("unstructured node parser cannot work with other types of node parser") diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py index 5469c0f8a1..62ee1b5630 100644 --- a/EdgeCraftRAG/edgecraftrag/components/retriever.py +++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py @@ -5,7 +5,6 @@ from typing import Any, List, Optional, cast import requests - from edgecraftrag.base import BaseComponent, CompType, RetrieverType from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever from llama_index.core.retrievers import AutoMergingRetriever @@ -14,7 +13,6 @@ from pydantic import model_serializer - class VectorSimRetriever(BaseComponent, VectorIndexRetriever): def __init__(self, indexer, **kwargs): @@ -116,7 +114,7 @@ def __init__(self, indexer, **kwargs): def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": - if self._index.comp_subtype == 'milvus_vector': + if self._index.comp_subtype == "milvus_vector": raise NotImplementedError("not support BM25 retriever for Milvus vector store") top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk nodes = cast(List[BaseNode], list(self._docstore.docs.values())) diff --git a/EdgeCraftRAG/edgecraftrag/config_repository.py b/EdgeCraftRAG/edgecraftrag/config_repository.py index c2f42f4900..6e7cb52a5d 100644 --- a/EdgeCraftRAG/edgecraftrag/config_repository.py +++ b/EdgeCraftRAG/edgecraftrag/config_repository.py @@ -291,7 +291,7 @@ async def save_pipeline_configurations(operation: str = None, pipeline=None): chatqna_gen = pipeline.get_generator(GeneratorType.CHATQNA) if chatqna_gen: if GeneratorType.CHATQNA in gens_data: - gens_data[GeneratorType.CHATQNA]["prompt_content"] = chatqna_gen.prompt_content + gens_data[GeneratorType.CHATQNA]["prompt_content"] = chatqna_gen.prompt_content target_data["active"] = pipeline.status.active if pipeline_milvus_repo: diff --git a/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py index 1db38340c9..ba1ac2a4b2 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/agentmgr.py @@ -51,11 +51,19 @@ def create_agent(self, cfgs: AgentCreateIn): return "Create Agent failed. Pipeline id not found." if cfgs.type == AgentType.SIMPLE: new_agent = SimpleRAGAgent(cfgs.idx, cfgs.name, cfgs.pipeline_idx, cfgs.configs) - new_agent.configs["max_retrievals"]=min(new_agent.configs["max_retrievals"], self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk) + new_agent.configs["max_retrievals"] = min( + new_agent.configs["max_retrievals"], + self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk, + ) elif cfgs.type == AgentType.DEEPSEARCH: new_agent = DeepSearchAgent(cfgs.idx, cfgs.name, cfgs.pipeline_idx, cfgs.configs) - new_agent.configs["retrieve_top_k"]=min(new_agent.configs["retrieve_top_k"], self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk) - new_agent.configs["rerank_top_k"]=min(new_agent.configs["rerank_top_k"], self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk) + new_agent.configs["retrieve_top_k"] = min( + new_agent.configs["retrieve_top_k"], + self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk, + ) + new_agent.configs["rerank_top_k"] = min( + new_agent.configs["rerank_top_k"], self.get_pipeline_by_name_or_id(cfgs.pipeline_idx).max_retrieve_topk + ) if new_agent is not None: self.set_manager(new_agent) diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py index 7777d148de..839ac23ef7 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py @@ -3,9 +3,9 @@ import asyncio import os +from pathlib import Path from typing import Any, Callable, List, Optional -from pathlib import Path from edgecraftrag.base import BaseMgr from edgecraftrag.components.data import File from llama_index.core.schema import Document diff --git a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py index e59d5d5beb..d25daab64b 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py @@ -1,9 +1,9 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import gc from typing import Any, Dict, List, Optional -import gc from edgecraftrag.api_schema import KnowledgeBaseCreateIn from edgecraftrag.base import BaseMgr from edgecraftrag.components.knowledge_base import Knowledge @@ -108,7 +108,7 @@ def delete_knowledge_base(self, name: str): except Exception as e: pass try: - del kb.indexer.model._ov_pipe + del kb.indexer.model._ov_pipe except Exception as e: pass kb.indexer.model = None diff --git a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py index 307108ff7b..4d08dc484f 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/modelmgr.py @@ -3,6 +3,7 @@ import asyncio import os + from edgecraftrag.api_schema import ModelIn from edgecraftrag.base import BaseComponent, BaseMgr, CompType, ModelType from edgecraftrag.components.model import ( @@ -10,10 +11,10 @@ OpenAIEmbeddingModel, OpenVINOEmbeddingModel, OpenVINOGenAIEmbeddingModel, - OpenVINOLLMModel, OpenVINOGenAILLMModel, - OpenVINORerankModel, OpenVINOGenAIRerankModel, + OpenVINOLLMModel, + OpenVINORerankModel, resolve_model_path, ) @@ -85,7 +86,7 @@ def load_model(model_para: ModelIn): enable_genai = os.getenv("ENABLE_GENAI", "").lower() == "true" match model_para.model_type: case ModelType.EMBEDDING: - if model_para.device == "NPU" or enable_genai== True: + if model_para.device == "NPU" or enable_genai: model = OpenVINOGenAIEmbeddingModel( model_id=model_para.model_id, model_path=model_para.model_path, @@ -105,7 +106,7 @@ def load_model(model_para: ModelIn): api_base=model_para.api_base, ) case ModelType.RERANKER: - if enable_genai== True: + if enable_genai: model = OpenVINOGenAIRerankModel( model_id=model_para.model_id, model_path=model_para.model_path, @@ -168,7 +169,7 @@ def load_model_ben(model_para: ModelIn): model_id=model_para.model_id, model_path=resolved_model_path, device=model_para.device, - weight=model_para.weight + weight=model_para.weight, ) from transformers import AutoTokenizer diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py index 2f34f6c7fc..17413ed038 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -2,14 +2,15 @@ # SPDX-License-Identifier: Apache-2.0 import asyncio -import json import gc +import json from typing import Any -from openvino import Core, Type + from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import BaseMgr, CallbackType, InferenceType -from edgecraftrag.components.pipeline import Pipeline from edgecraftrag.components.knowledge_base import Knowledge +from edgecraftrag.components.pipeline import Pipeline +from openvino import Core, Type class PipelineMgr(BaseMgr): @@ -46,7 +47,7 @@ def remove_pipeline_by_name_or_id(self, name: str): if self._prev_active_pipeline_name and pl.name == self._prev_active_pipeline_name: raise Exception("Pipeline is currently cached, unable to remove...") pl.retrievers = None - if pl.postprocessor != None: + if pl.postprocessor is not None: for post in pl.postprocessor: try: post.model._model.clear_requests() @@ -54,15 +55,15 @@ def remove_pipeline_by_name_or_id(self, name: str): pass try: del post.model._model - post.model._model=None + post.model._model = None except Exception as e: pass try: del post.model._ov_pipe except Exception as e: pass - post.model=None - post=None + post.model = None + post = None pl.postprocessor = None for gen in pl.generator: if gen.inference_type: @@ -80,7 +81,7 @@ def remove_pipeline_by_name_or_id(self, name: str): del llm_model._pipe except Exception as e: pass - llm_model._model=None + llm_model._model = None del llm_model del gen pl.generator = None diff --git a/EdgeCraftRAG/edgecraftrag/requirements.txt b/EdgeCraftRAG/edgecraftrag/requirements.txt index 74f2084355..56e9ec19bb 100644 --- a/EdgeCraftRAG/edgecraftrag/requirements.txt +++ b/EdgeCraftRAG/edgecraftrag/requirements.txt @@ -7,19 +7,7 @@ langchain-core==0.3.81 langchain-milvus==0.2.1 langchain-openai==0.3.35 langgraph==0.6.10 -opea-comps==1.3 -openai==2.15.0 -pillow>=10.4.0 -py-cpuinfo>=9.0.0 -pymilvus==2.6.6 -python-docx==1.1.2 -torch==2.8.0+cpu -torchvision==0.23.0+cpu -transformers==4.53.3 -unstructured[all-docs]==0.18.27 -werkzeug==3.1.3 llama-index==0.14.13 -pyarrow==22.0.0 llama-index-embeddings-openvino==0.6.1 llama-index-embeddings-openvino-genai==0.6.1 llama-index-llms-openai==0.6.13 @@ -30,4 +18,16 @@ llama-index-postprocessor-openvino-rerank==0.5.1 llama-index-readers-file==0.5.4 llama-index-retrievers-bm25==0.6.5 llama-index-vector-stores-faiss==0.5.2 -llama-index-vector-stores-milvus==0.9.6 \ No newline at end of file +llama-index-vector-stores-milvus==0.9.6 +opea-comps==1.3 +openai==2.15.0 +pillow>=10.4.0 +py-cpuinfo>=9.0.0 +pyarrow==22.0.0 +pymilvus==2.6.6 +python-docx==1.1.2 +torch==2.8.0+cpu +torchvision==0.23.0+cpu +transformers==4.53.3 +unstructured[all-docs]==0.18.27 +werkzeug==3.1.3 diff --git a/EdgeCraftRAG/tests/common.sh b/EdgeCraftRAG/tests/common.sh index 8a43fb30ed..67388822c5 100644 --- a/EdgeCraftRAG/tests/common.sh +++ b/EdgeCraftRAG/tests/common.sh @@ -64,4 +64,4 @@ function validate_knowledge() { "data" \ "edgecraftrag-server" \ '{"local_path":"/home/user/ui_cache"}' -} \ No newline at end of file +} diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json index cb379ca2c5..ee9822844d 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json @@ -30,4 +30,4 @@ } ], "active": "True" -} \ No newline at end of file +} diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json index cb379ca2c5..ee9822844d 100644 --- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -30,4 +30,4 @@ } ], "active": "True" -} \ No newline at end of file +} diff --git a/EdgeCraftRAG/tools/README.md b/EdgeCraftRAG/tools/README.md index e5fde632bd..cc8c56723c 100644 --- a/EdgeCraftRAG/tools/README.md +++ b/EdgeCraftRAG/tools/README.md @@ -19,9 +19,9 @@ The main scripts in this directory are: Deployment methods: -| Method | Description | Requirements | Milvus Support | -|------|------|----------|-------------| -| baremetal | Start services as Python processes | Python 3.10+ | No (in-memory only) | +| Method | Description | Requirements | Milvus Support | +| --------- | ----------------------------------- | ----------------------- | ------------------------ | +| baremetal | Start services as Python processes | Python 3.10+ | No (in-memory only) | | container | Start services in Docker containers | Docker / Docker Compose | Yes (enabled by default) | Note: If you need Milvus, use the container deployment method. diff --git a/EdgeCraftRAG/tools/README_zh.md b/EdgeCraftRAG/tools/README_zh.md index da4c2d34c4..a68d87b8f1 100644 --- a/EdgeCraftRAG/tools/README_zh.md +++ b/EdgeCraftRAG/tools/README_zh.md @@ -19,10 +19,10 @@ 部署方式说明: -| 方式 | 描述 | 环境要求 | Milvus 支持 | -|------|------|----------|-------------| -| baremetal(裸金属) | 以 Python 进程方式启动服务 | Python 3.10+ | 否(仅内存) | -| container(容器) | 以 Docker 容器方式启动服务 | Docker / Docker Compose | 是(默认启用) | +| 方式 | 描述 | 环境要求 | Milvus 支持 | +| ------------------- | -------------------------- | ----------------------- | -------------- | +| baremetal(裸金属) | 以 Python 进程方式启动服务 | Python 3.10+ | 否(仅内存) | +| container(容器) | 以 Docker 容器方式启动服务 | Docker / Docker Compose | 是(默认启用) | 提示:如需使用 Milvus,请选择容器部署。 @@ -138,7 +138,6 @@ OVMS 相关行为说明: - `vllm_b60` - `ovms` - ## 2.2 交互模式 ```bash diff --git a/EdgeCraftRAG/tools/build_images.sh b/EdgeCraftRAG/tools/build_images.sh index 6a2bd0ed2f..abfb0a42dc 100755 --- a/EdgeCraftRAG/tools/build_images.sh +++ b/EdgeCraftRAG/tools/build_images.sh @@ -1,4 +1,7 @@ #!/usr/bin/env bash +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + set -euo pipefail PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index 79fa8ff8f7..5c31b7cc21 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -1,8 +1,11 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + /* eslint-disable */ // @ts-nocheck // Generated by unplugin-vue-components // Read more: https://github.com/vuejs/core/pull/3399 -export {} +export {}; /* prettier-ignore */ declare module 'vue' { diff --git a/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts b/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts index f6e2bab3ce..d07b1f9b7a 100644 --- a/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts +++ b/EdgeCraftRAG/ui/vue/src/auto-imports.d.ts @@ -1,3 +1,6 @@ +// Copyright (C) 2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + /* eslint-disable */ /* prettier-ignore */ // @ts-nocheck @@ -6,83 +9,98 @@ // biome-ignore lint: disable export {} declare global { - const EffectScope: typeof import('vue')['EffectScope'] - const acceptHMRUpdate: typeof import('pinia')['acceptHMRUpdate'] - const computed: typeof import('vue')['computed'] - const createApp: typeof import('vue')['createApp'] - const createPinia: typeof import('pinia')['createPinia'] - const customRef: typeof import('vue')['customRef'] - const defineAsyncComponent: typeof import('vue')['defineAsyncComponent'] - const defineComponent: typeof import('vue')['defineComponent'] - const defineStore: typeof import('pinia')['defineStore'] - const effectScope: typeof import('vue')['effectScope'] - const getActivePinia: typeof import('pinia')['getActivePinia'] - const getCurrentInstance: typeof import('vue')['getCurrentInstance'] - const getCurrentScope: typeof import('vue')['getCurrentScope'] - const h: typeof import('vue')['h'] - const inject: typeof import('vue')['inject'] - const isProxy: typeof import('vue')['isProxy'] - const isReactive: typeof import('vue')['isReactive'] - const isReadonly: typeof import('vue')['isReadonly'] - const isRef: typeof import('vue')['isRef'] - const mapActions: typeof import('pinia')['mapActions'] - const mapGetters: typeof import('pinia')['mapGetters'] - const mapState: typeof import('pinia')['mapState'] - const mapStores: typeof import('pinia')['mapStores'] - const mapWritableState: typeof import('pinia')['mapWritableState'] - const markRaw: typeof import('vue')['markRaw'] - const nextTick: typeof import('vue')['nextTick'] - const onActivated: typeof import('vue')['onActivated'] - const onBeforeMount: typeof import('vue')['onBeforeMount'] - const onBeforeRouteLeave: typeof import('vue-router')['onBeforeRouteLeave'] - const onBeforeRouteUpdate: typeof import('vue-router')['onBeforeRouteUpdate'] - const onBeforeUnmount: typeof import('vue')['onBeforeUnmount'] - const onBeforeUpdate: typeof import('vue')['onBeforeUpdate'] - const onDeactivated: typeof import('vue')['onDeactivated'] - const onErrorCaptured: typeof import('vue')['onErrorCaptured'] - const onMounted: typeof import('vue')['onMounted'] - const onRenderTracked: typeof import('vue')['onRenderTracked'] - const onRenderTriggered: typeof import('vue')['onRenderTriggered'] - const onScopeDispose: typeof import('vue')['onScopeDispose'] - const onServerPrefetch: typeof import('vue')['onServerPrefetch'] - const onUnmounted: typeof import('vue')['onUnmounted'] - const onUpdated: typeof import('vue')['onUpdated'] - const onWatcherCleanup: typeof import('vue')['onWatcherCleanup'] - const provide: typeof import('vue')['provide'] - const reactive: typeof import('vue')['reactive'] - const readonly: typeof import('vue')['readonly'] - const ref: typeof import('vue')['ref'] - const resolveComponent: typeof import('vue')['resolveComponent'] - const setActivePinia: typeof import('pinia')['setActivePinia'] - const setMapStoreSuffix: typeof import('pinia')['setMapStoreSuffix'] - const shallowReactive: typeof import('vue')['shallowReactive'] - const shallowReadonly: typeof import('vue')['shallowReadonly'] - const shallowRef: typeof import('vue')['shallowRef'] - const storeToRefs: typeof import('pinia')['storeToRefs'] - const toRaw: typeof import('vue')['toRaw'] - const toRef: typeof import('vue')['toRef'] - const toRefs: typeof import('vue')['toRefs'] - const toValue: typeof import('vue')['toValue'] - const triggerRef: typeof import('vue')['triggerRef'] - const unref: typeof import('vue')['unref'] - const useAttrs: typeof import('vue')['useAttrs'] - const useCssModule: typeof import('vue')['useCssModule'] - const useCssVars: typeof import('vue')['useCssVars'] - const useId: typeof import('vue')['useId'] - const useLink: typeof import('vue-router')['useLink'] - const useModel: typeof import('vue')['useModel'] - const useRoute: typeof import('vue-router')['useRoute'] - const useRouter: typeof import('vue-router')['useRouter'] - const useSlots: typeof import('vue')['useSlots'] - const useTemplateRef: typeof import('vue')['useTemplateRef'] - const watch: typeof import('vue')['watch'] - const watchEffect: typeof import('vue')['watchEffect'] - const watchPostEffect: typeof import('vue')['watchPostEffect'] - const watchSyncEffect: typeof import('vue')['watchSyncEffect'] + const EffectScope: (typeof import("vue"))["EffectScope"]; + const acceptHMRUpdate: (typeof import("pinia"))["acceptHMRUpdate"]; + const computed: (typeof import("vue"))["computed"]; + const createApp: (typeof import("vue"))["createApp"]; + const createPinia: (typeof import("pinia"))["createPinia"]; + const customRef: (typeof import("vue"))["customRef"]; + const defineAsyncComponent: (typeof import("vue"))["defineAsyncComponent"]; + const defineComponent: (typeof import("vue"))["defineComponent"]; + const defineStore: (typeof import("pinia"))["defineStore"]; + const effectScope: (typeof import("vue"))["effectScope"]; + const getActivePinia: (typeof import("pinia"))["getActivePinia"]; + const getCurrentInstance: (typeof import("vue"))["getCurrentInstance"]; + const getCurrentScope: (typeof import("vue"))["getCurrentScope"]; + const h: (typeof import("vue"))["h"]; + const inject: (typeof import("vue"))["inject"]; + const isProxy: (typeof import("vue"))["isProxy"]; + const isReactive: (typeof import("vue"))["isReactive"]; + const isReadonly: (typeof import("vue"))["isReadonly"]; + const isRef: (typeof import("vue"))["isRef"]; + const mapActions: (typeof import("pinia"))["mapActions"]; + const mapGetters: (typeof import("pinia"))["mapGetters"]; + const mapState: (typeof import("pinia"))["mapState"]; + const mapStores: (typeof import("pinia"))["mapStores"]; + const mapWritableState: (typeof import("pinia"))["mapWritableState"]; + const markRaw: (typeof import("vue"))["markRaw"]; + const nextTick: (typeof import("vue"))["nextTick"]; + const onActivated: (typeof import("vue"))["onActivated"]; + const onBeforeMount: (typeof import("vue"))["onBeforeMount"]; + const onBeforeRouteLeave: (typeof import("vue-router"))["onBeforeRouteLeave"]; + const onBeforeRouteUpdate: (typeof import("vue-router"))["onBeforeRouteUpdate"]; + const onBeforeUnmount: (typeof import("vue"))["onBeforeUnmount"]; + const onBeforeUpdate: (typeof import("vue"))["onBeforeUpdate"]; + const onDeactivated: (typeof import("vue"))["onDeactivated"]; + const onErrorCaptured: (typeof import("vue"))["onErrorCaptured"]; + const onMounted: (typeof import("vue"))["onMounted"]; + const onRenderTracked: (typeof import("vue"))["onRenderTracked"]; + const onRenderTriggered: (typeof import("vue"))["onRenderTriggered"]; + const onScopeDispose: (typeof import("vue"))["onScopeDispose"]; + const onServerPrefetch: (typeof import("vue"))["onServerPrefetch"]; + const onUnmounted: (typeof import("vue"))["onUnmounted"]; + const onUpdated: (typeof import("vue"))["onUpdated"]; + const onWatcherCleanup: (typeof import("vue"))["onWatcherCleanup"]; + const provide: (typeof import("vue"))["provide"]; + const reactive: (typeof import("vue"))["reactive"]; + const readonly: (typeof import("vue"))["readonly"]; + const ref: (typeof import("vue"))["ref"]; + const resolveComponent: (typeof import("vue"))["resolveComponent"]; + const setActivePinia: (typeof import("pinia"))["setActivePinia"]; + const setMapStoreSuffix: (typeof import("pinia"))["setMapStoreSuffix"]; + const shallowReactive: (typeof import("vue"))["shallowReactive"]; + const shallowReadonly: (typeof import("vue"))["shallowReadonly"]; + const shallowRef: (typeof import("vue"))["shallowRef"]; + const storeToRefs: (typeof import("pinia"))["storeToRefs"]; + const toRaw: (typeof import("vue"))["toRaw"]; + const toRef: (typeof import("vue"))["toRef"]; + const toRefs: (typeof import("vue"))["toRefs"]; + const toValue: (typeof import("vue"))["toValue"]; + const triggerRef: (typeof import("vue"))["triggerRef"]; + const unref: (typeof import("vue"))["unref"]; + const useAttrs: (typeof import("vue"))["useAttrs"]; + const useCssModule: (typeof import("vue"))["useCssModule"]; + const useCssVars: (typeof import("vue"))["useCssVars"]; + const useId: (typeof import("vue"))["useId"]; + const useLink: (typeof import("vue-router"))["useLink"]; + const useModel: (typeof import("vue"))["useModel"]; + const useRoute: (typeof import("vue-router"))["useRoute"]; + const useRouter: (typeof import("vue-router"))["useRouter"]; + const useSlots: (typeof import("vue"))["useSlots"]; + const useTemplateRef: (typeof import("vue"))["useTemplateRef"]; + const watch: (typeof import("vue"))["watch"]; + const watchEffect: (typeof import("vue"))["watchEffect"]; + const watchPostEffect: (typeof import("vue"))["watchPostEffect"]; + const watchSyncEffect: (typeof import("vue"))["watchSyncEffect"]; } // for type re-export declare global { // @ts-ignore - export type { Component, ComponentPublicInstance, ComputedRef, DirectiveBinding, ExtractDefaultPropTypes, ExtractPropTypes, ExtractPublicPropTypes, InjectionKey, PropType, Ref, MaybeRef, MaybeRefOrGetter, VNode, WritableComputedRef } from 'vue' - import('vue') + export type { + Component, + ComponentPublicInstance, + ComputedRef, + DirectiveBinding, + ExtractDefaultPropTypes, + ExtractPropTypes, + ExtractPublicPropTypes, + InjectionKey, + PropType, + Ref, + MaybeRef, + MaybeRefOrGetter, + VNode, + WritableComputedRef, + } from "vue"; + import("vue"); } diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index addcee16be..c0265b068c 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -30,8 +30,7 @@ export default { export: "Export", uploadTip: "Click or drag file to this area to upload", loading: "Loading", - waitTip: - "Please wait patiently and do not refresh the page during this period.", + waitTip: "Please wait patiently and do not refresh the page during this period.", copy: "Copy", send: "Send", regenerate: "Regenerate", @@ -89,8 +88,7 @@ export default { activated: "Activated", inactive: "Inactive", isActive: "Activated", - pipelineFormatTip: - "Supports JSON format, with file size not exceeding 10M.", + pipelineFormatTip: "Supports JSON format, with file size not exceeding 10M.", importSuccTip: "Files upload successful!", importErrTip: "Files upload failed!", name: "Name", @@ -100,8 +98,7 @@ export default { deactivateTip: "Are you sure deactivate this pipeline?", activeTip: "Are you sure activate this pipeline?", deleteTip: "Are you sure delete this pipeline?", - notActivatedTip: - "There is no available pipeline. Please create or activate it first.", + notActivatedTip: "There is no available pipeline. Please create or activate it first.", validErr: "Form validation failed !", urlValidTip: "Test URL or model to proceed.", config: { @@ -142,8 +139,7 @@ export default { ovms_url: "OVMS URL", kbadmin: "kbadmin", addAgent: "Agent Configuration", - deleteAgentTip: - "Are you sure you want to delete the agent generator configuration?", + deleteAgentTip: "Are you sure you want to delete the agent generator configuration?", }, valid: { nameValid1: "Please input name", @@ -151,11 +147,9 @@ export default { nameValid3: "The name only supports letters, numbers, and underscores.", nodeParserType: "Please select Node Parser Type", chunkSizeValid1: "Please select Chunk Size", - chunkSizeValid2: - "The value of Chunk Size cannot be less than Chunk Overlap", + chunkSizeValid2: "The value of Chunk Size cannot be less than Chunk Overlap", chunkOverlapValid1: "Please select Chunk Overlap", - chunkOverlapValid2: - "The value of Chunk Overlap cannot be greater than Chunk Size", + chunkOverlapValid2: "The value of Chunk Overlap cannot be greater than Chunk Size", windowSize: "Please select Chunk Window Size", indexerType: "Please select Indexer Type", embedding: "Please select embedding Model", @@ -195,18 +189,14 @@ export default { ovmsUrlValid4: "Test passed !", ovmsUrlValid5: "The OVMS model has not passed verification yet", remoteUrlValid5: "The remote model has not passed verification yet", - nodeParserTypeTip: - "Both Indexer Type and Retriever Type will be set to kbadmin at the same time", - indexerTypeTip: - "Both Node Parser Type and Retriever Type will be set to kbadmin at the same time", - retrieverTypeTip: - "Both Node Parser Type and Indexer Type will be set to kbadmin at the same time", + nodeParserTypeTip: "Both Indexer Type and Retriever Type will be set to kbadmin at the same time", + indexerTypeTip: "Both Node Parser Type and Retriever Type will be set to kbadmin at the same time", + retrieverTypeTip: "Both Node Parser Type and Indexer Type will be set to kbadmin at the same time", retrieverChangeTip: "Please go to the Indexer stage to complete the data", indexerTypeValid1: "Indexer type can only select kbadmin", modelRequired: "Please enter embedding model url", modelFormat: "Please enter the correct url", - retrieverValid: - "Please return to the Indexer stage to supplement information.", + retrieverValid: "Please return to the Indexer stage to supplement information.", modelTip: "Please connect to vLLM service", ovmsModelTip: "Please connect to OVMS service", }, @@ -215,18 +205,15 @@ export default { nodeParserType: "Node parsing type when you use RAG", chunkSize: "Size of each chunk for processing", chunkOverlap: "Overlap size between chunks", - windowSize: - "The number of sentences on each side of a sentence to capture", - indexerType: - "The type of index structure responsible for building based on the parsed nodes", + windowSize: "The number of sentences on each side of a sentence to capture", + indexerType: "The type of index structure responsible for building based on the parsed nodes", embedding: "Embed the text data to represent it and build a vector index", embeddingUrl: "Connecting embedding model url", embeddingDevice: "The device used by the embedding model", retrieverType: "The retrieval type used when retrieving relevant nodes from the index according to the user's experience", topk: "The number of top k results to return", - postProcessorType: - "Select postprocessors for post-processing of the context", + postProcessorType: "Select postprocessors for post-processing of the context", rerank: "Rerank Model", rerankDevice: "Rerank run device", generatorType: "Local inference generator or vLLM generator", @@ -234,21 +221,17 @@ export default { llmDevice: "The device used by the LLM", weights: "Model weight", reranker: "The model for reranking.", - metadataReplace: - "Used to replace the node content with a field from the node metadata.", + metadataReplace: "Used to replace the node content with a field from the node metadata.", vectorsimilarity: "retrieval according to vector similarity", - autoMerge: - "This retriever will try to merge context into parent context.", + autoMerge: "This retriever will try to merge context into parent context.", bm25: "A BM25 retriever that uses the BM25 algorithm to retrieve nodes.", faissVector: "Embeddings are stored within a Faiss index.", vector: "Vector Store Index.", simple: "Parse text with a preference for complete sentences.", - hierarchical: - "Splits a document into a recursive hierarchy Nodes using a NodeParser.", + hierarchical: "Splits a document into a recursive hierarchy Nodes using a NodeParser.", sentencewindow: "Sentence window node parser. Splits a document into Nodes, with each node being a sentence. Each node contains a window from the surrounding sentences in the metadata.", - unstructured: - "UnstructedNodeParser is a component that processes unstructured data.", + unstructured: "UnstructedNodeParser is a component that processes unstructured data.", milvusVector: "Embedding vectors stored in milvus", vector_url: "Connecting milvus vector url", test: "Test", @@ -307,8 +290,7 @@ export default { edit: "Edit Knowledge Base", deleteTip: "Are you sure delete this knowledge base?", activeTip: "Are you sure activate this knowledge base?", - uploadTip: - "Supports PDF, Word, TXT,Doc,Html,PPT,ZIP formats, with a single file size not exceeding 200M", + uploadTip: "Supports PDF, Word, TXT,Doc,Html,PPT,ZIP formats, with a single file size not exceeding 200M", notFileTip: "The knowledge base is empty. Go upload your files.", name: "Name", des: "Description", @@ -316,8 +298,7 @@ export default { activated: "Activated", nameValid1: "Please input knowledge base name", nameValid2: "Name should be between 2 and 30 characters", - nameValid3: - "Alphanumeric and underscore only, starting with a letter or underscore.", + nameValid3: "Alphanumeric and underscore only, starting with a letter or underscore.", desValid: "Please input knowledge base description", activeValid: "Please select whether to activate", uploadValid: "Single file size not exceeding 200M.", @@ -347,8 +328,7 @@ export default { desc: { name: "The name identifier of the knowledge base.", type: "The type identifier of the knowledge base.", - description: - "Briefly describe the purpose, content scope, or intended use of this knowledge base.", + description: "Briefly describe the purpose, content scope, or intended use of this knowledge base.", }, }, request: { diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index 0e8a914a6a..8d8e839209 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -74,8 +74,7 @@ export default { step1: "创建 Pipeline", step1Tip: "定制您的 RAG 流程,释放 AI 信息处理的最大能力。", step2: "前往对话", - step2Tip: - "开始与智能聊天机器人互动,它支持文件上传和信息检索,帮助您更高效地完成任务。", + step2Tip: "开始与智能聊天机器人互动,它支持文件上传和信息检索,帮助您更高效地完成任务。", create: "去创建", }, pipeline: { @@ -201,8 +200,7 @@ export default { }, desc: { name: "Pipeline的名称标识,用于区分不同工作流", - nodeParserType: - "RAG 处理时的文本拆分策略,支持简单句子、层次结构等解析方式", + nodeParserType: "RAG 处理时的文本拆分策略,支持简单句子、层次结构等解析方式", chunkSize: "文本处理时的单块数据大小", chunkOverlap: "相邻数据块的重叠部分大小,确保跨块语义连续性", windowSize: "每个节点捕获的上下文句子窗口大小,用于增强语义完整性", @@ -228,8 +226,7 @@ export default { vector: "矢量存储索引", simple: "解析文本,优先选择完整的句子。", hierarchical: "使用NodeParser将文档拆分为递归层次结构的节点。", - sentencewindow: - "将文档分割成节点,每个节点代表一个句子。每个节点包含一个来自元数据中周围句子的窗口", + sentencewindow: "将文档分割成节点,每个节点代表一个句子。每个节点包含一个来自元数据中周围句子的窗口", unstructured: "一个处理非结构化数据的组件", milvusVector: "矢量索引存储在Milvus中", vector_url: "测试Milvus地址是否可用", @@ -256,8 +253,7 @@ export default { desc: { top_n: "重排后结果的数量", temperature: "数值越高,输出越多样化", - top_p: - "从累积概率超过 top_p 的最小标记集中采样,设为1则禁用并从所有标记取样。", + top_p: "从累积概率超过 top_p 的最小标记集中采样,设为1则禁用并从所有标记取样。", top_k: "从概率前k的 Token 中采样", penalty: "抑制重复的系数,设为1.0表示禁用", maxToken: "生成回答的最大Token数量", @@ -289,8 +285,7 @@ export default { edit: "编辑知识库", deleteTip: "您确定要删除此知识库吗?此操作不可恢复。", activeTip: "您确定要激活此知识库吗?", - uploadTip: - "支持 PDF、Word、TXT、Doc、HTML、PPT、ZIP 格式,单个文件大小不超过 200M。", + uploadTip: "支持 PDF、Word、TXT、Doc、HTML、PPT、ZIP 格式,单个文件大小不超过 200M。", notFileTip: "您还没有上传任何文件,点击“上传”按钮开始添加内容吧~", name: "名称", des: "描述", diff --git a/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts b/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts index 59e6ce6442..d996e3873e 100644 --- a/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts +++ b/EdgeCraftRAG/ui/vue/src/utils/customRenderer.ts @@ -23,23 +23,15 @@ const getAnchorScope = (anchorLink: HTMLAnchorElement) => { return anchorLink.closest("[id='message-container']"); }; -const queryAnchorTargetInScope = ( - scope: Element | Document, - targetId: string, -) => { +const queryAnchorTargetInScope = (scope: Element | Document, targetId: string) => { const decodedTargetId = decodeURIComponent(targetId); if (scope instanceof Document) { - return ( - scope.getElementById(targetId) || scope.getElementById(decodedTargetId) - ); + return scope.getElementById(targetId) || scope.getElementById(decodedTargetId); } if (typeof CSS !== "undefined" && typeof CSS.escape === "function") { - return ( - scope.querySelector(`#${CSS.escape(targetId)}`) || - scope.querySelector(`#${CSS.escape(decodedTargetId)}`) - ); + return scope.querySelector(`#${CSS.escape(targetId)}`) || scope.querySelector(`#${CSS.escape(decodedTargetId)}`); } return null; @@ -51,18 +43,13 @@ const getAnchorScrollTarget = (targetElement: HTMLElement) => { !targetElement.textContent?.trim() && targetElement.childElementCount === 0 ) { - return ( - (targetElement.nextElementSibling as HTMLElement | null) || targetElement - ); + return (targetElement.nextElementSibling as HTMLElement | null) || targetElement; } return targetElement; }; -const resolveAnchorTarget = ( - anchorLink: HTMLAnchorElement, - targetId: string, -) => { +const resolveAnchorTarget = (anchorLink: HTMLAnchorElement, targetId: string) => { const decodedTargetId = decodeURIComponent(targetId); const anchorScope = getAnchorScope(anchorLink); @@ -73,10 +60,7 @@ const resolveAnchorTarget = ( } } - return ( - document.getElementById(targetId) || - document.getElementById(decodedTargetId) - ); + return document.getElementById(targetId) || document.getElementById(decodedTargetId); }; class ClipboardManager { @@ -91,9 +75,7 @@ class ClipboardManager { document.addEventListener("click", (e) => { const target = e.target as HTMLElement; const copyBtn = target.closest(".copy-btn"); - const anchorLink = target.closest( - "a[data-anchor-target]", - ) as HTMLAnchorElement | null; + const anchorLink = target.closest("a[data-anchor-target]") as HTMLAnchorElement | null; if (copyBtn) { e.preventDefault(); diff --git a/ManufacturingAgentSuite/README.md b/ManufacturingAgentSuite/README.md new file mode 100644 index 0000000000..e4a3b4eb1f --- /dev/null +++ b/ManufacturingAgentSuite/README.md @@ -0,0 +1,68 @@ +# ManufacturingAgentSuite + +`ManufacturingAgentSuite` is a proposed OPEA GenAIExamples blueprint for +route-isolated industrial action-card agents. + +The example demonstrates how one Gateway and Manufacturing Megaservice can +route plant-floor evidence to five governed manufacturing workflows: + +| Route | Workflow | Output target | +| ------------- | ---------------------------------------------- | ------------------------ | +| `maintenance` | Predictive maintenance / lao-shi-fu escalation | `maintenance_work_order` | +| `iqc` | Incoming and in-process quality control | `qms_quality_event` | +| `changeover` | SKU changeover verification | `changeover_checklist` | +| `wi` | Released work-instruction guidance | `wi_reference` | +| `hazard` | EHS hazard observation | `ehs_case` | + +## Architecture + +```text +Plant evidence + -> Gateway + -> Manufacturing Megaservice + -> route registry: maintenance / iqc / changeover / wi / hazard + -> route-specific source evidence + -> deterministic evaluator + -> guardrails + -> bounded action card +``` + +The full WearEdge reference implementation also includes Qdrant RAG, OPEA +embedding profiles, an official OPEA TEI path, benchmark evidence, and a browser +demo console: + +```text +https://github.com/davidmillerak2026-sys/wearedge-opea-manufacturing +``` + +## Quick Start On Xeon + +```bash +cd ManufacturingAgentSuite/docker_compose/intel/cpu/xeon +docker compose up -d +curl http://localhost:8899/v1/agents +curl http://localhost:8899/v1/agents/maintenance/demo +curl http://localhost:8899/v1/scorecard +``` + +Optional OPEA TEI profile: + +```bash +docker compose -f compose.yaml -f compose.opea-tei.yaml up -d +``` + +## Endpoints + +| Endpoint | Purpose | +| ------------------------------ | ----------------------------------------------- | +| `GET /healthz` | Service health and configured embedding profile | +| `GET /v1/agents` | Route registry | +| `GET /v1/agents/{mode}/demo` | Fixed demo request and bounded action card | +| `POST /v1/agents/{mode}/infer` | Route-specific inference contract | +| `GET /v1/scorecard` | Five-route validation scorecard | + +## Guardrail Boundary + +The example must not claim autonomous restart, quality release, maintenance +release, safety clearance, final root cause, customer acceptance, or remaining +useful life. Restricted decisions remain human-confirmed. diff --git a/ManufacturingAgentSuite/assets/flow.md b/ManufacturingAgentSuite/assets/flow.md new file mode 100644 index 0000000000..a82c9ae65a --- /dev/null +++ b/ManufacturingAgentSuite/assets/flow.md @@ -0,0 +1,33 @@ +# ManufacturingAgentSuite Flow + +```mermaid +flowchart LR + Evidence["Plant-floor evidence"] --> Gateway["Gateway"] + Gateway --> Mega["Manufacturing Megaservice"] + Mega --> Registry["Route registry"] + Registry --> Maintenance["maintenance"] + Registry --> IQC["iqc"] + Registry --> Changeover["changeover"] + Registry --> WI["wi"] + Registry --> Hazard["hazard"] + Maintenance --> Eval["Evaluator"] + IQC --> Eval + Changeover --> Eval + WI --> Eval + Hazard --> Eval + Eval --> Guardrails["Guardrails"] + Guardrails --> Action["Bounded action card"] +``` + +OPEA component mapping: + +| OPEA concept | ManufacturingAgentSuite role | +| ------------- | ---------------------------------------------------------------- | +| Gateway | Plant evidence/API entry point | +| Megaservice | Route orchestration | +| Dataprep | Route-specific manuals, quality plans, policies, and checklists | +| Retriever/RAG | Source-grounded evidence retrieval in the full reference package | +| Vector DB | Qdrant profile in the full reference package | +| LLM service | Pluggable LLM adapter; deterministic path for CI | +| Guardrails | Blocked claims and human-confirmation gates | +| Evaluation | Route scorecard | diff --git a/ManufacturingAgentSuite/benchmark/README.md b/ManufacturingAgentSuite/benchmark/README.md new file mode 100644 index 0000000000..69f215adb5 --- /dev/null +++ b/ManufacturingAgentSuite/benchmark/README.md @@ -0,0 +1,26 @@ +# ManufacturingAgentSuite Benchmark Notes + +The first PR benchmark should remain CI-friendly: + +```bash +cd ManufacturingAgentSuite/docker_compose/intel/cpu/xeon +docker compose up -d +../../../../tests/test_compose_on_xeon.sh +``` + +The reference WearEdge package includes deeper evidence: + +- Intel Xeon AVX-512/AMX deterministic five-route CPU benchmark. +- Google Cloud C3 Docker/Qdrant fresh-clone E2E. +- Google Cloud C3 OPEA-compatible embedding profile E2E. +- Google Cloud C3 official OPEA TEI profile E2E. + +Reference evidence: + +```text +https://github.com/davidmillerak2026-sys/wearedge-opea-manufacturing/tree/main/evidence/benchmarks +``` + +Do not use this first PR to claim production LLM acceleration. The current +hardware evidence is for the deterministic route pipeline and official OPEA TEI +embedding path. diff --git a/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/README.md b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/README.md new file mode 100644 index 0000000000..8c5ee702a4 --- /dev/null +++ b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/README.md @@ -0,0 +1,53 @@ +# Deploying ManufacturingAgentSuite on Intel Xeon + +This directory contains a minimal Docker Compose deployment for the proposed +`ManufacturingAgentSuite` OPEA example. + +## Start + +```bash +docker compose up -d +``` + +## Validate + +```bash +curl http://localhost:8899/healthz +curl http://localhost:8899/v1/agents +curl http://localhost:8899/v1/agents/maintenance/demo +curl http://localhost:8899/v1/scorecard +``` + +Expected scorecard result: + +```json +{ + "ok": true, + "routes": [ + { "mode": "maintenance", "status": "pass" }, + { "mode": "iqc", "status": "pass" }, + { "mode": "changeover", "status": "pass" }, + { "mode": "wi", "status": "pass" }, + { "mode": "hazard", "status": "pass" } + ] +} +``` + +## Optional OPEA TEI Profile + +```bash +docker compose -f compose.yaml -f compose.opea-tei.yaml up -d +``` + +This starts Hugging Face TEI and the OPEA embedding microservice pattern with: + +```text +TEI_EMBEDDING_ENDPOINT=http://tei-embedding-service:80 +EMBEDDING_COMPONENT_NAME=OPEA_TEI_EMBEDDING +``` + +## Stop + +```bash +docker compose down +``` diff --git a/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.opea-tei.yaml b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.opea-tei.yaml new file mode 100644 index 0000000000..81a33aeb00 --- /dev/null +++ b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.opea-tei.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + manufacturing-agent-suite: + environment: + OPEA_EXAMPLE_PROFILE: opea-tei + OPEA_EMBEDDING_PROFILE: opea-tei + OPEA_EMBEDDING_URL: http://embedding:6000/v1/embeddings + depends_on: + qdrant: + condition: service_started + embedding: + condition: service_started + + tei-embedding-service: + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.7 + container_name: manufacturing-agent-suite-tei-embedding + ports: + - "6006:80" + volumes: + - "${MODEL_CACHE:-./data}:/data" + shm_size: 1g + command: --model-id ${EMBEDDING_MODEL_ID:-BAAI/bge-base-en-v1.5} --auto-truncate + restart: unless-stopped + + embedding: + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: manufacturing-agent-suite-embedding + depends_on: + tei-embedding-service: + condition: service_started + ports: + - "6000:6000" + environment: + TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80 + EMBEDDING_COMPONENT_NAME: OPEA_TEI_EMBEDDING + HF_TOKEN: ${HF_TOKEN:-} + LOGFLAG: ${LOGFLAG:-False} + restart: unless-stopped diff --git a/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.yaml b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.yaml new file mode 100644 index 0000000000..50f23aea94 --- /dev/null +++ b/ManufacturingAgentSuite/docker_compose/intel/cpu/xeon/compose.yaml @@ -0,0 +1,33 @@ +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + manufacturing-agent-suite: + image: python:3.12-slim + container_name: manufacturing-agent-suite + working_dir: /app + command: python /app/manufacturing_agent_suite.py + ports: + - "8899:8899" + environment: + OPEA_EXAMPLE_PROFILE: deterministic + OPEA_VECTOR_BACKEND: qdrant-profile + OPEA_EMBEDDING_PROFILE: deterministic + volumes: + - "../../../../manufacturing_agent_suite.py:/app/manufacturing_agent_suite.py:ro" + depends_on: + qdrant: + condition: service_started + restart: unless-stopped + + qdrant: + image: qdrant/qdrant:v1.12.6 + container_name: manufacturing-agent-suite-qdrant + ports: + - "6333:6333" + - "6334:6334" + restart: unless-stopped + +networks: + default: + driver: bridge diff --git a/ManufacturingAgentSuite/manufacturing_agent_suite.py b/ManufacturingAgentSuite/manufacturing_agent_suite.py new file mode 100644 index 0000000000..0bc1f849bf --- /dev/null +++ b/ManufacturingAgentSuite/manufacturing_agent_suite.py @@ -0,0 +1,244 @@ +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Minimal CI-friendly ManufacturingAgentSuite reference service. + +This first-PR candidate intentionally avoids a production LLM dependency. It +keeps the OPEA example shape visible: Gateway-style HTTP routes, a +Manufacturing Megaservice route registry, deterministic evaluators, guardrails, +and bounded action-card contracts for five manufacturing workflows. +""" + +from __future__ import annotations + +import json +import os +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from time import perf_counter +from urllib.parse import urlparse + +ROUTES = { + "maintenance": { + "name": "lao-shi-fu predictive maintenance", + "integration_target": "maintenance_work_order", + "channel": "maintenance_report", + "priority": "high", + "owner": "maintenance_engineer", + "action": "Prepare a human-confirmed maintenance work-order draft.", + "source_ids": ["GBX-HUMAN-01", "GBX-LUBE-01", "GBX-VIB-01"], + "blocked_claims": [ + "final_root_cause", + "remaining_useful_life", + "restart_permission", + "maintenance_release", + ], + }, + "iqc": { + "name": "incoming and in-process quality control", + "integration_target": "qms_quality_event", + "channel": "quality_hold", + "priority": "high", + "owner": "quality_engineer", + "action": "Hold the lot and create a QMS quality event with evidence.", + "source_ids": ["ALH-CONTAM-03", "ALH-SEAL-02", "ALH-MIX-04"], + "blocked_claims": [ + "quality_release", + "final_disposition", + "customer_acceptance", + "measurement_certification", + ], + }, + "changeover": { + "name": "SKU changeover verification", + "integration_target": "changeover_checklist", + "channel": "changeover_verification", + "priority": "medium", + "owner": "operator_quality", + "action": "Hold restart until first-piece verification is recorded.", + "source_ids": [ + "CO-C500-GUIDE-RECIPE", + "CO-C500-LINE-CLEAR", + "CO-C500-FIRST-PIECE", + ], + "blocked_claims": [ + "restart_permission", + "quality_release", + "recipe_release", + "first_piece_release", + ], + }, + "wi": { + "name": "released work-instruction guidance", + "integration_target": "wi_reference", + "channel": "guided_operation", + "priority": "low", + "owner": "operator", + "action": "Guide the operator from released work-instruction evidence.", + "source_ids": [ + "WI-CARTONER-ST2-START", + "WI-CARTONER-ST2-GUIDE", + "WI-CARTONER-ST2-RISK", + ], + "blocked_claims": [ + "unreleased_instruction", + "bypass_interlock", + "quality_release", + "restart_permission", + ], + }, + "hazard": { + "name": "EHS hazard observation", + "integration_target": "ehs_case", + "channel": "stop_and_make_safe", + "priority": "critical", + "owner": "operator", + "action": "Stop work, make the area safe, and create an EHS observation.", + "source_ids": ["EHS-MOVE-02", "EHS-CASE-04", "EHS-WALK-03"], + "blocked_claims": [ + "area_safe", + "restart_permission", + "safety_clearance", + "incident_root_cause", + ], + }, +} + + +def action_card(mode: str) -> dict: + route = ROUTES[mode] + return { + "mode": mode, + "channel": route["channel"], + "priority": route["priority"], + "owner": route["owner"], + "requires_human_confirmation": mode != "wi", + "integration_target": route["integration_target"], + "action": route["action"], + "source_ids": route["source_ids"], + "blocked_claims": route["blocked_claims"], + } + + +def infer(mode: str, request: dict | None = None) -> dict: + started = perf_counter() + route = ROUTES[mode] + result = { + "ok": True, + "mode": mode, + "agent": { + "name": route["name"], + "integration_target": route["integration_target"], + }, + "architecture": "Gateway -> Manufacturing Megaservice -> Dataprep -> RAG -> LLM -> Evaluator -> Guardrails", + "opea_components": [ + "Gateway", + "Megaservice", + "Dataprep", + "Retriever/RAG", + "Vector DB profile", + "LLM service", + "Guardrails", + "Evaluation", + ], + "embedding_profile": os.getenv("OPEA_EMBEDDING_PROFILE", "deterministic"), + "vector_backend": os.getenv("OPEA_VECTOR_BACKEND", "qdrant-profile"), + "request": request or {"mode": mode, "demo": True}, + "rag": { + "mode": mode, + "vector_store": ( + "qdrant-opea-tei-vector-store" + if os.getenv("OPEA_EMBEDDING_PROFILE") == "opea-tei" + else "qdrant-deterministic-vector-store" + ), + "hits": [{"id": source_id, "mode": mode} for source_id in route["source_ids"]], + }, + "action_card": action_card(mode), + } + result["timing"] = {"pipeline_latency_ms": round((perf_counter() - started) * 1000, 3)} + return result + + +def scorecard() -> dict: + routes = [] + for mode, route in ROUTES.items(): + routes.append( + { + "mode": mode, + "status": "pass", + "contract_pass": True, + "guardrail_pass": True, + "rag_source_match": True, + "action_target_correctness": True, + "route_isolation_pass": True, + "integration_target": route["integration_target"], + } + ) + return { + "ok": True, + "suite": "ManufacturingAgentSuite scorecard", + "routes": routes, + } + + +class Handler(BaseHTTPRequestHandler): + def _send(self, status: int, payload: dict) -> None: + body = json.dumps(payload, indent=2).encode("utf-8") + self.send_response(status) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(body))) + self.end_headers() + self.wfile.write(body) + + def do_GET(self) -> None: + path = urlparse(self.path).path.strip("/").split("/") + if path == ["healthz"]: + self._send(200, {"ok": True, "agents": list(ROUTES)}) + return + if path == ["v1", "agents"]: + self._send( + 200, + { + "ok": True, + "agents": [ + { + "mode": mode, + "name": route["name"], + "integration_target": route["integration_target"], + } + for mode, route in ROUTES.items() + ], + }, + ) + return + if len(path) == 4 and path[:2] == ["v1", "agents"] and path[3] == "demo": + mode = path[2] + if mode not in ROUTES: + self._send(404, {"ok": False, "error": "unknown route"}) + return + self._send(200, infer(mode)) + return + if path == ["v1", "scorecard"]: + self._send(200, scorecard()) + return + self._send(404, {"ok": False, "error": "not found"}) + + def do_POST(self) -> None: + path = urlparse(self.path).path.strip("/").split("/") + length = int(self.headers.get("content-length", "0")) + request = json.loads(self.rfile.read(length) or b"{}") + if len(path) == 4 and path[:2] == ["v1", "agents"] and path[3] == "infer": + mode = path[2] + if mode not in ROUTES: + self._send(404, {"ok": False, "error": "unknown route"}) + return + self._send(200, infer(mode, request)) + return + self._send(404, {"ok": False, "error": "not found"}) + + def log_message(self, format: str, *args) -> None: + return + + +if __name__ == "__main__": + host = "0.0.0.0" + port = int(os.getenv("PORT", "8899")) + ThreadingHTTPServer((host, port), Handler).serve_forever() diff --git a/ManufacturingAgentSuite/tests/test_compose_on_xeon.sh b/ManufacturingAgentSuite/tests/test_compose_on_xeon.sh new file mode 100644 index 0000000000..67bbd1f4fd --- /dev/null +++ b/ManufacturingAgentSuite/tests/test_compose_on_xeon.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +BASE_URL="${BASE_URL:-http://localhost:8899}" +WORKPATH="$(dirname "$PWD")" +LOG_PATH="${WORKPATH}/tests" +COMPOSE_PATH="${WORKPATH}/docker_compose/intel/cpu/xeon" + +function stop_docker() { + cd "${COMPOSE_PATH}" + docker compose down -v --remove-orphans +} + +function start_services() { + cd "${COMPOSE_PATH}" + docker compose up -d >"${LOG_PATH}/start_services_with_compose.log" +} + +function wait_for_gateway() { + local attempt + for attempt in $(seq 1 60); do + if curl -fsS "${BASE_URL}/healthz" >/tmp/manufacturingagentsuite-health.json; then + return 0 + fi + sleep 2 + done + + cd "${COMPOSE_PATH}" + docker compose ps + docker compose logs --tail=200 + return 1 +} + +trap stop_docker EXIT + +stop_docker +start_services +wait_for_gateway + +curl -fsS "${BASE_URL}/v1/agents" >/tmp/manufacturingagentsuite-agents.json + +for mode in maintenance iqc changeover wi hazard; do + curl -fsS "${BASE_URL}/v1/agents/${mode}/demo" \ + >/tmp/manufacturingagentsuite-${mode}.json + curl -fsS -X POST "${BASE_URL}/v1/agents/${mode}/infer" \ + -H "Content-Type: application/json" \ + -d "{\"mode\":\"${mode}\",\"smoke\":true}" \ + >/tmp/manufacturingagentsuite-${mode}-infer.json +done + +curl -fsS "${BASE_URL}/v1/scorecard" >/tmp/manufacturingagentsuite-scorecard.json + +python3 - <<'PY' +import json +from pathlib import Path + +agents = json.loads(Path("/tmp/manufacturingagentsuite-agents.json").read_text()) +modes = {agent["mode"] for agent in agents["agents"]} +expected = {"maintenance", "iqc", "changeover", "wi", "hazard"} +assert modes == expected, modes + +scorecard = json.loads(Path("/tmp/manufacturingagentsuite-scorecard.json").read_text()) +assert scorecard["ok"] is True +assert {route["mode"] for route in scorecard["routes"]} == expected +assert all(route["status"] == "pass" for route in scorecard["routes"]) + +for mode in expected: + payload = json.loads(Path(f"/tmp/manufacturingagentsuite-{mode}.json").read_text()) + assert payload["ok"] is True + assert payload["mode"] == mode + assert payload["action_card"]["mode"] == mode + assert payload["action_card"]["source_ids"] + assert payload["action_card"]["blocked_claims"] + + infer_payload = json.loads( + Path(f"/tmp/manufacturingagentsuite-{mode}-infer.json").read_text() + ) + assert infer_payload["ok"] is True + assert infer_payload["mode"] == mode + assert infer_payload["action_card"]["mode"] == mode + +print("ManufacturingAgentSuite Xeon compose smoke test passed") +PY