modelscope · Jintao-Huang · May 19, 2026 · May 19, 2026 · May 20, 2026 · gemini-code-assist
diff --git a/README.md b/README.md
@@ -52,7 +52,7 @@ You can contact us and communicate with us by adding our group:
 
 ## 📝 Introduction
 
-**mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM5.1, DeepSeek-V3.2, Minimax2.7, Kimi K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5.
+**mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM-5.1, DeepSeek-V3.2, Minimax-2.7, Kimi-K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5.
 
 ------
 

diff --git a/README_zh.md b/README_zh.md
@@ -51,7 +51,7 @@
 
 ## 📝 简介
 
-**mcore-bridge** 是由魔搭社区推出的、基于 Megatron-Core 生态构建的大模型与多模态大模型定义库。目前已支持 300+ 纯文本模型与 200+ 多模态模型。其中大语言模型包括 Qwen3-Next、GLM5.1、DeepSeek-V3.2、Minimax2.7、Kimi K2.5、GPT-OSS 等；多模态大模型包括 Qwen3.5、Qwen3-Omni、Gemma4、GLM4.6-V、InternVL3.5、Ovis2.5 等。
+**mcore-bridge** 是由魔搭社区推出的、基于 Megatron-Core 生态构建的大模型与多模态大模型定义库。目前已支持 300+ 纯文本模型与 200+ 多模态模型。其中大语言模型包括 Qwen3-Next、GLM-5.1、DeepSeek-V3.2、Minimax-2.7、Kimi-K2.5、GPT-OSS 等；多模态大模型包括 Qwen3.5、Qwen3-Omni、Gemma4、GLM4.6-V、InternVL3.5、Ovis2.5 等。
 
 ------
 

diff --git a/src/mcore_bridge/config/model_config.py b/src/mcore_bridge/config/model_config.py
@@ -314,6 +314,8 @@ def __post_init__(self):
             self.mtp_num_layers = 1
         else:
             self.mtp_unroll_steps = self.mtp_num_layers
+        if self.multi_latent_attention:
+            self.rotary_interleaved = False
         super().__post_init__()
 
         self._check_npu()

diff --git a/src/mcore_bridge/config/parser.py b/src/mcore_bridge/config/parser.py
@@ -216,6 +216,11 @@ def hf_to_mcore_config(hf_config: PretrainedConfig) -> Dict[str, Any]:
             res['moe_layer_freq'] = f"[{','.join(moe_layer_freq)}]"
     elif hf_model_type == 'glm4v':
         res['rotary_interleaved'] = True
+    elif llm_model_type == 'bailing_hybrid':
+        res['qk_layernorm'] = True
+        res['add_qkv_bias'] = False
+        res['moe_router_score_function'] = 'sigmoid'
+        res['moe_router_load_balancing_type'] = 'seq_aux_loss'
 
     if 'partial_rotary_factor' not in res and 'partial_rotary_factor' in rope_scaling:
         res['partial_rotary_factor'] = rope_scaling['partial_rotary_factor']

diff --git a/src/mcore_bridge/model/constant.py b/src/mcore_bridge/model/constant.py
@@ -9,6 +9,7 @@ class LLMModelType:
     minimax_m2 = 'minimax_m2'
     hy_v3 = 'hy_v3'
     bailing_moe = 'bailing_moe'
+    bailing_hybrid = 'bailing_hybrid'
 
     qwen3_emb = 'qwen3_emb'
 

diff --git a/src/mcore_bridge/model/gpts/__init__.py b/src/mcore_bridge/model/gpts/__init__.py
@@ -1,2 +1,2 @@
 # Copyright (c) ModelScope Contributors. All rights reserved.
-from . import bailing_moe, glm4, hunyuan, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next
+from . import bailing_hybrid, bailing_moe, glm4, hunyuan, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next
diff --git a/src/mcore_bridge/model/gpts/bailing_hybrid.py b/src/mcore_bridge/model/gpts/bailing_hybrid.py
@@ -0,0 +1,68 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+from megatron.core.inference.contexts import BaseInferenceContext
+from megatron.core.packed_seq_params import PackedSeqParams
+from torch import Tensor
+
+from mcore_bridge.bridge import GPTBridge
+from ..constant import ModelType
+from ..register import ModelLoader, ModelMeta, register_model
+from typing import Optional, Union, Tuple
+from megatron.core.transformer.attention import SelfAttention
+from megatron.core.transformer.attention import SelfAttentionSubmodules
+from megatron.core.transformer.transformer_config import TransformerConfig
+
+
+class BailingHybridBridge(GPTBridge):
+    pass
+
+
+class LinearAttention(SelfAttention):
+    def __init__(
+        self,
+        config: TransformerConfig,
+        *args, **kwargs,
+    ):
+        super().__init__(config, *args, **kwargs)
+
+    def forward(
+        self,
+        hidden_states: Tensor,
+        attention_mask: Tensor,
+        **kwargs,
+    ) -> Tuple[Tensor, Tensor]:
+        return super().forward(hidden_states, attention_mask, **kwargs)
+
+
+class BailingHybridLoader(ModelLoader):
+
+    def get_transformer_layer_spec(self, vp_stage: Optional[int] = None):
+        hf_config = self.config.hf_config
+        num_layers = hf_config.num_hidden_layers
+        group_size = hf_config.layer_group_size
+        tail_start = num_layers // group_size * group_size
+        hf_config.attention_layer_type = [
+            "attention"
+            if (layer_idx + 1) % group_size == 0 or layer_idx >= tail_start
+            else "linear_attention"
+            for layer_idx in range(num_layers)
+        ]
+        layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+        multi_latent_attention = self.config.multi_latent_attention
+        self.config.multi_latent_attention = False
+        linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+        self.config.multi_latent_attention = multi_latent_attention
-        layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
-        multi_latent_attention = self.config.multi_latent_attention
-        self.config.multi_latent_attention = False
-        linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
-        self.config.multi_latent_attention = multi_latent_attention
+        multi_latent_attention = self.config.multi_latent_attention
+        if multi_latent_attention:
+            layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            try:
+                self.config.multi_latent_attention = False
+                linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            finally:
+                self.config.multi_latent_attention = multi_latent_attention
+        else:
+            layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            linear_layer_specs = layer_specs
-        layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
-        multi_latent_attention = self.config.multi_latent_attention
-        self.config.multi_latent_attention = False
-        linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
-        self.config.multi_latent_attention = multi_latent_attention
+        multi_latent_attention = self.config.multi_latent_attention
+        if multi_latent_attention:
+            layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            try:
+                self.config.multi_latent_attention = False
+                linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            finally:
+                self.config.multi_latent_attention = multi_latent_attention
+        else:
+            layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage)
+            linear_layer_specs = layer_specs
+        for i, layer_spec in enumerate(layer_specs.layer_specs):
+            if hf_config.attention_layer_type[i] == 'linear_attention':
+                linear_spec = linear_layer_specs.layer_specs[i].submodules.self_attention
+                linear_spec.module = LinearAttention
+                layer_spec.submodules.self_attention = linear_spec
+        return layer_specs
+
+
+register_model(
+    ModelMeta(
+        ModelType.bailing_hybrid,
+        ['bailing_hybrid'],
+        bridge_cls=BailingHybridBridge,
+        loader=BailingHybridLoader,
+    ))
-Original file line number
+Diff line change
@@ Expand Up @@
     ## 📝 Introduction
-    **mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM5.1, DeepSeek-V3.2, Minimax2.7, Kimi K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5.
+    **mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM-5.1, DeepSeek-V3.2, Minimax-2.7, Kimi-K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5.
     ------
@@ Expand Down @@