From c327356a34edabce398e5de41b6e13eed9927dd6 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 19 May 2026 21:30:01 +0800 Subject: [PATCH 1/3] support bailing v2_5 --- README.md | 2 +- README_zh.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ec9236d..c66364f 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ You can contact us and communicate with us by adding our group: ## 📝 Introduction -**mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM5.1, DeepSeek-V3.2, Minimax2.7, Kimi K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5. +**mcore-bridge** is a large language model and multimodal large model definition library built on the Megatron-Core ecosystem, developed by the ModelScope community. It currently supports 300+ text-only models and 200+ multimodal models, including large language models such as Qwen3-Next, GLM-5.1, DeepSeek-V3.2, Minimax-2.7, Kimi-K2.5, and GPT-OSS, as well as multimodal large models such as Qwen3.5, Qwen3-Omni, Gemma4, GLM4.6-V, InternVL3.5, and Ovis2.5. ------ diff --git a/README_zh.md b/README_zh.md index a91e9f2..445fffa 100644 --- a/README_zh.md +++ b/README_zh.md @@ -51,7 +51,7 @@ ## 📝 简介 -**mcore-bridge** 是由魔搭社区推出的、基于 Megatron-Core 生态构建的大模型与多模态大模型定义库。目前已支持 300+ 纯文本模型与 200+ 多模态模型。其中大语言模型包括 Qwen3-Next、GLM5.1、DeepSeek-V3.2、Minimax2.7、Kimi K2.5、GPT-OSS 等;多模态大模型包括 Qwen3.5、Qwen3-Omni、Gemma4、GLM4.6-V、InternVL3.5、Ovis2.5 等。 +**mcore-bridge** 是由魔搭社区推出的、基于 Megatron-Core 生态构建的大模型与多模态大模型定义库。目前已支持 300+ 纯文本模型与 200+ 多模态模型。其中大语言模型包括 Qwen3-Next、GLM-5.1、DeepSeek-V3.2、Minimax-2.7、Kimi-K2.5、GPT-OSS 等;多模态大模型包括 Qwen3.5、Qwen3-Omni、Gemma4、GLM4.6-V、InternVL3.5、Ovis2.5 等。 ------ From d86b366b545e83c108790a0f06af8155e7407a60 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Tue, 19 May 2026 21:47:56 +0800 Subject: [PATCH 2/3] update --- src/mcore_bridge/config/parser.py | 5 +++++ src/mcore_bridge/model/constant.py | 1 + src/mcore_bridge/model/gpts/__init__.py | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/mcore_bridge/config/parser.py b/src/mcore_bridge/config/parser.py index 842c1bb..41d1288 100644 --- a/src/mcore_bridge/config/parser.py +++ b/src/mcore_bridge/config/parser.py @@ -216,6 +216,11 @@ def hf_to_mcore_config(hf_config: PretrainedConfig) -> Dict[str, Any]: res['moe_layer_freq'] = f"[{','.join(moe_layer_freq)}]" elif hf_model_type == 'glm4v': res['rotary_interleaved'] = True + elif llm_model_type == 'bailing_hybrid': + res['qk_layernorm'] = True + res['add_qkv_bias'] = False + res['moe_router_score_function'] = 'sigmoid' + res['moe_router_load_balancing_type'] = 'seq_aux_loss' if 'partial_rotary_factor' not in res and 'partial_rotary_factor' in rope_scaling: res['partial_rotary_factor'] = rope_scaling['partial_rotary_factor'] diff --git a/src/mcore_bridge/model/constant.py b/src/mcore_bridge/model/constant.py index 9b8dc1b..7cc005d 100644 --- a/src/mcore_bridge/model/constant.py +++ b/src/mcore_bridge/model/constant.py @@ -9,6 +9,7 @@ class LLMModelType: minimax_m2 = 'minimax_m2' hy_v3 = 'hy_v3' bailing_moe = 'bailing_moe' + bailing_hybrid = 'bailing_hybrid' qwen3_emb = 'qwen3_emb' diff --git a/src/mcore_bridge/model/gpts/__init__.py b/src/mcore_bridge/model/gpts/__init__.py index 52b007f..c3fcc5b 100644 --- a/src/mcore_bridge/model/gpts/__init__.py +++ b/src/mcore_bridge/model/gpts/__init__.py @@ -1,2 +1,2 @@ # Copyright (c) ModelScope Contributors. All rights reserved. -from . import bailing_moe, glm4, hunyuan, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next +from . import bailing_hybrid, bailing_moe, glm4, hunyuan, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next From 558b4d449fda532dba9d2ff2d1e807ac0a1046ad Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 20 May 2026 09:53:46 +0800 Subject: [PATCH 3/3] update --- src/mcore_bridge/config/model_config.py | 2 + src/mcore_bridge/model/gpts/bailing_hybrid.py | 68 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 src/mcore_bridge/model/gpts/bailing_hybrid.py diff --git a/src/mcore_bridge/config/model_config.py b/src/mcore_bridge/config/model_config.py index 8b5438a..2553749 100644 --- a/src/mcore_bridge/config/model_config.py +++ b/src/mcore_bridge/config/model_config.py @@ -314,6 +314,8 @@ def __post_init__(self): self.mtp_num_layers = 1 else: self.mtp_unroll_steps = self.mtp_num_layers + if self.multi_latent_attention: + self.rotary_interleaved = False super().__post_init__() self._check_npu() diff --git a/src/mcore_bridge/model/gpts/bailing_hybrid.py b/src/mcore_bridge/model/gpts/bailing_hybrid.py new file mode 100644 index 0000000..2256e05 --- /dev/null +++ b/src/mcore_bridge/model/gpts/bailing_hybrid.py @@ -0,0 +1,68 @@ +# Copyright (c) ModelScope Contributors. All rights reserved. +from megatron.core.inference.contexts import BaseInferenceContext +from megatron.core.packed_seq_params import PackedSeqParams +from torch import Tensor + +from mcore_bridge.bridge import GPTBridge +from ..constant import ModelType +from ..register import ModelLoader, ModelMeta, register_model +from typing import Optional, Union, Tuple +from megatron.core.transformer.attention import SelfAttention +from megatron.core.transformer.attention import SelfAttentionSubmodules +from megatron.core.transformer.transformer_config import TransformerConfig + + +class BailingHybridBridge(GPTBridge): + pass + + +class LinearAttention(SelfAttention): + def __init__( + self, + config: TransformerConfig, + *args, **kwargs, + ): + super().__init__(config, *args, **kwargs) + + def forward( + self, + hidden_states: Tensor, + attention_mask: Tensor, + **kwargs, + ) -> Tuple[Tensor, Tensor]: + return super().forward(hidden_states, attention_mask, **kwargs) + + +class BailingHybridLoader(ModelLoader): + + def get_transformer_layer_spec(self, vp_stage: Optional[int] = None): + hf_config = self.config.hf_config + num_layers = hf_config.num_hidden_layers + group_size = hf_config.layer_group_size + tail_start = num_layers // group_size * group_size + hf_config.attention_layer_type = [ + "attention" + if (layer_idx + 1) % group_size == 0 or layer_idx >= tail_start + else "linear_attention" + for layer_idx in range(num_layers) + ] + layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage) + multi_latent_attention = self.config.multi_latent_attention + self.config.multi_latent_attention = False + linear_layer_specs = super().get_transformer_layer_spec(vp_stage=vp_stage) + self.config.multi_latent_attention = multi_latent_attention + for i, layer_spec in enumerate(layer_specs.layer_specs): + if hf_config.attention_layer_type[i] == 'linear_attention': + linear_spec = linear_layer_specs.layer_specs[i].submodules.self_attention + linear_spec.module = LinearAttention + layer_spec.submodules.self_attention = linear_spec + return layer_specs + + +register_model( + ModelMeta( + ModelType.bailing_hybrid, + ['bailing_hybrid'], + bridge_cls=BailingHybridBridge, + loader=BailingHybridLoader, + ))