Skip to content

Commit 6f9e010

Browse files
sxjeruCopilot
andauthored
💄 style: Update Kimi K2.5 & Qwen3 Max Thinking models (#11925)
* 🔨 feat(models): add new AI models and update pricing strategies * 🐛 fix(models): remove deprecated Gemini 2.0 Flash Exp model from googleChatModels * 🔨 fix(moonshot): update Kimi K2.5 model parameters and enhance payload handling * ✨ feat: 添加新的聊天模型 Kimi-K2.5 和 PaddleOCR-VL 1.5 到 siliconcloud * Update packages/model-bank/src/aiModels/qwen.ts Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * ✨ feat: 添加 Kimi K2.5 模型,更新 Qwen 模型的思维预算处理 * ✨ feat: 添加 forceImageBase64 选项以支持强制将图像 URL 转换为 Base64 --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent a76a630 commit 6f9e010

12 files changed

Lines changed: 310 additions & 166 deletions

File tree

packages/model-bank/src/aiModels/google.ts

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -651,25 +651,6 @@ const googleChatModels: AIChatModelCard[] = [
651651
releasedAt: '2025-02-05',
652652
type: 'chat',
653653
},
654-
{
655-
abilities: {
656-
imageOutput: true,
657-
vision: true,
658-
},
659-
contextWindowTokens: 1_048_576 + 8192,
660-
description: 'A Gemini 2.0 Flash variant optimized for cost efficiency and low latency.',
661-
displayName: 'Gemini 2.0 Flash Exp',
662-
id: 'gemini-2.0-flash-exp',
663-
maxOutput: 8192,
664-
pricing: {
665-
units: [
666-
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
667-
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
668-
],
669-
},
670-
releasedAt: '2025-02-05',
671-
type: 'chat',
672-
},
673654
{
674655
abilities: {
675656
vision: true,

packages/model-bank/src/aiModels/moonshot.ts

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,34 @@ import { AIChatModelCard } from '../types/aiModel';
22

33
// https://platform.moonshot.cn/docs/pricing/chat
44
const moonshotChatModels: AIChatModelCard[] = [
5+
{
6+
abilities: {
7+
functionCall: true,
8+
reasoning: true,
9+
structuredOutput: true,
10+
vision: true,
11+
},
12+
contextWindowTokens: 262_144,
13+
description:
14+
'Kimi K2.5 is Kimi\'s most versatile model to date, featuring a native multimodal architecture that supports both vision and text inputs, "thinking" and "non-thinking" modes, and both conversational and agent tasks.',
15+
displayName: 'Kimi K2.5',
16+
enabled: true,
17+
id: 'kimi-k2.5',
18+
maxOutput: 32_768,
19+
pricing: {
20+
currency: 'CNY',
21+
units: [
22+
{ name: 'textInput_cacheRead', rate: 0.7, strategy: 'fixed', unit: 'millionTokens' },
23+
{ name: 'textInput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
24+
{ name: 'textOutput', rate: 21, strategy: 'fixed', unit: 'millionTokens' },
25+
],
26+
},
27+
releasedAt: '2026-01-27',
28+
settings: {
29+
extendParams: ['enableReasoning'],
30+
},
31+
type: 'chat',
32+
},
533
{
634
abilities: {
735
functionCall: true,
@@ -12,7 +40,6 @@ const moonshotChatModels: AIChatModelCard[] = [
1240
description:
1341
'K2 long-thinking model with 256k context, supporting multi-step tool use and reasoning for complex problems.',
1442
displayName: 'Kimi K2 Thinking',
15-
enabled: true,
1643
id: 'kimi-k2-thinking',
1744
maxOutput: 65_536,
1845
pricing: {
@@ -58,7 +85,6 @@ const moonshotChatModels: AIChatModelCard[] = [
5885
description:
5986
'kimi-k2-0905-preview offers a 256k context window, stronger agentic coding, better front-end code quality, and improved context understanding.',
6087
displayName: 'Kimi K2 0905',
61-
enabled: true,
6288
id: 'kimi-k2-0905-preview',
6389
pricing: {
6490
currency: 'CNY',
@@ -126,8 +152,32 @@ const moonshotChatModels: AIChatModelCard[] = [
126152
currency: 'CNY',
127153
units: [
128154
{ name: 'textInput_cacheRead', rate: 1, strategy: 'fixed', unit: 'millionTokens' },
129-
{ name: 'textInput', rate: 10, strategy: 'fixed', unit: 'millionTokens' },
130-
{ name: 'textOutput', rate: 30, strategy: 'fixed', unit: 'millionTokens' },
155+
{
156+
lookup: {
157+
prices: {
158+
'[0, 0.008_192]': 2,
159+
'[0.008_193, 0.032_768]': 5,
160+
'[0.032_769, 0.131_072]': 10,
161+
},
162+
pricingParams: ['textInput'],
163+
},
164+
name: 'textInput',
165+
strategy: 'lookup',
166+
unit: 'millionTokens',
167+
},
168+
{
169+
lookup: {
170+
prices: {
171+
'[0, 0.008_192]': 10,
172+
'[0.008_193, 0.032_768]': 20,
173+
'[0.032_769, 0.131_072]': 30,
174+
},
175+
pricingParams: ['textInput'],
176+
},
177+
name: 'textOutput',
178+
strategy: 'lookup',
179+
unit: 'millionTokens',
180+
},
131181
],
132182
},
133183
releasedAt: '2025-02-17',
@@ -138,7 +188,8 @@ const moonshotChatModels: AIChatModelCard[] = [
138188
functionCall: true,
139189
},
140190
contextWindowTokens: 131_072,
141-
description: 'Moonshot V1 Auto selects the appropriate model based on current context token usage.',
191+
description:
192+
'Moonshot V1 Auto selects the appropriate model based on current context token usage.',
142193
displayName: 'Moonshot V1 Auto',
143194
id: 'moonshot-v1-auto',
144195
pricing: {

packages/model-bank/src/aiModels/ollamacloud.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
11
import { AIChatModelCard } from '../types/aiModel';
22

33
const ollamaCloudModels: AIChatModelCard[] = [
4+
{
5+
abilities: {
6+
functionCall: true,
7+
reasoning: true,
8+
vision: true,
9+
},
10+
contextWindowTokens: 262_144,
11+
description:
12+
'Kimi K2.5 is an open-source, native multimodal agentic model that seamlessly integrates vision and language understanding with advanced agentic capabilities, instant and thinking modes, as well as conversational and agentic paradigms.',
13+
displayName: 'Kimi K2.5',
14+
enabled: true,
15+
id: 'kimi-k2.5',
16+
type: 'chat',
17+
},
418
{
519
abilities: {
620
functionCall: true,

packages/model-bank/src/aiModels/openrouter.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,20 +1079,6 @@ const openrouterChatModels: AIChatModelCard[] = [
10791079
id: 'google/gemma-2-9b-it:free',
10801080
type: 'chat',
10811081
},
1082-
{
1083-
abilities: {
1084-
functionCall: true,
1085-
vision: true,
1086-
},
1087-
contextWindowTokens: 1_048_576 + 8192,
1088-
description:
1089-
'Gemini 2.0 Flash Experimental is Google’s latest experimental multimodal AI model with quality improvements over prior versions, especially in world knowledge, code, and long context.',
1090-
displayName: 'Gemini 2.0 Flash Experimental (Free)',
1091-
id: 'google/gemini-2.0-flash-exp:free',
1092-
maxOutput: 8192,
1093-
releasedAt: '2024-12-11',
1094-
type: 'chat',
1095-
},
10961082
];
10971083

10981084
export const allModels = [...openrouterChatModels];

packages/model-bank/src/aiModels/qwen.ts

Lines changed: 105 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,55 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';
33
// https://help.aliyun.com/zh/model-studio/models?spm=a2c4g.11186623
44

55
const qwenChatModels: AIChatModelCard[] = [
6+
{
7+
abilities: {
8+
functionCall: true,
9+
reasoning: true,
10+
vision: true,
11+
},
12+
contextWindowTokens: 262_144,
13+
description:
14+
'Kimi K2.5 is the most capable Kimi model, delivering open-source SOTA in agent tasks, coding, and vision understanding. It supports multimodal inputs and both thinking and non-thinking modes.',
15+
displayName: 'Kimi K2.5',
16+
id: 'kimi-k2.5',
17+
maxOutput: 32_768,
18+
organization: 'Qwen',
19+
pricing: {
20+
currency: 'CNY',
21+
units: [
22+
{ name: 'textInput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
23+
{ name: 'textOutput', rate: 21, strategy: 'fixed', unit: 'millionTokens' },
24+
],
25+
},
26+
settings: {
27+
extendParams: ['enableReasoning', 'reasoningBudgetToken'],
28+
},
29+
type: 'chat',
30+
},
31+
{
32+
abilities: {
33+
functionCall: true,
34+
reasoning: true,
35+
search: true,
36+
},
37+
contextWindowTokens: 204_800,
38+
description:
39+
'MiniMax-M2.1 is a flagship open-source large model from MiniMax, focusing on solving complex real-world tasks. Its core strengths are multi-language programming capabilities and the ability to solve complex tasks as an Agent.',
40+
displayName: 'MiniMax-M2.1',
41+
id: 'MiniMax-M2.1',
42+
maxOutput: 32_768,
43+
pricing: {
44+
currency: 'CNY',
45+
units: [
46+
{ name: 'textInput', rate: 2.1, strategy: 'fixed', unit: 'millionTokens' },
47+
{ name: 'textOutput', rate: 8.4, strategy: 'fixed', unit: 'millionTokens' },
48+
],
49+
},
50+
settings: {
51+
searchImpl: 'params',
52+
},
53+
type: 'chat',
54+
},
655
{
756
abilities: {
857
reasoning: true,
@@ -61,7 +110,7 @@ const qwenChatModels: AIChatModelCard[] = [
61110
vision: true,
62111
},
63112
config: {
64-
deploymentName: 'qwen3-vl-flash-2025-10-15',
113+
deploymentName: 'qwen3-vl-flash-2026-01-22',
65114
},
66115
contextWindowTokens: 262_144,
67116
description:
@@ -101,7 +150,6 @@ const qwenChatModels: AIChatModelCard[] = [
101150
},
102151
],
103152
},
104-
releasedAt: '2025-10-15',
105153
settings: {
106154
extendParams: ['enableReasoning', 'reasoningBudgetToken'],
107155
},
@@ -1019,7 +1067,8 @@ const qwenChatModels: AIChatModelCard[] = [
10191067
deploymentName: 'qwen-plus-2025-12-01',
10201068
},
10211069
contextWindowTokens: 1_000_000,
1022-
description: 'Enhanced ultra-large Qwen model supporting Chinese, English, and other languages.',
1070+
description:
1071+
'Enhanced ultra-large Qwen model supporting Chinese, English, and other languages.',
10231072
displayName: 'Qwen Plus',
10241073
enabled: true,
10251074
id: 'qwen-plus',
@@ -1078,6 +1127,57 @@ const qwenChatModels: AIChatModelCard[] = [
10781127
},
10791128
type: 'chat',
10801129
},
1130+
{
1131+
abilities: {
1132+
functionCall: true,
1133+
search: true,
1134+
reasoning: true,
1135+
},
1136+
contextWindowTokens: 262_144,
1137+
description:
1138+
'Qwen3 Max models deliver large gains over the 2.5 series in general ability, Chinese/English understanding, complex instruction following, subjective open tasks, multilingual ability, and tool use, with fewer hallucinations. The latest qwen3-max improves agentic programming and tool use over qwen3-max-preview. This release reaches field SOTA and targets more complex agent needs.',
1139+
displayName: 'Qwen3 Max Thinking',
1140+
id: 'qwen3-max-2026-01-23',
1141+
maxOutput: 65_536,
1142+
organization: 'Qwen',
1143+
pricing: {
1144+
currency: 'CNY',
1145+
units: [
1146+
{
1147+
lookup: {
1148+
prices: {
1149+
'[0, 0.032]': 2.5,
1150+
'[0.032, 0.128]': 4,
1151+
'[0.128, 0.252]': 7,
1152+
},
1153+
pricingParams: ['textInputRange'],
1154+
},
1155+
name: 'textInput',
1156+
strategy: 'lookup',
1157+
unit: 'millionTokens',
1158+
},
1159+
{
1160+
lookup: {
1161+
prices: {
1162+
'[0, 0.032]': 10,
1163+
'[0.032, 0.128]': 16,
1164+
'[0.128, 0.252]': 28,
1165+
},
1166+
pricingParams: ['textInputRange'],
1167+
},
1168+
name: 'textOutput',
1169+
strategy: 'lookup',
1170+
unit: 'millionTokens',
1171+
},
1172+
],
1173+
},
1174+
releasedAt: '2026-01-23',
1175+
settings: {
1176+
extendParams: ['enableReasoning', 'reasoningBudgetToken'],
1177+
searchImpl: 'params',
1178+
},
1179+
type: 'chat',
1180+
},
10811181
{
10821182
abilities: {
10831183
functionCall: true,
@@ -1477,7 +1577,8 @@ const qwenChatModels: AIChatModelCard[] = [
14771577
vision: true,
14781578
},
14791579
contextWindowTokens: 131_072,
1480-
description: 'Qwen3 VL 8B non-thinking mode (Instruct) for standard multimodal generation and recognition.',
1580+
description:
1581+
'Qwen3 VL 8B non-thinking mode (Instruct) for standard multimodal generation and recognition.',
14811582
displayName: 'Qwen3 VL 8B Instruct',
14821583
id: 'qwen3-vl-8b-instruct',
14831584
maxOutput: 32_768,

packages/model-bank/src/aiModels/siliconcloud.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,45 @@ import { AIChatModelCard, AIImageModelCard } from '../types/aiModel';
22

33
// https://siliconflow.cn/zh-cn/models
44
const siliconcloudChatModels: AIChatModelCard[] = [
5+
{
6+
abilities: {
7+
functionCall: true,
8+
reasoning: true,
9+
vision: true,
10+
},
11+
contextWindowTokens: 262_144,
12+
description:
13+
'Kimi K2.5 is an open-source native multimodal agent model, built on Kimi-K2-Base, trained on approximately 1.5 trillion mixed vision and text tokens. The model adopts an MoE architecture with 1T total parameters and 32B active parameters, supporting a 256K context window, seamlessly integrating vision and language understanding capabilities.',
14+
displayName: 'Kimi-K2.5 (Pro)',
15+
id: 'Pro/moonshotai/Kimi-K2.5',
16+
pricing: {
17+
currency: 'CNY',
18+
units: [
19+
{ name: 'textInput', rate: 4, strategy: 'fixed', unit: 'millionTokens' },
20+
{ name: 'textOutput', rate: 21, strategy: 'fixed', unit: 'millionTokens' },
21+
],
22+
},
23+
releasedAt: '2026-01-27',
24+
type: 'chat',
25+
},
26+
{
27+
abilities: {
28+
vision: true,
29+
},
30+
description:
31+
'PaddleOCR-VL-1.5 is an upgraded version of the PaddleOCR-VL series, achieving 94.5% accuracy on the OmniDocBench v1.5 document parsing benchmark, surpassing leading general large models and specialized document parsing models. It innovatively supports irregular bounding box localization for document elements, handling scanned, tilted, and screen-captured images effectively.',
32+
displayName: 'PaddleOCR-VL 1.5',
33+
id: 'PaddlePaddle/PaddleOCR-VL-1.5',
34+
pricing: {
35+
currency: 'CNY',
36+
units: [
37+
{ name: 'textInput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
38+
{ name: 'textOutput', rate: 0, strategy: 'fixed', unit: 'millionTokens' },
39+
],
40+
},
41+
releasedAt: '2026-01-29',
42+
type: 'chat',
43+
},
544
{
645
abilities: {
746
functionCall: true,

0 commit comments

Comments
 (0)