diff --git a/packages/types/src/providers/baseten.ts b/packages/types/src/providers/baseten.ts index 27b8cbff4a..6c275cda2c 100644 --- a/packages/types/src/providers/baseten.ts +++ b/packages/types/src/providers/baseten.ts @@ -83,6 +83,28 @@ export const basetenModels = { description: "DeepSeek's hybrid reasoning model with efficient long context scaling with GPT-5 level performance", }, + "deepseek-ai/DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "openai/gpt-oss-120b": { maxTokens: 16_384, contextWindow: 128_072, diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 40722471cb..0e7267a3aa 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -32,6 +32,30 @@ export const deepSeekModels = { cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025 description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`, }, + "deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 1.74, // $1.74 per million tokens (cache miss) + outputPrice: 3.48, // $3.48 per million tokens + cacheWritesPrice: 1.74, // $1.74 per million tokens (cache miss) + cacheReadsPrice: 0.145, // $0.145 per million tokens (cache hit) + description: `DeepSeek V4 Pro`, + }, + "deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 0.14, // $0.14 per million tokens (cache miss) + outputPrice: 0.28, // $0.28 per million tokens + cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss) + cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) + description: `DeepSeek V4 Flash`, + }, } as const satisfies Record // https://api-docs.deepseek.com/quick_start/parameter_settings diff --git a/packages/types/src/providers/fireworks.ts b/packages/types/src/providers/fireworks.ts index c9017c54cd..fb839c07e1 100644 --- a/packages/types/src/providers/fireworks.ts +++ b/packages/types/src/providers/fireworks.ts @@ -13,6 +13,8 @@ export type FireworksModelId = | "accounts/fireworks/models/deepseek-v3" | "accounts/fireworks/models/deepseek-v3p1" | "accounts/fireworks/models/deepseek-v3p2" + | "accounts/fireworks/models/deepseek-v4-pro" + | "accounts/fireworks/models/deepseek-v4-flash" | "accounts/fireworks/models/glm-4p5" | "accounts/fireworks/models/glm-4p5-air" | "accounts/fireworks/models/glm-4p6" @@ -200,6 +202,28 @@ export const fireworksModels = { description: "DeepSeek V3.2 is the latest iteration of the V3 model family with enhanced reasoning capabilities, improved code generation, and better instruction following.", }, + "accounts/fireworks/models/deepseek-v4-pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "accounts/fireworks/models/deepseek-v4-flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "accounts/fireworks/models/glm-4p7": { maxTokens: 25344, contextWindow: 198000, diff --git a/packages/types/src/providers/sambanova.ts b/packages/types/src/providers/sambanova.ts index 624a7eb8c7..8b2f88ea58 100644 --- a/packages/types/src/providers/sambanova.ts +++ b/packages/types/src/providers/sambanova.ts @@ -7,6 +7,7 @@ export type SambaNovaModelId = | "DeepSeek-R1" | "DeepSeek-V3-0324" | "DeepSeek-V3.1" + | "DeepSeek-V4-Pro" | "Llama-4-Maverick-17B-128E-Instruct" | "Qwen3-32B" | "gpt-oss-120b" @@ -60,6 +61,28 @@ export const sambaNovaModels = { outputPrice: 4.5, description: "DeepSeek V3.1 model with 32K context window.", }, + "DeepSeek-V4-Pro": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: "DeepSeek V4 Pro", + }, + "deepseek-ai/DeepSeek-V4-Flash": { + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: "DeepSeek V4 Flash", + }, "Llama-4-Maverick-17B-128E-Instruct": { maxTokens: 8192, contextWindow: 131072, diff --git a/src/api/providers/__tests__/fireworks.spec.ts b/src/api/providers/__tests__/fireworks.spec.ts index 79f69f868b..6641292f53 100644 --- a/src/api/providers/__tests__/fireworks.spec.ts +++ b/src/api/providers/__tests__/fireworks.spec.ts @@ -245,6 +245,52 @@ describe("FireworksHandler", () => { ) }) + it("should return DeepSeek V4 Pro model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-pro" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 1.74, + outputPrice: 3.48, + cacheWritesPrice: 1.74, + cacheReadsPrice: 0.145, + description: expect.stringContaining("DeepSeek V4 Pro"), + }), + ) + }) + + it("should return DeepSeek V4 Flash model with correct configuration", () => { + const testModelId: FireworksModelId = "accounts/fireworks/models/deepseek-v4-flash" + const handlerWithModel = new FireworksHandler({ + apiModelId: testModelId, + fireworksApiKey: "test-fireworks-api-key", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual( + expect.objectContaining({ + maxTokens: 384_000, + contextWindow: 1_000_000, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.14, + outputPrice: 0.28, + cacheWritesPrice: 0.14, + cacheReadsPrice: 0.028, + description: expect.stringContaining("DeepSeek V4 Flash"), + }), + ) + }) + it("should return GLM-4.5 model with correct configuration", () => { const testModelId: FireworksModelId = "accounts/fireworks/models/glm-4p5" const handlerWithModel = new FireworksHandler({ diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de0..bced110a39 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -55,8 +55,11 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Check if this is a thinking-enabled model (deepseek-reasoner, deepseek-v4-pro, deepseek-v4-flash) + const isThinkingModel = + modelId.includes("deepseek-reasoner") || + modelId.includes("deepseek-v4-pro") || + modelId.includes("deepseek-v4-flash") // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role