diff --git a/packages/types/src/providers/deepseek.ts b/packages/types/src/providers/deepseek.ts index 40722471cb..eb32599db8 100644 --- a/packages/types/src/providers/deepseek.ts +++ b/packages/types/src/providers/deepseek.ts @@ -32,6 +32,30 @@ export const deepSeekModels = { cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025 description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`, }, + "deepseek-v4-pro": { + maxTokens: 384_000, // 384K max output + contextWindow: 1_000_000, // 1M context + supportsImages: true, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 2.0, // $2.00 per million tokens (cache miss) + outputPrice: 8.0, // $8.00 per million tokens + cacheWritesPrice: 2.0, // $2.00 per million tokens (cache miss) + cacheReadsPrice: 0.5, // $0.50 per million tokens (cache hit) + description: `DeepSeek-V4-Pro is the flagship reasoning model with 1M context window and 384K max output. Features enhanced thinking mode, vision support, tool calls, and JSON output. Best suited for complex reasoning, code generation, and multi-step tasks.`, + }, + "deepseek-v4-flash": { + maxTokens: 384_000, // 384K max output + contextWindow: 1_000_000, // 1M context + supportsImages: true, + supportsPromptCache: true, + preserveReasoning: true, + inputPrice: 1.0, // $1.00 per million tokens (cache miss) + outputPrice: 4.0, // $4.00 per million tokens + cacheWritesPrice: 1.0, // $1.00 per million tokens (cache miss) + cacheReadsPrice: 0.25, // $0.25 per million tokens (cache hit) + description: `DeepSeek-V4-Flash is a fast, cost-efficient reasoning model with 1M context window and 384K max output. Features thinking mode, vision support, tool calls, and JSON output. Optimized for speed while maintaining strong reasoning capabilities.`, + }, } as const satisfies Record // https://api-docs.deepseek.com/quick_start/parameter_settings diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index cbbc61ad4d..1056c4593c 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -30,7 +30,8 @@ vi.mock("openai", () => { } // Check if this is a reasoning_content test by looking at model - const isReasonerModel = options.model?.includes("deepseek-reasoner") + const isReasonerModel = + options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4-") const isToolCallTest = options.tools?.length > 0 // Return async iterator for streaming @@ -247,6 +248,36 @@ describe("DeepSeekHandler", () => { expect((model.info as ModelInfo).preserveReasoning).toBeUndefined() }) + it("should return correct model info for deepseek-v4-pro", () => { + const handlerWithV4Pro = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-pro", + }) + const model = handlerWithV4Pro.getModel() + expect(model.id).toBe("deepseek-v4-pro") + expect(model.info).toBeDefined() + expect(model.info.maxTokens).toBe(384_000) // 384K max output + expect(model.info.contextWindow).toBe(1_000_000) // 1M context + expect(model.info.supportsImages).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect((model.info as ModelInfo).preserveReasoning).toBe(true) + }) + + it("should return correct model info for deepseek-v4-flash", () => { + const handlerWithV4Flash = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-flash", + }) + const model = handlerWithV4Flash.getModel() + expect(model.id).toBe("deepseek-v4-flash") + expect(model.info).toBeDefined() + expect(model.info.maxTokens).toBe(384_000) // 384K max output + expect(model.info.contextWindow).toBe(1_000_000) // 1M context + expect(model.info.supportsImages).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect((model.info as ModelInfo).preserveReasoning).toBe(true) + }) + it("should return provided model ID with default model info if model does not exist", () => { const handlerWithInvalidModel = new DeepSeekHandler({ ...mockOptions, @@ -475,6 +506,46 @@ describe("DeepSeekHandler", () => { expect(callArgs.thinking).toBeUndefined() }) + it("should pass thinking parameter for deepseek-v4-pro model", async () => { + const v4ProHandler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-pro", + }) + + const stream = v4ProHandler.createMessage(systemPrompt, messages) + for await (const _chunk of stream) { + // Consume the stream + } + + // Verify that the thinking parameter was passed to the API + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "enabled" }, + }), + {}, + ) + }) + + it("should pass thinking parameter for deepseek-v4-flash model", async () => { + const v4FlashHandler = new DeepSeekHandler({ + ...mockOptions, + apiModelId: "deepseek-v4-flash", + }) + + const stream = v4FlashHandler.createMessage(systemPrompt, messages) + for await (const _chunk of stream) { + // Consume the stream + } + + // Verify that the thinking parameter was passed to the API + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "enabled" }, + }), + {}, + ) + }) + it("should handle tool calls with reasoning_content", async () => { const reasonerHandler = new DeepSeekHandler({ ...mockOptions, diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de0..dec52482fe 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -55,8 +55,9 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Check if this is a thinking-enabled model using the preserveReasoning flag + // This covers deepseek-reasoner, deepseek-v4-pro, deepseek-v4-flash, and future thinking models + const isThinkingModel = "preserveReasoning" in modelInfo && modelInfo.preserveReasoning === true // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role