Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@ export const deepSeekModels = {
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
},
"deepseek-v4-pro": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000, // 1M context
supportsImages: true,
supportsPromptCache: true,
preserveReasoning: true,
inputPrice: 2.0, // $2.00 per million tokens (cache miss)
outputPrice: 8.0, // $8.00 per million tokens
cacheWritesPrice: 2.0, // $2.00 per million tokens (cache miss)
cacheReadsPrice: 0.5, // $0.50 per million tokens (cache hit)
description: `DeepSeek-V4-Pro is the flagship reasoning model with 1M context window and 384K max output. Features enhanced thinking mode, vision support, tool calls, and JSON output. Best suited for complex reasoning, code generation, and multi-step tasks.`,
},
"deepseek-v4-flash": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000, // 1M context
supportsImages: true,
supportsPromptCache: true,
preserveReasoning: true,
inputPrice: 1.0, // $1.00 per million tokens (cache miss)
outputPrice: 4.0, // $4.00 per million tokens
cacheWritesPrice: 1.0, // $1.00 per million tokens (cache miss)
cacheReadsPrice: 0.25, // $0.25 per million tokens (cache hit)
description: `DeepSeek-V4-Flash is a fast, cost-efficient reasoning model with 1M context window and 384K max output. Features thinking mode, vision support, tool calls, and JSON output. Optimized for speed while maintaining strong reasoning capabilities.`,
},
} as const satisfies Record<string, ModelInfo>

// https://api-docs.deepseek.com/quick_start/parameter_settings
Expand Down
73 changes: 72 additions & 1 deletion src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ vi.mock("openai", () => {
}

// Check if this is a reasoning_content test by looking at model
const isReasonerModel = options.model?.includes("deepseek-reasoner")
const isReasonerModel =
options.model?.includes("deepseek-reasoner") || options.model?.includes("deepseek-v4-")
const isToolCallTest = options.tools?.length > 0

// Return async iterator for streaming
Expand Down Expand Up @@ -247,6 +248,36 @@ describe("DeepSeekHandler", () => {
expect((model.info as ModelInfo).preserveReasoning).toBeUndefined()
})

it("should return correct model info for deepseek-v4-pro", () => {
const handlerWithV4Pro = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
})
const model = handlerWithV4Pro.getModel()
expect(model.id).toBe("deepseek-v4-pro")
expect(model.info).toBeDefined()
expect(model.info.maxTokens).toBe(384_000) // 384K max output
expect(model.info.contextWindow).toBe(1_000_000) // 1M context
expect(model.info.supportsImages).toBe(true)
expect(model.info.supportsPromptCache).toBe(true)
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should return correct model info for deepseek-v4-flash", () => {
const handlerWithV4Flash = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-flash",
})
const model = handlerWithV4Flash.getModel()
expect(model.id).toBe("deepseek-v4-flash")
expect(model.info).toBeDefined()
expect(model.info.maxTokens).toBe(384_000) // 384K max output
expect(model.info.contextWindow).toBe(1_000_000) // 1M context
expect(model.info.supportsImages).toBe(true)
expect(model.info.supportsPromptCache).toBe(true)
expect((model.info as ModelInfo).preserveReasoning).toBe(true)
})

it("should return provided model ID with default model info if model does not exist", () => {
const handlerWithInvalidModel = new DeepSeekHandler({
...mockOptions,
Expand Down Expand Up @@ -475,6 +506,46 @@ describe("DeepSeekHandler", () => {
expect(callArgs.thinking).toBeUndefined()
})

it("should pass thinking parameter for deepseek-v4-pro model", async () => {
const v4ProHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-pro",
})

const stream = v4ProHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

// Verify that the thinking parameter was passed to the API
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "enabled" },
}),
{},
)
})

it("should pass thinking parameter for deepseek-v4-flash model", async () => {
const v4FlashHandler = new DeepSeekHandler({
...mockOptions,
apiModelId: "deepseek-v4-flash",
})

const stream = v4FlashHandler.createMessage(systemPrompt, messages)
for await (const _chunk of stream) {
// Consume the stream
}

// Verify that the thinking parameter was passed to the API
expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "enabled" },
}),
{},
)
})

it("should handle tool calls with reasoning_content", async () => {
const reasonerHandler = new DeepSeekHandler({
...mockOptions,
Expand Down
5 changes: 3 additions & 2 deletions src/api/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ export class DeepSeekHandler extends OpenAiHandler {
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
const { info: modelInfo } = this.getModel()

// Check if this is a thinking-enabled model (deepseek-reasoner)
const isThinkingModel = modelId.includes("deepseek-reasoner")
// Check if this is a thinking-enabled model using the preserveReasoning flag
// This covers deepseek-reasoner, deepseek-v4-pro, deepseek-v4-flash, and future thinking models
const isThinkingModel = "preserveReasoning" in modelInfo && modelInfo.preserveReasoning === true

// Convert messages to R1 format (merges consecutive same-role messages)
// This is required for DeepSeek which does not support successive messages with the same role
Expand Down
Loading