Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/types/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer<typeof reasoningEffortWithMinim
* Extended Reasoning Effort (includes "none" and "minimal")
* Note: "disable" is a UI/control value, not a value sent as effort
*/
export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"] as const
export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const

export const reasoningEffortExtendedSchema = z.enum(reasoningEffortsExtended)

Expand All @@ -32,7 +32,7 @@ export type ReasoningEffortExtended = z.infer<typeof reasoningEffortExtendedSche
/**
* Reasoning Effort user setting (includes "disable")
*/
export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"] as const
export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
export const reasoningEffortSettingSchema = z.enum(reasoningEffortSettingValues)

/**
Expand Down Expand Up @@ -89,7 +89,7 @@ export const modelInfoSchema = z.object({
defaultTemperature: z.number().optional(),
requiredReasoningBudget: z.boolean().optional(),
supportsReasoningEffort: z
.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))])
.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh", "max"]))])
.optional(),
requiredReasoningEffort: z.boolean().optional(),
preserveReasoning: z.boolean().optional(),
Expand Down
38 changes: 33 additions & 5 deletions packages/types/src/providers/deepseek.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import type { ModelInfo } from "../model.js"

// https://platform.deepseek.com/docs/api
// https://api-docs.deepseek.com/zh-cn/quick_start/pricing
// preserveReasoning enables interleaved thinking mode for tool calls:
// DeepSeek requires reasoning_content to be passed back during tool call
// continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
export type DeepSeekModelId = keyof typeof deepSeekModels

export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-v4-flash"

export const deepSeekModels = {
"deepseek-chat": {
Expand All @@ -18,7 +18,7 @@ export const deepSeekModels = {
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Non-thinking Mode) achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally. Supports JSON output, tool calls, chat prefix completion (beta), and FIM completion (beta).`,
description: `DeepSeek-V3.2 (Non-thinking Mode) - Legacy model. Use deepseek-v4-flash for better performance.`,
},
"deepseek-reasoner": {
maxTokens: 8192, // 8K max output
Expand All @@ -30,9 +30,37 @@ export const deepSeekModels = {
outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
description: `DeepSeek-V3.2 (Thinking Mode) - Legacy model. Use deepseek-v4-pro for better performance.`,
},
"deepseek-v4-flash": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000, // 1M context window
supportsImages: false,
supportsPromptCache: true,
preserveReasoning: true, // Also supports thinking mode
supportsReasoningEffort: ["disable", "high", "max"],
reasoningEffort: "high",
inputPrice: 0.14, // $0.14 per million tokens (cache miss, ¥1/M)
outputPrice: 0.28, // $0.28 per million tokens (¥2/M)
cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss, ¥1/M)
cacheReadsPrice: 0.03, // $0.03 per million tokens (cache hit, ¥0.2/M)
description: `DeepSeek-V4-Flash - Fast and efficient model with 1M context window and 384K max output. Supports thinking mode for better reasoning. Best for general tasks. Supports JSON output, tool calls, and prompt caching.`,
},
"deepseek-v4-pro": {
maxTokens: 384_000, // 384K max output
contextWindow: 1_000_000, // 1M context window
supportsImages: false,
supportsPromptCache: true,
preserveReasoning: true, // Enables interleaved thinking mode for tool calls
supportsReasoningEffort: ["disable", "high", "max"],
reasoningEffort: "high",
inputPrice: 1.68, // $1.68 per million tokens (cache miss, ¥12/M)
outputPrice: 3.36, // $3.36 per million tokens (¥24/M)
cacheWritesPrice: 1.68, // $1.68 per million tokens (cache miss, ¥12/M)
cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit, ¥1/M)
description: `DeepSeek-V4-Pro (Thinking Mode) - Advanced reasoning model with Chain of Thought capabilities. 1M context window, 384K max output. Supports reasoning_effort parameter (high/max) for deeper thinking. Ideal for complex reasoning, math, and code tasks.`,
},
} as const satisfies Record<string, ModelInfo>

// https://api-docs.deepseek.com/quick_start/parameter_settings
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
9 changes: 4 additions & 5 deletions src/api/providers/__tests__/deepseek.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,11 @@ describe("DeepSeekHandler", () => {
const model = handlerWithInvalidModel.getModel()
expect(model.id).toBe("invalid-model") // Returns provided ID
expect(model.info).toBeDefined()
// With the current implementation, it's the same object reference when using default model info
expect(model.info).toBe(handler.getModel().info)
// Should have the same base properties
expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow)
// And should have supportsPromptCache set to true
// Falls back to the default model (deepseek-v4-flash) when ID is invalid
expect(model.info.maxTokens).toBe(384_000) // v4-flash: 384K max output
expect(model.info.contextWindow).toBe(1_000_000) // v4-flash: 1M context window
expect(model.info.supportsPromptCache).toBe(true)
expect(model.info.preserveReasoning).toBe(true) // v4-flash supports thinking mode
})

it("should return default model if no model ID is provided", () => {
Expand Down
120 changes: 114 additions & 6 deletions src/api/providers/deepseek.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import type { ApiHandlerCreateMessageMetadata } from "../index"
// Custom interface for DeepSeek params to support thinking mode
type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
thinking?: { type: "enabled" | "disabled" }
reasoning_effort?: "high" | "max"
}

export class DeepSeekHandler extends OpenAiHandler {
Expand Down Expand Up @@ -55,27 +56,44 @@ export class DeepSeekHandler extends OpenAiHandler {
const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
const { info: modelInfo } = this.getModel()

// Check if this is a thinking-enabled model (deepseek-reasoner)
const isThinkingModel = modelId.includes("deepseek-reasoner")
// Whether the model inherently supports thinking mode via preserveReasoning
const hasThinkingCapability = modelInfo.preserveReasoning || modelId.includes("deepseek-v4-pro") || modelId.includes("deepseek-reasoner")
// Respect user's toggle: enableReasoningEffort=false means disable thinking entirely
// reasoningEffort="disable" also turns off thinking
const isThinkingDisabled = this.options.enableReasoningEffort === false || (this.options as any).reasoningEffort === "disable"
const isThinkingModel = hasThinkingCapability && !isThinkingDisabled

// Convert messages to R1 format (merges consecutive same-role messages)
// This is required for DeepSeek which does not support successive messages with the same role
// For thinking models (deepseek-reasoner), enable mergeToolResultText to preserve reasoning_content
// For thinking models, enable mergeToolResultText to preserve reasoning_content
// during tool call sequences. Without this, environment_details text after tool_results would
// create user messages that cause DeepSeek to drop all previous reasoning_content.
// See: https://api-docs.deepseek.com/guides/thinking_mode
const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], {
mergeToolResultText: isThinkingModel,
})

// Pre-flight check: ensure reasoning_content is preserved on assistant messages
// when thinking mode is enabled. DeepSeek requires reasoning_content from previous
// turns to be passed back, otherwise it returns 400 error.
// See: https://api-docs.deepseek.com/guides/thinking_mode
if (isThinkingModel) {
ensureReasoningContentPreserved(convertedMessages, messages)
}

const requestOptions: DeepSeekChatCompletionParams = {
model: modelId,
temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
messages: convertedMessages,
stream: true as const,
stream_options: { include_usage: true },
// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
// Enable thinking mode for thinking-enabled models (respects user toggle)
...(isThinkingModel && { thinking: { type: "enabled" } }),
// Add reasoning_effort for v4 models (can be "high" or "max")
// Only sent when thinking is enabled; user can set to "max" via settings
...((modelId.includes("deepseek-v4-flash") || modelId.includes("deepseek-v4-pro")) && isThinkingModel && {
reasoning_effort: (this.options as any).reasoningEffort === "max" ? "max" : "high",
}),
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
Expand All @@ -94,8 +112,29 @@ export class DeepSeekHandler extends OpenAiHandler {
isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
)
} catch (error) {
const { handleOpenAIError } = await import("./utils/openai-error-handler")
throw handleOpenAIError(error, "DeepSeek")
// Attempt graceful degradation for thinking-mode reasoning_content errors.
// This happens when DeepSeek requires reasoning_content to be passed back
// but it was lost during message conversion (e.g., after conversation condense).
// We retry without thinking enabled as a safe fallback.
const errorMessage = String(error)
if (
isThinkingModel &&
errorMessage.includes("reasoning_content") &&
errorMessage.includes("must be passed back")
) {
console.warn("[DeepSeek] reasoning_content missing, retrying without thinking mode")
const retryOptions: DeepSeekChatCompletionParams = {
...requestOptions,
thinking: undefined,
}
stream = await this.client.chat.completions.create(
retryOptions,
isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
)
} else {
const { handleOpenAIError } = await import("./utils/openai-error-handler")
throw handleOpenAIError(error, "DeepSeek")
}
}

let lastUsage
Expand Down Expand Up @@ -154,3 +193,72 @@ export class DeepSeekHandler extends OpenAiHandler {
}
}
}

/**
* Pre-flight validation: ensures converted OpenAI messages retain reasoning_content
* from source Anthropic messages when thinking mode is enabled.
*
* DeepSeek's thinking mode requires reasoning_content from previous assistant
* responses to be passed back in subsequent requests within the same turn.
* If convertToR1Format failed to preserve it (e.g., edge cases with nested
* tool calls or conversation condense), we patch it here as a safety net.
*
* @param convertedMessages - The messages after convertToR1Format (will be mutated)
* @param sourceMessages - The original Anthropic messages before conversion
*/
function ensureReasoningContentPreserved(
convertedMessages: OpenAI.Chat.ChatCompletionMessageParam[],
sourceMessages: Anthropic.Messages.MessageParam[],
): void {
// Scan source messages for any assistant message that had reasoning
const sourceReasoning = extractReasoningFromMessages(sourceMessages)
if (!sourceReasoning) {
return // No reasoning in source, nothing to preserve
}

// Check if converted assistant messages already have reasoning_content
const assistantMsgs = convertedMessages.filter((m) => m.role === "assistant")
const hasReasoningInConverted = assistantMsgs.some(
(msg: any) => typeof msg.reasoning_content === "string" && msg.reasoning_content.trim().length > 0,
)

if (hasReasoningInConverted) {
return // Already preserved correctly
}

// Reasoning was lost during conversion — patch it onto the last assistant
// message that has tool_calls (this is the one DeepSeek requires it on).
const lastToolAssistant = [...assistantMsgs].reverse().find((msg: any) => msg.tool_calls)
if (lastToolAssistant) {
;(lastToolAssistant as any).reasoning_content = sourceReasoning
}
}

/**
* Extracts reasoning_content from Anthropic messages.
* Checks both message-level reasoning_content and content blocks with type "reasoning".
*/
function extractReasoningFromMessages(
messages: Anthropic.Messages.MessageParam[],
): string | undefined {
for (const msg of messages) {
if (msg.role !== "assistant") continue

// Check message-level reasoning_content (set by some providers directly)
const msgReasoning = (msg as any).reasoning_content
if (typeof msgReasoning === "string" && msgReasoning.trim().length > 0) {
return msgReasoning
}

// Check content blocks for reasoning type (Task.ts stores it this way)
if (Array.isArray(msg.content)) {
for (const block of msg.content as any[]) {
if (block.type === "reasoning" && typeof block.text === "string" && block.text.trim().length > 0) {
return block.text
}
}
}
}

return undefined
}
6 changes: 6 additions & 0 deletions src/shared/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
* When undefined, Ollama will use the model's default num_ctx from the Modelfile.
*/
ollamaNumCtx?: number
/**
* Optional reasoning_effort parameter for DeepSeek v4-pro model.
* Controls the depth of reasoning: "high" or "max".
* When undefined, defaults to "high".
*/
reasoningEffort?: "high" | "max"
}

// RouterName
Expand Down
6 changes: 3 additions & 3 deletions webview-ui/src/components/settings/ThinkingBudget.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
// 1. requiredReasoningEffort is not true, AND
// 2. supportsReasoningEffort is boolean true (not an explicit array)
// When the model provides an explicit array, respect those exact values.
type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable"
type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable" | "max"
const shouldAutoAddDisable =
!modelInfo?.requiredReasoningEffort && supports === true && !baseAvailableOptions.includes("disable" as any)
const availableOptions: ReadonlyArray<ReasoningEffortOption> = shouldAutoAddDisable
Expand Down Expand Up @@ -240,9 +240,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
setApiConfigurationField("enableReasoningEffort", false)
setApiConfigurationField("reasoningEffort", "disable")
} else {
// "none", "minimal", "low", "medium", "high" all enable reasoning
// "none", "minimal", "low", "medium", "high", "max" all enable reasoning
setApiConfigurationField("enableReasoningEffort", true)
setApiConfigurationField("reasoningEffort", value as ReasoningEffortWithMinimal)
setApiConfigurationField("reasoningEffort", value as any)
}
}}>
<SelectTrigger className="w-full">
Expand Down
3 changes: 2 additions & 1 deletion webview-ui/src/i18n/locales/en/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,8 @@
"low": "Low",
"medium": "Medium",
"high": "High",
"xhigh": "Extra High"
"xhigh": "Extra High",
"max": "Max"
},
"verbosity": {
"label": "Output Verbosity",
Expand Down
3 changes: 2 additions & 1 deletion webview-ui/src/i18n/locales/zh-CN/settings.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading