diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index 926f29a0a1a..85c957984ee 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer // https://api-docs.deepseek.com/quick_start/parameter_settings -export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3 +export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3 \ No newline at end of file diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index cbbc61ad4d0..b86a873a976 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -255,12 +255,11 @@ describe("DeepSeekHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - // With the current implementation, it's the same object reference when using default model info - expect(model.info).toBe(handler.getModel().info) - // Should have the same base properties - expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) - // And should have supportsPromptCache set to true + // Falls back to the default model (deepseek-v4-flash) when ID is invalid + expect(model.info.maxTokens).toBe(384_000) // v4-flash: 384K max output + expect(model.info.contextWindow).toBe(1_000_000) // v4-flash: 1M context window expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.preserveReasoning).toBe(true) // v4-flash supports thinking mode }) it("should return default model if no model ID is provided", () => { diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de05..9d66f1a434c 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -20,6 +20,7 @@ import type { ApiHandlerCreateMessageMetadata } from "../index" // Custom interface for DeepSeek params to support thinking mode type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { thinking?: { type: "enabled" | "disabled" } + reasoning_effort?: "high" | "max" } export class DeepSeekHandler extends OpenAiHandler { @@ -55,12 +56,16 @@ export class DeepSeekHandler extends OpenAiHandler { const modelId = this.options.apiModelId ?? deepSeekDefaultModelId const { info: modelInfo } = this.getModel() - // Check if this is a thinking-enabled model (deepseek-reasoner) - const isThinkingModel = modelId.includes("deepseek-reasoner") + // Whether the model inherently supports thinking mode via preserveReasoning + const hasThinkingCapability = modelInfo.preserveReasoning || modelId.includes("deepseek-v4-pro") || modelId.includes("deepseek-reasoner") + // Respect user's toggle: enableReasoningEffort=false means disable thinking entirely + // reasoningEffort="disable" also turns off thinking + const isThinkingDisabled = this.options.enableReasoningEffort === false || (this.options as any).reasoningEffort === "disable" + const isThinkingModel = hasThinkingCapability && !isThinkingDisabled // Convert messages to R1 format (merges consecutive same-role messages) // This is required for DeepSeek which does not support successive messages with the same role - // For thinking models (deepseek-reasoner), enable mergeToolResultText to preserve reasoning_content + // For thinking models, enable mergeToolResultText to preserve reasoning_content // during tool call sequences. Without this, environment_details text after tool_results would // create user messages that cause DeepSeek to drop all previous reasoning_content. // See: https://api-docs.deepseek.com/guides/thinking_mode @@ -68,14 +73,27 @@ export class DeepSeekHandler extends OpenAiHandler { mergeToolResultText: isThinkingModel, }) + // Pre-flight check: ensure reasoning_content is preserved on assistant messages + // when thinking mode is enabled. DeepSeek requires reasoning_content from previous + // turns to be passed back, otherwise it returns 400 error. + // See: https://api-docs.deepseek.com/guides/thinking_mode + if (isThinkingModel) { + ensureReasoningContentPreserved(convertedMessages, messages) + } + const requestOptions: DeepSeekChatCompletionParams = { model: modelId, temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE, messages: convertedMessages, stream: true as const, stream_options: { include_usage: true }, - // Enable thinking mode for deepseek-reasoner or when tools are used with thinking model + // Enable thinking mode for thinking-enabled models (respects user toggle) ...(isThinkingModel && { thinking: { type: "enabled" } }), + // Add reasoning_effort for v4 models (can be "high" or "max") + // Only sent when thinking is enabled; user can set to "max" via settings + ...((modelId.includes("deepseek-v4-flash") || modelId.includes("deepseek-v4-pro")) && isThinkingModel && { + reasoning_effort: (this.options as any).reasoningEffort === "max" ? "max" : "high", + }), tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, @@ -94,8 +112,29 @@ export class DeepSeekHandler extends OpenAiHandler { isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, ) } catch (error) { - const { handleOpenAIError } = await import("./utils/openai-error-handler") - throw handleOpenAIError(error, "DeepSeek") + // Attempt graceful degradation for thinking-mode reasoning_content errors. + // This happens when DeepSeek requires reasoning_content to be passed back + // but it was lost during message conversion (e.g., after conversation condense). + // We retry without thinking enabled as a safe fallback. + const errorMessage = String(error) + if ( + isThinkingModel && + errorMessage.includes("reasoning_content") && + errorMessage.includes("must be passed back") + ) { + console.warn("[DeepSeek] reasoning_content missing, retrying without thinking mode") + const retryOptions: DeepSeekChatCompletionParams = { + ...requestOptions, + thinking: undefined, + } + stream = await this.client.chat.completions.create( + retryOptions, + isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {}, + ) + } else { + const { handleOpenAIError } = await import("./utils/openai-error-handler") + throw handleOpenAIError(error, "DeepSeek") + } } let lastUsage @@ -154,3 +193,72 @@ export class DeepSeekHandler extends OpenAiHandler { } } } + +/** + * Pre-flight validation: ensures converted OpenAI messages retain reasoning_content + * from source Anthropic messages when thinking mode is enabled. + * + * DeepSeek's thinking mode requires reasoning_content from previous assistant + * responses to be passed back in subsequent requests within the same turn. + * If convertToR1Format failed to preserve it (e.g., edge cases with nested + * tool calls or conversation condense), we patch it here as a safety net. + * + * @param convertedMessages - The messages after convertToR1Format (will be mutated) + * @param sourceMessages - The original Anthropic messages before conversion + */ +function ensureReasoningContentPreserved( + convertedMessages: OpenAI.Chat.ChatCompletionMessageParam[], + sourceMessages: Anthropic.Messages.MessageParam[], +): void { + // Scan source messages for any assistant message that had reasoning + const sourceReasoning = extractReasoningFromMessages(sourceMessages) + if (!sourceReasoning) { + return // No reasoning in source, nothing to preserve + } + + // Check if converted assistant messages already have reasoning_content + const assistantMsgs = convertedMessages.filter((m) => m.role === "assistant") + const hasReasoningInConverted = assistantMsgs.some( + (msg: any) => typeof msg.reasoning_content === "string" && msg.reasoning_content.trim().length > 0, + ) + + if (hasReasoningInConverted) { + return // Already preserved correctly + } + + // Reasoning was lost during conversion — patch it onto the last assistant + // message that has tool_calls (this is the one DeepSeek requires it on). + const lastToolAssistant = [...assistantMsgs].reverse().find((msg: any) => msg.tool_calls) + if (lastToolAssistant) { + ;(lastToolAssistant as any).reasoning_content = sourceReasoning + } +} + +/** + * Extracts reasoning_content from Anthropic messages. + * Checks both message-level reasoning_content and content blocks with type "reasoning". + */ +function extractReasoningFromMessages( + messages: Anthropic.Messages.MessageParam[], +): string | undefined { + for (const msg of messages) { + if (msg.role !== "assistant") continue + + // Check message-level reasoning_content (set by some providers directly) + const msgReasoning = (msg as any).reasoning_content + if (typeof msgReasoning === "string" && msgReasoning.trim().length > 0) { + return msgReasoning + } + + // Check content blocks for reasoning type (Task.ts stores it this way) + if (Array.isArray(msg.content)) { + for (const block of msg.content as any[]) { + if (block.type === "reasoning" && typeof block.text === "string" && block.text.trim().length > 0) { + return block.text + } + } + } + } + + return undefined +} \ No newline at end of file diff --git a/src/shared/api.ts b/src/shared/api.ts index a68abcc3adc..9a64c7bcf13 100644 --- a/src/shared/api.ts +++ b/src/shared/api.ts @@ -23,6 +23,12 @@ export type ApiHandlerOptions = Omit & { * When undefined, Ollama will use the model's default num_ctx from the Modelfile. */ ollamaNumCtx?: number + /** + * Optional reasoning_effort parameter for DeepSeek v4-pro model. + * Controls the depth of reasoning: "high" or "max". + * When undefined, defaults to "high". + */ + reasoningEffort?: "high" | "max" } // RouterName diff --git a/webview-ui/src/components/settings/ThinkingBudget.tsx b/webview-ui/src/components/settings/ThinkingBudget.tsx index c0c4a79bb2a..36c84bce593 100644 --- a/webview-ui/src/components/settings/ThinkingBudget.tsx +++ b/webview-ui/src/components/settings/ThinkingBudget.tsx @@ -91,7 +91,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod // 1. requiredReasoningEffort is not true, AND // 2. supportsReasoningEffort is boolean true (not an explicit array) // When the model provides an explicit array, respect those exact values. - type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable" + type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable" | "max" const shouldAutoAddDisable = !modelInfo?.requiredReasoningEffort && supports === true && !baseAvailableOptions.includes("disable" as any) const availableOptions: ReadonlyArray = shouldAutoAddDisable @@ -240,9 +240,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod setApiConfigurationField("enableReasoningEffort", false) setApiConfigurationField("reasoningEffort", "disable") } else { - // "none", "minimal", "low", "medium", "high" all enable reasoning + // "none", "minimal", "low", "medium", "high", "max" all enable reasoning setApiConfigurationField("enableReasoningEffort", true) - setApiConfigurationField("reasoningEffort", value as ReasoningEffortWithMinimal) + setApiConfigurationField("reasoningEffort", value as any) } }}> diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 8ec42367f14..9e21ae19e00 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -601,7 +601,8 @@ "low": "Low", "medium": "Medium", "high": "High", - "xhigh": "Extra High" + "xhigh": "Extra High", + "max": "Max" }, "verbosity": { "label": "Output Verbosity", diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index 80ca437ce77..c697168e952 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -538,7 +538,8 @@ "high": "高", "xhigh": "超高", "medium": "中", - "low": "低" + "low": "低", + "max": "最高" }, "verbosity": { "label": "输出详细程度",