RooCodeInc · androidGG · Apr 24, 2026
@@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer<typeof reasoningEffortWithMinim
  * Extended Reasoning Effort (includes "none" and "minimal")
  * Note: "disable" is a UI/control value, not a value sent as effort
  */
-export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh"] as const
+export const reasoningEffortsExtended = ["none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
 
 export const reasoningEffortExtendedSchema = z.enum(reasoningEffortsExtended)
 
@@ -32,7 +32,7 @@ export type ReasoningEffortExtended = z.infer<typeof reasoningEffortExtendedSche
 /**
  * Reasoning Effort user setting (includes "disable")
  */
-export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh"] as const
+export const reasoningEffortSettingValues = ["disable", "none", "minimal", "low", "medium", "high", "xhigh", "max"] as const
 export const reasoningEffortSettingSchema = z.enum(reasoningEffortSettingValues)
 
 /**
@@ -89,7 +89,7 @@ export const modelInfoSchema = z.object({
 	defaultTemperature: z.number().optional(),
 	requiredReasoningBudget: z.boolean().optional(),
 	supportsReasoningEffort: z
-		.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh"]))])
+		.union([z.boolean(), z.array(z.enum(["disable", "none", "minimal", "low", "medium", "high", "xhigh", "max"]))])
 		.optional(),
 	requiredReasoningEffort: z.boolean().optional(),
 	preserveReasoning: z.boolean().optional(),

@@ -1,12 +1,12 @@
 import type { ModelInfo } from "../model.js"
 
-// https://platform.deepseek.com/docs/api
+// https://api-docs.deepseek.com/zh-cn/quick_start/pricing
 // preserveReasoning enables interleaved thinking mode for tool calls:
 // DeepSeek requires reasoning_content to be passed back during tool call
 // continuation within the same turn. See: https://api-docs.deepseek.com/guides/thinking_mode
 export type DeepSeekModelId = keyof typeof deepSeekModels
 
-export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-chat"
+export const deepSeekDefaultModelId: DeepSeekModelId = "deepseek-v4-flash"
 
 export const deepSeekModels = {
 	"deepseek-chat": {
@@ -18,7 +18,7 @@ export const deepSeekModels = {
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
 		cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
-		description: `DeepSeek-V3.2 (Non-thinking Mode) achieves a significant breakthrough in inference speed over previous models. It tops the leaderboard among open-source models and rivals the most advanced closed-source models globally. Supports JSON output, tool calls, chat prefix completion (beta), and FIM completion (beta).`,
+		description: `DeepSeek-V3.2 (Non-thinking Mode) - Legacy model. Use deepseek-v4-flash for better performance.`,
 	},
 	"deepseek-reasoner": {
 		maxTokens: 8192, // 8K max output
@@ -30,9 +30,37 @@ export const deepSeekModels = {
 		outputPrice: 0.42, // $0.42 per million tokens - Updated Dec 9, 2025
 		cacheWritesPrice: 0.28, // $0.28 per million tokens (cache miss) - Updated Dec 9, 2025
 		cacheReadsPrice: 0.028, // $0.028 per million tokens (cache hit) - Updated Dec 9, 2025
-		description: `DeepSeek-V3.2 (Thinking Mode) achieves performance comparable to OpenAI-o1 across math, code, and reasoning tasks. Supports Chain of Thought reasoning with up to 8K output tokens. Supports JSON output, tool calls, and chat prefix completion (beta).`,
+		description: `DeepSeek-V3.2 (Thinking Mode) - Legacy model. Use deepseek-v4-pro for better performance.`,
+	},
+	"deepseek-v4-flash": {
+		maxTokens: 384_000, // 384K max output
+		contextWindow: 1_000_000, // 1M context window
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true, // Also supports thinking mode
+		supportsReasoningEffort: ["disable", "high", "max"],
+		reasoningEffort: "high",
+		inputPrice: 0.14, // $0.14 per million tokens (cache miss, ¥1/M)
+		outputPrice: 0.28, // $0.28 per million tokens (¥2/M)
+		cacheWritesPrice: 0.14, // $0.14 per million tokens (cache miss, ¥1/M)
+		cacheReadsPrice: 0.03, // $0.03 per million tokens (cache hit, ¥0.2/M)
+		description: `DeepSeek-V4-Flash - Fast and efficient model with 1M context window and 384K max output. Supports thinking mode for better reasoning. Best for general tasks. Supports JSON output, tool calls, and prompt caching.`,
+	},
+	"deepseek-v4-pro": {
+		maxTokens: 384_000, // 384K max output
+		contextWindow: 1_000_000, // 1M context window
+		supportsImages: false,
+		supportsPromptCache: true,
+		preserveReasoning: true, // Enables interleaved thinking mode for tool calls
+		supportsReasoningEffort: ["disable", "high", "max"],
+		reasoningEffort: "high",
+		inputPrice: 1.68, // $1.68 per million tokens (cache miss, ¥12/M)
+		outputPrice: 3.36, // $3.36 per million tokens (¥24/M)
+		cacheWritesPrice: 1.68, // $1.68 per million tokens (cache miss, ¥12/M)
+		cacheReadsPrice: 0.14, // $0.14 per million tokens (cache hit, ¥1/M)
+		description: `DeepSeek-V4-Pro (Thinking Mode) - Advanced reasoning model with Chain of Thought capabilities. 1M context window, 384K max output. Supports reasoning_effort parameter (high/max) for deeper thinking. Ideal for complex reasoning, math, and code tasks.`,
 	},
 } as const satisfies Record<string, ModelInfo>
 
 // https://api-docs.deepseek.com/quick_start/parameter_settings
-export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
+export const DEEP_SEEK_DEFAULT_TEMPERATURE = 0.3
@@ -255,12 +255,11 @@ describe("DeepSeekHandler", () => {
 			const model = handlerWithInvalidModel.getModel()
 			expect(model.id).toBe("invalid-model") // Returns provided ID
 			expect(model.info).toBeDefined()
-			// With the current implementation, it's the same object reference when using default model info
-			expect(model.info).toBe(handler.getModel().info)
-			// Should have the same base properties
-			expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow)
-			// And should have supportsPromptCache set to true
+			// Falls back to the default model (deepseek-v4-flash) when ID is invalid
+			expect(model.info.maxTokens).toBe(384_000) // v4-flash: 384K max output
+			expect(model.info.contextWindow).toBe(1_000_000) // v4-flash: 1M context window
 			expect(model.info.supportsPromptCache).toBe(true)
+			expect(model.info.preserveReasoning).toBe(true) // v4-flash supports thinking mode
 		})
 
 		it("should return default model if no model ID is provided", () => {

@@ -20,6 +20,7 @@ import type { ApiHandlerCreateMessageMetadata } from "../index"
 // Custom interface for DeepSeek params to support thinking mode
 type DeepSeekChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & {
 	thinking?: { type: "enabled" | "disabled" }
+	reasoning_effort?: "high" | "max"
 }
 
 export class DeepSeekHandler extends OpenAiHandler {
@@ -55,27 +56,44 @@ export class DeepSeekHandler extends OpenAiHandler {
 		const modelId = this.options.apiModelId ?? deepSeekDefaultModelId
 		const { info: modelInfo } = this.getModel()
 
-		// Check if this is a thinking-enabled model (deepseek-reasoner)
-		const isThinkingModel = modelId.includes("deepseek-reasoner")
+		// Whether the model inherently supports thinking mode via preserveReasoning
+		const hasThinkingCapability = modelInfo.preserveReasoning || modelId.includes("deepseek-v4-pro") || modelId.includes("deepseek-reasoner")
+		// Respect user's toggle: enableReasoningEffort=false means disable thinking entirely
+		// reasoningEffort="disable" also turns off thinking
+		const isThinkingDisabled = this.options.enableReasoningEffort === false || (this.options as any).reasoningEffort === "disable"
+		const isThinkingModel = hasThinkingCapability && !isThinkingDisabled
 
 		// Convert messages to R1 format (merges consecutive same-role messages)
 		// This is required for DeepSeek which does not support successive messages with the same role
-		// For thinking models (deepseek-reasoner), enable mergeToolResultText to preserve reasoning_content
+		// For thinking models, enable mergeToolResultText to preserve reasoning_content
 		// during tool call sequences. Without this, environment_details text after tool_results would
 		// create user messages that cause DeepSeek to drop all previous reasoning_content.
 		// See: https://api-docs.deepseek.com/guides/thinking_mode
 		const convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages], {
 			mergeToolResultText: isThinkingModel,
 		})
 
+		// Pre-flight check: ensure reasoning_content is preserved on assistant messages
+		// when thinking mode is enabled. DeepSeek requires reasoning_content from previous
+		// turns to be passed back, otherwise it returns 400 error.
+		// See: https://api-docs.deepseek.com/guides/thinking_mode
+		if (isThinkingModel) {
+			ensureReasoningContentPreserved(convertedMessages, messages)
+		}
+
 		const requestOptions: DeepSeekChatCompletionParams = {
 			model: modelId,
 			temperature: this.options.modelTemperature ?? DEEP_SEEK_DEFAULT_TEMPERATURE,
 			messages: convertedMessages,
 			stream: true as const,
 			stream_options: { include_usage: true },
-			// Enable thinking mode for deepseek-reasoner or when tools are used with thinking model
+			// Enable thinking mode for thinking-enabled models (respects user toggle)
 			...(isThinkingModel && { thinking: { type: "enabled" } }),
+			// Add reasoning_effort for v4 models (can be "high" or "max")
+			// Only sent when thinking is enabled; user can set to "max" via settings
+			...((modelId.includes("deepseek-v4-flash") || modelId.includes("deepseek-v4-pro")) && isThinkingModel && {
+				reasoning_effort: (this.options as any).reasoningEffort === "max" ? "max" : "high",
+			}),
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
 			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
@@ -94,8 +112,29 @@ export class DeepSeekHandler extends OpenAiHandler {
 				isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
 			)
 		} catch (error) {
-			const { handleOpenAIError } = await import("./utils/openai-error-handler")
-			throw handleOpenAIError(error, "DeepSeek")
+			// Attempt graceful degradation for thinking-mode reasoning_content errors.
+			// This happens when DeepSeek requires reasoning_content to be passed back
+			// but it was lost during message conversion (e.g., after conversation condense).
+			// We retry without thinking enabled as a safe fallback.
+			const errorMessage = String(error)
+			if (
+				isThinkingModel &&
+				errorMessage.includes("reasoning_content") &&
+				errorMessage.includes("must be passed back")
+			) {
+				console.warn("[DeepSeek] reasoning_content missing, retrying without thinking mode")
+				const retryOptions: DeepSeekChatCompletionParams = {
+					...requestOptions,
+					thinking: undefined,
+				}
+				stream = await this.client.chat.completions.create(
+					retryOptions,
+					isAzureAiInference ? { path: OPENAI_AZURE_AI_INFERENCE_PATH } : {},
+				)
+			} else {
+				const { handleOpenAIError } = await import("./utils/openai-error-handler")
+				throw handleOpenAIError(error, "DeepSeek")
+			}
 		}
 
 		let lastUsage
@@ -154,3 +193,72 @@ export class DeepSeekHandler extends OpenAiHandler {
 		}
 	}
 }
+
+/**
+	* Pre-flight validation: ensures converted OpenAI messages retain reasoning_content
+	* from source Anthropic messages when thinking mode is enabled.
+	*
+	* DeepSeek's thinking mode requires reasoning_content from previous assistant
+	* responses to be passed back in subsequent requests within the same turn.
+	* If convertToR1Format failed to preserve it (e.g., edge cases with nested
+	* tool calls or conversation condense), we patch it here as a safety net.
+	*
+	* @param convertedMessages - The messages after convertToR1Format (will be mutated)
+	* @param sourceMessages - The original Anthropic messages before conversion
+	*/
+function ensureReasoningContentPreserved(
+	convertedMessages: OpenAI.Chat.ChatCompletionMessageParam[],
+	sourceMessages: Anthropic.Messages.MessageParam[],
+): void {
+	// Scan source messages for any assistant message that had reasoning
+	const sourceReasoning = extractReasoningFromMessages(sourceMessages)
+	if (!sourceReasoning) {
+		return // No reasoning in source, nothing to preserve
+	}
+
+	// Check if converted assistant messages already have reasoning_content
+	const assistantMsgs = convertedMessages.filter((m) => m.role === "assistant")
+	const hasReasoningInConverted = assistantMsgs.some(
+		(msg: any) => typeof msg.reasoning_content === "string" && msg.reasoning_content.trim().length > 0,
+	)
+
+	if (hasReasoningInConverted) {
+		return // Already preserved correctly
+	}
+
+	// Reasoning was lost during conversion — patch it onto the last assistant
+	// message that has tool_calls (this is the one DeepSeek requires it on).
+	const lastToolAssistant = [...assistantMsgs].reverse().find((msg: any) => msg.tool_calls)
+	if (lastToolAssistant) {
+		;(lastToolAssistant as any).reasoning_content = sourceReasoning
+	}
+}
+
+/**
+	* Extracts reasoning_content from Anthropic messages.
+	* Checks both message-level reasoning_content and content blocks with type "reasoning".
+	*/
+function extractReasoningFromMessages(
+	messages: Anthropic.Messages.MessageParam[],
+): string | undefined {
+	for (const msg of messages) {
+		if (msg.role !== "assistant") continue
+
+		// Check message-level reasoning_content (set by some providers directly)
+		const msgReasoning = (msg as any).reasoning_content
+		if (typeof msgReasoning === "string" && msgReasoning.trim().length > 0) {
+			return msgReasoning
+		}
+
+		// Check content blocks for reasoning type (Task.ts stores it this way)
+		if (Array.isArray(msg.content)) {
+			for (const block of msg.content as any[]) {
+				if (block.type === "reasoning" && typeof block.text === "string" && block.text.trim().length > 0) {
+					return block.text
+				}
+			}
+		}
+	}
+
+	return undefined
+}
@@ -23,6 +23,12 @@ export type ApiHandlerOptions = Omit<ProviderSettings, "apiProvider"> & {
 	 * When undefined, Ollama will use the model's default num_ctx from the Modelfile.
 	 */
 	ollamaNumCtx?: number
+	/**
+	 * Optional reasoning_effort parameter for DeepSeek v4-pro model.
+	 * Controls the depth of reasoning: "high" or "max".
+	 * When undefined, defaults to "high".
+	 */
+	reasoningEffort?: "high" | "max"
 }
 
 // RouterName

@@ -91,7 +91,7 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 	// 1. requiredReasoningEffort is not true, AND
 	// 2. supportsReasoningEffort is boolean true (not an explicit array)
 	// When the model provides an explicit array, respect those exact values.
-	type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable"
+	type ReasoningEffortOption = ReasoningEffortWithMinimal | "none" | "disable" | "max"
 	const shouldAutoAddDisable =
 		!modelInfo?.requiredReasoningEffort && supports === true && !baseAvailableOptions.includes("disable" as any)
 	const availableOptions: ReadonlyArray<ReasoningEffortOption> = shouldAutoAddDisable
@@ -240,9 +240,9 @@ export const ThinkingBudget = ({ apiConfiguration, setApiConfigurationField, mod
 						setApiConfigurationField("enableReasoningEffort", false)
 						setApiConfigurationField("reasoningEffort", "disable")
 					} else {
-						// "none", "minimal", "low", "medium", "high" all enable reasoning
+						// "none", "minimal", "low", "medium", "high", "max" all enable reasoning
 						setApiConfigurationField("enableReasoningEffort", true)
-						setApiConfigurationField("reasoningEffort", value as ReasoningEffortWithMinimal)
+						setApiConfigurationField("reasoningEffort", value as any)
 					}
 				}}>
 				<SelectTrigger className="w-full">

@@ -601,7 +601,8 @@
 			"low": "Low",
 			"medium": "Medium",
 			"high": "High",
-			"xhigh": "Extra High"
+			"xhigh": "Extra High",
+			"max": "Max"
 		},
 		"verbosity": {
 			"label": "Output Verbosity",