diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index d5e9aa0cfb..abbded6329 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -133,6 +133,7 @@ import { AutoApprovalHandler, checkAutoApproval } from "../auto-approval" import { MessageManager } from "../message-manager" import { validateAndFixToolResultIds } from "./validateToolResultIds" import { mergeConsecutiveApiMessages } from "./mergeConsecutiveApiMessages" +import { appendEnvironmentDetails, removeEnvironmentDetailsBlocks } from "./appendEnvironmentDetails" const MAX_EXPONENTIAL_BACKOFF_SECONDS = 600 // 10 minutes const DEFAULT_USAGE_COLLECTION_TIMEOUT_MS = 5000 // 5 seconds @@ -2568,20 +2569,18 @@ export class Task extends EventEmitter implements TaskLike { if (lastUserMsgIndex >= 0) { const lastUserMsg = this.apiConversationHistory[lastUserMsgIndex] if (Array.isArray(lastUserMsg.content)) { - // Remove any existing environment_details blocks before adding fresh ones - const contentWithoutEnvDetails = lastUserMsg.content.filter( - (block: Anthropic.Messages.ContentBlockParam) => { - if (block.type === "text" && typeof block.text === "string") { - const isEnvironmentDetailsBlock = - block.text.trim().startsWith("") && - block.text.trim().endsWith("") - return !isEnvironmentDetailsBlock - } - return true - }, + // Remove any existing environment_details blocks before adding fresh ones, + // then append env details to the last text or tool_result block. + // This avoids creating standalone trailing text blocks which can break + // interleaved-thinking models like DeepSeek reasoner. + const contentWithoutEnvDetails = removeEnvironmentDetailsBlocks( + lastUserMsg.content as ( + | Anthropic.Messages.TextBlockParam + | Anthropic.Messages.ImageBlockParam + | Anthropic.Messages.ToolResultBlockParam + )[], ) - // Add fresh environment details - lastUserMsg.content = [...contentWithoutEnvDetails, { type: "text" as const, text: environmentDetails }] + lastUserMsg.content = appendEnvironmentDetails(contentWithoutEnvDetails, environmentDetails) } } @@ -2748,23 +2747,12 @@ export class Task extends EventEmitter implements TaskLike { // Remove any existing environment_details blocks before adding fresh ones. // This prevents duplicate environment details when resuming tasks, // where the old user message content may already contain environment details from the previous session. - // We check for both opening and closing tags to ensure we're matching complete environment detail blocks, - // not just mentions of the tag in regular content. - const contentWithoutEnvDetails = parsedUserContent.filter((block) => { - if (block.type === "text" && typeof block.text === "string") { - // Check if this text block is a complete environment_details block - // by verifying it starts with the opening tag and ends with the closing tag - const isEnvironmentDetailsBlock = - block.text.trim().startsWith("") && - block.text.trim().endsWith("") - return !isEnvironmentDetailsBlock - } - return true - }) + const contentWithoutEnvDetails = removeEnvironmentDetailsBlocks(parsedUserContent) - // Add environment details as its own text block, separate from tool - // results. - let finalUserContent = [...contentWithoutEnvDetails, { type: "text" as const, text: environmentDetails }] + // Append environment details to the last text or tool_result block. + // This avoids creating standalone trailing text blocks which can break + // interleaved-thinking models like DeepSeek reasoner that expect specific message shapes. + let finalUserContent = appendEnvironmentDetails(contentWithoutEnvDetails, environmentDetails) // Only add user message to conversation history if: // 1. This is the first attempt (retryAttempt === 0), AND // 2. The original userContent was not empty (empty signals delegation resume where diff --git a/src/core/task/__tests__/appendEnvironmentDetails.spec.ts b/src/core/task/__tests__/appendEnvironmentDetails.spec.ts new file mode 100644 index 0000000000..54ccb1ee1e --- /dev/null +++ b/src/core/task/__tests__/appendEnvironmentDetails.spec.ts @@ -0,0 +1,414 @@ +import { Anthropic } from "@anthropic-ai/sdk" +import { + appendEnvironmentDetails, + removeEnvironmentDetailsBlocks, + stripAppendedEnvironmentDetails, + UserContentBlock, +} from "../appendEnvironmentDetails" + +describe("appendEnvironmentDetails", () => { + const envDetails = "\n# Test\nSome details\n" + + describe("empty content", () => { + it("should return a text block when content is empty", () => { + const result = appendEnvironmentDetails([], envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("text") + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe(envDetails) + }) + }) + + describe("text block handling", () => { + it("should append to the last text block", () => { + const content: UserContentBlock[] = [{ type: "text", text: "User message" }] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("text") + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("User message\n\n" + envDetails) + }) + + it("should append to the last text block when multiple text blocks exist", () => { + const content: UserContentBlock[] = [ + { type: "text", text: "First message" }, + { type: "text", text: "Second message" }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("First message") + expect((result[1] as Anthropic.Messages.TextBlockParam).text).toBe("Second message\n\n" + envDetails) + }) + + it("should not mutate the original content array", () => { + const content: UserContentBlock[] = [{ type: "text", text: "Original" }] + + appendEnvironmentDetails(content, envDetails) + + expect((content[0] as Anthropic.Messages.TextBlockParam).text).toBe("Original") + }) + }) + + describe("tool_result block handling", () => { + it("should append to tool_result with string content", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result text", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + expect(result[0].type).toBe("tool_result") + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(toolResult.content).toBe("Tool result text\n\n" + envDetails) + expect(toolResult.tool_use_id).toBe("tool-123") + }) + + it("should append to tool_result with undefined content", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + } as Anthropic.Messages.ToolResultBlockParam, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(toolResult.content).toBe(envDetails) + }) + + it("should append to tool_result with array content containing text", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [{ type: "text", text: "Result line 1" }], + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(Array.isArray(toolResult.content)).toBe(true) + const contentArray = toolResult.content as Anthropic.Messages.TextBlockParam[] + expect(contentArray).toHaveLength(1) + expect(contentArray[0].text).toBe("Result line 1\n\n" + envDetails) + }) + + it("should add text block to tool_result with array content containing only images", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [ + { + type: "image", + source: { type: "base64", media_type: "image/png", data: "abc123" }, + }, + ], + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(1) + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(Array.isArray(toolResult.content)).toBe(true) + const contentArray = toolResult.content as Array< + Anthropic.Messages.TextBlockParam | Anthropic.Messages.ImageBlockParam + > + expect(contentArray).toHaveLength(2) + expect(contentArray[0].type).toBe("image") + expect(contentArray[1].type).toBe("text") + expect((contentArray[1] as Anthropic.Messages.TextBlockParam).text).toBe(envDetails) + }) + + it("should append to the last text block in tool_result array content", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [ + { type: "text", text: "First text" }, + { + type: "image", + source: { type: "base64", media_type: "image/png", data: "abc123" }, + }, + { type: "text", text: "Last text" }, + ], + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + const contentArray = toolResult.content as Array< + Anthropic.Messages.TextBlockParam | Anthropic.Messages.ImageBlockParam + > + expect(contentArray).toHaveLength(3) + expect((contentArray[0] as Anthropic.Messages.TextBlockParam).text).toBe("First text") + expect((contentArray[2] as Anthropic.Messages.TextBlockParam).text).toBe("Last text\n\n" + envDetails) + }) + + it("should preserve is_error flag on tool_result", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Error message", + is_error: true, + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(toolResult.is_error).toBe(true) + expect(toolResult.content).toBe("Error message\n\n" + envDetails) + }) + }) + + describe("mixed content handling", () => { + it("should append to last text block when text comes after tool_result", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + { type: "text", text: "User comment" }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as Anthropic.Messages.ToolResultBlockParam).content).toBe("Tool result") + expect((result[1] as Anthropic.Messages.TextBlockParam).text).toBe("User comment\n\n" + envDetails) + }) + + it("should append to last tool_result when no text block follows", () => { + const content: UserContentBlock[] = [ + { type: "text", text: "User message" }, + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("User message") + expect((result[1] as Anthropic.Messages.ToolResultBlockParam).content).toBe("Tool result\n\n" + envDetails) + }) + + it("should add new text block when content only has images", () => { + const content: UserContentBlock[] = [ + { + type: "image", + source: { type: "base64", media_type: "image/png", data: "abc123" }, + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect(result[0].type).toBe("image") + expect(result[1].type).toBe("text") + expect((result[1] as Anthropic.Messages.TextBlockParam).text).toBe(envDetails) + }) + + it("should handle multiple tool_results and append to the last one", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-1", + content: "First result", + }, + { + type: "tool_result", + tool_use_id: "tool-2", + content: "Second result", + }, + ] + + const result = appendEnvironmentDetails(content, envDetails) + + expect(result).toHaveLength(2) + expect((result[0] as Anthropic.Messages.ToolResultBlockParam).content).toBe("First result") + expect((result[1] as Anthropic.Messages.ToolResultBlockParam).content).toBe( + "Second result\n\n" + envDetails, + ) + }) + }) +}) + +describe("removeEnvironmentDetailsBlocks", () => { + const envDetailsBlock: UserContentBlock = { + type: "text", + text: "\n# Test\nSome details\n", + } + + it("should remove standalone environment_details text blocks", () => { + const content: UserContentBlock[] = [{ type: "text", text: "User message" }, envDetailsBlock] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(1) + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("User message") + }) + + it("should not remove text blocks that mention environment_details but aren't complete blocks", () => { + const content: UserContentBlock[] = [ + { type: "text", text: "Let me explain tags" }, + { type: "text", text: "The closing tag is " }, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(2) + }) + + it("should preserve non-text blocks", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Result", + }, + envDetailsBlock, + { + type: "image", + source: { type: "base64", media_type: "image/png", data: "abc123" }, + }, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(2) + expect(result[0].type).toBe("tool_result") + expect(result[1].type).toBe("image") + }) + + it("should handle empty content", () => { + const result = removeEnvironmentDetailsBlocks([]) + expect(result).toHaveLength(0) + }) + + it("should handle whitespace around environment_details tags", () => { + const content: UserContentBlock[] = [ + { + type: "text", + text: " \n# Test\nSome details\n ", + }, + ] + + const result = removeEnvironmentDetailsBlocks(content) + + expect(result).toHaveLength(0) + }) +}) + +describe("stripAppendedEnvironmentDetails", () => { + const envDetails = "\n# Test\nSome details\n" + + it("should strip environment details from the end of a text block", () => { + const content: UserContentBlock[] = [{ type: "text", text: "User message\n\n" + envDetails }] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("User message") + }) + + it("should strip environment details from tool_result string content", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result\n\n" + envDetails, + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as Anthropic.Messages.ToolResultBlockParam).content).toBe("Tool result") + }) + + it("should strip environment details from tool_result array content", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: [{ type: "text", text: "Result text\n\n" + envDetails }], + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + const contentArray = toolResult.content as Anthropic.Messages.TextBlockParam[] + expect(contentArray[0].text).toBe("Result text") + }) + + it("should also remove standalone environment_details blocks", () => { + const content: UserContentBlock[] = [ + { type: "text", text: "User message" }, + { type: "text", text: envDetails }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toHaveLength(1) + expect((result[0] as Anthropic.Messages.TextBlockParam).text).toBe("User message") + }) + + it("should handle content without environment details", () => { + const content: UserContentBlock[] = [ + { type: "text", text: "User message" }, + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Tool result", + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + expect(result).toEqual(content) + }) + + it("should handle empty content", () => { + const result = stripAppendedEnvironmentDetails([]) + expect(result).toHaveLength(0) + }) + + it("should preserve is_error flag when stripping from tool_result", () => { + const content: UserContentBlock[] = [ + { + type: "tool_result", + tool_use_id: "tool-123", + content: "Error\n\n" + envDetails, + is_error: true, + }, + ] + + const result = stripAppendedEnvironmentDetails(content) + + const toolResult = result[0] as Anthropic.Messages.ToolResultBlockParam + expect(toolResult.is_error).toBe(true) + expect(toolResult.content).toBe("Error") + }) +}) diff --git a/src/core/task/appendEnvironmentDetails.ts b/src/core/task/appendEnvironmentDetails.ts new file mode 100644 index 0000000000..09495d89e7 --- /dev/null +++ b/src/core/task/appendEnvironmentDetails.ts @@ -0,0 +1,238 @@ +import { Anthropic } from "@anthropic-ai/sdk" + +/** + * Type for content block params that can appear in messages. + * Using the broader ContentBlockParam type to handle all possible block types. + */ +export type UserContentBlock = Anthropic.Messages.ContentBlockParam + +/** + * Appends environment details to the last text block or tool_result block in user content. + * This avoids creating a standalone trailing text block, which can break interleaved-thinking + * models like DeepSeek reasoner that expect specific message shapes. + * + * Priority: + * 1. If the last block is a text block, append to it + * 2. If the last block is a tool_result, append to its content + * 3. If no suitable block found, add as a new text block (fallback) + * + * @param content - Array of content blocks from a user message + * @param environmentDetails - The environment details string to append + * @returns New array with environment details appended to the appropriate block + */ +export function appendEnvironmentDetails(content: UserContentBlock[], environmentDetails: string): UserContentBlock[] { + if (content.length === 0) { + // No existing content, just return the environment details as a text block + return [{ type: "text" as const, text: environmentDetails }] + } + + // Create a shallow copy so we don't mutate the original array + const result = [...content] + + // Find the last suitable block (text or tool_result) + let lastSuitableIndex = -1 + for (let i = result.length - 1; i >= 0; i--) { + const block = result[i] + if (block.type === "text" || block.type === "tool_result") { + lastSuitableIndex = i + break + } + } + + if (lastSuitableIndex === -1) { + // No text or tool_result block found (content only has images?), add new text block + result.push({ type: "text" as const, text: environmentDetails }) + return result + } + + const lastBlock = result[lastSuitableIndex] + + if (lastBlock.type === "text") { + // Append to existing text block + result[lastSuitableIndex] = { + type: "text" as const, + text: lastBlock.text + "\n\n" + environmentDetails, + } + } else if (lastBlock.type === "tool_result") { + // Append to tool_result content + result[lastSuitableIndex] = appendToToolResult(lastBlock, environmentDetails) + } + + return result +} + +/** + * Appends text to a tool_result block's content. + * Tool result content can be a string or an array of content blocks. + */ +function appendToToolResult( + toolResult: Anthropic.Messages.ToolResultBlockParam, + textToAppend: string, +): Anthropic.Messages.ToolResultBlockParam { + const { content, ...rest } = toolResult + + if (content === undefined || content === null) { + // No existing content, just set the text + return { + ...rest, + content: textToAppend, + } + } + + if (typeof content === "string") { + // String content, just concatenate + return { + ...rest, + content: content + "\n\n" + textToAppend, + } + } + + if (Array.isArray(content)) { + // Array content - find the last text block and append, or add new text block + const contentCopy = [...content] + let lastTextIndex = -1 + + for (let i = contentCopy.length - 1; i >= 0; i--) { + if (contentCopy[i].type === "text") { + lastTextIndex = i + break + } + } + + if (lastTextIndex >= 0) { + // Append to last text block in array + const lastTextBlock = contentCopy[lastTextIndex] as Anthropic.Messages.TextBlockParam + contentCopy[lastTextIndex] = { + type: "text" as const, + text: lastTextBlock.text + "\n\n" + textToAppend, + } + } else { + // No text block in array, add new one + contentCopy.push({ type: "text" as const, text: textToAppend }) + } + + return { + ...rest, + content: contentCopy, + } + } + + // Unknown content type, return with text appended as new content + return { + ...rest, + content: textToAppend, + } +} + +/** + * Removes any existing environment_details blocks from the content array. + * A block is considered an environment_details block if it's a text block + * that starts with and ends with . + * + * @param content - Array of content blocks to filter + * @returns New array with environment_details blocks removed + */ +export function removeEnvironmentDetailsBlocks(content: UserContentBlock[]): UserContentBlock[] { + return content.filter((block) => { + if (block.type === "text" && typeof block.text === "string") { + const trimmed = block.text.trim() + const isEnvironmentDetailsBlock = + trimmed.startsWith("") && trimmed.endsWith("") + return !isEnvironmentDetailsBlock + } + return true + }) +} + +/** + * Strips environment details from the last text block or tool_result in the content. + * This handles the case where environment details were appended to an existing block + * rather than added as a standalone block. + * + * @param content - Array of content blocks + * @returns New array with environment details stripped from the last suitable block + */ +export function stripAppendedEnvironmentDetails(content: UserContentBlock[]): UserContentBlock[] { + if (content.length === 0) { + return content + } + + // First, remove any standalone environment_details blocks + let result = removeEnvironmentDetailsBlocks(content) + + if (result.length === 0) { + return result + } + + // Then, strip appended environment details from the last block + const lastIndex = result.length - 1 + const lastBlock = result[lastIndex] + + if (lastBlock.type === "text") { + const strippedText = stripEnvDetailsFromText(lastBlock.text) + if (strippedText !== lastBlock.text) { + result = [...result] + result[lastIndex] = { type: "text" as const, text: strippedText } + } + } else if (lastBlock.type === "tool_result") { + const strippedToolResult = stripEnvDetailsFromToolResult(lastBlock) + if (strippedToolResult !== lastBlock) { + result = [...result] + result[lastIndex] = strippedToolResult + } + } + + return result +} + +/** + * Strips environment details from the end of a text string. + */ +function stripEnvDetailsFromText(text: string): string { + // Match environment details at the end of the string, with optional preceding newlines + const envDetailsPattern = /\n*[\s\S]*<\/environment_details>\s*$/ + return text.replace(envDetailsPattern, "") +} + +/** + * Strips environment details from a tool_result block's content. + */ +function stripEnvDetailsFromToolResult( + toolResult: Anthropic.Messages.ToolResultBlockParam, +): Anthropic.Messages.ToolResultBlockParam { + const { content, ...rest } = toolResult + + if (content === undefined || content === null) { + return toolResult + } + + if (typeof content === "string") { + const strippedContent = stripEnvDetailsFromText(content) + if (strippedContent === content) { + return toolResult + } + return { ...rest, content: strippedContent } + } + + if (Array.isArray(content)) { + let changed = false + const newContent = content.map((block) => { + if (block.type === "text") { + const strippedText = stripEnvDetailsFromText(block.text) + if (strippedText !== block.text) { + changed = true + return { type: "text" as const, text: strippedText } + } + } + return block + }) + + if (!changed) { + return toolResult + } + + return { ...rest, content: newContent } + } + + return toolResult +}