From 16b938d5e3c4cc71225bb82df5a357077989cf8e Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Fri, 27 Mar 2026 16:23:53 -0700 Subject: [PATCH 1/2] fix: handle direct model answers in ReACT loop The ReACT framework now properly handles cases where the model provides a direct answer without calling tools. Previously, these answers were ignored and the loop would continue until exhausting the budget. Added test coverage for both scenarios (no tools, unused tools). Fixes: #762 Signed-off-by: Mark Sturdevant --- mellea/stdlib/frameworks/react.py | 18 ++++-- test/stdlib/test_react_direct_answer.py | 75 +++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 5 deletions(-) create mode 100644 test/stdlib/test_react_direct_answer.py diff --git a/mellea/stdlib/frameworks/react.py b/mellea/stdlib/frameworks/react.py index 810542295..b6a4bd8b6 100644 --- a/mellea/stdlib/frameworks/react.py +++ b/mellea/stdlib/frameworks/react.py @@ -105,9 +105,19 @@ async def react( if tool_res.name == MELLEA_FINALIZER_TOOL: is_final = True - if is_final: - assert len(tool_responses) == 1, "multiple tools were called with 'final'" + # Check if we should return: either finalizer was called or model gave direct answer + should_return = is_final or (step.tool_calls is None and step.value is not None) + if should_return: + if is_final: + assert len(tool_responses) == 1, ( + "multiple tools were called with 'final'" + ) + if format is None: + # The tool has already been called above. + step._underlying_value = str(tool_responses[0].content) + + # Apply format if requested (works for both finalizer and direct answer cases) if format is not None: step, next_context = await mfuncs.aact( action=ReactThought(), @@ -120,9 +130,7 @@ async def react( ) assert isinstance(next_context, ChatContext) context = next_context - else: - # The tool has already been called above. - step._underlying_value = str(tool_responses[0].content) + return step, context raise RuntimeError(f"could not complete react loop in {loop_budget} iterations") diff --git a/test/stdlib/test_react_direct_answer.py b/test/stdlib/test_react_direct_answer.py new file mode 100644 index 000000000..31f458896 --- /dev/null +++ b/test/stdlib/test_react_direct_answer.py @@ -0,0 +1,75 @@ +"""Test ReACT framework handling of direct answers without tool calls.""" + +import pytest + +from mellea.backends.tools import tool +from mellea.stdlib.context import ChatContext +from mellea.stdlib.frameworks.react import react +from mellea.stdlib.session import start_session + + +@pytest.mark.ollama +@pytest.mark.llm +async def test_react_direct_answer_without_tools(): + """Test that ReACT handles direct answers when model doesn't call tools. + + This tests the case where the model provides a direct answer in step.value + without making any tool calls. The fix ensures the loop terminates properly + instead of continuing until loop_budget is exhausted. + """ + m = start_session() + + # Ask a simple question that doesn't require tools + # The model should provide a direct answer without calling any tools + out, _ = await react( + goal="What is 2 + 2?", + context=ChatContext(), + backend=m.backend, + tools=[], # No tools provided + loop_budget=3, # Should complete in 1 iteration, not exhaust budget + ) + + # Verify we got an answer + assert out.value is not None + assert len(out.value) > 0 + + # The answer should contain "4" or "four" + answer_lower = out.value.lower() + assert "4" in answer_lower or "four" in answer_lower + + +@pytest.mark.ollama +@pytest.mark.llm +async def test_react_direct_answer_with_unused_tools(): + """Test that ReACT handles direct answers even when tools are available. + + This tests the case where tools are provided but the model chooses to + answer directly without using them. + """ + m = start_session() + + # Create a dummy tool that won't be needed + @tool + def search_web(query: str) -> str: + """Search the web for information.""" + return "Search results" + + # Ask a question that doesn't need the tool + out, _ = await react( + goal="What is the capital of France?", + context=ChatContext(), + backend=m.backend, + tools=[search_web], + loop_budget=3, + ) + + # Verify we got an answer + assert out.value is not None + assert len(out.value) > 0 + + # The answer should mention Paris + answer_lower = out.value.lower() + assert "paris" in answer_lower + + +# Made with Bob From c46aff27c4014f9ca6c659da84094b30f862105e Mon Sep 17 00:00:00 2001 From: Mark Sturdevant Date: Tue, 31 Mar 2026 19:28:04 -0700 Subject: [PATCH 2/2] fix: before failing react loop ask LLM if it has the answer With some models (and Ollama for example) we get stuck where the model has the answer but won't call finalize. Before failing due to iteration limit, ask the model if it has the answer and if it responds True then use it. Note: This is only done at the end of iterations because it is questionable to penalize other models on each iteration. When failure is the only option, it seems to be worth a try. Fixes: #762 Signed-off-by: Mark Sturdevant --- mellea/stdlib/frameworks/react.py | 60 ++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/mellea/stdlib/frameworks/react.py b/mellea/stdlib/frameworks/react.py index b6a4bd8b6..b3d77d450 100644 --- a/mellea/stdlib/frameworks/react.py +++ b/mellea/stdlib/frameworks/react.py @@ -7,6 +7,8 @@ history tracking. Raises ``RuntimeError`` if the loop ends without a final answer. """ +import pydantic + # from PIL import Image as PILImage from mellea.backends.model_options import ModelOption from mellea.core.backend import Backend, BaseModelSubclass @@ -24,6 +26,14 @@ from mellea.stdlib.context import ChatContext +class TrueOrFalse(pydantic.BaseModel): + """Response indicating whether the ReACT agent has completed its task.""" + + answer: bool = pydantic.Field( + description="True if you have enough information to answer the user's question, False if you need more tool calls" + ) + + async def react( goal: str, context: ChatContext, @@ -105,19 +115,43 @@ async def react( if tool_res.name == MELLEA_FINALIZER_TOOL: is_final = True - # Check if we should return: either finalizer was called or model gave direct answer - should_return = is_final or (step.tool_calls is None and step.value is not None) - - if should_return: - if is_final: - assert len(tool_responses) == 1, ( - "multiple tools were called with 'final'" + # Check for special case where model already has the answer, but it won't call the finalize tool. + # Instead of letting this run out of iterations and fail, let's ask. + # Only do this before we fail on iteration limit as a last resort because it's hard to justify doing it earlier for now. + elif -1 < loop_budget <= turn_num and step.value: + # If the turn number has reached the end of loop budget (and budget is not unlimited), + # then it's time to check if the model is just loopy and already has the answer. + print("### Done Check") + print("STEP_TOOL_CALLS:", step.tool_calls) + print("STEP:", step) + print("CONTEXT:", context) + content = mfuncs.chat( + content=f"Do you know the answer to the user's original query ({goal})? If so, respond with True. If you need to take more actions, then respond False.", + context=context, + backend=backend, + format=TrueOrFalse, + )[0].content + have_answer = TrueOrFalse.model_validate_json(content).answer + + print("### Done Check ANSWER: ", have_answer) + if have_answer: + # Create a synthetic finalizer tool response to be consistent with normal loop + finalizer_response = ToolMessage( + role="tool", + content=step.value, + tool_output=step.value, + name=MELLEA_FINALIZER_TOOL, + args={}, + tool=None, # type: ignore ) - if format is None: - # The tool has already been called above. - step._underlying_value = str(tool_responses[0].content) + tool_responses = [finalizer_response] + context = context.add(finalizer_response) + is_final = True - # Apply format if requested (works for both finalizer and direct answer cases) + if is_final: + assert len(tool_responses) == 1, "multiple tools were called with 'final'" + + # Apply format if requested if format is not None: step, next_context = await mfuncs.aact( action=ReactThought(), @@ -130,7 +164,9 @@ async def react( ) assert isinstance(next_context, ChatContext) context = next_context - + else: + # The tool has already been called above. + step._underlying_value = str(tool_responses[0].content) return step, context raise RuntimeError(f"could not complete react loop in {loop_budget} iterations")