Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions crates/loopal-context/src/budget.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub struct ContextBudget {
pub safety_margin: u32,
/// Actual token budget available for messages.
pub message_budget: u32,
/// True max_output_tokens from model (uncapped, for API constraint validation).
pub max_output_tokens: u32,
}

impl ContextBudget {
Expand Down Expand Up @@ -48,6 +50,7 @@ impl ContextBudget {
output_reserve,
safety_margin,
message_budget,
max_output_tokens,
}
}

Expand Down Expand Up @@ -77,6 +80,19 @@ impl ContextBudget {
pub fn needs_emergency(&self, msg_tokens: u32) -> bool {
msg_tokens > self.message_budget * 19 / 20
}

/// Clamp max_tokens so that `estimated_input + result <= context_window`.
///
/// Pre-flight check before API call: if input has grown large, dynamically
/// reduce max_tokens to avoid the `input + max_tokens > context_window` rejection.
/// Returns the original max_output_tokens when there is enough headroom.
pub fn clamp_output_tokens(&self, estimated_input: u32) -> u32 {
let headroom = self
.context_window
.saturating_sub(estimated_input)
.saturating_sub(self.safety_margin);
self.max_output_tokens.min(headroom).max(1)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -108,6 +124,7 @@ mod tests {
output_reserve: 0,
safety_margin: 0,
message_budget: 100_000,
max_output_tokens: 16_384,
};
assert!(!budget.needs_compaction(74_999));
assert!(budget.needs_compaction(75_001));
Expand All @@ -122,8 +139,36 @@ mod tests {
output_reserve: 0,
safety_margin: 0,
message_budget: 100_000,
max_output_tokens: 16_384,
};
assert!(!budget.needs_emergency(94_999));
assert!(budget.needs_emergency(95_001));
}

#[test]
fn calculate_preserves_max_output_tokens() {
let budget = ContextBudget::calculate(200_000, "sys", 0, 64_000);
assert_eq!(budget.max_output_tokens, 64_000);
assert_eq!(budget.output_reserve, 16_384);
}

#[test]
fn clamp_preserves_when_input_small() {
let budget = ContextBudget::calculate(200_000, "", 0, 64_000);
assert_eq!(budget.clamp_output_tokens(50_000), 64_000);
}

#[test]
fn clamp_reduces_when_input_large() {
let budget = ContextBudget::calculate(200_000, "", 0, 64_000);
let clamped = budget.clamp_output_tokens(180_000);
assert!(clamped < 64_000, "should be reduced: {clamped}");
assert!(clamped <= 200_000 - 180_000 - budget.safety_margin);
}

#[test]
fn clamp_saturates_to_one() {
let budget = ContextBudget::calculate(200_000, "", 0, 64_000);
assert_eq!(budget.clamp_output_tokens(300_000), 1);
}
}
1 change: 1 addition & 0 deletions crates/loopal-context/tests/suite/degradation_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ fn make_budget(message_budget: u32) -> ContextBudget {
output_reserve: 0,
safety_margin: 0,
message_budget,
max_output_tokens: 16_384,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-context/tests/suite/store_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ fn make_budget(message_budget: u32) -> ContextBudget {
output_reserve: 0,
safety_margin: 0,
message_budget,
max_output_tokens: 16_384,
}
}

Expand Down
7 changes: 5 additions & 2 deletions crates/loopal-error/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ impl ProviderError {
// non-overflow 400s (prefill rejection, malformed blocks, etc.).
if *status == 400
&& (message.contains("prompt is too long")
|| message.contains("maximum context length"))
|| message.contains("maximum context length")
|| message.contains("exceed context limit"))
{
return false;
}
Expand All @@ -34,7 +35,9 @@ impl ProviderError {
match self {
ProviderError::ContextOverflow { .. } => true,
ProviderError::Api { status, message } if *status == 400 => {
message.contains("prompt is too long") || message.contains("maximum context length")
message.contains("prompt is too long")
|| message.contains("maximum context length")
|| message.contains("exceed context limit")
}
_ => false,
}
Expand Down
14 changes: 14 additions & 0 deletions crates/loopal-error/tests/suite/error_edge_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,20 @@ fn test_api_400_prompt_too_long_is_context_overflow() {
assert!(!err.is_retryable());
}

#[test]
fn test_api_400_exceed_context_limit_is_context_overflow() {
let body = concat!(
r#"{"error":{"message":"input length and `max_tokens` exceed context limit:"#,
r#" 140795 + 64000 > 200000","type":"invalid_request_error"},"type":"error"}"#,
);
let err = ProviderError::Api {
status: 400,
message: body.into(),
};
assert!(err.is_context_overflow());
assert!(!err.is_retryable());
}

#[test]
fn test_api_400_max_context_is_context_overflow() {
let err = ProviderError::Api {
Expand Down
4 changes: 3 additions & 1 deletion crates/loopal-provider/src/anthropic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ impl AnthropicProvider {
// many 400 errors (prefill rejection, malformed blocks, etc.) and
// must not be conflated with context overflow.
if status.as_u16() == 400
&& (text.contains("prompt is too long") || text.contains("maximum context length"))
&& (text.contains("prompt is too long")
|| text.contains("maximum context length")
|| text.contains("exceed context limit"))
{
return ProviderError::ContextOverflow { message: text }.into();
}
Expand Down
22 changes: 16 additions & 6 deletions crates/loopal-runtime/src/agent_loop/llm_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//!
//! Split from llm.rs to keep files under 200 lines.

use loopal_context::{estimate_messages_tokens, estimate_tokens};
use loopal_error::Result;
use loopal_message::Message;
use loopal_provider::{get_thinking_capability, resolve_thinking_config};
Expand Down Expand Up @@ -34,18 +35,27 @@ impl AgentLoopRunner {
tool_defs.retain(|t| plan_filter.contains(&t.name));
}

// Pre-flight: estimate input tokens and clamp max_tokens to avoid
// the API's `input + max_tokens > context_window` hard rejection.
let tool_token_count = loopal_context::ContextBudget::estimate_tool_tokens(&tool_defs);
let estimated_input = estimate_tokens(&full_system_prompt)
+ tool_token_count
+ estimate_messages_tokens(messages);
let safe_max_tokens = self
.params
.store
.budget()
.clamp_output_tokens(estimated_input);

let capability = get_thinking_capability(self.params.config.model());
let resolved_thinking = resolve_thinking_config(
&self.model_config.thinking,
capability,
self.model_config.max_output_tokens,
);
let resolved_thinking =
resolve_thinking_config(&self.model_config.thinking, capability, safe_max_tokens);
Ok(ChatParams {
model: self.params.config.model().to_string(),
messages: messages.to_vec(),
system_prompt: full_system_prompt,
tools: tool_defs,
max_tokens: self.model_config.max_output_tokens,
max_tokens: safe_max_tokens,
temperature: None,
thinking: resolved_thinking,
debug_dump_dir: Some(loopal_config::tmp_dir()),
Expand Down
13 changes: 13 additions & 0 deletions crates/loopal-runtime/src/agent_loop/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ impl AgentLoopRunner {
pub(super) async fn run_loop(&mut self) -> Result<AgentOutput> {
let mut last_output = String::new();
let mut server_block_retry = false;
let mut context_overflow_retry = false;
let mut needs_input = self.params.store.is_empty();

loop {
Expand Down Expand Up @@ -84,6 +85,17 @@ impl AgentLoopRunner {
needs_input = false;
continue;
}
if !context_overflow_retry && e.is_context_overflow() {
context_overflow_retry = true;
info!("context overflow detected, emergency compacting and retrying");
self.params.store.emergency_compact(5);
self.emit(AgentEventPayload::Error {
message: "Context overflow — compacting and retrying...".into(),
})
.await?;
needs_input = false;
continue;
}
if self.interrupt.take() {
self.emit_interrupted().await?;
continue;
Expand All @@ -93,6 +105,7 @@ impl AgentLoopRunner {
}
}
server_block_retry = false;
context_overflow_retry = false;
}

Ok(AgentOutput {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-runtime/tests/agent_loop/integration_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-runtime/tests/agent_loop/llm_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn test_prepare_chat_params_act_mode() {
!params.system_prompt.is_empty(),
"env section should be present"
);
// With empty messages and 200K window, max_tokens should be preserved (headroom is large).
assert_eq!(params.max_tokens, runner.model_config.max_output_tokens);
assert!(params.messages.is_empty());
// Builtin tools should be present
Expand Down
1 change: 1 addition & 0 deletions crates/loopal-runtime/tests/agent_loop/mock_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-runtime/tests/agent_loop/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ pub fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ fn make_test_budget() -> ContextBudget {
output_reserve: 16_384,
safety_margin: 10_000,
message_budget: 173_616,
max_output_tokens: 64_000,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-tui/tests/suite/e2e_compact_edge_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ fn tiny_budget() -> ContextBudget {
output_reserve: 50,
safety_margin: 25,
message_budget: 425,
max_output_tokens: 50,
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/loopal-tui/tests/suite/e2e_compact_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ fn tiny_budget() -> ContextBudget {
output_reserve: 50,
safety_margin: 25,
message_budget: 425,
max_output_tokens: 50,
}
}

Expand Down
Loading