From 973806b1cb35792555bead994cb3ed94656eb171 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Fri, 15 May 2026 19:03:20 +0200 Subject: [PATCH 1/6] feat: add `body_after_prefix` --- .../codex_app_server_protocol.schemas.json | 29 ++++ .../codex_app_server_protocol.v2.schemas.json | 29 ++++ .../schema/json/v2/ConfigReadResponse.json | 29 ++++ .../typescript/AutoCompactTokenLimitScope.ts | 9 ++ .../schema/typescript/index.ts | 1 + .../schema/typescript/v2/Config.ts | 3 +- .../src/protocol/v2/config.rs | 2 + .../src/protocol/v2/tests.rs | 4 + codex-rs/config/src/config_toml.rs | 5 + codex-rs/core-api/src/lib.rs | 1 + codex-rs/core/config.schema.json | 27 ++++ codex-rs/core/src/config/config_tests.rs | 4 + codex-rs/core/src/config/mod.rs | 8 + codex-rs/core/src/session/mod.rs | 12 ++ codex-rs/core/src/session/turn.rs | 99 ++++++++---- codex-rs/core/src/state/session.rs | 15 ++ codex-rs/core/tests/suite/compact.rs | 145 ++++++++++++++++++ codex-rs/protocol/src/config_types.rs | 15 ++ codex-rs/thread-manager-sample/src/main.rs | 2 + 19 files changed, 411 insertions(+), 28 deletions(-) create mode 100644 codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 8b292f667df..751f4adcd60 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -6290,6 +6290,25 @@ } ] }, + "AutoCompactTokenLimitScope": { + "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.", + "oneOf": [ + { + "description": "Count the full active context against the limit.", + "enum": [ + "total" + ], + "type": "string" + }, + { + "description": "Count sampled output and later growth after the carried window prefix.", + "enum": [ + "body_after_prefix" + ], + "type": "string" + } + ] + }, "AutoReviewDecisionSource": { "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.", "enum": [ @@ -7138,6 +7157,16 @@ "null" ] }, + "model_auto_compact_token_limit_scope": { + "anyOf": [ + { + "$ref": "#/definitions/v2/AutoCompactTokenLimitScope" + }, + { + "type": "null" + } + ] + }, "model_context_window": { "format": "int64", "type": [ diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index 16e548e8c20..8ae7b4ed204 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -826,6 +826,25 @@ } ] }, + "AutoCompactTokenLimitScope": { + "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.", + "oneOf": [ + { + "description": "Count the full active context against the limit.", + "enum": [ + "total" + ], + "type": "string" + }, + { + "description": "Count sampled output and later growth after the carried window prefix.", + "enum": [ + "body_after_prefix" + ], + "type": "string" + } + ] + }, "AutoReviewDecisionSource": { "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.", "enum": [ @@ -3527,6 +3546,16 @@ "null" ] }, + "model_auto_compact_token_limit_scope": { + "anyOf": [ + { + "$ref": "#/definitions/AutoCompactTokenLimitScope" + }, + { + "type": "null" + } + ] + }, "model_context_window": { "format": "int64", "type": [ diff --git a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json index 81364a6f403..7595f7fd009 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json @@ -188,6 +188,25 @@ } ] }, + "AutoCompactTokenLimitScope": { + "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.", + "oneOf": [ + { + "description": "Count the full active context against the limit.", + "enum": [ + "total" + ], + "type": "string" + }, + { + "description": "Count sampled output and later growth after the carried window prefix.", + "enum": [ + "body_after_prefix" + ], + "type": "string" + } + ] + }, "Config": { "additionalProperties": true, "properties": { @@ -280,6 +299,16 @@ "null" ] }, + "model_auto_compact_token_limit_scope": { + "anyOf": [ + { + "$ref": "#/definitions/AutoCompactTokenLimitScope" + }, + { + "type": "null" + } + ] + }, "model_context_window": { "format": "int64", "type": [ diff --git a/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts new file mode 100644 index 00000000000..3d9a56c82a6 --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts @@ -0,0 +1,9 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Selects which part of the active context is charged against + * `model_auto_compact_token_limit`. + */ +export type AutoCompactTokenLimitScope = "total" | "body_after_prefix"; diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts index 97ea4356019..8be75af546f 100644 --- a/codex-rs/app-server-protocol/schema/typescript/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/index.ts @@ -5,6 +5,7 @@ export type { AgentPath } from "./AgentPath"; export type { ApplyPatchApprovalParams } from "./ApplyPatchApprovalParams"; export type { ApplyPatchApprovalResponse } from "./ApplyPatchApprovalResponse"; export type { AuthMode } from "./AuthMode"; +export type { AutoCompactTokenLimitScope } from "./AutoCompactTokenLimitScope"; export type { ClientInfo } from "./ClientInfo"; export type { ClientNotification } from "./ClientNotification"; export type { ClientRequest } from "./ClientRequest"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts index ba24663e879..29eae987741 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts @@ -1,6 +1,7 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { AutoCompactTokenLimitScope } from "../AutoCompactTokenLimitScope"; import type { ForcedLoginMethod } from "../ForcedLoginMethod"; import type { ReasoningEffort } from "../ReasoningEffort"; import type { ReasoningSummary } from "../ReasoningSummary"; @@ -16,7 +17,7 @@ import type { SandboxMode } from "./SandboxMode"; import type { SandboxWorkspaceWrite } from "./SandboxWorkspaceWrite"; import type { ToolsV2 } from "./ToolsV2"; -export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_provider: string | null, approval_policy: AskForApproval | null, /** +export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope | null, model_provider: string | null, approval_policy: AskForApproval | null, /** * [UNSTABLE] Optional default for where approval requests are routed for * review. */ diff --git a/codex-rs/app-server-protocol/src/protocol/v2/config.rs b/codex-rs/app-server-protocol/src/protocol/v2/config.rs index b46515d8114..a34e6c530f9 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/config.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/config.rs @@ -3,6 +3,7 @@ use super::AskForApproval; use super::SandboxMode; use super::shared::default_enabled; use codex_experimental_api_macros::ExperimentalApi; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::config_types::Verbosity; @@ -251,6 +252,7 @@ pub struct Config { pub review_model: Option, pub model_context_window: Option, pub model_auto_compact_token_limit: Option, + pub model_auto_compact_token_limit_scope: Option, pub model_provider: Option, #[experimental(nested)] pub approval_policy: Option, diff --git a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs index 50058cb6862..a0aefa0bf42 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs @@ -1560,6 +1560,7 @@ fn config_granular_approval_policy_is_marked_experimental() { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: None, model_provider: None, approval_policy: Some(AskForApproval::Granular { sandbox_approval: false, @@ -1600,6 +1601,7 @@ fn config_approvals_reviewer_is_marked_experimental() { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: None, model_provider: None, approval_policy: None, approvals_reviewer: Some(ApprovalsReviewer::AutoReview), @@ -1634,6 +1636,7 @@ fn config_nested_profile_granular_approval_policy_is_marked_experimental() { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: None, model_provider: None, approval_policy: None, approvals_reviewer: None, @@ -1690,6 +1693,7 @@ fn config_nested_profile_approvals_reviewer_is_marked_experimental() { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: None, model_provider: None, approval_policy: None, approvals_reviewer: None, diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs index 72641150ddb..0d5db00badc 100644 --- a/codex-rs/config/src/config_toml.rs +++ b/codex-rs/config/src/config_toml.rs @@ -38,6 +38,7 @@ use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::OLLAMA_CHAT_PROVIDER_REMOVED_ERROR; use codex_model_provider_info::OLLAMA_OSS_PROVIDER_ID; use codex_model_provider_info::OPENAI_PROVIDER_ID; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary; @@ -156,6 +157,10 @@ pub struct ConfigToml { /// Token usage threshold triggering auto-compaction of conversation history. pub model_auto_compact_token_limit: Option, + /// Controls whether the auto-compaction limit applies to the full context or + /// only to tokens after the carried prefix in the current compaction window. + pub model_auto_compact_token_limit_scope: Option, + /// Default approval policy for executing commands. pub approval_policy: Option, diff --git a/codex-rs/core-api/src/lib.rs b/codex-rs/core-api/src/lib.rs index 04ebaf8e7e6..e87ee82f309 100644 --- a/codex-rs/core-api/src/lib.rs +++ b/codex-rs/core-api/src/lib.rs @@ -58,6 +58,7 @@ pub use codex_models_manager::manager::SharedModelsManager; pub use codex_protocol::ThreadId; pub use codex_protocol::config_types::AltScreenMode; pub use codex_protocol::config_types::ApprovalsReviewer; +pub use codex_protocol::config_types::AutoCompactTokenLimitScope; pub use codex_protocol::config_types::CollaborationModeMask; pub use codex_protocol::config_types::ShellEnvironmentPolicy; pub use codex_protocol::config_types::WebSearchMode; diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 0387b2e4015..b4f847ae3b9 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -309,6 +309,25 @@ } ] }, + "AutoCompactTokenLimitScope": { + "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.", + "oneOf": [ + { + "description": "Count the full active context against the limit.", + "enum": [ + "total" + ], + "type": "string" + }, + { + "description": "Count sampled output and later growth after the carried window prefix.", + "enum": [ + "body_after_prefix" + ], + "type": "string" + } + ] + }, "AutoReviewToml": { "properties": { "policy": { @@ -4534,6 +4553,14 @@ "format": "int64", "type": "integer" }, + "model_auto_compact_token_limit_scope": { + "allOf": [ + { + "$ref": "#/definitions/AutoCompactTokenLimitScope" + } + ], + "description": "Controls whether the auto-compaction limit applies to the full context or only to tokens after the carried prefix in the current compaction window." + }, "model_catalog_json": { "allOf": [ { diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs index f197ccad71c..cc748138b96 100644 --- a/codex-rs/core/src/config/config_tests.rs +++ b/codex-rs/core/src/config/config_tests.rs @@ -7623,6 +7623,7 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total, service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), @@ -8073,6 +8074,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total, service_tier: None, model_provider_id: "openai-custom".to_string(), model_provider: fixture.openai_custom_provider.clone(), @@ -8237,6 +8239,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total, service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), @@ -8386,6 +8389,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> { review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total, service_tier: None, model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 527c93b3f37..9812dd25fef 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -77,6 +77,7 @@ use codex_model_provider_info::built_in_model_providers; use codex_model_provider_info::merge_configured_model_providers; use codex_models_manager::ModelsManagerConfig; use codex_protocol::config_types::AltScreenMode; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::ForcedLoginMethod; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary; @@ -556,6 +557,10 @@ pub struct Config { /// Token usage threshold triggering auto-compaction of conversation history. pub model_auto_compact_token_limit: Option, + /// Controls whether `model_auto_compact_token_limit` applies to the full + /// active context or only tokens after the carried compaction-window prefix. + pub model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope, + /// Key into the model_providers map that specifies which provider to use. pub model_provider_id: String, @@ -3299,6 +3304,9 @@ impl Config { review_model, model_context_window: cfg.model_context_window, model_auto_compact_token_limit: cfg.model_auto_compact_token_limit, + model_auto_compact_token_limit_scope: cfg + .model_auto_compact_token_limit_scope + .unwrap_or_default(), model_provider_id, model_provider, cwd: resolved_cwd, diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 2dd97710e83..73b1d4b8a40 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -80,6 +80,7 @@ use codex_protocol::approvals::ExecPolicyAmendment; use codex_protocol::approvals::NetworkPolicyAmendment; use codex_protocol::approvals::NetworkPolicyRuleAction; use codex_protocol::config_types::ApprovalsReviewer; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::Settings; use codex_protocol::config_types::WebSearchMode; @@ -1106,6 +1107,11 @@ impl Session { state.get_total_token_usage(state.server_reasoning_included()) } + pub(crate) async fn auto_compact_window_prefix_input_tokens(&self) -> Option { + let state = self.state.lock().await; + state.auto_compact_window_prefix_input_tokens() + } + pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { let state = self.state.lock().await; state.history.get_total_token_usage_breakdown() @@ -2941,6 +2947,12 @@ impl Session { let mut state = self.state.lock().await; state .update_token_info_from_usage(token_usage, turn_context.model_context_window()); + if matches!( + turn_context.config.model_auto_compact_token_limit_scope, + AutoCompactTokenLimitScope::BodyAfterPrefix + ) { + state.ensure_auto_compact_window_prefix_input_tokens(token_usage); + } state.token_info() }; if let Some(token_info) = token_info.as_ref() { diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index aee4bd360ff..1226ebf81ff 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -75,6 +75,7 @@ use codex_hooks::HookEvent; use codex_hooks::HookEventAfterAgent; use codex_hooks::HookPayload; use codex_hooks::HookResult; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::ModeKind; use codex_protocol::config_types::ServiceTier; use codex_protocol::error::CodexErr; @@ -150,8 +151,6 @@ pub(crate) async fn run_turn( return None; } - let model_info = turn_context.model_info.clone(); - let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX); let mut client_session = prewarmed_client_session.unwrap_or_else(|| sess.services.model_client.new_session()); // TODO(ccunningham): Pre-turn compaction runs before context updates and the @@ -476,17 +475,22 @@ pub(crate) async fn run_turn( can_drain_pending_input = true; let has_pending_input = sess.has_pending_input().await; let needs_follow_up = model_needs_follow_up || has_pending_input; - let total_usage_tokens = sess.get_total_token_usage().await; - let token_limit_reached = total_usage_tokens >= auto_compact_limit; + let token_status = + auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await; + let token_limit_reached = token_status.token_limit_reached; let estimated_token_count = sess.get_estimated_token_count(turn_context.as_ref()).await; trace!( turn_id = %turn_context.sub_id, - total_usage_tokens, + total_usage_tokens = token_status.active_context_tokens, + budgeted_tokens = token_status.budgeted_tokens, estimated_token_count = ?estimated_token_count, - auto_compact_limit, + auto_compact_limit = token_status.auto_compact_limit, + auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope, + context_window_limit = ?token_status.context_window_limit, + context_window_limit_reached = token_status.context_window_limit_reached, token_limit_reached, model_needs_follow_up, has_pending_input, @@ -726,27 +730,72 @@ struct PreSamplingCompactResult { reset_client_session: bool, } +#[derive(Debug)] +struct AutoCompactTokenStatus { + active_context_tokens: i64, + budgeted_tokens: i64, + auto_compact_limit: i64, + context_window_limit: Option, + context_window_limit_reached: bool, + token_limit_reached: bool, +} + +async fn auto_compact_token_status( + sess: &Session, + turn_context: &TurnContext, +) -> AutoCompactTokenStatus { + let active_context_tokens = sess.get_total_token_usage().await; + let (budgeted_tokens, auto_compact_limit, context_window_limit) = + match turn_context.config.model_auto_compact_token_limit_scope { + AutoCompactTokenLimitScope::Total => ( + active_context_tokens, + turn_context + .model_info + .auto_compact_token_limit() + .unwrap_or(i64::MAX), + None, + ), + AutoCompactTokenLimitScope::BodyAfterPrefix => { + let baseline = sess + .auto_compact_window_prefix_input_tokens() + .await + .unwrap_or(active_context_tokens); + ( + active_context_tokens.saturating_sub(baseline), + turn_context + .config + .model_auto_compact_token_limit + .or_else(|| turn_context.model_info.auto_compact_token_limit()) + .unwrap_or(i64::MAX), + turn_context.model_context_window(), + ) + } + }; + let context_window_limit_reached = context_window_limit + .is_some_and(|context_window_limit| active_context_tokens >= context_window_limit); + let token_limit_reached = budgeted_tokens >= auto_compact_limit || context_window_limit_reached; + + AutoCompactTokenStatus { + active_context_tokens, + budgeted_tokens, + auto_compact_limit, + context_window_limit, + context_window_limit_reached, + token_limit_reached, + } +} + async fn run_pre_sampling_compact( sess: &Arc, turn_context: &Arc, client_session: &mut ModelClientSession, ) -> CodexResult { - let total_usage_tokens_before_compaction = sess.get_total_token_usage().await; - let mut pre_sampling_compacted = maybe_run_previous_model_inline_compact( - sess, - turn_context, - client_session, - total_usage_tokens_before_compaction, - ) - .await?; + let mut pre_sampling_compacted = + maybe_run_previous_model_inline_compact(sess, turn_context, client_session).await?; let mut reset_client_session = pre_sampling_compacted; - let total_usage_tokens = sess.get_total_token_usage().await; - let auto_compact_limit = turn_context - .model_info - .auto_compact_token_limit() - .unwrap_or(i64::MAX); - // Compact if the total usage tokens are greater than the auto compact limit - if total_usage_tokens >= auto_compact_limit { + let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await; + // Compact if the configured auto-compaction budget or usable context window is exhausted. + if token_status.token_limit_reached { reset_client_session |= run_auto_compact( sess, turn_context, @@ -773,7 +822,6 @@ async fn maybe_run_previous_model_inline_compact( sess: &Arc, turn_context: &Arc, client_session: &mut ModelClientSession, - total_usage_tokens: i64, ) -> CodexResult { let Some(previous_turn_settings) = sess.previous_turn_settings().await else { return Ok(false); @@ -790,11 +838,8 @@ async fn maybe_run_previous_model_inline_compact( let Some(new_context_window) = turn_context.model_context_window() else { return Ok(false); }; - let new_auto_compact_limit = turn_context - .model_info - .auto_compact_token_limit() - .unwrap_or(i64::MAX); - let should_run = total_usage_tokens > new_auto_compact_limit + let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await; + let should_run = token_status.token_limit_reached && previous_model_turn_context.model_info.slug != turn_context.model_info.slug && old_context_window > new_context_window; if should_run { diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 3bd4b8a26e7..49006d40f4a 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -28,6 +28,9 @@ pub(crate) struct SessionState { /// model/realtime handling on subsequent regular turns (including full-context /// reinjection after resume or `/compact`). previous_turn_settings: Option, + /// Prefix size for the active compaction window when auto-compaction is + /// configured to count only tokens after the carried window prefix. + auto_compact_window_prefix_input_tokens: Option, /// Startup prewarmed session prepared during session initialization. pub(crate) startup_prewarm: Option, pub(crate) active_connector_selection: HashSet, @@ -48,6 +51,7 @@ impl SessionState { dependency_env: HashMap::new(), mcp_dependency_prompted: HashSet::new(), previous_turn_settings: None, + auto_compact_window_prefix_input_tokens: None, startup_prewarm: None, active_connector_selection: HashSet::new(), pending_session_start_source: None, @@ -97,6 +101,7 @@ impl SessionState { self.history.replace(items); self.history .set_reference_context_item(reference_context_item); + self.auto_compact_window_prefix_input_tokens = None; } pub(crate) fn set_token_info(&mut self, info: Option) { @@ -120,6 +125,16 @@ impl SessionState { self.history.update_token_info(usage, model_context_window); } + pub(crate) fn ensure_auto_compact_window_prefix_input_tokens(&mut self, usage: &TokenUsage) { + if self.auto_compact_window_prefix_input_tokens.is_none() { + self.auto_compact_window_prefix_input_tokens = Some(usage.input_tokens.max(0)); + } + } + + pub(crate) fn auto_compact_window_prefix_input_tokens(&self) -> Option { + self.auto_compact_window_prefix_input_tokens + } + pub(crate) fn token_info(&self) -> Option { self.history.token_info() } diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 68ddf1691b9..204cae48459 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -7,6 +7,7 @@ use codex_login::CodexAuth; use codex_model_provider_info::ModelProviderInfo; use codex_model_provider_info::built_in_model_providers; use codex_models_manager::bundled_models_response; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::items::TurnItem; use codex_protocol::models::PermissionProfile; use codex_protocol::openai_models::ModelInfo; @@ -121,6 +122,22 @@ fn set_test_compact_prompt(config: &mut Config) { config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string()); } +fn ev_completed_with_usage(id: &str, input_tokens: i64, output_tokens: i64) -> Value { + json!({ + "type": "response.completed", + "response": { + "id": id, + "usage": { + "input_tokens": input_tokens, + "input_tokens_details": null, + "output_tokens": output_tokens, + "output_tokens_details": null, + "total_tokens": input_tokens + output_tokens + } + } + }) +} + fn body_contains_text(body: &str, text: &str) -> bool { body.contains(&json_fragment(text)) } @@ -2982,6 +2999,134 @@ async fn auto_compact_clamps_config_limit_to_context_window() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compact_body_after_prefix_ignores_starting_window_prefix() { + skip_if_no_network!(); + + let server = start_mock_server().await; + + let first_turn = sse(vec![ + ev_assistant_message("m1", FIRST_REPLY), + ev_completed_with_usage("r1", /*input_tokens*/ 600, /*output_tokens*/ 50), + ]); + let second_turn = sse(vec![ + ev_assistant_message("m2", SECOND_LARGE_REPLY), + ev_completed_with_usage("r2", /*input_tokens*/ 700, /*output_tokens*/ 50), + ]); + let auto_compact_turn = sse(vec![ + ev_assistant_message("m3", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r3", /*total_tokens*/ 20), + ]); + let third_turn = sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_usage("r4", /*input_tokens*/ 750, /*output_tokens*/ 20), + ]); + let request_log = mount_sse_sequence( + &server, + vec![first_turn, second_turn, auto_compact_turn, third_turn], + ) + .await; + + let model_provider = non_openai_model_provider(&server); + let test = test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_context_window = Some(1_000); + config.model_auto_compact_token_limit = Some(100); + config.model_auto_compact_token_limit_scope = + AutoCompactTokenLimitScope::BodyAfterPrefix; + }) + .build(&server) + .await + .expect("build codex"); + + for user in ["PREFIX_FREE_ONE", "PREFIX_FREE_TWO"] { + test.submit_turn(user).await.expect("submit turn"); + } + + assert_eq!( + request_log.requests().len(), + 2, + "the first two turns should not compact just because the prefix exceeds the body budget" + ); + + test.submit_turn("PREFIX_FREE_THREE") + .await + .expect("submit third turn"); + + let requests = request_log.requests(); + assert_eq!( + requests.len(), + 4, + "third turn should include pre-turn compaction plus the post-compaction request" + ); + let compact_body = requests[2].body_json().to_string(); + assert!( + body_contains_text(&compact_body, SUMMARIZATION_PROMPT), + "body-after-prefix mode should compact once tokens after the first assistant sample exceed the configured budget" + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compact_body_after_prefix_still_caps_at_context_window() { + skip_if_no_network!(); + + let server = start_mock_server().await; + + let first_turn = sse(vec![ + ev_assistant_message("m1", FIRST_REPLY), + ev_completed_with_usage("r1", /*input_tokens*/ 80, /*output_tokens*/ 5), + ]); + let second_turn = sse(vec![ + ev_assistant_message("m2", SECOND_LARGE_REPLY), + ev_completed_with_usage("r2", /*input_tokens*/ 98, /*output_tokens*/ 1), + ]); + let auto_compact_turn = sse(vec![ + ev_assistant_message("m3", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r3", /*total_tokens*/ 20), + ]); + let third_turn = sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_usage("r4", /*input_tokens*/ 80, /*output_tokens*/ 5), + ]); + let request_log = mount_sse_sequence( + &server, + vec![first_turn, second_turn, auto_compact_turn, third_turn], + ) + .await; + + let model_provider = non_openai_model_provider(&server); + let test = test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_context_window = Some(100); + config.model_auto_compact_token_limit = Some(200); + config.model_auto_compact_token_limit_scope = + AutoCompactTokenLimitScope::BodyAfterPrefix; + }) + .build(&server) + .await + .expect("build codex"); + + for user in ["CONTEXT_CAP_ONE", "CONTEXT_CAP_TWO", "CONTEXT_CAP_THREE"] { + test.submit_turn(user).await.expect("submit turn"); + } + + let requests = request_log.requests(); + assert_eq!( + requests.len(), + 4, + "third turn should compact before sampling because total context hit the usable window" + ); + let compact_body = requests[2].body_json().to_string(); + assert!( + body_contains_text(&compact_body, SUMMARIZATION_PROMPT), + "body-after-prefix mode should still clamp the total threshold to the usable context window" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn auto_compact_counts_encrypted_reasoning_before_last_user() { skip_if_no_network!(); diff --git a/codex-rs/protocol/src/config_types.rs b/codex-rs/protocol/src/config_types.rs index 22c4f515480..b4b4759c9b7 100644 --- a/codex-rs/protocol/src/config_types.rs +++ b/codex-rs/protocol/src/config_types.rs @@ -21,6 +21,21 @@ use wildmatch::WildMatchPattern; use crate::openai_models::ReasoningEffort; +/// Selects which part of the active context is charged against +/// `model_auto_compact_token_limit`. +#[derive( + Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display, JsonSchema, TS, +)] +#[serde(rename_all = "snake_case")] +#[strum(serialize_all = "snake_case")] +pub enum AutoCompactTokenLimitScope { + /// Count the full active context against the limit. + #[default] + Total, + /// Count sampled output and later growth after the carried window prefix. + BodyAfterPrefix, +} + /// A summary of the reasoning performed by the model. This can be useful for /// debugging and understanding the model's reasoning process. /// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries diff --git a/codex-rs/thread-manager-sample/src/main.rs b/codex-rs/thread-manager-sample/src/main.rs index 313971afdce..4a82f67c086 100644 --- a/codex-rs/thread-manager-sample/src/main.rs +++ b/codex-rs/thread-manager-sample/src/main.rs @@ -15,6 +15,7 @@ use codex_core_api::Arg0DispatchPaths; use codex_core_api::AskForApproval; use codex_core_api::AuthCredentialsStoreMode; use codex_core_api::AuthManager; +use codex_core_api::AutoCompactTokenLimitScope; use codex_core_api::CodexThread; use codex_core_api::Config; use codex_core_api::ConfigLayerStack; @@ -168,6 +169,7 @@ fn new_config(model: Option, arg0_paths: Arg0DispatchPaths) -> anyhow::R review_model: None, model_context_window: None, model_auto_compact_token_limit: None, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total, model_provider_id, model_provider, personality: None, From f3b80c51e783a7d3a8f41540adcfd0cbf068ffee Mon Sep 17 00:00:00 2001 From: jif-oai Date: Sun, 17 May 2026 14:42:35 +0200 Subject: [PATCH 2/6] Fix body-after-prefix compaction regressions --- codex-rs/core/src/guardian/review_session.rs | 31 ++++ codex-rs/core/src/session/mod.rs | 38 +++- codex-rs/core/src/session/turn.rs | 20 ++- codex-rs/core/src/state/session.rs | 23 ++- codex-rs/core/tests/suite/compact.rs | 179 +++++++++++++++++++ 5 files changed, 277 insertions(+), 14 deletions(-) diff --git a/codex-rs/core/src/guardian/review_session.rs b/codex-rs/core/src/guardian/review_session.rs index afb5882a69b..3e6eae5db36 100644 --- a/codex-rs/core/src/guardian/review_session.rs +++ b/codex-rs/core/src/guardian/review_session.rs @@ -7,6 +7,7 @@ use std::time::Duration; use anyhow::anyhow; use codex_analytics::GuardianReviewAnalyticsResult; use codex_analytics::GuardianReviewSessionKind; +use codex_protocol::config_types::AutoCompactTokenLimitScope; use codex_protocol::config_types::Personality; use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig; use codex_protocol::models::PermissionProfile; @@ -138,6 +139,7 @@ struct GuardianReviewSessionReuseKey { model_provider: ModelProviderInfo, model_context_window: Option, model_auto_compact_token_limit: Option, + model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope, model_reasoning_effort: Option, model_reasoning_summary: Option, permissions: Permissions, @@ -162,6 +164,7 @@ impl GuardianReviewSessionReuseKey { model_provider: spawn_config.model_provider.clone(), model_context_window: spawn_config.model_context_window, model_auto_compact_token_limit: spawn_config.model_auto_compact_token_limit, + model_auto_compact_token_limit_scope: spawn_config.model_auto_compact_token_limit_scope, model_reasoning_effort: spawn_config.model_reasoning_effort, model_reasoning_summary: spawn_config.model_reasoning_summary, permissions: spawn_config.permissions.clone(), @@ -1148,6 +1151,34 @@ mod tests { ); } + #[tokio::test] + async fn guardian_review_session_compact_scope_change_invalidates_cached_session() { + let parent_config = crate::config::test_config().await; + let cached_spawn_config = build_guardian_review_session_config( + &parent_config, + /*live_network_config*/ None, + "active-model", + /*reasoning_effort*/ None, + ) + .expect("cached guardian config"); + let cached_reuse_key = + GuardianReviewSessionReuseKey::from_spawn_config(&cached_spawn_config); + + let mut changed_parent_config = parent_config; + changed_parent_config.model_auto_compact_token_limit_scope = + AutoCompactTokenLimitScope::BodyAfterPrefix; + let next_spawn_config = build_guardian_review_session_config( + &changed_parent_config, + /*live_network_config*/ None, + "active-model", + /*reasoning_effort*/ None, + ) + .expect("next guardian config"); + let next_reuse_key = GuardianReviewSessionReuseKey::from_spawn_config(&next_spawn_config); + + assert_ne!(cached_reuse_key, next_reuse_key); + } + #[tokio::test] async fn guardian_review_session_config_disables_hooks() { let mut parent_config = crate::config::test_config().await; diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 0bc07398ea1..1d061b0dfc6 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -1108,9 +1108,9 @@ impl Session { state.get_total_token_usage(state.server_reasoning_included()) } - pub(crate) async fn auto_compact_window_prefix_input_tokens(&self) -> Option { + pub(crate) async fn auto_compact_window_prefix_tokens(&self) -> Option { let state = self.state.lock().await; - state.auto_compact_window_prefix_input_tokens() + state.auto_compact_window_prefix_tokens() } pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { @@ -1271,11 +1271,41 @@ impl Session { reconstructed_rollout.reference_context_item, ) .await; + let prefix_tokens = if matches!( + turn_context.config.model_auto_compact_token_limit_scope, + AutoCompactTokenLimitScope::BodyAfterPrefix + ) { + let history = self.clone_history().await; + let base_instructions = self.get_base_instructions().await; + history.estimate_token_count_with_base_instructions(&base_instructions) + } else { + None + }; + if let Some(prefix_tokens) = prefix_tokens { + self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, prefix_tokens) + .await; + } self.set_previous_turn_settings(previous_turn_settings.clone()) .await; previous_turn_settings } + async fn set_auto_compact_window_prefix_tokens_for_scope( + &self, + turn_context: &TurnContext, + tokens: i64, + ) { + if !matches!( + turn_context.config.model_auto_compact_token_limit_scope, + AutoCompactTokenLimitScope::BodyAfterPrefix + ) { + return; + } + + let mut state = self.state.lock().await; + state.set_auto_compact_window_prefix_tokens(tokens); + } + fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option { rollout_items.iter().rev().find_map(|item| match item { RolloutItem::EventMsg(EventMsg::TokenCount(ev)) => ev.info.clone(), @@ -2952,7 +2982,7 @@ impl Session { turn_context.config.model_auto_compact_token_limit_scope, AutoCompactTokenLimitScope::BodyAfterPrefix ) { - state.ensure_auto_compact_window_prefix_input_tokens(token_usage); + state.ensure_auto_compact_window_prefix_tokens_from_usage(token_usage); } state.token_info() }; @@ -2999,6 +3029,8 @@ impl Session { state.set_token_info(Some(info)); } + self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, estimated_total_tokens) + .await; self.send_token_count_event(turn_context).await; } diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 1226ebf81ff..9ee8c7c32b2 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -757,7 +757,7 @@ async fn auto_compact_token_status( ), AutoCompactTokenLimitScope::BodyAfterPrefix => { let baseline = sess - .auto_compact_window_prefix_input_tokens() + .auto_compact_window_prefix_tokens() .await .unwrap_or(active_context_tokens); ( @@ -838,8 +838,22 @@ async fn maybe_run_previous_model_inline_compact( let Some(new_context_window) = turn_context.model_context_window() else { return Ok(false); }; - let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await; - let should_run = token_status.token_limit_reached + let active_context_tokens = sess.get_total_token_usage().await; + let previous_model_limit_reached = match turn_context + .config + .model_auto_compact_token_limit_scope + { + AutoCompactTokenLimitScope::Total => { + let new_auto_compact_limit = turn_context + .model_info + .auto_compact_token_limit() + .unwrap_or(i64::MAX); + active_context_tokens > new_auto_compact_limit + || active_context_tokens >= new_context_window + } + AutoCompactTokenLimitScope::BodyAfterPrefix => active_context_tokens >= new_context_window, + }; + let should_run = previous_model_limit_reached && previous_model_turn_context.model_info.slug != turn_context.model_info.slug && old_context_window > new_context_window; if should_run { diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 49006d40f4a..3abe8140a54 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -30,7 +30,7 @@ pub(crate) struct SessionState { previous_turn_settings: Option, /// Prefix size for the active compaction window when auto-compaction is /// configured to count only tokens after the carried window prefix. - auto_compact_window_prefix_input_tokens: Option, + auto_compact_window_prefix_tokens: Option, /// Startup prewarmed session prepared during session initialization. pub(crate) startup_prewarm: Option, pub(crate) active_connector_selection: HashSet, @@ -51,7 +51,7 @@ impl SessionState { dependency_env: HashMap::new(), mcp_dependency_prompted: HashSet::new(), previous_turn_settings: None, - auto_compact_window_prefix_input_tokens: None, + auto_compact_window_prefix_tokens: None, startup_prewarm: None, active_connector_selection: HashSet::new(), pending_session_start_source: None, @@ -101,7 +101,7 @@ impl SessionState { self.history.replace(items); self.history .set_reference_context_item(reference_context_item); - self.auto_compact_window_prefix_input_tokens = None; + self.auto_compact_window_prefix_tokens = None; } pub(crate) fn set_token_info(&mut self, info: Option) { @@ -125,14 +125,21 @@ impl SessionState { self.history.update_token_info(usage, model_context_window); } - pub(crate) fn ensure_auto_compact_window_prefix_input_tokens(&mut self, usage: &TokenUsage) { - if self.auto_compact_window_prefix_input_tokens.is_none() { - self.auto_compact_window_prefix_input_tokens = Some(usage.input_tokens.max(0)); + pub(crate) fn ensure_auto_compact_window_prefix_tokens_from_usage( + &mut self, + usage: &TokenUsage, + ) { + if self.auto_compact_window_prefix_tokens.is_none() { + self.auto_compact_window_prefix_tokens = Some(usage.input_tokens.max(0)); } } - pub(crate) fn auto_compact_window_prefix_input_tokens(&self) -> Option { - self.auto_compact_window_prefix_input_tokens + pub(crate) fn set_auto_compact_window_prefix_tokens(&mut self, tokens: i64) { + self.auto_compact_window_prefix_tokens = Some(tokens.max(0)); + } + + pub(crate) fn auto_compact_window_prefix_tokens(&self) -> Option { + self.auto_compact_window_prefix_tokens } pub(crate) fn token_info(&self) -> Option { diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 96455d84e75..6607b03c527 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -2072,6 +2072,100 @@ async fn pre_sampling_compact_runs_on_switch_to_smaller_context_model() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn body_after_prefix_model_switch_budget_compacts_with_next_model() { + skip_if_no_network!(); + + let server = MockServer::start().await; + let previous_model = "gpt-5.3-codex"; + let next_model = "gpt-5.2"; + + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![ + model_info_with_context_window(previous_model, /*context_window*/ 273_000), + model_info_with_context_window(next_model, /*context_window*/ 125_000), + ], + }, + ) + .await; + + let request_log = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_assistant_message("m1", "before switch"), + ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50), + ]), + sse(vec![ + ev_assistant_message("m2", "BODY_BUDGET_SUMMARY"), + ev_completed_with_tokens("r2", /*total_tokens*/ 10), + ]), + sse(vec![ + ev_assistant_message("m3", "after switch"), + ev_completed_with_tokens("r3", /*total_tokens*/ 100), + ]), + ], + ) + .await; + + let model_provider = non_openai_model_provider(&server); + let mut builder = test_codex() + .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing()) + .with_model(previous_model) + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + let _ = config.features.enable(Feature::RemoteModels); + config.model_auto_compact_token_limit = Some(20); + config.model_auto_compact_token_limit_scope = + AutoCompactTokenLimitScope::BodyAfterPrefix; + }); + let test = builder.build(&server).await.expect("build test codex"); + + test.codex + .submit(disabled_permission_user_turn( + "before switch", + test.cwd.path().to_path_buf(), + previous_model.to_string(), + )) + .await + .expect("submit first user turn"); + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + test.codex + .submit(disabled_permission_user_turn( + "after switch", + test.cwd.path().to_path_buf(), + next_model.to_string(), + )) + .await + .expect("submit second user turn"); + assert_compaction_uses_turn_lifecycle_id(&test.codex).await; + + let requests = request_log.requests(); + assert_eq!(models_mock.requests().len(), 1); + assert_eq!( + requests.len(), + 3, + "expected user, compact, and follow-up requests" + ); + assert_eq!( + requests[0].body_json()["model"].as_str(), + Some(previous_model) + ); + assert_eq!(requests[1].body_json()["model"].as_str(), Some(next_model)); + assert_eq!(requests[2].body_json()["model"].as_str(), Some(next_model)); + assert!( + body_contains_text(&requests[1].body_json().to_string(), SUMMARIZATION_PROMPT), + "body-budget compaction request should include summarization prompt" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn pre_sampling_compact_runs_after_resume_and_switch_to_smaller_model() { skip_if_no_network!(); @@ -3068,6 +3162,91 @@ async fn auto_compact_body_after_prefix_ignores_starting_window_prefix() { ); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compact_body_after_prefix_counts_growth_after_compaction() { + skip_if_no_network!(); + + let server = start_mock_server().await; + + let first_turn = sse(vec![ + ev_assistant_message("m1", FIRST_REPLY), + ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50), + ]); + let first_auto_compact_turn = sse(vec![ + ev_assistant_message("m2", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r2", /*total_tokens*/ 20), + ]); + let second_turn = sse(vec![ + ev_assistant_message("m3", SECOND_LARGE_REPLY), + ev_completed_with_usage( + "r3", /*input_tokens*/ 100_000, /*output_tokens*/ 10, + ), + ]); + let second_auto_compact_turn = sse(vec![ + ev_assistant_message("m4", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r4", /*total_tokens*/ 20), + ]); + let third_turn = sse(vec![ + ev_assistant_message("m5", FINAL_REPLY), + ev_completed_with_usage("r5", /*input_tokens*/ 80, /*output_tokens*/ 5), + ]); + let request_log = mount_sse_sequence( + &server, + vec![ + first_turn, + first_auto_compact_turn, + second_turn, + second_auto_compact_turn, + third_turn, + ], + ) + .await; + + let model_provider = non_openai_model_provider(&server); + let test = test_codex() + .with_config(move |config| { + config.model_provider = model_provider; + set_test_compact_prompt(config); + config.model_context_window = Some(200_000); + config.model_auto_compact_token_limit = Some(20); + config.model_auto_compact_token_limit_scope = + AutoCompactTokenLimitScope::BodyAfterPrefix; + }) + .build(&server) + .await + .expect("build codex"); + + test.submit_turn("WINDOW_PREFIX") + .await + .expect("submit first turn"); + test.submit_turn("GROWTH_AFTER_COMPACT") + .await + .expect("submit second turn"); + + let requests = request_log.requests(); + assert_eq!( + requests.len(), + 3, + "second turn should compact first and then sample the new growth" + ); + + test.submit_turn("AFTER_GROWTH") + .await + .expect("submit third turn"); + + let requests = request_log.requests(); + assert_eq!( + requests.len(), + 5, + "third turn should compact again because the post-compaction growth counted against the body budget" + ); + let compact_body = requests[3].body_json().to_string(); + assert!( + body_contains_text(&compact_body, SUMMARIZATION_PROMPT), + "post-compaction growth should trigger a second body-after-prefix compaction" + ); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn auto_compact_body_after_prefix_still_caps_at_context_window() { skip_if_no_network!(); From 1519b002e3c8e84cf7be21d121e349a75656da7f Mon Sep 17 00:00:00 2001 From: jif-oai Date: Mon, 18 May 2026 19:10:52 +0200 Subject: [PATCH 3/6] make something cleaner --- codex-rs/core/src/session/mod.rs | 27 ++-- codex-rs/core/src/session/turn.rs | 16 +- .../core/src/state/auto_compact_window.rs | 137 ++++++++++++++++++ codex-rs/core/src/state/mod.rs | 2 + codex-rs/core/src/state/session.rs | 30 ++-- codex-rs/core/src/state/session_tests.rs | 19 +++ codex-rs/core/tests/suite/compact.rs | 38 +++-- 7 files changed, 232 insertions(+), 37 deletions(-) create mode 100644 codex-rs/core/src/state/auto_compact_window.rs diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 1d061b0dfc6..0c70bfdf918 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -291,6 +291,7 @@ use crate::session_startup_prewarm::SessionStartupPrewarmHandle; use crate::shell; use crate::shell_snapshot::ShellSnapshot; use crate::state::ActiveTurn; +use crate::state::AutoCompactWindowSnapshot; use crate::state::MailboxDeliveryPhase; use crate::state::PendingRequestPermissions; use crate::state::SessionServices; @@ -1108,9 +1109,9 @@ impl Session { state.get_total_token_usage(state.server_reasoning_included()) } - pub(crate) async fn auto_compact_window_prefix_tokens(&self) -> Option { + pub(crate) async fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot { let state = self.state.lock().await; - state.auto_compact_window_prefix_tokens() + state.auto_compact_window_snapshot() } pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { @@ -1282,7 +1283,7 @@ impl Session { None }; if let Some(prefix_tokens) = prefix_tokens { - self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, prefix_tokens) + self.set_auto_compact_window_estimated_prefill_for_scope(turn_context, prefix_tokens) .await; } self.set_previous_turn_settings(previous_turn_settings.clone()) @@ -1290,7 +1291,7 @@ impl Session { previous_turn_settings } - async fn set_auto_compact_window_prefix_tokens_for_scope( + async fn set_auto_compact_window_estimated_prefill_for_scope( &self, turn_context: &TurnContext, tokens: i64, @@ -1303,7 +1304,7 @@ impl Session { } let mut state = self.state.lock().await; - state.set_auto_compact_window_prefix_tokens(tokens); + state.set_auto_compact_window_estimated_prefill(tokens); } fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option { @@ -2619,8 +2620,11 @@ impl Session { reference_context_item: Option, compacted_item: CompactedItem, ) { - self.replace_history(items, reference_context_item.clone()) - .await; + { + let mut state = self.state.lock().await; + state.replace_history(items, reference_context_item.clone()); + state.start_next_auto_compact_window(); + } self.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)]) .await; @@ -2982,7 +2986,7 @@ impl Session { turn_context.config.model_auto_compact_token_limit_scope, AutoCompactTokenLimitScope::BodyAfterPrefix ) { - state.ensure_auto_compact_window_prefix_tokens_from_usage(token_usage); + state.ensure_auto_compact_window_server_prefill_from_usage(token_usage); } state.token_info() }; @@ -3029,8 +3033,11 @@ impl Session { state.set_token_info(Some(info)); } - self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, estimated_total_tokens) - .await; + self.set_auto_compact_window_estimated_prefill_for_scope( + turn_context, + estimated_total_tokens, + ) + .await; self.send_token_count_event(turn_context).await; } diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 9ee8c7c32b2..39163678c10 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -489,6 +489,8 @@ pub(crate) async fn run_turn( estimated_token_count = ?estimated_token_count, auto_compact_limit = token_status.auto_compact_limit, auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope, + auto_compact_window_ordinal = ?token_status.auto_compact_window_ordinal, + auto_compact_window_prefill_tokens = ?token_status.auto_compact_window_prefill_tokens, context_window_limit = ?token_status.context_window_limit, context_window_limit_reached = token_status.context_window_limit_reached, token_limit_reached, @@ -736,6 +738,8 @@ struct AutoCompactTokenStatus { budgeted_tokens: i64, auto_compact_limit: i64, context_window_limit: Option, + auto_compact_window_ordinal: Option, + auto_compact_window_prefill_tokens: Option, context_window_limit_reached: bool, token_limit_reached: bool, } @@ -745,6 +749,8 @@ async fn auto_compact_token_status( turn_context: &TurnContext, ) -> AutoCompactTokenStatus { let active_context_tokens = sess.get_total_token_usage().await; + let mut auto_compact_window_ordinal = None; + let mut auto_compact_window_prefill_tokens = None; let (budgeted_tokens, auto_compact_limit, context_window_limit) = match turn_context.config.model_auto_compact_token_limit_scope { AutoCompactTokenLimitScope::Total => ( @@ -756,10 +762,10 @@ async fn auto_compact_token_status( None, ), AutoCompactTokenLimitScope::BodyAfterPrefix => { - let baseline = sess - .auto_compact_window_prefix_tokens() - .await - .unwrap_or(active_context_tokens); + let window = sess.auto_compact_window_snapshot().await; + auto_compact_window_ordinal = Some(window.ordinal); + auto_compact_window_prefill_tokens = window.prefill_input_tokens; + let baseline = window.prefill_input_tokens.unwrap_or(active_context_tokens); ( active_context_tokens.saturating_sub(baseline), turn_context @@ -780,6 +786,8 @@ async fn auto_compact_token_status( budgeted_tokens, auto_compact_limit, context_window_limit, + auto_compact_window_ordinal, + auto_compact_window_prefill_tokens, context_window_limit_reached, token_limit_reached, } diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs new file mode 100644 index 00000000000..e84200c3117 --- /dev/null +++ b/codex-rs/core/src/state/auto_compact_window.rs @@ -0,0 +1,137 @@ +use codex_protocol::protocol::TokenUsage; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct AutoCompactWindowSnapshot { + pub(crate) ordinal: u64, + pub(crate) prefill_input_tokens: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AutoCompactWindowPrefill { + ServerObserved(i64), + Estimated(i64), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(super) struct AutoCompactWindow { + ordinal: u64, + prefill_input_tokens: Option, +} + +impl AutoCompactWindow { + pub(super) fn new() -> Self { + Self { + ordinal: 1, + prefill_input_tokens: None, + } + } + + pub(super) fn clear_prefill(&mut self) { + self.prefill_input_tokens = None; + } + + pub(super) fn start_next(&mut self) { + self.ordinal = self.ordinal.saturating_add(1); + self.clear_prefill(); + } + + pub(super) fn ensure_server_observed_prefill_from_usage(&mut self, usage: &TokenUsage) { + if matches!( + self.prefill_input_tokens, + Some(AutoCompactWindowPrefill::ServerObserved(_)) + ) { + return; + } + + self.prefill_input_tokens = Some(AutoCompactWindowPrefill::ServerObserved( + usage.input_tokens.max(0), + )); + } + + pub(super) fn set_estimated_prefill(&mut self, tokens: i64) { + if matches!( + self.prefill_input_tokens, + Some(AutoCompactWindowPrefill::ServerObserved(_)) + ) { + return; + } + + self.prefill_input_tokens = Some(AutoCompactWindowPrefill::Estimated(tokens.max(0))); + } + + pub(super) fn snapshot(&self) -> AutoCompactWindowSnapshot { + let prefill_input_tokens = match self.prefill_input_tokens { + Some(AutoCompactWindowPrefill::ServerObserved(tokens)) + | Some(AutoCompactWindowPrefill::Estimated(tokens)) => Some(tokens), + None => None, + }; + AutoCompactWindowSnapshot { + ordinal: self.ordinal, + prefill_input_tokens, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn tracks_prefill_and_window_boundaries() { + let mut window = AutoCompactWindow::new(); + + assert_eq!( + window.snapshot(), + AutoCompactWindowSnapshot { + ordinal: 1, + prefill_input_tokens: None, + } + ); + + window.set_estimated_prefill(150); + assert_eq!( + window.snapshot(), + AutoCompactWindowSnapshot { + ordinal: 1, + prefill_input_tokens: Some(150), + } + ); + + window.ensure_server_observed_prefill_from_usage(&TokenUsage { + input_tokens: 120, + total_tokens: 170, + ..Default::default() + }); + assert_eq!( + window.snapshot(), + AutoCompactWindowSnapshot { + ordinal: 1, + prefill_input_tokens: Some(120), + } + ); + + window.ensure_server_observed_prefill_from_usage(&TokenUsage { + input_tokens: 130, + total_tokens: 180, + ..Default::default() + }); + window.set_estimated_prefill(90); + assert_eq!( + window.snapshot(), + AutoCompactWindowSnapshot { + ordinal: 1, + prefill_input_tokens: Some(120), + } + ); + + window.start_next(); + assert_eq!( + window.snapshot(), + AutoCompactWindowSnapshot { + ordinal: 2, + prefill_input_tokens: None, + } + ); + } +} diff --git a/codex-rs/core/src/state/mod.rs b/codex-rs/core/src/state/mod.rs index 13f3bf6c86f..3122ec5f259 100644 --- a/codex-rs/core/src/state/mod.rs +++ b/codex-rs/core/src/state/mod.rs @@ -1,7 +1,9 @@ +mod auto_compact_window; mod service; mod session; mod turn; +pub(crate) use auto_compact_window::AutoCompactWindowSnapshot; pub(crate) use service::SessionServices; pub(crate) use session::SessionState; pub(crate) use turn::ActiveTurn; diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 3abe8140a54..6d34a737d44 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -6,6 +6,8 @@ use codex_sandboxing::policy_transforms::merge_permission_profiles; use std::collections::HashMap; use std::collections::HashSet; +use super::auto_compact_window::AutoCompactWindow; +use super::auto_compact_window::AutoCompactWindowSnapshot; use crate::context_manager::ContextManager; use crate::session::PreviousTurnSettings; use crate::session::session::SessionConfiguration; @@ -28,9 +30,8 @@ pub(crate) struct SessionState { /// model/realtime handling on subsequent regular turns (including full-context /// reinjection after resume or `/compact`). previous_turn_settings: Option, - /// Prefix size for the active compaction window when auto-compaction is - /// configured to count only tokens after the carried window prefix. - auto_compact_window_prefix_tokens: Option, + /// Runtime accounting state for the active auto-compaction window. + auto_compact_window: AutoCompactWindow, /// Startup prewarmed session prepared during session initialization. pub(crate) startup_prewarm: Option, pub(crate) active_connector_selection: HashSet, @@ -51,7 +52,7 @@ impl SessionState { dependency_env: HashMap::new(), mcp_dependency_prompted: HashSet::new(), previous_turn_settings: None, - auto_compact_window_prefix_tokens: None, + auto_compact_window: AutoCompactWindow::new(), startup_prewarm: None, active_connector_selection: HashSet::new(), pending_session_start_source: None, @@ -101,7 +102,7 @@ impl SessionState { self.history.replace(items); self.history .set_reference_context_item(reference_context_item); - self.auto_compact_window_prefix_tokens = None; + self.auto_compact_window.clear_prefill(); } pub(crate) fn set_token_info(&mut self, info: Option) { @@ -125,21 +126,24 @@ impl SessionState { self.history.update_token_info(usage, model_context_window); } - pub(crate) fn ensure_auto_compact_window_prefix_tokens_from_usage( + pub(crate) fn ensure_auto_compact_window_server_prefill_from_usage( &mut self, usage: &TokenUsage, ) { - if self.auto_compact_window_prefix_tokens.is_none() { - self.auto_compact_window_prefix_tokens = Some(usage.input_tokens.max(0)); - } + self.auto_compact_window + .ensure_server_observed_prefill_from_usage(usage); + } + + pub(crate) fn set_auto_compact_window_estimated_prefill(&mut self, tokens: i64) { + self.auto_compact_window.set_estimated_prefill(tokens); } - pub(crate) fn set_auto_compact_window_prefix_tokens(&mut self, tokens: i64) { - self.auto_compact_window_prefix_tokens = Some(tokens.max(0)); + pub(crate) fn start_next_auto_compact_window(&mut self) { + self.auto_compact_window.start_next(); } - pub(crate) fn auto_compact_window_prefix_tokens(&self) -> Option { - self.auto_compact_window_prefix_tokens + pub(crate) fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot { + self.auto_compact_window.snapshot() } pub(crate) fn token_info(&self) -> Option { diff --git a/codex-rs/core/src/state/session_tests.rs b/codex-rs/core/src/state/session_tests.rs index 5e90cc881dd..30af41e84ac 100644 --- a/codex-rs/core/src/state/session_tests.rs +++ b/codex-rs/core/src/state/session_tests.rs @@ -1,5 +1,6 @@ use super::*; use crate::session::tests::make_session_configuration_for_tests; +use crate::state::AutoCompactWindowSnapshot; use codex_protocol::protocol::CreditsSnapshot; use codex_protocol::protocol::RateLimitWindow; use pretty_assertions::assert_eq; @@ -61,6 +62,24 @@ async fn set_rate_limits_defaults_limit_id_to_codex_when_missing() { ); } +#[tokio::test] +async fn replace_history_clears_auto_compact_window_prefill_without_advancing() { + let session_configuration = make_session_configuration_for_tests().await; + let mut state = SessionState::new(session_configuration); + + state.start_next_auto_compact_window(); + state.set_auto_compact_window_estimated_prefill(100); + state.replace_history(Vec::new(), /*reference_context_item*/ None); + + assert_eq!( + state.auto_compact_window_snapshot(), + AutoCompactWindowSnapshot { + ordinal: 2, + prefill_input_tokens: None, + } + ); +} + #[tokio::test] async fn set_rate_limits_defaults_to_codex_when_limit_id_missing_after_other_bucket() { let session_configuration = make_session_configuration_for_tests().await; diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 6607b03c527..72393d23174 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -3182,13 +3182,19 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() { "r3", /*input_tokens*/ 100_000, /*output_tokens*/ 10, ), ]); + let third_turn = sse(vec![ + ev_assistant_message("m4", FINAL_REPLY), + ev_completed_with_usage( + "r4", /*input_tokens*/ 100_100, /*output_tokens*/ 5, + ), + ]); let second_auto_compact_turn = sse(vec![ - ev_assistant_message("m4", AUTO_SUMMARY_TEXT), - ev_completed_with_tokens("r4", /*total_tokens*/ 20), + ev_assistant_message("m5", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r5", /*total_tokens*/ 20), ]); - let third_turn = sse(vec![ - ev_assistant_message("m5", FINAL_REPLY), - ev_completed_with_usage("r5", /*input_tokens*/ 80, /*output_tokens*/ 5), + let fourth_turn = sse(vec![ + ev_assistant_message("m6", FINAL_REPLY), + ev_completed_with_usage("r6", /*input_tokens*/ 80, /*output_tokens*/ 5), ]); let request_log = mount_sse_sequence( &server, @@ -3196,8 +3202,9 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() { first_turn, first_auto_compact_turn, second_turn, - second_auto_compact_turn, third_turn, + second_auto_compact_turn, + fourth_turn, ], ) .await; @@ -3208,7 +3215,7 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() { config.model_provider = model_provider; set_test_compact_prompt(config); config.model_context_window = Some(200_000); - config.model_auto_compact_token_limit = Some(20); + config.model_auto_compact_token_limit = Some(40); config.model_auto_compact_token_limit_scope = AutoCompactTokenLimitScope::BodyAfterPrefix; }) @@ -3237,10 +3244,21 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() { let requests = request_log.requests(); assert_eq!( requests.len(), - 5, - "third turn should compact again because the post-compaction growth counted against the body budget" + 4, + "the first server-observed input in the new window should become the prefill baseline" + ); + + test.submit_turn("AFTER_GROWTH_TRIGGER") + .await + .expect("submit fourth turn"); + + let requests = request_log.requests(); + assert_eq!( + requests.len(), + 6, + "fourth turn should compact because later post-compaction growth counted against the body budget" ); - let compact_body = requests[3].body_json().to_string(); + let compact_body = requests[4].body_json().to_string(); assert!( body_contains_text(&compact_body, SUMMARIZATION_PROMPT), "post-compaction growth should trigger a second body-after-prefix compaction" From 02d98f923f4b8a2db3be2cec47982404a7ba028e Mon Sep 17 00:00:00 2001 From: jif-oai Date: Mon, 18 May 2026 20:49:02 +0200 Subject: [PATCH 4/6] nit cleanings --- codex-rs/core/src/session/turn.rs | 37 +++++++++++-------- .../core/src/state/auto_compact_window.rs | 8 ++++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs index 39163678c10..6a7b022393b 100644 --- a/codex-rs/core/src/session/turn.rs +++ b/codex-rs/core/src/session/turn.rs @@ -485,14 +485,14 @@ pub(crate) async fn run_turn( trace!( turn_id = %turn_context.sub_id, total_usage_tokens = token_status.active_context_tokens, - budgeted_tokens = token_status.budgeted_tokens, + auto_compact_scope_tokens = token_status.auto_compact_scope_tokens, estimated_token_count = ?estimated_token_count, - auto_compact_limit = token_status.auto_compact_limit, + auto_compact_scope_limit = token_status.auto_compact_scope_limit, auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope, auto_compact_window_ordinal = ?token_status.auto_compact_window_ordinal, auto_compact_window_prefill_tokens = ?token_status.auto_compact_window_prefill_tokens, - context_window_limit = ?token_status.context_window_limit, - context_window_limit_reached = token_status.context_window_limit_reached, + full_context_window_limit = ?token_status.full_context_window_limit, + full_context_window_limit_reached = token_status.full_context_window_limit_reached, token_limit_reached, model_needs_follow_up, has_pending_input, @@ -734,13 +734,15 @@ struct PreSamplingCompactResult { #[derive(Debug)] struct AutoCompactTokenStatus { + // Full active context usage, independent of the configured auto-compact scope. active_context_tokens: i64, - budgeted_tokens: i64, - auto_compact_limit: i64, - context_window_limit: Option, + // Usage counted against `model_auto_compact_token_limit` for the current scope. + auto_compact_scope_tokens: i64, + auto_compact_scope_limit: i64, + full_context_window_limit: Option, auto_compact_window_ordinal: Option, auto_compact_window_prefill_tokens: Option, - context_window_limit_reached: bool, + full_context_window_limit_reached: bool, token_limit_reached: bool, } @@ -751,7 +753,7 @@ async fn auto_compact_token_status( let active_context_tokens = sess.get_total_token_usage().await; let mut auto_compact_window_ordinal = None; let mut auto_compact_window_prefill_tokens = None; - let (budgeted_tokens, auto_compact_limit, context_window_limit) = + let (auto_compact_scope_tokens, auto_compact_scope_limit, full_context_window_limit) = match turn_context.config.model_auto_compact_token_limit_scope { AutoCompactTokenLimitScope::Total => ( active_context_tokens, @@ -777,18 +779,21 @@ async fn auto_compact_token_status( ) } }; - let context_window_limit_reached = context_window_limit - .is_some_and(|context_window_limit| active_context_tokens >= context_window_limit); - let token_limit_reached = budgeted_tokens >= auto_compact_limit || context_window_limit_reached; + let full_context_window_limit_reached = + full_context_window_limit.is_some_and(|full_context_window_limit| { + active_context_tokens >= full_context_window_limit + }); + let token_limit_reached = + auto_compact_scope_tokens >= auto_compact_scope_limit || full_context_window_limit_reached; AutoCompactTokenStatus { active_context_tokens, - budgeted_tokens, - auto_compact_limit, - context_window_limit, + auto_compact_scope_tokens, + auto_compact_scope_limit, + full_context_window_limit, auto_compact_window_ordinal, auto_compact_window_prefill_tokens, - context_window_limit_reached, + full_context_window_limit_reached, token_limit_reached, } } diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs index e84200c3117..d8d436280f0 100644 --- a/codex-rs/core/src/state/auto_compact_window.rs +++ b/codex-rs/core/src/state/auto_compact_window.rs @@ -15,6 +15,11 @@ enum AutoCompactWindowPrefill { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(super) struct AutoCompactWindow { ordinal: u64, + /// Absolute input-token baseline for the current compaction window. + /// + /// `body_after_prefix` subtracts this from later active-context usage. It is + /// not the growth itself; server-observed usage replaces estimated + /// resume/recompute baselines when available. prefill_input_tokens: Option, } @@ -35,6 +40,9 @@ impl AutoCompactWindow { self.clear_prefill(); } + /// Records the request-input side of the first server usage sample. The + /// sampled output from that response is body growth and should remain + /// counted against the scoped auto-compact budget. pub(super) fn ensure_server_observed_prefill_from_usage(&mut self, usage: &TokenUsage) { if matches!( self.prefill_input_tokens, From 8eb781b4cf78e78ed90ae5233e60db108abb4d35 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Mon, 18 May 2026 21:37:20 +0200 Subject: [PATCH 5/6] Fix argument comments for auto-compact tests --- codex-rs/core/src/state/auto_compact_window.rs | 4 ++-- codex-rs/core/src/state/session_tests.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs index d8d436280f0..d1deb3c2882 100644 --- a/codex-rs/core/src/state/auto_compact_window.rs +++ b/codex-rs/core/src/state/auto_compact_window.rs @@ -97,7 +97,7 @@ mod tests { } ); - window.set_estimated_prefill(150); + window.set_estimated_prefill(/*tokens*/ 150); assert_eq!( window.snapshot(), AutoCompactWindowSnapshot { @@ -124,7 +124,7 @@ mod tests { total_tokens: 180, ..Default::default() }); - window.set_estimated_prefill(90); + window.set_estimated_prefill(/*tokens*/ 90); assert_eq!( window.snapshot(), AutoCompactWindowSnapshot { diff --git a/codex-rs/core/src/state/session_tests.rs b/codex-rs/core/src/state/session_tests.rs index 30af41e84ac..b6b5a057a01 100644 --- a/codex-rs/core/src/state/session_tests.rs +++ b/codex-rs/core/src/state/session_tests.rs @@ -68,7 +68,7 @@ async fn replace_history_clears_auto_compact_window_prefill_without_advancing() let mut state = SessionState::new(session_configuration); state.start_next_auto_compact_window(); - state.set_auto_compact_window_estimated_prefill(100); + state.set_auto_compact_window_estimated_prefill(/*tokens*/ 100); state.replace_history(Vec::new(), /*reference_context_item*/ None); assert_eq!( From 253a9bea16ba031097e5e706b8f1ab0886f72fd6 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Tue, 19 May 2026 11:20:22 +0200 Subject: [PATCH 6/6] Fix auto-compact CI build --- codex-rs/core/src/client.rs | 2 +- codex-rs/core/src/session/mod.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index cd1fcb6696d..f604a634581 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -931,7 +931,7 @@ impl Drop for ModelClientSession { } impl ModelClientSession { - fn reset_websocket_session(&mut self) { + pub(crate) fn reset_websocket_session(&mut self) { self.websocket_session.connection = None; self.websocket_session.last_request = None; self.websocket_session.last_response_rx = None; diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs index 4ffd5c43e2d..fbffffbe41b 100644 --- a/codex-rs/core/src/session/mod.rs +++ b/codex-rs/core/src/session/mod.rs @@ -288,9 +288,7 @@ use crate::rollout::map_session_init_error; use crate::session_startup_prewarm::SessionStartupPrewarmHandle; use crate::shell; use crate::shell_snapshot::ShellSnapshot; -use crate::state::ActiveTurn; use crate::state::AutoCompactWindowSnapshot; -use crate::state::MailboxDeliveryPhase; use crate::state::PendingRequestPermissions; use crate::state::SessionServices; use crate::state::SessionState;