From 973806b1cb35792555bead994cb3ed94656eb171 Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Fri, 15 May 2026 19:03:20 +0200
Subject: [PATCH 1/6] feat: add `body_after_prefix`

---
 .../codex_app_server_protocol.schemas.json    |  29 ++++
 .../codex_app_server_protocol.v2.schemas.json |  29 ++++
 .../schema/json/v2/ConfigReadResponse.json    |  29 ++++
 .../typescript/AutoCompactTokenLimitScope.ts  |   9 ++
 .../schema/typescript/index.ts                |   1 +
 .../schema/typescript/v2/Config.ts            |   3 +-
 .../src/protocol/v2/config.rs                 |   2 +
 .../src/protocol/v2/tests.rs                  |   4 +
 codex-rs/config/src/config_toml.rs            |   5 +
 codex-rs/core-api/src/lib.rs                  |   1 +
 codex-rs/core/config.schema.json              |  27 ++++
 codex-rs/core/src/config/config_tests.rs      |   4 +
 codex-rs/core/src/config/mod.rs               |   8 +
 codex-rs/core/src/session/mod.rs              |  12 ++
 codex-rs/core/src/session/turn.rs             |  99 ++++++++----
 codex-rs/core/src/state/session.rs            |  15 ++
 codex-rs/core/tests/suite/compact.rs          | 145 ++++++++++++++++++
 codex-rs/protocol/src/config_types.rs         |  15 ++
 codex-rs/thread-manager-sample/src/main.rs    |   2 +
 19 files changed, 411 insertions(+), 28 deletions(-)
 create mode 100644 codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts

diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
index 8b292f667df..751f4adcd60 100644
--- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
+++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
@@ -6290,6 +6290,25 @@
           }
         ]
       },
+      "AutoCompactTokenLimitScope": {
+        "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+        "oneOf": [
+          {
+            "description": "Count the full active context against the limit.",
+            "enum": [
+              "total"
+            ],
+            "type": "string"
+          },
+          {
+            "description": "Count sampled output and later growth after the carried window prefix.",
+            "enum": [
+              "body_after_prefix"
+            ],
+            "type": "string"
+          }
+        ]
+      },
       "AutoReviewDecisionSource": {
         "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.",
         "enum": [
@@ -7138,6 +7157,16 @@
               "null"
             ]
           },
+          "model_auto_compact_token_limit_scope": {
+            "anyOf": [
+              {
+                "$ref": "#/definitions/v2/AutoCompactTokenLimitScope"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
           "model_context_window": {
             "format": "int64",
             "type": [
diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
index 16e548e8c20..8ae7b4ed204 100644
--- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
+++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
@@ -826,6 +826,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "AutoReviewDecisionSource": {
       "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.",
       "enum": [
@@ -3527,6 +3546,16 @@
             "null"
           ]
         },
+        "model_auto_compact_token_limit_scope": {
+          "anyOf": [
+            {
+              "$ref": "#/definitions/AutoCompactTokenLimitScope"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
         "model_context_window": {
           "format": "int64",
           "type": [
diff --git a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
index 81364a6f403..7595f7fd009 100644
--- a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
+++ b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
@@ -188,6 +188,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "Config": {
       "additionalProperties": true,
       "properties": {
@@ -280,6 +299,16 @@
             "null"
           ]
         },
+        "model_auto_compact_token_limit_scope": {
+          "anyOf": [
+            {
+              "$ref": "#/definitions/AutoCompactTokenLimitScope"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
         "model_context_window": {
           "format": "int64",
           "type": [
diff --git a/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts
new file mode 100644
index 00000000000..3d9a56c82a6
--- /dev/null
+++ b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts
@@ -0,0 +1,9 @@
+// GENERATED CODE! DO NOT MODIFY BY HAND!
+
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Selects which part of the active context is charged against
+ * `model_auto_compact_token_limit`.
+ */
+export type AutoCompactTokenLimitScope = "total" | "body_after_prefix";
diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts
index 97ea4356019..8be75af546f 100644
--- a/codex-rs/app-server-protocol/schema/typescript/index.ts
+++ b/codex-rs/app-server-protocol/schema/typescript/index.ts
@@ -5,6 +5,7 @@ export type { AgentPath } from "./AgentPath";
 export type { ApplyPatchApprovalParams } from "./ApplyPatchApprovalParams";
 export type { ApplyPatchApprovalResponse } from "./ApplyPatchApprovalResponse";
 export type { AuthMode } from "./AuthMode";
+export type { AutoCompactTokenLimitScope } from "./AutoCompactTokenLimitScope";
 export type { ClientInfo } from "./ClientInfo";
 export type { ClientNotification } from "./ClientNotification";
 export type { ClientRequest } from "./ClientRequest";
diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
index ba24663e879..29eae987741 100644
--- a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
+++ b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
@@ -1,6 +1,7 @@
 // GENERATED CODE! DO NOT MODIFY BY HAND!
 
 // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { AutoCompactTokenLimitScope } from "../AutoCompactTokenLimitScope";
 import type { ForcedLoginMethod } from "../ForcedLoginMethod";
 import type { ReasoningEffort } from "../ReasoningEffort";
 import type { ReasoningSummary } from "../ReasoningSummary";
@@ -16,7 +17,7 @@ import type { SandboxMode } from "./SandboxMode";
 import type { SandboxWorkspaceWrite } from "./SandboxWorkspaceWrite";
 import type { ToolsV2 } from "./ToolsV2";
 
-export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_provider: string | null, approval_policy: AskForApproval | null, /**
+export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope | null, model_provider: string | null, approval_policy: AskForApproval | null, /**
  * [UNSTABLE] Optional default for where approval requests are routed for
  * review.
  */
diff --git a/codex-rs/app-server-protocol/src/protocol/v2/config.rs b/codex-rs/app-server-protocol/src/protocol/v2/config.rs
index b46515d8114..a34e6c530f9 100644
--- a/codex-rs/app-server-protocol/src/protocol/v2/config.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2/config.rs
@@ -3,6 +3,7 @@ use super::AskForApproval;
 use super::SandboxMode;
 use super::shared::default_enabled;
 use codex_experimental_api_macros::ExperimentalApi;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::config_types::Verbosity;
@@ -251,6 +252,7 @@ pub struct Config {
     pub review_model: Option<String>,
     pub model_context_window: Option<i64>,
     pub model_auto_compact_token_limit: Option<i64>,
+    pub model_auto_compact_token_limit_scope: Option<AutoCompactTokenLimitScope>,
     pub model_provider: Option<String>,
     #[experimental(nested)]
     pub approval_policy: Option<AskForApproval>,
diff --git a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
index 50058cb6862..a0aefa0bf42 100644
--- a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
@@ -1560,6 +1560,7 @@ fn config_granular_approval_policy_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: Some(AskForApproval::Granular {
             sandbox_approval: false,
@@ -1600,6 +1601,7 @@ fn config_approvals_reviewer_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: Some(ApprovalsReviewer::AutoReview),
@@ -1634,6 +1636,7 @@ fn config_nested_profile_granular_approval_policy_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: None,
@@ -1690,6 +1693,7 @@ fn config_nested_profile_approvals_reviewer_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: None,
diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs
index 72641150ddb..0d5db00badc 100644
--- a/codex-rs/config/src/config_toml.rs
+++ b/codex-rs/config/src/config_toml.rs
@@ -38,6 +38,7 @@ use codex_model_provider_info::ModelProviderInfo;
 use codex_model_provider_info::OLLAMA_CHAT_PROVIDER_REMOVED_ERROR;
 use codex_model_provider_info::OLLAMA_OSS_PROVIDER_ID;
 use codex_model_provider_info::OPENAI_PROVIDER_ID;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary;
@@ -156,6 +157,10 @@ pub struct ConfigToml {
     /// Token usage threshold triggering auto-compaction of conversation history.
     pub model_auto_compact_token_limit: Option<i64>,
 
+    /// Controls whether the auto-compaction limit applies to the full context or
+    /// only to tokens after the carried prefix in the current compaction window.
+    pub model_auto_compact_token_limit_scope: Option<AutoCompactTokenLimitScope>,
+
     /// Default approval policy for executing commands.
     pub approval_policy: Option<AskForApproval>,
 
diff --git a/codex-rs/core-api/src/lib.rs b/codex-rs/core-api/src/lib.rs
index 04ebaf8e7e6..e87ee82f309 100644
--- a/codex-rs/core-api/src/lib.rs
+++ b/codex-rs/core-api/src/lib.rs
@@ -58,6 +58,7 @@ pub use codex_models_manager::manager::SharedModelsManager;
 pub use codex_protocol::ThreadId;
 pub use codex_protocol::config_types::AltScreenMode;
 pub use codex_protocol::config_types::ApprovalsReviewer;
+pub use codex_protocol::config_types::AutoCompactTokenLimitScope;
 pub use codex_protocol::config_types::CollaborationModeMask;
 pub use codex_protocol::config_types::ShellEnvironmentPolicy;
 pub use codex_protocol::config_types::WebSearchMode;
diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json
index 0387b2e4015..b4f847ae3b9 100644
--- a/codex-rs/core/config.schema.json
+++ b/codex-rs/core/config.schema.json
@@ -309,6 +309,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "AutoReviewToml": {
       "properties": {
         "policy": {
@@ -4534,6 +4553,14 @@
       "format": "int64",
       "type": "integer"
     },
+    "model_auto_compact_token_limit_scope": {
+      "allOf": [
+        {
+          "$ref": "#/definitions/AutoCompactTokenLimitScope"
+        }
+      ],
+      "description": "Controls whether the auto-compaction limit applies to the full context or only to tokens after the carried prefix in the current compaction window."
+    },
     "model_catalog_json": {
       "allOf": [
         {
diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs
index f197ccad71c..cc748138b96 100644
--- a/codex-rs/core/src/config/config_tests.rs
+++ b/codex-rs/core/src/config/config_tests.rs
@@ -7623,6 +7623,7 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
             review_model: None,
             model_context_window: None,
             model_auto_compact_token_limit: None,
+            model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
             service_tier: None,
             model_provider_id: "openai".to_string(),
             model_provider: fixture.openai_provider.clone(),
@@ -8073,6 +8074,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai-custom".to_string(),
         model_provider: fixture.openai_custom_provider.clone(),
@@ -8237,6 +8239,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai".to_string(),
         model_provider: fixture.openai_provider.clone(),
@@ -8386,6 +8389,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai".to_string(),
         model_provider: fixture.openai_provider.clone(),
diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs
index 527c93b3f37..9812dd25fef 100644
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -77,6 +77,7 @@ use codex_model_provider_info::built_in_model_providers;
 use codex_model_provider_info::merge_configured_model_providers;
 use codex_models_manager::ModelsManagerConfig;
 use codex_protocol::config_types::AltScreenMode;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary;
@@ -556,6 +557,10 @@ pub struct Config {
     /// Token usage threshold triggering auto-compaction of conversation history.
     pub model_auto_compact_token_limit: Option<i64>,
 
+    /// Controls whether `model_auto_compact_token_limit` applies to the full
+    /// active context or only tokens after the carried compaction-window prefix.
+    pub model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope,
+
     /// Key into the model_providers map that specifies which provider to use.
     pub model_provider_id: String,
 
@@ -3299,6 +3304,9 @@ impl Config {
             review_model,
             model_context_window: cfg.model_context_window,
             model_auto_compact_token_limit: cfg.model_auto_compact_token_limit,
+            model_auto_compact_token_limit_scope: cfg
+                .model_auto_compact_token_limit_scope
+                .unwrap_or_default(),
             model_provider_id,
             model_provider,
             cwd: resolved_cwd,
diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs
index 2dd97710e83..73b1d4b8a40 100644
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -80,6 +80,7 @@ use codex_protocol::approvals::ExecPolicyAmendment;
 use codex_protocol::approvals::NetworkPolicyAmendment;
 use codex_protocol::approvals::NetworkPolicyRuleAction;
 use codex_protocol::config_types::ApprovalsReviewer;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ModeKind;
 use codex_protocol::config_types::Settings;
 use codex_protocol::config_types::WebSearchMode;
@@ -1106,6 +1107,11 @@ impl Session {
         state.get_total_token_usage(state.server_reasoning_included())
     }
 
+    pub(crate) async fn auto_compact_window_prefix_input_tokens(&self) -> Option<i64> {
+        let state = self.state.lock().await;
+        state.auto_compact_window_prefix_input_tokens()
+    }
+
     pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
         let state = self.state.lock().await;
         state.history.get_total_token_usage_breakdown()
@@ -2941,6 +2947,12 @@ impl Session {
                 let mut state = self.state.lock().await;
                 state
                     .update_token_info_from_usage(token_usage, turn_context.model_context_window());
+                if matches!(
+                    turn_context.config.model_auto_compact_token_limit_scope,
+                    AutoCompactTokenLimitScope::BodyAfterPrefix
+                ) {
+                    state.ensure_auto_compact_window_prefix_input_tokens(token_usage);
+                }
                 state.token_info()
             };
             if let Some(token_info) = token_info.as_ref() {
diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index aee4bd360ff..1226ebf81ff 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -75,6 +75,7 @@ use codex_hooks::HookEvent;
 use codex_hooks::HookEventAfterAgent;
 use codex_hooks::HookPayload;
 use codex_hooks::HookResult;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ModeKind;
 use codex_protocol::config_types::ServiceTier;
 use codex_protocol::error::CodexErr;
@@ -150,8 +151,6 @@ pub(crate) async fn run_turn(
         return None;
     }
 
-    let model_info = turn_context.model_info.clone();
-    let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX);
     let mut client_session =
         prewarmed_client_session.unwrap_or_else(|| sess.services.model_client.new_session());
     // TODO(ccunningham): Pre-turn compaction runs before context updates and the
@@ -476,17 +475,22 @@ pub(crate) async fn run_turn(
                 can_drain_pending_input = true;
                 let has_pending_input = sess.has_pending_input().await;
                 let needs_follow_up = model_needs_follow_up || has_pending_input;
-                let total_usage_tokens = sess.get_total_token_usage().await;
-                let token_limit_reached = total_usage_tokens >= auto_compact_limit;
+                let token_status =
+                    auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
+                let token_limit_reached = token_status.token_limit_reached;
 
                 let estimated_token_count =
                     sess.get_estimated_token_count(turn_context.as_ref()).await;
 
                 trace!(
                     turn_id = %turn_context.sub_id,
-                    total_usage_tokens,
+                    total_usage_tokens = token_status.active_context_tokens,
+                    budgeted_tokens = token_status.budgeted_tokens,
                     estimated_token_count = ?estimated_token_count,
-                    auto_compact_limit,
+                    auto_compact_limit = token_status.auto_compact_limit,
+                    auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope,
+                    context_window_limit = ?token_status.context_window_limit,
+                    context_window_limit_reached = token_status.context_window_limit_reached,
                     token_limit_reached,
                     model_needs_follow_up,
                     has_pending_input,
@@ -726,27 +730,72 @@ struct PreSamplingCompactResult {
     reset_client_session: bool,
 }
 
+#[derive(Debug)]
+struct AutoCompactTokenStatus {
+    active_context_tokens: i64,
+    budgeted_tokens: i64,
+    auto_compact_limit: i64,
+    context_window_limit: Option<i64>,
+    context_window_limit_reached: bool,
+    token_limit_reached: bool,
+}
+
+async fn auto_compact_token_status(
+    sess: &Session,
+    turn_context: &TurnContext,
+) -> AutoCompactTokenStatus {
+    let active_context_tokens = sess.get_total_token_usage().await;
+    let (budgeted_tokens, auto_compact_limit, context_window_limit) =
+        match turn_context.config.model_auto_compact_token_limit_scope {
+            AutoCompactTokenLimitScope::Total => (
+                active_context_tokens,
+                turn_context
+                    .model_info
+                    .auto_compact_token_limit()
+                    .unwrap_or(i64::MAX),
+                None,
+            ),
+            AutoCompactTokenLimitScope::BodyAfterPrefix => {
+                let baseline = sess
+                    .auto_compact_window_prefix_input_tokens()
+                    .await
+                    .unwrap_or(active_context_tokens);
+                (
+                    active_context_tokens.saturating_sub(baseline),
+                    turn_context
+                        .config
+                        .model_auto_compact_token_limit
+                        .or_else(|| turn_context.model_info.auto_compact_token_limit())
+                        .unwrap_or(i64::MAX),
+                    turn_context.model_context_window(),
+                )
+            }
+        };
+    let context_window_limit_reached = context_window_limit
+        .is_some_and(|context_window_limit| active_context_tokens >= context_window_limit);
+    let token_limit_reached = budgeted_tokens >= auto_compact_limit || context_window_limit_reached;
+
+    AutoCompactTokenStatus {
+        active_context_tokens,
+        budgeted_tokens,
+        auto_compact_limit,
+        context_window_limit,
+        context_window_limit_reached,
+        token_limit_reached,
+    }
+}
+
 async fn run_pre_sampling_compact(
     sess: &Arc<Session>,
     turn_context: &Arc<TurnContext>,
     client_session: &mut ModelClientSession,
 ) -> CodexResult<PreSamplingCompactResult> {
-    let total_usage_tokens_before_compaction = sess.get_total_token_usage().await;
-    let mut pre_sampling_compacted = maybe_run_previous_model_inline_compact(
-        sess,
-        turn_context,
-        client_session,
-        total_usage_tokens_before_compaction,
-    )
-    .await?;
+    let mut pre_sampling_compacted =
+        maybe_run_previous_model_inline_compact(sess, turn_context, client_session).await?;
     let mut reset_client_session = pre_sampling_compacted;
-    let total_usage_tokens = sess.get_total_token_usage().await;
-    let auto_compact_limit = turn_context
-        .model_info
-        .auto_compact_token_limit()
-        .unwrap_or(i64::MAX);
-    // Compact if the total usage tokens are greater than the auto compact limit
-    if total_usage_tokens >= auto_compact_limit {
+    let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
+    // Compact if the configured auto-compaction budget or usable context window is exhausted.
+    if token_status.token_limit_reached {
         reset_client_session |= run_auto_compact(
             sess,
             turn_context,
@@ -773,7 +822,6 @@ async fn maybe_run_previous_model_inline_compact(
     sess: &Arc<Session>,
     turn_context: &Arc<TurnContext>,
     client_session: &mut ModelClientSession,
-    total_usage_tokens: i64,
 ) -> CodexResult<bool> {
     let Some(previous_turn_settings) = sess.previous_turn_settings().await else {
         return Ok(false);
@@ -790,11 +838,8 @@ async fn maybe_run_previous_model_inline_compact(
     let Some(new_context_window) = turn_context.model_context_window() else {
         return Ok(false);
     };
-    let new_auto_compact_limit = turn_context
-        .model_info
-        .auto_compact_token_limit()
-        .unwrap_or(i64::MAX);
-    let should_run = total_usage_tokens > new_auto_compact_limit
+    let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
+    let should_run = token_status.token_limit_reached
         && previous_model_turn_context.model_info.slug != turn_context.model_info.slug
         && old_context_window > new_context_window;
     if should_run {
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index 3bd4b8a26e7..49006d40f4a 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -28,6 +28,9 @@ pub(crate) struct SessionState {
     /// model/realtime handling on subsequent regular turns (including full-context
     /// reinjection after resume or `/compact`).
     previous_turn_settings: Option<PreviousTurnSettings>,
+    /// Prefix size for the active compaction window when auto-compaction is
+    /// configured to count only tokens after the carried window prefix.
+    auto_compact_window_prefix_input_tokens: Option<i64>,
     /// Startup prewarmed session prepared during session initialization.
     pub(crate) startup_prewarm: Option<SessionStartupPrewarmHandle>,
     pub(crate) active_connector_selection: HashSet<String>,
@@ -48,6 +51,7 @@ impl SessionState {
             dependency_env: HashMap::new(),
             mcp_dependency_prompted: HashSet::new(),
             previous_turn_settings: None,
+            auto_compact_window_prefix_input_tokens: None,
             startup_prewarm: None,
             active_connector_selection: HashSet::new(),
             pending_session_start_source: None,
@@ -97,6 +101,7 @@ impl SessionState {
         self.history.replace(items);
         self.history
             .set_reference_context_item(reference_context_item);
+        self.auto_compact_window_prefix_input_tokens = None;
     }
 
     pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
@@ -120,6 +125,16 @@ impl SessionState {
         self.history.update_token_info(usage, model_context_window);
     }
 
+    pub(crate) fn ensure_auto_compact_window_prefix_input_tokens(&mut self, usage: &TokenUsage) {
+        if self.auto_compact_window_prefix_input_tokens.is_none() {
+            self.auto_compact_window_prefix_input_tokens = Some(usage.input_tokens.max(0));
+        }
+    }
+
+    pub(crate) fn auto_compact_window_prefix_input_tokens(&self) -> Option<i64> {
+        self.auto_compact_window_prefix_input_tokens
+    }
+
     pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
         self.history.token_info()
     }
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index 68ddf1691b9..204cae48459 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -7,6 +7,7 @@ use codex_login::CodexAuth;
 use codex_model_provider_info::ModelProviderInfo;
 use codex_model_provider_info::built_in_model_providers;
 use codex_models_manager::bundled_models_response;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::items::TurnItem;
 use codex_protocol::models::PermissionProfile;
 use codex_protocol::openai_models::ModelInfo;
@@ -121,6 +122,22 @@ fn set_test_compact_prompt(config: &mut Config) {
     config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
 }
 
+fn ev_completed_with_usage(id: &str, input_tokens: i64, output_tokens: i64) -> Value {
+    json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {
+                "input_tokens": input_tokens,
+                "input_tokens_details": null,
+                "output_tokens": output_tokens,
+                "output_tokens_details": null,
+                "total_tokens": input_tokens + output_tokens
+            }
+        }
+    })
+}
+
 fn body_contains_text(body: &str, text: &str) -> bool {
     body.contains(&json_fragment(text))
 }
@@ -2982,6 +2999,134 @@ async fn auto_compact_clamps_config_limit_to_context_window() {
     );
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_ignores_starting_window_prefix() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 600, /*output_tokens*/ 50),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m2", SECOND_LARGE_REPLY),
+        ev_completed_with_usage("r2", /*input_tokens*/ 700, /*output_tokens*/ 50),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", /*total_tokens*/ 20),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage("r4", /*input_tokens*/ 750, /*output_tokens*/ 20),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![first_turn, second_turn, auto_compact_turn, third_turn],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(1_000);
+            config.model_auto_compact_token_limit = Some(100);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    for user in ["PREFIX_FREE_ONE", "PREFIX_FREE_TWO"] {
+        test.submit_turn(user).await.expect("submit turn");
+    }
+
+    assert_eq!(
+        request_log.requests().len(),
+        2,
+        "the first two turns should not compact just because the prefix exceeds the body budget"
+    );
+
+    test.submit_turn("PREFIX_FREE_THREE")
+        .await
+        .expect("submit third turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "third turn should include pre-turn compaction plus the post-compaction request"
+    );
+    let compact_body = requests[2].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "body-after-prefix mode should compact once tokens after the first assistant sample exceed the configured budget"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_still_caps_at_context_window() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m2", SECOND_LARGE_REPLY),
+        ev_completed_with_usage("r2", /*input_tokens*/ 98, /*output_tokens*/ 1),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", /*total_tokens*/ 20),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage("r4", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![first_turn, second_turn, auto_compact_turn, third_turn],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(100);
+            config.model_auto_compact_token_limit = Some(200);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    for user in ["CONTEXT_CAP_ONE", "CONTEXT_CAP_TWO", "CONTEXT_CAP_THREE"] {
+        test.submit_turn(user).await.expect("submit turn");
+    }
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "third turn should compact before sampling because total context hit the usable window"
+    );
+    let compact_body = requests[2].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "body-after-prefix mode should still clamp the total threshold to the usable context window"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
     skip_if_no_network!();
diff --git a/codex-rs/protocol/src/config_types.rs b/codex-rs/protocol/src/config_types.rs
index 22c4f515480..b4b4759c9b7 100644
--- a/codex-rs/protocol/src/config_types.rs
+++ b/codex-rs/protocol/src/config_types.rs
@@ -21,6 +21,21 @@ use wildmatch::WildMatchPattern;
 
 use crate::openai_models::ReasoningEffort;
 
+/// Selects which part of the active context is charged against
+/// `model_auto_compact_token_limit`.
+#[derive(
+    Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display, JsonSchema, TS,
+)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum AutoCompactTokenLimitScope {
+    /// Count the full active context against the limit.
+    #[default]
+    Total,
+    /// Count sampled output and later growth after the carried window prefix.
+    BodyAfterPrefix,
+}
+
 /// A summary of the reasoning performed by the model. This can be useful for
 /// debugging and understanding the model's reasoning process.
 /// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
diff --git a/codex-rs/thread-manager-sample/src/main.rs b/codex-rs/thread-manager-sample/src/main.rs
index 313971afdce..4a82f67c086 100644
--- a/codex-rs/thread-manager-sample/src/main.rs
+++ b/codex-rs/thread-manager-sample/src/main.rs
@@ -15,6 +15,7 @@ use codex_core_api::Arg0DispatchPaths;
 use codex_core_api::AskForApproval;
 use codex_core_api::AuthCredentialsStoreMode;
 use codex_core_api::AuthManager;
+use codex_core_api::AutoCompactTokenLimitScope;
 use codex_core_api::CodexThread;
 use codex_core_api::Config;
 use codex_core_api::ConfigLayerStack;
@@ -168,6 +169,7 @@ fn new_config(model: Option<String>, arg0_paths: Arg0DispatchPaths) -> anyhow::R
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         model_provider_id,
         model_provider,
         personality: None,

From f3b80c51e783a7d3a8f41540adcfd0cbf068ffee Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Sun, 17 May 2026 14:42:35 +0200
Subject: [PATCH 2/6] Fix body-after-prefix compaction regressions

---
 codex-rs/core/src/guardian/review_session.rs |  31 ++++
 codex-rs/core/src/session/mod.rs             |  38 +++-
 codex-rs/core/src/session/turn.rs            |  20 ++-
 codex-rs/core/src/state/session.rs           |  23 ++-
 codex-rs/core/tests/suite/compact.rs         | 179 +++++++++++++++++++
 5 files changed, 277 insertions(+), 14 deletions(-)

diff --git a/codex-rs/core/src/guardian/review_session.rs b/codex-rs/core/src/guardian/review_session.rs
index afb5882a69b..3e6eae5db36 100644
--- a/codex-rs/core/src/guardian/review_session.rs
+++ b/codex-rs/core/src/guardian/review_session.rs
@@ -7,6 +7,7 @@ use std::time::Duration;
 use anyhow::anyhow;
 use codex_analytics::GuardianReviewAnalyticsResult;
 use codex_analytics::GuardianReviewSessionKind;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::models::PermissionProfile;
@@ -138,6 +139,7 @@ struct GuardianReviewSessionReuseKey {
     model_provider: ModelProviderInfo,
     model_context_window: Option<i64>,
     model_auto_compact_token_limit: Option<i64>,
+    model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope,
     model_reasoning_effort: Option<ReasoningEffortConfig>,
     model_reasoning_summary: Option<ReasoningSummaryConfig>,
     permissions: Permissions,
@@ -162,6 +164,7 @@ impl GuardianReviewSessionReuseKey {
             model_provider: spawn_config.model_provider.clone(),
             model_context_window: spawn_config.model_context_window,
             model_auto_compact_token_limit: spawn_config.model_auto_compact_token_limit,
+            model_auto_compact_token_limit_scope: spawn_config.model_auto_compact_token_limit_scope,
             model_reasoning_effort: spawn_config.model_reasoning_effort,
             model_reasoning_summary: spawn_config.model_reasoning_summary,
             permissions: spawn_config.permissions.clone(),
@@ -1148,6 +1151,34 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn guardian_review_session_compact_scope_change_invalidates_cached_session() {
+        let parent_config = crate::config::test_config().await;
+        let cached_spawn_config = build_guardian_review_session_config(
+            &parent_config,
+            /*live_network_config*/ None,
+            "active-model",
+            /*reasoning_effort*/ None,
+        )
+        .expect("cached guardian config");
+        let cached_reuse_key =
+            GuardianReviewSessionReuseKey::from_spawn_config(&cached_spawn_config);
+
+        let mut changed_parent_config = parent_config;
+        changed_parent_config.model_auto_compact_token_limit_scope =
+            AutoCompactTokenLimitScope::BodyAfterPrefix;
+        let next_spawn_config = build_guardian_review_session_config(
+            &changed_parent_config,
+            /*live_network_config*/ None,
+            "active-model",
+            /*reasoning_effort*/ None,
+        )
+        .expect("next guardian config");
+        let next_reuse_key = GuardianReviewSessionReuseKey::from_spawn_config(&next_spawn_config);
+
+        assert_ne!(cached_reuse_key, next_reuse_key);
+    }
+
     #[tokio::test]
     async fn guardian_review_session_config_disables_hooks() {
         let mut parent_config = crate::config::test_config().await;
diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs
index 0bc07398ea1..1d061b0dfc6 100644
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -1108,9 +1108,9 @@ impl Session {
         state.get_total_token_usage(state.server_reasoning_included())
     }
 
-    pub(crate) async fn auto_compact_window_prefix_input_tokens(&self) -> Option<i64> {
+    pub(crate) async fn auto_compact_window_prefix_tokens(&self) -> Option<i64> {
         let state = self.state.lock().await;
-        state.auto_compact_window_prefix_input_tokens()
+        state.auto_compact_window_prefix_tokens()
     }
 
     pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
@@ -1271,11 +1271,41 @@ impl Session {
             reconstructed_rollout.reference_context_item,
         )
         .await;
+        let prefix_tokens = if matches!(
+            turn_context.config.model_auto_compact_token_limit_scope,
+            AutoCompactTokenLimitScope::BodyAfterPrefix
+        ) {
+            let history = self.clone_history().await;
+            let base_instructions = self.get_base_instructions().await;
+            history.estimate_token_count_with_base_instructions(&base_instructions)
+        } else {
+            None
+        };
+        if let Some(prefix_tokens) = prefix_tokens {
+            self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, prefix_tokens)
+                .await;
+        }
         self.set_previous_turn_settings(previous_turn_settings.clone())
             .await;
         previous_turn_settings
     }
 
+    async fn set_auto_compact_window_prefix_tokens_for_scope(
+        &self,
+        turn_context: &TurnContext,
+        tokens: i64,
+    ) {
+        if !matches!(
+            turn_context.config.model_auto_compact_token_limit_scope,
+            AutoCompactTokenLimitScope::BodyAfterPrefix
+        ) {
+            return;
+        }
+
+        let mut state = self.state.lock().await;
+        state.set_auto_compact_window_prefix_tokens(tokens);
+    }
+
     fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option<TokenUsageInfo> {
         rollout_items.iter().rev().find_map(|item| match item {
             RolloutItem::EventMsg(EventMsg::TokenCount(ev)) => ev.info.clone(),
@@ -2952,7 +2982,7 @@ impl Session {
                     turn_context.config.model_auto_compact_token_limit_scope,
                     AutoCompactTokenLimitScope::BodyAfterPrefix
                 ) {
-                    state.ensure_auto_compact_window_prefix_input_tokens(token_usage);
+                    state.ensure_auto_compact_window_prefix_tokens_from_usage(token_usage);
                 }
                 state.token_info()
             };
@@ -2999,6 +3029,8 @@ impl Session {
 
             state.set_token_info(Some(info));
         }
+        self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, estimated_total_tokens)
+            .await;
         self.send_token_count_event(turn_context).await;
     }
 
diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index 1226ebf81ff..9ee8c7c32b2 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -757,7 +757,7 @@ async fn auto_compact_token_status(
             ),
             AutoCompactTokenLimitScope::BodyAfterPrefix => {
                 let baseline = sess
-                    .auto_compact_window_prefix_input_tokens()
+                    .auto_compact_window_prefix_tokens()
                     .await
                     .unwrap_or(active_context_tokens);
                 (
@@ -838,8 +838,22 @@ async fn maybe_run_previous_model_inline_compact(
     let Some(new_context_window) = turn_context.model_context_window() else {
         return Ok(false);
     };
-    let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
-    let should_run = token_status.token_limit_reached
+    let active_context_tokens = sess.get_total_token_usage().await;
+    let previous_model_limit_reached = match turn_context
+        .config
+        .model_auto_compact_token_limit_scope
+    {
+        AutoCompactTokenLimitScope::Total => {
+            let new_auto_compact_limit = turn_context
+                .model_info
+                .auto_compact_token_limit()
+                .unwrap_or(i64::MAX);
+            active_context_tokens > new_auto_compact_limit
+                || active_context_tokens >= new_context_window
+        }
+        AutoCompactTokenLimitScope::BodyAfterPrefix => active_context_tokens >= new_context_window,
+    };
+    let should_run = previous_model_limit_reached
         && previous_model_turn_context.model_info.slug != turn_context.model_info.slug
         && old_context_window > new_context_window;
     if should_run {
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index 49006d40f4a..3abe8140a54 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -30,7 +30,7 @@ pub(crate) struct SessionState {
     previous_turn_settings: Option<PreviousTurnSettings>,
     /// Prefix size for the active compaction window when auto-compaction is
     /// configured to count only tokens after the carried window prefix.
-    auto_compact_window_prefix_input_tokens: Option<i64>,
+    auto_compact_window_prefix_tokens: Option<i64>,
     /// Startup prewarmed session prepared during session initialization.
     pub(crate) startup_prewarm: Option<SessionStartupPrewarmHandle>,
     pub(crate) active_connector_selection: HashSet<String>,
@@ -51,7 +51,7 @@ impl SessionState {
             dependency_env: HashMap::new(),
             mcp_dependency_prompted: HashSet::new(),
             previous_turn_settings: None,
-            auto_compact_window_prefix_input_tokens: None,
+            auto_compact_window_prefix_tokens: None,
             startup_prewarm: None,
             active_connector_selection: HashSet::new(),
             pending_session_start_source: None,
@@ -101,7 +101,7 @@ impl SessionState {
         self.history.replace(items);
         self.history
             .set_reference_context_item(reference_context_item);
-        self.auto_compact_window_prefix_input_tokens = None;
+        self.auto_compact_window_prefix_tokens = None;
     }
 
     pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
@@ -125,14 +125,21 @@ impl SessionState {
         self.history.update_token_info(usage, model_context_window);
     }
 
-    pub(crate) fn ensure_auto_compact_window_prefix_input_tokens(&mut self, usage: &TokenUsage) {
-        if self.auto_compact_window_prefix_input_tokens.is_none() {
-            self.auto_compact_window_prefix_input_tokens = Some(usage.input_tokens.max(0));
+    pub(crate) fn ensure_auto_compact_window_prefix_tokens_from_usage(
+        &mut self,
+        usage: &TokenUsage,
+    ) {
+        if self.auto_compact_window_prefix_tokens.is_none() {
+            self.auto_compact_window_prefix_tokens = Some(usage.input_tokens.max(0));
         }
     }
 
-    pub(crate) fn auto_compact_window_prefix_input_tokens(&self) -> Option<i64> {
-        self.auto_compact_window_prefix_input_tokens
+    pub(crate) fn set_auto_compact_window_prefix_tokens(&mut self, tokens: i64) {
+        self.auto_compact_window_prefix_tokens = Some(tokens.max(0));
+    }
+
+    pub(crate) fn auto_compact_window_prefix_tokens(&self) -> Option<i64> {
+        self.auto_compact_window_prefix_tokens
     }
 
     pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index 96455d84e75..6607b03c527 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -2072,6 +2072,100 @@ async fn pre_sampling_compact_runs_on_switch_to_smaller_context_model() {
     );
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn body_after_prefix_model_switch_budget_compacts_with_next_model() {
+    skip_if_no_network!();
+
+    let server = MockServer::start().await;
+    let previous_model = "gpt-5.3-codex";
+    let next_model = "gpt-5.2";
+
+    let models_mock = mount_models_once(
+        &server,
+        ModelsResponse {
+            models: vec![
+                model_info_with_context_window(previous_model, /*context_window*/ 273_000),
+                model_info_with_context_window(next_model, /*context_window*/ 125_000),
+            ],
+        },
+    )
+    .await;
+
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            sse(vec![
+                ev_assistant_message("m1", "before switch"),
+                ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50),
+            ]),
+            sse(vec![
+                ev_assistant_message("m2", "BODY_BUDGET_SUMMARY"),
+                ev_completed_with_tokens("r2", /*total_tokens*/ 10),
+            ]),
+            sse(vec![
+                ev_assistant_message("m3", "after switch"),
+                ev_completed_with_tokens("r3", /*total_tokens*/ 100),
+            ]),
+        ],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+        .with_model(previous_model)
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            let _ = config.features.enable(Feature::RemoteModels);
+            config.model_auto_compact_token_limit = Some(20);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        });
+    let test = builder.build(&server).await.expect("build test codex");
+
+    test.codex
+        .submit(disabled_permission_user_turn(
+            "before switch",
+            test.cwd.path().to_path_buf(),
+            previous_model.to_string(),
+        ))
+        .await
+        .expect("submit first user turn");
+    wait_for_event(&test.codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    test.codex
+        .submit(disabled_permission_user_turn(
+            "after switch",
+            test.cwd.path().to_path_buf(),
+            next_model.to_string(),
+        ))
+        .await
+        .expect("submit second user turn");
+    assert_compaction_uses_turn_lifecycle_id(&test.codex).await;
+
+    let requests = request_log.requests();
+    assert_eq!(models_mock.requests().len(), 1);
+    assert_eq!(
+        requests.len(),
+        3,
+        "expected user, compact, and follow-up requests"
+    );
+    assert_eq!(
+        requests[0].body_json()["model"].as_str(),
+        Some(previous_model)
+    );
+    assert_eq!(requests[1].body_json()["model"].as_str(), Some(next_model));
+    assert_eq!(requests[2].body_json()["model"].as_str(), Some(next_model));
+    assert!(
+        body_contains_text(&requests[1].body_json().to_string(), SUMMARIZATION_PROMPT),
+        "body-budget compaction request should include summarization prompt"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn pre_sampling_compact_runs_after_resume_and_switch_to_smaller_model() {
     skip_if_no_network!();
@@ -3068,6 +3162,91 @@ async fn auto_compact_body_after_prefix_ignores_starting_window_prefix() {
     );
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50),
+    ]);
+    let first_auto_compact_turn = sse(vec![
+        ev_assistant_message("m2", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r2", /*total_tokens*/ 20),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m3", SECOND_LARGE_REPLY),
+        ev_completed_with_usage(
+            "r3", /*input_tokens*/ 100_000, /*output_tokens*/ 10,
+        ),
+    ]);
+    let second_auto_compact_turn = sse(vec![
+        ev_assistant_message("m4", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r4", /*total_tokens*/ 20),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m5", FINAL_REPLY),
+        ev_completed_with_usage("r5", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            first_turn,
+            first_auto_compact_turn,
+            second_turn,
+            second_auto_compact_turn,
+            third_turn,
+        ],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(200_000);
+            config.model_auto_compact_token_limit = Some(20);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    test.submit_turn("WINDOW_PREFIX")
+        .await
+        .expect("submit first turn");
+    test.submit_turn("GROWTH_AFTER_COMPACT")
+        .await
+        .expect("submit second turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        3,
+        "second turn should compact first and then sample the new growth"
+    );
+
+    test.submit_turn("AFTER_GROWTH")
+        .await
+        .expect("submit third turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        5,
+        "third turn should compact again because the post-compaction growth counted against the body budget"
+    );
+    let compact_body = requests[3].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "post-compaction growth should trigger a second body-after-prefix compaction"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_body_after_prefix_still_caps_at_context_window() {
     skip_if_no_network!();

From 1519b002e3c8e84cf7be21d121e349a75656da7f Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Mon, 18 May 2026 19:10:52 +0200
Subject: [PATCH 3/6] make something cleaner

---
 codex-rs/core/src/session/mod.rs              |  27 ++--
 codex-rs/core/src/session/turn.rs             |  16 +-
 .../core/src/state/auto_compact_window.rs     | 137 ++++++++++++++++++
 codex-rs/core/src/state/mod.rs                |   2 +
 codex-rs/core/src/state/session.rs            |  30 ++--
 codex-rs/core/src/state/session_tests.rs      |  19 +++
 codex-rs/core/tests/suite/compact.rs          |  38 +++--
 7 files changed, 232 insertions(+), 37 deletions(-)
 create mode 100644 codex-rs/core/src/state/auto_compact_window.rs

diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs
index 1d061b0dfc6..0c70bfdf918 100644
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -291,6 +291,7 @@ use crate::session_startup_prewarm::SessionStartupPrewarmHandle;
 use crate::shell;
 use crate::shell_snapshot::ShellSnapshot;
 use crate::state::ActiveTurn;
+use crate::state::AutoCompactWindowSnapshot;
 use crate::state::MailboxDeliveryPhase;
 use crate::state::PendingRequestPermissions;
 use crate::state::SessionServices;
@@ -1108,9 +1109,9 @@ impl Session {
         state.get_total_token_usage(state.server_reasoning_included())
     }
 
-    pub(crate) async fn auto_compact_window_prefix_tokens(&self) -> Option<i64> {
+    pub(crate) async fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot {
         let state = self.state.lock().await;
-        state.auto_compact_window_prefix_tokens()
+        state.auto_compact_window_snapshot()
     }
 
     pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
@@ -1282,7 +1283,7 @@ impl Session {
             None
         };
         if let Some(prefix_tokens) = prefix_tokens {
-            self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, prefix_tokens)
+            self.set_auto_compact_window_estimated_prefill_for_scope(turn_context, prefix_tokens)
                 .await;
         }
         self.set_previous_turn_settings(previous_turn_settings.clone())
@@ -1290,7 +1291,7 @@ impl Session {
         previous_turn_settings
     }
 
-    async fn set_auto_compact_window_prefix_tokens_for_scope(
+    async fn set_auto_compact_window_estimated_prefill_for_scope(
         &self,
         turn_context: &TurnContext,
         tokens: i64,
@@ -1303,7 +1304,7 @@ impl Session {
         }
 
         let mut state = self.state.lock().await;
-        state.set_auto_compact_window_prefix_tokens(tokens);
+        state.set_auto_compact_window_estimated_prefill(tokens);
     }
 
     fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option<TokenUsageInfo> {
@@ -2619,8 +2620,11 @@ impl Session {
         reference_context_item: Option<TurnContextItem>,
         compacted_item: CompactedItem,
     ) {
-        self.replace_history(items, reference_context_item.clone())
-            .await;
+        {
+            let mut state = self.state.lock().await;
+            state.replace_history(items, reference_context_item.clone());
+            state.start_next_auto_compact_window();
+        }
 
         self.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
             .await;
@@ -2982,7 +2986,7 @@ impl Session {
                     turn_context.config.model_auto_compact_token_limit_scope,
                     AutoCompactTokenLimitScope::BodyAfterPrefix
                 ) {
-                    state.ensure_auto_compact_window_prefix_tokens_from_usage(token_usage);
+                    state.ensure_auto_compact_window_server_prefill_from_usage(token_usage);
                 }
                 state.token_info()
             };
@@ -3029,8 +3033,11 @@ impl Session {
 
             state.set_token_info(Some(info));
         }
-        self.set_auto_compact_window_prefix_tokens_for_scope(turn_context, estimated_total_tokens)
-            .await;
+        self.set_auto_compact_window_estimated_prefill_for_scope(
+            turn_context,
+            estimated_total_tokens,
+        )
+        .await;
         self.send_token_count_event(turn_context).await;
     }
 
diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index 9ee8c7c32b2..39163678c10 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -489,6 +489,8 @@ pub(crate) async fn run_turn(
                     estimated_token_count = ?estimated_token_count,
                     auto_compact_limit = token_status.auto_compact_limit,
                     auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope,
+                    auto_compact_window_ordinal = ?token_status.auto_compact_window_ordinal,
+                    auto_compact_window_prefill_tokens = ?token_status.auto_compact_window_prefill_tokens,
                     context_window_limit = ?token_status.context_window_limit,
                     context_window_limit_reached = token_status.context_window_limit_reached,
                     token_limit_reached,
@@ -736,6 +738,8 @@ struct AutoCompactTokenStatus {
     budgeted_tokens: i64,
     auto_compact_limit: i64,
     context_window_limit: Option<i64>,
+    auto_compact_window_ordinal: Option<u64>,
+    auto_compact_window_prefill_tokens: Option<i64>,
     context_window_limit_reached: bool,
     token_limit_reached: bool,
 }
@@ -745,6 +749,8 @@ async fn auto_compact_token_status(
     turn_context: &TurnContext,
 ) -> AutoCompactTokenStatus {
     let active_context_tokens = sess.get_total_token_usage().await;
+    let mut auto_compact_window_ordinal = None;
+    let mut auto_compact_window_prefill_tokens = None;
     let (budgeted_tokens, auto_compact_limit, context_window_limit) =
         match turn_context.config.model_auto_compact_token_limit_scope {
             AutoCompactTokenLimitScope::Total => (
@@ -756,10 +762,10 @@ async fn auto_compact_token_status(
                 None,
             ),
             AutoCompactTokenLimitScope::BodyAfterPrefix => {
-                let baseline = sess
-                    .auto_compact_window_prefix_tokens()
-                    .await
-                    .unwrap_or(active_context_tokens);
+                let window = sess.auto_compact_window_snapshot().await;
+                auto_compact_window_ordinal = Some(window.ordinal);
+                auto_compact_window_prefill_tokens = window.prefill_input_tokens;
+                let baseline = window.prefill_input_tokens.unwrap_or(active_context_tokens);
                 (
                     active_context_tokens.saturating_sub(baseline),
                     turn_context
@@ -780,6 +786,8 @@ async fn auto_compact_token_status(
         budgeted_tokens,
         auto_compact_limit,
         context_window_limit,
+        auto_compact_window_ordinal,
+        auto_compact_window_prefill_tokens,
         context_window_limit_reached,
         token_limit_reached,
     }
diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs
new file mode 100644
index 00000000000..e84200c3117
--- /dev/null
+++ b/codex-rs/core/src/state/auto_compact_window.rs
@@ -0,0 +1,137 @@
+use codex_protocol::protocol::TokenUsage;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) struct AutoCompactWindowSnapshot {
+    pub(crate) ordinal: u64,
+    pub(crate) prefill_input_tokens: Option<i64>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum AutoCompactWindowPrefill {
+    ServerObserved(i64),
+    Estimated(i64),
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) struct AutoCompactWindow {
+    ordinal: u64,
+    prefill_input_tokens: Option<AutoCompactWindowPrefill>,
+}
+
+impl AutoCompactWindow {
+    pub(super) fn new() -> Self {
+        Self {
+            ordinal: 1,
+            prefill_input_tokens: None,
+        }
+    }
+
+    pub(super) fn clear_prefill(&mut self) {
+        self.prefill_input_tokens = None;
+    }
+
+    pub(super) fn start_next(&mut self) {
+        self.ordinal = self.ordinal.saturating_add(1);
+        self.clear_prefill();
+    }
+
+    pub(super) fn ensure_server_observed_prefill_from_usage(&mut self, usage: &TokenUsage) {
+        if matches!(
+            self.prefill_input_tokens,
+            Some(AutoCompactWindowPrefill::ServerObserved(_))
+        ) {
+            return;
+        }
+
+        self.prefill_input_tokens = Some(AutoCompactWindowPrefill::ServerObserved(
+            usage.input_tokens.max(0),
+        ));
+    }
+
+    pub(super) fn set_estimated_prefill(&mut self, tokens: i64) {
+        if matches!(
+            self.prefill_input_tokens,
+            Some(AutoCompactWindowPrefill::ServerObserved(_))
+        ) {
+            return;
+        }
+
+        self.prefill_input_tokens = Some(AutoCompactWindowPrefill::Estimated(tokens.max(0)));
+    }
+
+    pub(super) fn snapshot(&self) -> AutoCompactWindowSnapshot {
+        let prefill_input_tokens = match self.prefill_input_tokens {
+            Some(AutoCompactWindowPrefill::ServerObserved(tokens))
+            | Some(AutoCompactWindowPrefill::Estimated(tokens)) => Some(tokens),
+            None => None,
+        };
+        AutoCompactWindowSnapshot {
+            ordinal: self.ordinal,
+            prefill_input_tokens,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn tracks_prefill_and_window_boundaries() {
+        let mut window = AutoCompactWindow::new();
+
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: None,
+            }
+        );
+
+        window.set_estimated_prefill(150);
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(150),
+            }
+        );
+
+        window.ensure_server_observed_prefill_from_usage(&TokenUsage {
+            input_tokens: 120,
+            total_tokens: 170,
+            ..Default::default()
+        });
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(120),
+            }
+        );
+
+        window.ensure_server_observed_prefill_from_usage(&TokenUsage {
+            input_tokens: 130,
+            total_tokens: 180,
+            ..Default::default()
+        });
+        window.set_estimated_prefill(90);
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(120),
+            }
+        );
+
+        window.start_next();
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 2,
+                prefill_input_tokens: None,
+            }
+        );
+    }
+}
diff --git a/codex-rs/core/src/state/mod.rs b/codex-rs/core/src/state/mod.rs
index 13f3bf6c86f..3122ec5f259 100644
--- a/codex-rs/core/src/state/mod.rs
+++ b/codex-rs/core/src/state/mod.rs
@@ -1,7 +1,9 @@
+mod auto_compact_window;
 mod service;
 mod session;
 mod turn;
 
+pub(crate) use auto_compact_window::AutoCompactWindowSnapshot;
 pub(crate) use service::SessionServices;
 pub(crate) use session::SessionState;
 pub(crate) use turn::ActiveTurn;
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index 3abe8140a54..6d34a737d44 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -6,6 +6,8 @@ use codex_sandboxing::policy_transforms::merge_permission_profiles;
 use std::collections::HashMap;
 use std::collections::HashSet;
 
+use super::auto_compact_window::AutoCompactWindow;
+use super::auto_compact_window::AutoCompactWindowSnapshot;
 use crate::context_manager::ContextManager;
 use crate::session::PreviousTurnSettings;
 use crate::session::session::SessionConfiguration;
@@ -28,9 +30,8 @@ pub(crate) struct SessionState {
     /// model/realtime handling on subsequent regular turns (including full-context
     /// reinjection after resume or `/compact`).
     previous_turn_settings: Option<PreviousTurnSettings>,
-    /// Prefix size for the active compaction window when auto-compaction is
-    /// configured to count only tokens after the carried window prefix.
-    auto_compact_window_prefix_tokens: Option<i64>,
+    /// Runtime accounting state for the active auto-compaction window.
+    auto_compact_window: AutoCompactWindow,
     /// Startup prewarmed session prepared during session initialization.
     pub(crate) startup_prewarm: Option<SessionStartupPrewarmHandle>,
     pub(crate) active_connector_selection: HashSet<String>,
@@ -51,7 +52,7 @@ impl SessionState {
             dependency_env: HashMap::new(),
             mcp_dependency_prompted: HashSet::new(),
             previous_turn_settings: None,
-            auto_compact_window_prefix_tokens: None,
+            auto_compact_window: AutoCompactWindow::new(),
             startup_prewarm: None,
             active_connector_selection: HashSet::new(),
             pending_session_start_source: None,
@@ -101,7 +102,7 @@ impl SessionState {
         self.history.replace(items);
         self.history
             .set_reference_context_item(reference_context_item);
-        self.auto_compact_window_prefix_tokens = None;
+        self.auto_compact_window.clear_prefill();
     }
 
     pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
@@ -125,21 +126,24 @@ impl SessionState {
         self.history.update_token_info(usage, model_context_window);
     }
 
-    pub(crate) fn ensure_auto_compact_window_prefix_tokens_from_usage(
+    pub(crate) fn ensure_auto_compact_window_server_prefill_from_usage(
         &mut self,
         usage: &TokenUsage,
     ) {
-        if self.auto_compact_window_prefix_tokens.is_none() {
-            self.auto_compact_window_prefix_tokens = Some(usage.input_tokens.max(0));
-        }
+        self.auto_compact_window
+            .ensure_server_observed_prefill_from_usage(usage);
+    }
+
+    pub(crate) fn set_auto_compact_window_estimated_prefill(&mut self, tokens: i64) {
+        self.auto_compact_window.set_estimated_prefill(tokens);
     }
 
-    pub(crate) fn set_auto_compact_window_prefix_tokens(&mut self, tokens: i64) {
-        self.auto_compact_window_prefix_tokens = Some(tokens.max(0));
+    pub(crate) fn start_next_auto_compact_window(&mut self) {
+        self.auto_compact_window.start_next();
     }
 
-    pub(crate) fn auto_compact_window_prefix_tokens(&self) -> Option<i64> {
-        self.auto_compact_window_prefix_tokens
+    pub(crate) fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot {
+        self.auto_compact_window.snapshot()
     }
 
     pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
diff --git a/codex-rs/core/src/state/session_tests.rs b/codex-rs/core/src/state/session_tests.rs
index 5e90cc881dd..30af41e84ac 100644
--- a/codex-rs/core/src/state/session_tests.rs
+++ b/codex-rs/core/src/state/session_tests.rs
@@ -1,5 +1,6 @@
 use super::*;
 use crate::session::tests::make_session_configuration_for_tests;
+use crate::state::AutoCompactWindowSnapshot;
 use codex_protocol::protocol::CreditsSnapshot;
 use codex_protocol::protocol::RateLimitWindow;
 use pretty_assertions::assert_eq;
@@ -61,6 +62,24 @@ async fn set_rate_limits_defaults_limit_id_to_codex_when_missing() {
     );
 }
 
+#[tokio::test]
+async fn replace_history_clears_auto_compact_window_prefill_without_advancing() {
+    let session_configuration = make_session_configuration_for_tests().await;
+    let mut state = SessionState::new(session_configuration);
+
+    state.start_next_auto_compact_window();
+    state.set_auto_compact_window_estimated_prefill(100);
+    state.replace_history(Vec::new(), /*reference_context_item*/ None);
+
+    assert_eq!(
+        state.auto_compact_window_snapshot(),
+        AutoCompactWindowSnapshot {
+            ordinal: 2,
+            prefill_input_tokens: None,
+        }
+    );
+}
+
 #[tokio::test]
 async fn set_rate_limits_defaults_to_codex_when_limit_id_missing_after_other_bucket() {
     let session_configuration = make_session_configuration_for_tests().await;
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index 6607b03c527..72393d23174 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -3182,13 +3182,19 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
             "r3", /*input_tokens*/ 100_000, /*output_tokens*/ 10,
         ),
     ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage(
+            "r4", /*input_tokens*/ 100_100, /*output_tokens*/ 5,
+        ),
+    ]);
     let second_auto_compact_turn = sse(vec![
-        ev_assistant_message("m4", AUTO_SUMMARY_TEXT),
-        ev_completed_with_tokens("r4", /*total_tokens*/ 20),
+        ev_assistant_message("m5", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r5", /*total_tokens*/ 20),
     ]);
-    let third_turn = sse(vec![
-        ev_assistant_message("m5", FINAL_REPLY),
-        ev_completed_with_usage("r5", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    let fourth_turn = sse(vec![
+        ev_assistant_message("m6", FINAL_REPLY),
+        ev_completed_with_usage("r6", /*input_tokens*/ 80, /*output_tokens*/ 5),
     ]);
     let request_log = mount_sse_sequence(
         &server,
@@ -3196,8 +3202,9 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
             first_turn,
             first_auto_compact_turn,
             second_turn,
-            second_auto_compact_turn,
             third_turn,
+            second_auto_compact_turn,
+            fourth_turn,
         ],
     )
     .await;
@@ -3208,7 +3215,7 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
             config.model_provider = model_provider;
             set_test_compact_prompt(config);
             config.model_context_window = Some(200_000);
-            config.model_auto_compact_token_limit = Some(20);
+            config.model_auto_compact_token_limit = Some(40);
             config.model_auto_compact_token_limit_scope =
                 AutoCompactTokenLimitScope::BodyAfterPrefix;
         })
@@ -3237,10 +3244,21 @@ async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
     let requests = request_log.requests();
     assert_eq!(
         requests.len(),
-        5,
-        "third turn should compact again because the post-compaction growth counted against the body budget"
+        4,
+        "the first server-observed input in the new window should become the prefill baseline"
+    );
+
+    test.submit_turn("AFTER_GROWTH_TRIGGER")
+        .await
+        .expect("submit fourth turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        6,
+        "fourth turn should compact because later post-compaction growth counted against the body budget"
     );
-    let compact_body = requests[3].body_json().to_string();
+    let compact_body = requests[4].body_json().to_string();
     assert!(
         body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
         "post-compaction growth should trigger a second body-after-prefix compaction"

From 02d98f923f4b8a2db3be2cec47982404a7ba028e Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Mon, 18 May 2026 20:49:02 +0200
Subject: [PATCH 4/6] nit cleanings

---
 codex-rs/core/src/session/turn.rs             | 37 +++++++++++--------
 .../core/src/state/auto_compact_window.rs     |  8 ++++
 2 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index 39163678c10..6a7b022393b 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -485,14 +485,14 @@ pub(crate) async fn run_turn(
                 trace!(
                     turn_id = %turn_context.sub_id,
                     total_usage_tokens = token_status.active_context_tokens,
-                    budgeted_tokens = token_status.budgeted_tokens,
+                    auto_compact_scope_tokens = token_status.auto_compact_scope_tokens,
                     estimated_token_count = ?estimated_token_count,
-                    auto_compact_limit = token_status.auto_compact_limit,
+                    auto_compact_scope_limit = token_status.auto_compact_scope_limit,
                     auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope,
                     auto_compact_window_ordinal = ?token_status.auto_compact_window_ordinal,
                     auto_compact_window_prefill_tokens = ?token_status.auto_compact_window_prefill_tokens,
-                    context_window_limit = ?token_status.context_window_limit,
-                    context_window_limit_reached = token_status.context_window_limit_reached,
+                    full_context_window_limit = ?token_status.full_context_window_limit,
+                    full_context_window_limit_reached = token_status.full_context_window_limit_reached,
                     token_limit_reached,
                     model_needs_follow_up,
                     has_pending_input,
@@ -734,13 +734,15 @@ struct PreSamplingCompactResult {
 
 #[derive(Debug)]
 struct AutoCompactTokenStatus {
+    // Full active context usage, independent of the configured auto-compact scope.
     active_context_tokens: i64,
-    budgeted_tokens: i64,
-    auto_compact_limit: i64,
-    context_window_limit: Option<i64>,
+    // Usage counted against `model_auto_compact_token_limit` for the current scope.
+    auto_compact_scope_tokens: i64,
+    auto_compact_scope_limit: i64,
+    full_context_window_limit: Option<i64>,
     auto_compact_window_ordinal: Option<u64>,
     auto_compact_window_prefill_tokens: Option<i64>,
-    context_window_limit_reached: bool,
+    full_context_window_limit_reached: bool,
     token_limit_reached: bool,
 }
 
@@ -751,7 +753,7 @@ async fn auto_compact_token_status(
     let active_context_tokens = sess.get_total_token_usage().await;
     let mut auto_compact_window_ordinal = None;
     let mut auto_compact_window_prefill_tokens = None;
-    let (budgeted_tokens, auto_compact_limit, context_window_limit) =
+    let (auto_compact_scope_tokens, auto_compact_scope_limit, full_context_window_limit) =
         match turn_context.config.model_auto_compact_token_limit_scope {
             AutoCompactTokenLimitScope::Total => (
                 active_context_tokens,
@@ -777,18 +779,21 @@ async fn auto_compact_token_status(
                 )
             }
         };
-    let context_window_limit_reached = context_window_limit
-        .is_some_and(|context_window_limit| active_context_tokens >= context_window_limit);
-    let token_limit_reached = budgeted_tokens >= auto_compact_limit || context_window_limit_reached;
+    let full_context_window_limit_reached =
+        full_context_window_limit.is_some_and(|full_context_window_limit| {
+            active_context_tokens >= full_context_window_limit
+        });
+    let token_limit_reached =
+        auto_compact_scope_tokens >= auto_compact_scope_limit || full_context_window_limit_reached;
 
     AutoCompactTokenStatus {
         active_context_tokens,
-        budgeted_tokens,
-        auto_compact_limit,
-        context_window_limit,
+        auto_compact_scope_tokens,
+        auto_compact_scope_limit,
+        full_context_window_limit,
         auto_compact_window_ordinal,
         auto_compact_window_prefill_tokens,
-        context_window_limit_reached,
+        full_context_window_limit_reached,
         token_limit_reached,
     }
 }
diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs
index e84200c3117..d8d436280f0 100644
--- a/codex-rs/core/src/state/auto_compact_window.rs
+++ b/codex-rs/core/src/state/auto_compact_window.rs
@@ -15,6 +15,11 @@ enum AutoCompactWindowPrefill {
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub(super) struct AutoCompactWindow {
     ordinal: u64,
+    /// Absolute input-token baseline for the current compaction window.
+    ///
+    /// `body_after_prefix` subtracts this from later active-context usage. It is
+    /// not the growth itself; server-observed usage replaces estimated
+    /// resume/recompute baselines when available.
     prefill_input_tokens: Option<AutoCompactWindowPrefill>,
 }
 
@@ -35,6 +40,9 @@ impl AutoCompactWindow {
         self.clear_prefill();
     }
 
+    /// Records the request-input side of the first server usage sample. The
+    /// sampled output from that response is body growth and should remain
+    /// counted against the scoped auto-compact budget.
     pub(super) fn ensure_server_observed_prefill_from_usage(&mut self, usage: &TokenUsage) {
         if matches!(
             self.prefill_input_tokens,

From 8eb781b4cf78e78ed90ae5233e60db108abb4d35 Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Mon, 18 May 2026 21:37:20 +0200
Subject: [PATCH 5/6] Fix argument comments for auto-compact tests

---
 codex-rs/core/src/state/auto_compact_window.rs | 4 ++--
 codex-rs/core/src/state/session_tests.rs       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs
index d8d436280f0..d1deb3c2882 100644
--- a/codex-rs/core/src/state/auto_compact_window.rs
+++ b/codex-rs/core/src/state/auto_compact_window.rs
@@ -97,7 +97,7 @@ mod tests {
             }
         );
 
-        window.set_estimated_prefill(150);
+        window.set_estimated_prefill(/*tokens*/ 150);
         assert_eq!(
             window.snapshot(),
             AutoCompactWindowSnapshot {
@@ -124,7 +124,7 @@ mod tests {
             total_tokens: 180,
             ..Default::default()
         });
-        window.set_estimated_prefill(90);
+        window.set_estimated_prefill(/*tokens*/ 90);
         assert_eq!(
             window.snapshot(),
             AutoCompactWindowSnapshot {
diff --git a/codex-rs/core/src/state/session_tests.rs b/codex-rs/core/src/state/session_tests.rs
index 30af41e84ac..b6b5a057a01 100644
--- a/codex-rs/core/src/state/session_tests.rs
+++ b/codex-rs/core/src/state/session_tests.rs
@@ -68,7 +68,7 @@ async fn replace_history_clears_auto_compact_window_prefill_without_advancing()
     let mut state = SessionState::new(session_configuration);
 
     state.start_next_auto_compact_window();
-    state.set_auto_compact_window_estimated_prefill(100);
+    state.set_auto_compact_window_estimated_prefill(/*tokens*/ 100);
     state.replace_history(Vec::new(), /*reference_context_item*/ None);
 
     assert_eq!(

From 253a9bea16ba031097e5e706b8f1ab0886f72fd6 Mon Sep 17 00:00:00 2001
From: jif-oai <jif@openai.com>
Date: Tue, 19 May 2026 11:20:22 +0200
Subject: [PATCH 6/6] Fix auto-compact CI build

---
 codex-rs/core/src/client.rs      | 2 +-
 codex-rs/core/src/session/mod.rs | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
index cd1fcb6696d..f604a634581 100644
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -931,7 +931,7 @@ impl Drop for ModelClientSession {
 }
 
 impl ModelClientSession {
-    fn reset_websocket_session(&mut self) {
+    pub(crate) fn reset_websocket_session(&mut self) {
         self.websocket_session.connection = None;
         self.websocket_session.last_request = None;
         self.websocket_session.last_response_rx = None;
diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs
index 4ffd5c43e2d..fbffffbe41b 100644
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -288,9 +288,7 @@ use crate::rollout::map_session_init_error;
 use crate::session_startup_prewarm::SessionStartupPrewarmHandle;
 use crate::shell;
 use crate::shell_snapshot::ShellSnapshot;
-use crate::state::ActiveTurn;
 use crate::state::AutoCompactWindowSnapshot;
-use crate::state::MailboxDeliveryPhase;
 use crate::state::PendingRequestPermissions;
 use crate::state::SessionServices;
 use crate::state::SessionState;