diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
index fdaaf3e857d0..c7f963df36d7 100644
--- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
+++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
@@ -6314,6 +6314,25 @@
           }
         ]
       },
+      "AutoCompactTokenLimitScope": {
+        "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+        "oneOf": [
+          {
+            "description": "Count the full active context against the limit.",
+            "enum": [
+              "total"
+            ],
+            "type": "string"
+          },
+          {
+            "description": "Count sampled output and later growth after the carried window prefix.",
+            "enum": [
+              "body_after_prefix"
+            ],
+            "type": "string"
+          }
+        ]
+      },
       "AutoReviewDecisionSource": {
         "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.",
         "enum": [
@@ -7162,6 +7181,16 @@
               "null"
             ]
           },
+          "model_auto_compact_token_limit_scope": {
+            "anyOf": [
+              {
+                "$ref": "#/definitions/v2/AutoCompactTokenLimitScope"
+              },
+              {
+                "type": "null"
+              }
+            ]
+          },
           "model_context_window": {
             "format": "int64",
             "type": [
diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
index e413d792349c..0fee6fda8c43 100644
--- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
+++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json
@@ -826,6 +826,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "AutoReviewDecisionSource": {
       "description": "[UNSTABLE] Source that produced a terminal approval auto-review decision.",
       "enum": [
@@ -3551,6 +3570,16 @@
             "null"
           ]
         },
+        "model_auto_compact_token_limit_scope": {
+          "anyOf": [
+            {
+              "$ref": "#/definitions/AutoCompactTokenLimitScope"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
         "model_context_window": {
           "format": "int64",
           "type": [
diff --git a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
index 81364a6f4030..7595f7fd0093 100644
--- a/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
+++ b/codex-rs/app-server-protocol/schema/json/v2/ConfigReadResponse.json
@@ -188,6 +188,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "Config": {
       "additionalProperties": true,
       "properties": {
@@ -280,6 +299,16 @@
             "null"
           ]
         },
+        "model_auto_compact_token_limit_scope": {
+          "anyOf": [
+            {
+              "$ref": "#/definitions/AutoCompactTokenLimitScope"
+            },
+            {
+              "type": "null"
+            }
+          ]
+        },
         "model_context_window": {
           "format": "int64",
           "type": [
diff --git a/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts
new file mode 100644
index 000000000000..3d9a56c82a67
--- /dev/null
+++ b/codex-rs/app-server-protocol/schema/typescript/AutoCompactTokenLimitScope.ts
@@ -0,0 +1,9 @@
+// GENERATED CODE! DO NOT MODIFY BY HAND!
+
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Selects which part of the active context is charged against
+ * `model_auto_compact_token_limit`.
+ */
+export type AutoCompactTokenLimitScope = "total" | "body_after_prefix";
diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts
index 97ea43560192..8be75af546fd 100644
--- a/codex-rs/app-server-protocol/schema/typescript/index.ts
+++ b/codex-rs/app-server-protocol/schema/typescript/index.ts
@@ -5,6 +5,7 @@ export type { AgentPath } from "./AgentPath";
 export type { ApplyPatchApprovalParams } from "./ApplyPatchApprovalParams";
 export type { ApplyPatchApprovalResponse } from "./ApplyPatchApprovalResponse";
 export type { AuthMode } from "./AuthMode";
+export type { AutoCompactTokenLimitScope } from "./AutoCompactTokenLimitScope";
 export type { ClientInfo } from "./ClientInfo";
 export type { ClientNotification } from "./ClientNotification";
 export type { ClientRequest } from "./ClientRequest";
diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
index ba24663e8798..29eae9877419 100644
--- a/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
+++ b/codex-rs/app-server-protocol/schema/typescript/v2/Config.ts
@@ -1,6 +1,7 @@
 // GENERATED CODE! DO NOT MODIFY BY HAND!
 
 // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { AutoCompactTokenLimitScope } from "../AutoCompactTokenLimitScope";
 import type { ForcedLoginMethod } from "../ForcedLoginMethod";
 import type { ReasoningEffort } from "../ReasoningEffort";
 import type { ReasoningSummary } from "../ReasoningSummary";
@@ -16,7 +17,7 @@ import type { SandboxMode } from "./SandboxMode";
 import type { SandboxWorkspaceWrite } from "./SandboxWorkspaceWrite";
 import type { ToolsV2 } from "./ToolsV2";
 
-export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_provider: string | null, approval_policy: AskForApproval | null, /**
+export type Config = {model: string | null, review_model: string | null, model_context_window: bigint | null, model_auto_compact_token_limit: bigint | null, model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope | null, model_provider: string | null, approval_policy: AskForApproval | null, /**
  * [UNSTABLE] Optional default for where approval requests are routed for
  * review.
  */
diff --git a/codex-rs/app-server-protocol/src/protocol/v2/config.rs b/codex-rs/app-server-protocol/src/protocol/v2/config.rs
index b46515d81142..a34e6c530f9a 100644
--- a/codex-rs/app-server-protocol/src/protocol/v2/config.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2/config.rs
@@ -3,6 +3,7 @@ use super::AskForApproval;
 use super::SandboxMode;
 use super::shared::default_enabled;
 use codex_experimental_api_macros::ExperimentalApi;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::ReasoningSummary;
 use codex_protocol::config_types::Verbosity;
@@ -251,6 +252,7 @@ pub struct Config {
     pub review_model: Option<String>,
     pub model_context_window: Option<i64>,
     pub model_auto_compact_token_limit: Option<i64>,
+    pub model_auto_compact_token_limit_scope: Option<AutoCompactTokenLimitScope>,
     pub model_provider: Option<String>,
     #[experimental(nested)]
     pub approval_policy: Option<AskForApproval>,
diff --git a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
index b445fc4d01af..fbc692c20161 100644
--- a/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
+++ b/codex-rs/app-server-protocol/src/protocol/v2/tests.rs
@@ -1511,6 +1511,7 @@ fn config_granular_approval_policy_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: Some(AskForApproval::Granular {
             sandbox_approval: false,
@@ -1551,6 +1552,7 @@ fn config_approvals_reviewer_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: Some(ApprovalsReviewer::AutoReview),
@@ -1585,6 +1587,7 @@ fn config_nested_profile_granular_approval_policy_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: None,
@@ -1641,6 +1644,7 @@ fn config_nested_profile_approvals_reviewer_is_marked_experimental() {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: None,
         model_provider: None,
         approval_policy: None,
         approvals_reviewer: None,
diff --git a/codex-rs/config/src/config_toml.rs b/codex-rs/config/src/config_toml.rs
index 76ba338fc0e7..43f75ffc289e 100644
--- a/codex-rs/config/src/config_toml.rs
+++ b/codex-rs/config/src/config_toml.rs
@@ -38,6 +38,7 @@ use codex_model_provider_info::ModelProviderInfo;
 use codex_model_provider_info::OLLAMA_CHAT_PROVIDER_REMOVED_ERROR;
 use codex_model_provider_info::OLLAMA_OSS_PROVIDER_ID;
 use codex_model_provider_info::OPENAI_PROVIDER_ID;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary;
@@ -156,6 +157,10 @@ pub struct ConfigToml {
     /// Token usage threshold triggering auto-compaction of conversation history.
     pub model_auto_compact_token_limit: Option<i64>,
 
+    /// Controls whether the auto-compaction limit applies to the full context or
+    /// only to tokens after the carried prefix in the current compaction window.
+    pub model_auto_compact_token_limit_scope: Option<AutoCompactTokenLimitScope>,
+
     /// Default approval policy for executing commands.
     pub approval_policy: Option<AskForApproval>,
 
diff --git a/codex-rs/core-api/src/lib.rs b/codex-rs/core-api/src/lib.rs
index 04ebaf8e7e6f..e87ee82f3090 100644
--- a/codex-rs/core-api/src/lib.rs
+++ b/codex-rs/core-api/src/lib.rs
@@ -58,6 +58,7 @@ pub use codex_models_manager::manager::SharedModelsManager;
 pub use codex_protocol::ThreadId;
 pub use codex_protocol::config_types::AltScreenMode;
 pub use codex_protocol::config_types::ApprovalsReviewer;
+pub use codex_protocol::config_types::AutoCompactTokenLimitScope;
 pub use codex_protocol::config_types::CollaborationModeMask;
 pub use codex_protocol::config_types::ShellEnvironmentPolicy;
 pub use codex_protocol::config_types::WebSearchMode;
diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json
index 07f49a3cd630..f0eb76d62018 100644
--- a/codex-rs/core/config.schema.json
+++ b/codex-rs/core/config.schema.json
@@ -309,6 +309,25 @@
         }
       ]
     },
+    "AutoCompactTokenLimitScope": {
+      "description": "Selects which part of the active context is charged against `model_auto_compact_token_limit`.",
+      "oneOf": [
+        {
+          "description": "Count the full active context against the limit.",
+          "enum": [
+            "total"
+          ],
+          "type": "string"
+        },
+        {
+          "description": "Count sampled output and later growth after the carried window prefix.",
+          "enum": [
+            "body_after_prefix"
+          ],
+          "type": "string"
+        }
+      ]
+    },
     "AutoReviewToml": {
       "properties": {
         "policy": {
@@ -4544,6 +4563,14 @@
       "format": "int64",
       "type": "integer"
     },
+    "model_auto_compact_token_limit_scope": {
+      "allOf": [
+        {
+          "$ref": "#/definitions/AutoCompactTokenLimitScope"
+        }
+      ],
+      "description": "Controls whether the auto-compaction limit applies to the full context or only to tokens after the carried prefix in the current compaction window."
+    },
     "model_catalog_json": {
       "allOf": [
         {
diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
index cd1fcb6696d8..f604a634581e 100644
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -931,7 +931,7 @@ impl Drop for ModelClientSession {
 }
 
 impl ModelClientSession {
-    fn reset_websocket_session(&mut self) {
+    pub(crate) fn reset_websocket_session(&mut self) {
         self.websocket_session.connection = None;
         self.websocket_session.last_request = None;
         self.websocket_session.last_response_rx = None;
diff --git a/codex-rs/core/src/config/config_tests.rs b/codex-rs/core/src/config/config_tests.rs
index 06ee405495de..850d9ab571b5 100644
--- a/codex-rs/core/src/config/config_tests.rs
+++ b/codex-rs/core/src/config/config_tests.rs
@@ -7711,6 +7711,7 @@ async fn test_precedence_fixture_with_o3_profile() -> std::io::Result<()> {
             review_model: None,
             model_context_window: None,
             model_auto_compact_token_limit: None,
+            model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
             service_tier: None,
             model_provider_id: "openai".to_string(),
             model_provider: fixture.openai_provider.clone(),
@@ -8164,6 +8165,7 @@ async fn test_precedence_fixture_with_gpt3_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai-custom".to_string(),
         model_provider: fixture.openai_custom_provider.clone(),
@@ -8331,6 +8333,7 @@ async fn test_precedence_fixture_with_zdr_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai".to_string(),
         model_provider: fixture.openai_provider.clone(),
@@ -8483,6 +8486,7 @@ async fn test_precedence_fixture_with_gpt5_profile() -> std::io::Result<()> {
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         service_tier: None,
         model_provider_id: "openai".to_string(),
         model_provider: fixture.openai_provider.clone(),
diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs
index 74fdb8383c36..dbd6cad470b5 100644
--- a/codex-rs/core/src/config/mod.rs
+++ b/codex-rs/core/src/config/mod.rs
@@ -77,6 +77,7 @@ use codex_model_provider_info::built_in_model_providers;
 use codex_model_provider_info::merge_configured_model_providers;
 use codex_models_manager::ModelsManagerConfig;
 use codex_protocol::config_types::AltScreenMode;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ForcedLoginMethod;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary;
@@ -527,6 +528,10 @@ pub struct Config {
     /// Token usage threshold triggering auto-compaction of conversation history.
     pub model_auto_compact_token_limit: Option<i64>,
 
+    /// Controls whether `model_auto_compact_token_limit` applies to the full
+    /// active context or only tokens after the carried compaction-window prefix.
+    pub model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope,
+
     /// Key into the model_providers map that specifies which provider to use.
     pub model_provider_id: String,
 
@@ -3373,6 +3378,9 @@ impl Config {
             review_model,
             model_context_window: cfg.model_context_window,
             model_auto_compact_token_limit: cfg.model_auto_compact_token_limit,
+            model_auto_compact_token_limit_scope: cfg
+                .model_auto_compact_token_limit_scope
+                .unwrap_or_default(),
             model_provider_id,
             model_provider,
             cwd: resolved_cwd,
diff --git a/codex-rs/core/src/guardian/review_session.rs b/codex-rs/core/src/guardian/review_session.rs
index 389373c8deb8..15d6c142caec 100644
--- a/codex-rs/core/src/guardian/review_session.rs
+++ b/codex-rs/core/src/guardian/review_session.rs
@@ -7,6 +7,7 @@ use std::time::Duration;
 use anyhow::anyhow;
 use codex_analytics::GuardianReviewAnalyticsResult;
 use codex_analytics::GuardianReviewSessionKind;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::Personality;
 use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::models::PermissionProfile;
@@ -138,6 +139,7 @@ struct GuardianReviewSessionReuseKey {
     model_provider: ModelProviderInfo,
     model_context_window: Option<i64>,
     model_auto_compact_token_limit: Option<i64>,
+    model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope,
     model_reasoning_effort: Option<ReasoningEffortConfig>,
     model_reasoning_summary: Option<ReasoningSummaryConfig>,
     permissions: Permissions,
@@ -162,6 +164,7 @@ impl GuardianReviewSessionReuseKey {
             model_provider: spawn_config.model_provider.clone(),
             model_context_window: spawn_config.model_context_window,
             model_auto_compact_token_limit: spawn_config.model_auto_compact_token_limit,
+            model_auto_compact_token_limit_scope: spawn_config.model_auto_compact_token_limit_scope,
             model_reasoning_effort: spawn_config.model_reasoning_effort,
             model_reasoning_summary: spawn_config.model_reasoning_summary,
             permissions: spawn_config.permissions.clone(),
@@ -1155,6 +1158,34 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn guardian_review_session_compact_scope_change_invalidates_cached_session() {
+        let parent_config = crate::config::test_config().await;
+        let cached_spawn_config = build_guardian_review_session_config(
+            &parent_config,
+            /*live_network_config*/ None,
+            "active-model",
+            /*reasoning_effort*/ None,
+        )
+        .expect("cached guardian config");
+        let cached_reuse_key =
+            GuardianReviewSessionReuseKey::from_spawn_config(&cached_spawn_config);
+
+        let mut changed_parent_config = parent_config;
+        changed_parent_config.model_auto_compact_token_limit_scope =
+            AutoCompactTokenLimitScope::BodyAfterPrefix;
+        let next_spawn_config = build_guardian_review_session_config(
+            &changed_parent_config,
+            /*live_network_config*/ None,
+            "active-model",
+            /*reasoning_effort*/ None,
+        )
+        .expect("next guardian config");
+        let next_reuse_key = GuardianReviewSessionReuseKey::from_spawn_config(&next_spawn_config);
+
+        assert_ne!(cached_reuse_key, next_reuse_key);
+    }
+
     #[tokio::test]
     async fn guardian_review_session_config_disables_hooks() {
         let mut parent_config = crate::config::test_config().await;
diff --git a/codex-rs/core/src/session/mod.rs b/codex-rs/core/src/session/mod.rs
index 3f1eab893644..ddab7752f717 100644
--- a/codex-rs/core/src/session/mod.rs
+++ b/codex-rs/core/src/session/mod.rs
@@ -78,6 +78,7 @@ use codex_protocol::approvals::ExecPolicyAmendment;
 use codex_protocol::approvals::NetworkPolicyAmendment;
 use codex_protocol::approvals::NetworkPolicyRuleAction;
 use codex_protocol::config_types::ApprovalsReviewer;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ModeKind;
 use codex_protocol::config_types::Settings;
 use codex_protocol::config_types::WebSearchMode;
@@ -288,6 +289,7 @@ use crate::rollout::map_session_init_error;
 use crate::session_startup_prewarm::SessionStartupPrewarmHandle;
 use crate::shell;
 use crate::shell_snapshot::ShellSnapshot;
+use crate::state::AutoCompactWindowSnapshot;
 use crate::state::PendingRequestPermissions;
 use crate::state::SessionServices;
 use crate::state::SessionState;
@@ -1089,6 +1091,11 @@ impl Session {
         state.get_total_token_usage(state.server_reasoning_included())
     }
 
+    pub(crate) async fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot {
+        let state = self.state.lock().await;
+        state.auto_compact_window_snapshot()
+    }
+
     pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
         let state = self.state.lock().await;
         state.history.get_total_token_usage_breakdown()
@@ -1247,11 +1254,41 @@ impl Session {
             reconstructed_rollout.reference_context_item,
         )
         .await;
+        let prefix_tokens = if matches!(
+            turn_context.config.model_auto_compact_token_limit_scope,
+            AutoCompactTokenLimitScope::BodyAfterPrefix
+        ) {
+            let history = self.clone_history().await;
+            let base_instructions = self.get_base_instructions().await;
+            history.estimate_token_count_with_base_instructions(&base_instructions)
+        } else {
+            None
+        };
+        if let Some(prefix_tokens) = prefix_tokens {
+            self.set_auto_compact_window_estimated_prefill_for_scope(turn_context, prefix_tokens)
+                .await;
+        }
         self.set_previous_turn_settings(previous_turn_settings.clone())
             .await;
         previous_turn_settings
     }
 
+    async fn set_auto_compact_window_estimated_prefill_for_scope(
+        &self,
+        turn_context: &TurnContext,
+        tokens: i64,
+    ) {
+        if !matches!(
+            turn_context.config.model_auto_compact_token_limit_scope,
+            AutoCompactTokenLimitScope::BodyAfterPrefix
+        ) {
+            return;
+        }
+
+        let mut state = self.state.lock().await;
+        state.set_auto_compact_window_estimated_prefill(tokens);
+    }
+
     fn last_token_info_from_rollout(rollout_items: &[RolloutItem]) -> Option<TokenUsageInfo> {
         rollout_items.iter().rev().find_map(|item| match item {
             RolloutItem::EventMsg(EventMsg::TokenCount(ev)) => ev.info.clone(),
@@ -2568,8 +2605,11 @@ impl Session {
         reference_context_item: Option<TurnContextItem>,
         compacted_item: CompactedItem,
     ) {
-        self.replace_history(items, reference_context_item.clone())
-            .await;
+        {
+            let mut state = self.state.lock().await;
+            state.replace_history(items, reference_context_item.clone());
+            state.start_next_auto_compact_window();
+        }
 
         self.persist_rollout_items(&[RolloutItem::Compacted(compacted_item)])
             .await;
@@ -2927,6 +2967,12 @@ impl Session {
                 let mut state = self.state.lock().await;
                 state
                     .update_token_info_from_usage(token_usage, turn_context.model_context_window());
+                if matches!(
+                    turn_context.config.model_auto_compact_token_limit_scope,
+                    AutoCompactTokenLimitScope::BodyAfterPrefix
+                ) {
+                    state.ensure_auto_compact_window_server_prefill_from_usage(token_usage);
+                }
                 state.token_info()
             };
             if let Some(token_info) = token_info.as_ref() {
@@ -2974,6 +3020,11 @@ impl Session {
 
             state.set_token_info(Some(info));
         }
+        self.set_auto_compact_window_estimated_prefill_for_scope(
+            turn_context,
+            estimated_total_tokens,
+        )
+        .await;
         self.send_token_count_event(turn_context).await;
     }
 
diff --git a/codex-rs/core/src/session/turn.rs b/codex-rs/core/src/session/turn.rs
index 5c4567b9fca5..7b1a59f610bd 100644
--- a/codex-rs/core/src/session/turn.rs
+++ b/codex-rs/core/src/session/turn.rs
@@ -75,6 +75,7 @@ use codex_hooks::HookEvent;
 use codex_hooks::HookEventAfterAgent;
 use codex_hooks::HookPayload;
 use codex_hooks::HookResult;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::config_types::ModeKind;
 use codex_protocol::config_types::ServiceTier;
 use codex_protocol::error::CodexErr;
@@ -146,26 +147,31 @@ pub(crate) async fn run_turn(
         return None;
     }
 
-    let model_info = turn_context.model_info.clone();
-    let auto_compact_limit = model_info.auto_compact_token_limit().unwrap_or(i64::MAX);
     let mut client_session =
         prewarmed_client_session.unwrap_or_else(|| sess.services.model_client.new_session());
     // TODO(ccunningham): Pre-turn compaction runs before context updates and the
     // new user message are recorded. Estimate pending incoming items (context
     // diffs/full reinjection + user input) and trigger compaction preemptively
     // when they would push the thread over the compaction threshold.
-    if let Err(err) = run_pre_sampling_compact(&sess, &turn_context, &mut client_session).await {
-        if err.to_codex_protocol_error() == CodexErrorInfo::UsageLimitExceeded
-            && let Err(err) = sess
-                .goal_runtime_apply(GoalRuntimeEvent::UsageLimitReached {
-                    turn_context: turn_context.as_ref(),
-                })
-                .await
-        {
-            warn!("failed to usage-limit active goal after usage-limit error: {err}");
-        }
-        error!("Failed to run pre-sampling compact");
-        return None;
+    let pre_sampling_compact =
+        match run_pre_sampling_compact(&sess, &turn_context, &mut client_session).await {
+            Ok(pre_sampling_compact) => pre_sampling_compact,
+            Err(err) => {
+                if err.to_codex_protocol_error() == CodexErrorInfo::UsageLimitExceeded
+                    && let Err(err) = sess
+                        .goal_runtime_apply(GoalRuntimeEvent::UsageLimitReached {
+                            turn_context: turn_context.as_ref(),
+                        })
+                        .await
+                {
+                    warn!("failed to usage-limit active goal after usage-limit error: {err}");
+                }
+                error!("Failed to run pre-sampling compact");
+                return None;
+            }
+        };
+    if pre_sampling_compact.reset_client_session {
+        client_session.reset_websocket_session();
     }
 
     sess.record_context_updates_and_set_reference_context_item(turn_context.as_ref())
@@ -342,17 +348,24 @@ pub(crate) async fn run_turn(
                 can_drain_pending_input = true;
                 let has_pending_input = sess.input_queue.has_pending_input(&sess.active_turn).await;
                 let needs_follow_up = model_needs_follow_up || has_pending_input;
-                let total_usage_tokens = sess.get_total_token_usage().await;
-                let token_limit_reached = total_usage_tokens >= auto_compact_limit;
+                let token_status =
+                    auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
+                let token_limit_reached = token_status.token_limit_reached;
 
                 let estimated_token_count =
                     sess.get_estimated_token_count(turn_context.as_ref()).await;
 
                 trace!(
                     turn_id = %turn_context.sub_id,
-                    total_usage_tokens,
+                    total_usage_tokens = token_status.active_context_tokens,
+                    auto_compact_scope_tokens = token_status.auto_compact_scope_tokens,
                     estimated_token_count = ?estimated_token_count,
-                    auto_compact_limit,
+                    auto_compact_scope_limit = token_status.auto_compact_scope_limit,
+                    auto_compact_limit_scope = ?turn_context.config.model_auto_compact_token_limit_scope,
+                    auto_compact_window_ordinal = ?token_status.auto_compact_window_ordinal,
+                    auto_compact_window_prefill_tokens = ?token_status.auto_compact_window_prefill_tokens,
+                    full_context_window_limit = ?token_status.full_context_window_limit,
+                    full_context_window_limit_reached = token_status.full_context_window_limit_reached,
                     token_limit_reached,
                     model_needs_follow_up,
                     has_pending_input,
@@ -362,7 +375,7 @@ pub(crate) async fn run_turn(
 
                 // as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
                 if token_limit_reached && needs_follow_up {
-                    match run_auto_compact(
+                    let reset_client_session = match run_auto_compact(
                         &sess,
                         &turn_context,
                         &mut client_session,
@@ -372,7 +385,7 @@ pub(crate) async fn run_turn(
                     )
                     .await
                     {
-                        Ok(()) => {}
+                        Ok(reset_client_session) => reset_client_session,
                         Err(err) => {
                             if err.to_codex_protocol_error() == CodexErrorInfo::UsageLimitExceeded
                                 && let Err(err) = sess
@@ -388,6 +401,9 @@ pub(crate) async fn run_turn(
                             return None;
                         }
                     };
+                    if reset_client_session {
+                        client_session.reset_websocket_session();
+                    }
                     can_drain_pending_input = !model_needs_follow_up;
                     continue;
                 }
@@ -743,27 +759,88 @@ async fn track_turn_resolved_config_analytics(
         });
 }
 
+struct PreSamplingCompactResult {
+    reset_client_session: bool,
+}
+
+#[derive(Debug)]
+struct AutoCompactTokenStatus {
+    // Full active context usage, independent of the configured auto-compact scope.
+    active_context_tokens: i64,
+    // Usage counted against `model_auto_compact_token_limit` for the current scope.
+    auto_compact_scope_tokens: i64,
+    auto_compact_scope_limit: i64,
+    full_context_window_limit: Option<i64>,
+    auto_compact_window_ordinal: Option<u64>,
+    auto_compact_window_prefill_tokens: Option<i64>,
+    full_context_window_limit_reached: bool,
+    token_limit_reached: bool,
+}
+
+async fn auto_compact_token_status(
+    sess: &Session,
+    turn_context: &TurnContext,
+) -> AutoCompactTokenStatus {
+    let active_context_tokens = sess.get_total_token_usage().await;
+    let mut auto_compact_window_ordinal = None;
+    let mut auto_compact_window_prefill_tokens = None;
+    let (auto_compact_scope_tokens, auto_compact_scope_limit, full_context_window_limit) =
+        match turn_context.config.model_auto_compact_token_limit_scope {
+            AutoCompactTokenLimitScope::Total => (
+                active_context_tokens,
+                turn_context
+                    .model_info
+                    .auto_compact_token_limit()
+                    .unwrap_or(i64::MAX),
+                None,
+            ),
+            AutoCompactTokenLimitScope::BodyAfterPrefix => {
+                let window = sess.auto_compact_window_snapshot().await;
+                auto_compact_window_ordinal = Some(window.ordinal);
+                auto_compact_window_prefill_tokens = window.prefill_input_tokens;
+                let baseline = window.prefill_input_tokens.unwrap_or(active_context_tokens);
+                (
+                    active_context_tokens.saturating_sub(baseline),
+                    turn_context
+                        .config
+                        .model_auto_compact_token_limit
+                        .or_else(|| turn_context.model_info.auto_compact_token_limit())
+                        .unwrap_or(i64::MAX),
+                    turn_context.model_context_window(),
+                )
+            }
+        };
+    let full_context_window_limit_reached =
+        full_context_window_limit.is_some_and(|full_context_window_limit| {
+            active_context_tokens >= full_context_window_limit
+        });
+    let token_limit_reached =
+        auto_compact_scope_tokens >= auto_compact_scope_limit || full_context_window_limit_reached;
+
+    AutoCompactTokenStatus {
+        active_context_tokens,
+        auto_compact_scope_tokens,
+        auto_compact_scope_limit,
+        full_context_window_limit,
+        auto_compact_window_ordinal,
+        auto_compact_window_prefill_tokens,
+        full_context_window_limit_reached,
+        token_limit_reached,
+    }
+}
+
 async fn run_pre_sampling_compact(
     sess: &Arc<Session>,
     turn_context: &Arc<TurnContext>,
     client_session: &mut ModelClientSession,
-) -> CodexResult<()> {
-    let total_usage_tokens_before_compaction = sess.get_total_token_usage().await;
-    maybe_run_previous_model_inline_compact(
-        sess,
-        turn_context,
-        client_session,
-        total_usage_tokens_before_compaction,
-    )
-    .await?;
-    let total_usage_tokens = sess.get_total_token_usage().await;
-    let auto_compact_limit = turn_context
-        .model_info
-        .auto_compact_token_limit()
-        .unwrap_or(i64::MAX);
-    // Compact if the total usage tokens are greater than the auto compact limit
-    if total_usage_tokens >= auto_compact_limit {
-        run_auto_compact(
+) -> CodexResult<PreSamplingCompactResult> {
+    let mut pre_sampling_compacted =
+        maybe_run_previous_model_inline_compact(sess, turn_context, client_session).await?;
+    let mut reset_client_session = pre_sampling_compacted;
+    let token_status = auto_compact_token_status(sess.as_ref(), turn_context.as_ref()).await;
+    // Compact if the configured auto-compaction budget or usable context window is exhausted.
+    if token_status.token_limit_reached {
+        reset_client_session |= run_auto_compact(
             sess,
             turn_context,
             client_session,
@@ -772,22 +849,26 @@ async fn run_pre_sampling_compact(
             CompactionPhase::PreTurn,
         )
         .await?;
+        pre_sampling_compacted = true;
     }
-    Ok(())
+    Ok(PreSamplingCompactResult {
+        reset_client_session: pre_sampling_compacted && reset_client_session,
+    })
 }
 
 /// Runs pre-sampling compaction against the previous model when switching to a smaller
 /// context-window model.
 ///
-/// Returns `Err(_)` only when compaction was attempted and failed.
+/// Returns `Ok(true)` when compaction ran successfully, `Ok(false)` when compaction was skipped
+/// because the model/context-window preconditions were not met, and `Err(_)` only when compaction
+/// was attempted and failed.
 async fn maybe_run_previous_model_inline_compact(
     sess: &Arc<Session>,
     turn_context: &Arc<TurnContext>,
     client_session: &mut ModelClientSession,
-    total_usage_tokens: i64,
-) -> CodexResult<()> {
+) -> CodexResult<bool> {
     let Some(previous_turn_settings) = sess.previous_turn_settings().await else {
-        return Ok(());
+        return Ok(false);
     };
     let previous_model_turn_context = Arc::new(
         turn_context
@@ -796,20 +877,31 @@ async fn maybe_run_previous_model_inline_compact(
     );
 
     let Some(old_context_window) = previous_model_turn_context.model_context_window() else {
-        return Ok(());
+        return Ok(false);
     };
     let Some(new_context_window) = turn_context.model_context_window() else {
-        return Ok(());
+        return Ok(false);
     };
-    let new_auto_compact_limit = turn_context
-        .model_info
-        .auto_compact_token_limit()
-        .unwrap_or(i64::MAX);
-    let should_run = total_usage_tokens > new_auto_compact_limit
+    let active_context_tokens = sess.get_total_token_usage().await;
+    let previous_model_limit_reached = match turn_context
+        .config
+        .model_auto_compact_token_limit_scope
+    {
+        AutoCompactTokenLimitScope::Total => {
+            let new_auto_compact_limit = turn_context
+                .model_info
+                .auto_compact_token_limit()
+                .unwrap_or(i64::MAX);
+            active_context_tokens > new_auto_compact_limit
+                || active_context_tokens >= new_context_window
+        }
+        AutoCompactTokenLimitScope::BodyAfterPrefix => active_context_tokens >= new_context_window,
+    };
+    let should_run = previous_model_limit_reached
         && previous_model_turn_context.model_info.slug != turn_context.model_info.slug
         && old_context_window > new_context_window;
     if should_run {
-        run_auto_compact(
+        let _ = run_auto_compact(
             sess,
             &previous_model_turn_context,
             client_session,
@@ -818,8 +910,9 @@ async fn maybe_run_previous_model_inline_compact(
             CompactionPhase::PreTurn,
         )
         .await?;
+        return Ok(true);
     }
-    Ok(())
+    Ok(false)
 }
 
 async fn run_auto_compact(
@@ -829,7 +922,7 @@ async fn run_auto_compact(
     initial_context_injection: InitialContextInjection,
     reason: CompactionReason,
     phase: CompactionPhase,
-) -> CodexResult<()> {
+) -> CodexResult<bool> {
     if should_use_remote_compact_task(turn_context.provider.info()) {
         if turn_context.features.enabled(Feature::RemoteCompactionV2) {
             run_inline_remote_auto_compact_task_v2(
@@ -841,7 +934,7 @@ async fn run_auto_compact(
                 phase,
             )
             .await?;
-            return Ok(());
+            return Ok(false);
         }
         run_inline_remote_auto_compact_task(
             Arc::clone(sess),
@@ -861,7 +954,7 @@ async fn run_auto_compact(
         )
         .await?;
     }
-    Ok(())
+    Ok(true)
 }
 
 pub(super) fn collect_explicit_app_ids_from_skill_items(
diff --git a/codex-rs/core/src/state/auto_compact_window.rs b/codex-rs/core/src/state/auto_compact_window.rs
new file mode 100644
index 000000000000..d1deb3c2882b
--- /dev/null
+++ b/codex-rs/core/src/state/auto_compact_window.rs
@@ -0,0 +1,145 @@
+use codex_protocol::protocol::TokenUsage;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) struct AutoCompactWindowSnapshot {
+    pub(crate) ordinal: u64,
+    pub(crate) prefill_input_tokens: Option<i64>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum AutoCompactWindowPrefill {
+    ServerObserved(i64),
+    Estimated(i64),
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(super) struct AutoCompactWindow {
+    ordinal: u64,
+    /// Absolute input-token baseline for the current compaction window.
+    ///
+    /// `body_after_prefix` subtracts this from later active-context usage. It is
+    /// not the growth itself; server-observed usage replaces estimated
+    /// resume/recompute baselines when available.
+    prefill_input_tokens: Option<AutoCompactWindowPrefill>,
+}
+
+impl AutoCompactWindow {
+    pub(super) fn new() -> Self {
+        Self {
+            ordinal: 1,
+            prefill_input_tokens: None,
+        }
+    }
+
+    pub(super) fn clear_prefill(&mut self) {
+        self.prefill_input_tokens = None;
+    }
+
+    pub(super) fn start_next(&mut self) {
+        self.ordinal = self.ordinal.saturating_add(1);
+        self.clear_prefill();
+    }
+
+    /// Records the request-input side of the first server usage sample. The
+    /// sampled output from that response is body growth and should remain
+    /// counted against the scoped auto-compact budget.
+    pub(super) fn ensure_server_observed_prefill_from_usage(&mut self, usage: &TokenUsage) {
+        if matches!(
+            self.prefill_input_tokens,
+            Some(AutoCompactWindowPrefill::ServerObserved(_))
+        ) {
+            return;
+        }
+
+        self.prefill_input_tokens = Some(AutoCompactWindowPrefill::ServerObserved(
+            usage.input_tokens.max(0),
+        ));
+    }
+
+    pub(super) fn set_estimated_prefill(&mut self, tokens: i64) {
+        if matches!(
+            self.prefill_input_tokens,
+            Some(AutoCompactWindowPrefill::ServerObserved(_))
+        ) {
+            return;
+        }
+
+        self.prefill_input_tokens = Some(AutoCompactWindowPrefill::Estimated(tokens.max(0)));
+    }
+
+    pub(super) fn snapshot(&self) -> AutoCompactWindowSnapshot {
+        let prefill_input_tokens = match self.prefill_input_tokens {
+            Some(AutoCompactWindowPrefill::ServerObserved(tokens))
+            | Some(AutoCompactWindowPrefill::Estimated(tokens)) => Some(tokens),
+            None => None,
+        };
+        AutoCompactWindowSnapshot {
+            ordinal: self.ordinal,
+            prefill_input_tokens,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn tracks_prefill_and_window_boundaries() {
+        let mut window = AutoCompactWindow::new();
+
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: None,
+            }
+        );
+
+        window.set_estimated_prefill(/*tokens*/ 150);
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(150),
+            }
+        );
+
+        window.ensure_server_observed_prefill_from_usage(&TokenUsage {
+            input_tokens: 120,
+            total_tokens: 170,
+            ..Default::default()
+        });
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(120),
+            }
+        );
+
+        window.ensure_server_observed_prefill_from_usage(&TokenUsage {
+            input_tokens: 130,
+            total_tokens: 180,
+            ..Default::default()
+        });
+        window.set_estimated_prefill(/*tokens*/ 90);
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 1,
+                prefill_input_tokens: Some(120),
+            }
+        );
+
+        window.start_next();
+        assert_eq!(
+            window.snapshot(),
+            AutoCompactWindowSnapshot {
+                ordinal: 2,
+                prefill_input_tokens: None,
+            }
+        );
+    }
+}
diff --git a/codex-rs/core/src/state/mod.rs b/codex-rs/core/src/state/mod.rs
index 13f3bf6c86f2..3122ec5f2599 100644
--- a/codex-rs/core/src/state/mod.rs
+++ b/codex-rs/core/src/state/mod.rs
@@ -1,7 +1,9 @@
+mod auto_compact_window;
 mod service;
 mod session;
 mod turn;
 
+pub(crate) use auto_compact_window::AutoCompactWindowSnapshot;
 pub(crate) use service::SessionServices;
 pub(crate) use session::SessionState;
 pub(crate) use turn::ActiveTurn;
diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs
index 7121a775245c..0903d0d07326 100644
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -5,6 +5,8 @@ use codex_protocol::models::ResponseItem;
 use codex_sandboxing::policy_transforms::merge_permission_profiles;
 use std::collections::HashSet;
 
+use super::auto_compact_window::AutoCompactWindow;
+use super::auto_compact_window::AutoCompactWindowSnapshot;
 use crate::context_manager::ContextManager;
 use crate::session::PreviousTurnSettings;
 use crate::session::session::SessionConfiguration;
@@ -26,6 +28,8 @@ pub(crate) struct SessionState {
     /// model/realtime handling on subsequent regular turns (including full-context
     /// reinjection after resume or `/compact`).
     previous_turn_settings: Option<PreviousTurnSettings>,
+    /// Runtime accounting state for the active auto-compaction window.
+    auto_compact_window: AutoCompactWindow,
     /// Startup prewarmed session prepared during session initialization.
     pub(crate) startup_prewarm: Option<SessionStartupPrewarmHandle>,
     pub(crate) active_connector_selection: HashSet<String>,
@@ -45,6 +49,7 @@ impl SessionState {
             server_reasoning_included: false,
             mcp_dependency_prompted: HashSet::new(),
             previous_turn_settings: None,
+            auto_compact_window: AutoCompactWindow::new(),
             startup_prewarm: None,
             active_connector_selection: HashSet::new(),
             pending_session_start_source: None,
@@ -94,6 +99,7 @@ impl SessionState {
         self.history.replace(items);
         self.history
             .set_reference_context_item(reference_context_item);
+        self.auto_compact_window.clear_prefill();
     }
 
     pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
@@ -117,6 +123,26 @@ impl SessionState {
         self.history.update_token_info(usage, model_context_window);
     }
 
+    pub(crate) fn ensure_auto_compact_window_server_prefill_from_usage(
+        &mut self,
+        usage: &TokenUsage,
+    ) {
+        self.auto_compact_window
+            .ensure_server_observed_prefill_from_usage(usage);
+    }
+
+    pub(crate) fn set_auto_compact_window_estimated_prefill(&mut self, tokens: i64) {
+        self.auto_compact_window.set_estimated_prefill(tokens);
+    }
+
+    pub(crate) fn start_next_auto_compact_window(&mut self) {
+        self.auto_compact_window.start_next();
+    }
+
+    pub(crate) fn auto_compact_window_snapshot(&self) -> AutoCompactWindowSnapshot {
+        self.auto_compact_window.snapshot()
+    }
+
     pub(crate) fn token_info(&self) -> Option<TokenUsageInfo> {
         self.history.token_info()
     }
diff --git a/codex-rs/core/src/state/session_tests.rs b/codex-rs/core/src/state/session_tests.rs
index 5e90cc881dd2..b6b5a057a019 100644
--- a/codex-rs/core/src/state/session_tests.rs
+++ b/codex-rs/core/src/state/session_tests.rs
@@ -1,5 +1,6 @@
 use super::*;
 use crate::session::tests::make_session_configuration_for_tests;
+use crate::state::AutoCompactWindowSnapshot;
 use codex_protocol::protocol::CreditsSnapshot;
 use codex_protocol::protocol::RateLimitWindow;
 use pretty_assertions::assert_eq;
@@ -61,6 +62,24 @@ async fn set_rate_limits_defaults_limit_id_to_codex_when_missing() {
     );
 }
 
+#[tokio::test]
+async fn replace_history_clears_auto_compact_window_prefill_without_advancing() {
+    let session_configuration = make_session_configuration_for_tests().await;
+    let mut state = SessionState::new(session_configuration);
+
+    state.start_next_auto_compact_window();
+    state.set_auto_compact_window_estimated_prefill(/*tokens*/ 100);
+    state.replace_history(Vec::new(), /*reference_context_item*/ None);
+
+    assert_eq!(
+        state.auto_compact_window_snapshot(),
+        AutoCompactWindowSnapshot {
+            ordinal: 2,
+            prefill_input_tokens: None,
+        }
+    );
+}
+
 #[tokio::test]
 async fn set_rate_limits_defaults_to_codex_when_limit_id_missing_after_other_bucket() {
     let session_configuration = make_session_configuration_for_tests().await;
diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs
index b07adfc61c3a..140b4ec2b11c 100644
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -7,6 +7,7 @@ use codex_login::CodexAuth;
 use codex_model_provider_info::ModelProviderInfo;
 use codex_model_provider_info::built_in_model_providers;
 use codex_models_manager::bundled_models_response;
+use codex_protocol::config_types::AutoCompactTokenLimitScope;
 use codex_protocol::items::TurnItem;
 use codex_protocol::models::PermissionProfile;
 use codex_protocol::openai_models::ModelInfo;
@@ -126,6 +127,22 @@ fn set_test_compact_prompt(config: &mut Config) {
     config.compact_prompt = Some(SUMMARIZATION_PROMPT.to_string());
 }
 
+fn ev_completed_with_usage(id: &str, input_tokens: i64, output_tokens: i64) -> Value {
+    json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {
+                "input_tokens": input_tokens,
+                "input_tokens_details": null,
+                "output_tokens": output_tokens,
+                "output_tokens_details": null,
+                "total_tokens": input_tokens + output_tokens
+            }
+        }
+    })
+}
+
 fn body_contains_text(body: &str, text: &str) -> bool {
     body.contains(&json_fragment(text))
 }
@@ -2074,6 +2091,100 @@ async fn pre_sampling_compact_runs_on_switch_to_smaller_context_model() {
     );
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn body_after_prefix_model_switch_budget_compacts_with_next_model() {
+    skip_if_no_network!();
+
+    let server = MockServer::start().await;
+    let previous_model = "gpt-5.3-codex";
+    let next_model = "gpt-5.2";
+
+    let models_mock = mount_models_once(
+        &server,
+        ModelsResponse {
+            models: vec![
+                model_info_with_context_window(previous_model, /*context_window*/ 273_000),
+                model_info_with_context_window(next_model, /*context_window*/ 125_000),
+            ],
+        },
+    )
+    .await;
+
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            sse(vec![
+                ev_assistant_message("m1", "before switch"),
+                ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50),
+            ]),
+            sse(vec![
+                ev_assistant_message("m2", "BODY_BUDGET_SUMMARY"),
+                ev_completed_with_tokens("r2", /*total_tokens*/ 10),
+            ]),
+            sse(vec![
+                ev_assistant_message("m3", "after switch"),
+                ev_completed_with_tokens("r3", /*total_tokens*/ 100),
+            ]),
+        ],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let mut builder = test_codex()
+        .with_auth(CodexAuth::create_dummy_chatgpt_auth_for_testing())
+        .with_model(previous_model)
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            let _ = config.features.enable(Feature::RemoteModels);
+            config.model_auto_compact_token_limit = Some(20);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        });
+    let test = builder.build(&server).await.expect("build test codex");
+
+    test.codex
+        .submit(disabled_permission_user_turn(
+            "before switch",
+            test.cwd.path().to_path_buf(),
+            previous_model.to_string(),
+        ))
+        .await
+        .expect("submit first user turn");
+    wait_for_event(&test.codex, |event| {
+        matches!(event, EventMsg::TurnComplete(_))
+    })
+    .await;
+
+    test.codex
+        .submit(disabled_permission_user_turn(
+            "after switch",
+            test.cwd.path().to_path_buf(),
+            next_model.to_string(),
+        ))
+        .await
+        .expect("submit second user turn");
+    assert_compaction_uses_turn_lifecycle_id(&test.codex).await;
+
+    let requests = request_log.requests();
+    assert_eq!(models_mock.requests().len(), 1);
+    assert_eq!(
+        requests.len(),
+        3,
+        "expected user, compact, and follow-up requests"
+    );
+    assert_eq!(
+        requests[0].body_json()["model"].as_str(),
+        Some(previous_model)
+    );
+    assert_eq!(requests[1].body_json()["model"].as_str(), Some(next_model));
+    assert_eq!(requests[2].body_json()["model"].as_str(), Some(next_model));
+    assert!(
+        body_contains_text(&requests[1].body_json().to_string(), SUMMARIZATION_PROMPT),
+        "body-budget compaction request should include summarization prompt"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn pre_sampling_compact_runs_after_resume_and_switch_to_smaller_model() {
     skip_if_no_network!();
@@ -3011,6 +3122,237 @@ async fn auto_compact_clamps_config_limit_to_context_window() {
     );
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_ignores_starting_window_prefix() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 600, /*output_tokens*/ 50),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m2", SECOND_LARGE_REPLY),
+        ev_completed_with_usage("r2", /*input_tokens*/ 700, /*output_tokens*/ 50),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", /*total_tokens*/ 20),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage("r4", /*input_tokens*/ 750, /*output_tokens*/ 20),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![first_turn, second_turn, auto_compact_turn, third_turn],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(1_000);
+            config.model_auto_compact_token_limit = Some(100);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    for user in ["PREFIX_FREE_ONE", "PREFIX_FREE_TWO"] {
+        test.submit_turn(user).await.expect("submit turn");
+    }
+
+    assert_eq!(
+        request_log.requests().len(),
+        2,
+        "the first two turns should not compact just because the prefix exceeds the body budget"
+    );
+
+    test.submit_turn("PREFIX_FREE_THREE")
+        .await
+        .expect("submit third turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "third turn should include pre-turn compaction plus the post-compaction request"
+    );
+    let compact_body = requests[2].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "body-after-prefix mode should compact once tokens after the first assistant sample exceed the configured budget"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_counts_growth_after_compaction() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 100, /*output_tokens*/ 50),
+    ]);
+    let first_auto_compact_turn = sse(vec![
+        ev_assistant_message("m2", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r2", /*total_tokens*/ 20),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m3", SECOND_LARGE_REPLY),
+        ev_completed_with_usage(
+            "r3", /*input_tokens*/ 100_000, /*output_tokens*/ 10,
+        ),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage(
+            "r4", /*input_tokens*/ 100_100, /*output_tokens*/ 5,
+        ),
+    ]);
+    let second_auto_compact_turn = sse(vec![
+        ev_assistant_message("m5", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r5", /*total_tokens*/ 20),
+    ]);
+    let fourth_turn = sse(vec![
+        ev_assistant_message("m6", FINAL_REPLY),
+        ev_completed_with_usage("r6", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![
+            first_turn,
+            first_auto_compact_turn,
+            second_turn,
+            third_turn,
+            second_auto_compact_turn,
+            fourth_turn,
+        ],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(200_000);
+            config.model_auto_compact_token_limit = Some(40);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    test.submit_turn("WINDOW_PREFIX")
+        .await
+        .expect("submit first turn");
+    test.submit_turn("GROWTH_AFTER_COMPACT")
+        .await
+        .expect("submit second turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        3,
+        "second turn should compact first and then sample the new growth"
+    );
+
+    test.submit_turn("AFTER_GROWTH")
+        .await
+        .expect("submit third turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "the first server-observed input in the new window should become the prefill baseline"
+    );
+
+    test.submit_turn("AFTER_GROWTH_TRIGGER")
+        .await
+        .expect("submit fourth turn");
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        6,
+        "fourth turn should compact because later post-compaction growth counted against the body budget"
+    );
+    let compact_body = requests[4].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "post-compaction growth should trigger a second body-after-prefix compaction"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_body_after_prefix_still_caps_at_context_window() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let first_turn = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed_with_usage("r1", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let second_turn = sse(vec![
+        ev_assistant_message("m2", SECOND_LARGE_REPLY),
+        ev_completed_with_usage("r2", /*input_tokens*/ 98, /*output_tokens*/ 1),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", /*total_tokens*/ 20),
+    ]);
+    let third_turn = sse(vec![
+        ev_assistant_message("m4", FINAL_REPLY),
+        ev_completed_with_usage("r4", /*input_tokens*/ 80, /*output_tokens*/ 5),
+    ]);
+    let request_log = mount_sse_sequence(
+        &server,
+        vec![first_turn, second_turn, auto_compact_turn, third_turn],
+    )
+    .await;
+
+    let model_provider = non_openai_model_provider(&server);
+    let test = test_codex()
+        .with_config(move |config| {
+            config.model_provider = model_provider;
+            set_test_compact_prompt(config);
+            config.model_context_window = Some(100);
+            config.model_auto_compact_token_limit = Some(200);
+            config.model_auto_compact_token_limit_scope =
+                AutoCompactTokenLimitScope::BodyAfterPrefix;
+        })
+        .build(&server)
+        .await
+        .expect("build codex");
+
+    for user in ["CONTEXT_CAP_ONE", "CONTEXT_CAP_TWO", "CONTEXT_CAP_THREE"] {
+        test.submit_turn(user).await.expect("submit turn");
+    }
+
+    let requests = request_log.requests();
+    assert_eq!(
+        requests.len(),
+        4,
+        "third turn should compact before sampling because total context hit the usable window"
+    );
+    let compact_body = requests[2].body_json().to_string();
+    assert!(
+        body_contains_text(&compact_body, SUMMARIZATION_PROMPT),
+        "body-after-prefix mode should still clamp the total threshold to the usable context window"
+    );
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
     skip_if_no_network!();
diff --git a/codex-rs/protocol/src/config_types.rs b/codex-rs/protocol/src/config_types.rs
index 22c4f5154800..b4b4759c9b71 100644
--- a/codex-rs/protocol/src/config_types.rs
+++ b/codex-rs/protocol/src/config_types.rs
@@ -21,6 +21,21 @@ use wildmatch::WildMatchPattern;
 
 use crate::openai_models::ReasoningEffort;
 
+/// Selects which part of the active context is charged against
+/// `model_auto_compact_token_limit`.
+#[derive(
+    Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display, JsonSchema, TS,
+)]
+#[serde(rename_all = "snake_case")]
+#[strum(serialize_all = "snake_case")]
+pub enum AutoCompactTokenLimitScope {
+    /// Count the full active context against the limit.
+    #[default]
+    Total,
+    /// Count sampled output and later growth after the carried window prefix.
+    BodyAfterPrefix,
+}
+
 /// A summary of the reasoning performed by the model. This can be useful for
 /// debugging and understanding the model's reasoning process.
 /// See https://platform.openai.com/docs/guides/reasoning?api-mode=responses#reasoning-summaries
diff --git a/codex-rs/thread-manager-sample/src/main.rs b/codex-rs/thread-manager-sample/src/main.rs
index 3410711e3f35..cf499c7703cd 100644
--- a/codex-rs/thread-manager-sample/src/main.rs
+++ b/codex-rs/thread-manager-sample/src/main.rs
@@ -15,6 +15,7 @@ use codex_core_api::Arg0DispatchPaths;
 use codex_core_api::AskForApproval;
 use codex_core_api::AuthCredentialsStoreMode;
 use codex_core_api::AuthManager;
+use codex_core_api::AutoCompactTokenLimitScope;
 use codex_core_api::CodexThread;
 use codex_core_api::Config;
 use codex_core_api::ConfigLayerStack;
@@ -168,6 +169,7 @@ fn new_config(model: Option<String>, arg0_paths: Arg0DispatchPaths) -> anyhow::R
         review_model: None,
         model_context_window: None,
         model_auto_compact_token_limit: None,
+        model_auto_compact_token_limit_scope: AutoCompactTokenLimitScope::Total,
         model_provider_id,
         model_provider,
         personality: None,