feat: bound compact worker context

2026-05-28 11:57:57 +09:00 · 2026-05-28 11:57:57 +09:00 · c274e4a891
commit c274e4a891
parent 7034d02455
10 changed files with 791 additions and 211 deletions
--- a/crates/llm-worker/src/interceptor.rs
+++ b/crates/llm-worker/src/interceptor.rs
@ -32,10 +32,16 @@ pub enum PromptAction {
 }

 /// Action before an LLM request.
-#[derive(Debug, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum PreRequestAction {
    /// Proceed normally.
    Continue,
+    /// Proceed after appending these items to durable worker history.
+    ///
+    /// This is for upper-layer budget/status nudges that the model may react
+    /// to: the items are committed before the request so later turns can see
+    /// why the worker changed course.
+    ContinueWith(Vec<Item>),
    /// Cancel with a reason (treated as an error).
    Cancel(String),
    /// Yield control to the caller for external processing.
@ -149,11 +155,12 @@ pub trait Interceptor: Send + Sync {

    /// Called before each LLM request. The context starts as a clone
    /// of `worker.history` (after `pending_history_appends` and the
-    /// Worker's own prune projection have been applied) and can be
-    /// further modified for that single request only — mutations here
-    /// are **not** persisted back to history. Use
-    /// [`Self::pending_history_appends`] for inputs that need to land
-    /// in history.
+    /// Worker's own prune projection have been applied).
+    ///
+    /// Direct mutations to `context` remain request-local and are not persisted.
+    /// If an interceptor derives a human/model-visible nudge from the current
+    /// request context, return [`PreRequestAction::ContinueWith`] so the Worker
+    /// commits it to history before the request is sent.
    async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
        PreRequestAction::Continue
    }
--- a/crates/llm-worker/src/worker.rs
+++ b/crates/llm-worker/src/worker.rs
@ -1169,6 +1169,10 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
                    self.last_run_interrupted = true;
                    return Ok(WorkerResult::Yielded);
                }
+                PreRequestAction::ContinueWith(items) => {
+                    self.append_history_items(items.clone());
+                    request_context.extend(items);
+                }
                PreRequestAction::Continue => {}
            }

--- a/crates/manifest/src/config.rs
+++ b/crates/manifest/src/config.rs
@ -125,18 +125,34 @@ pub struct CompactionConfigPartial {
    pub prune_protected_tokens: Option<u64>,
    #[serde(default)]
    pub prune_min_savings: Option<u64>,
+    #[serde(default, alias = "compact_threshold")]
+    pub threshold: Option<u64>,
+    #[serde(default, alias = "compact_request_threshold")]
+    pub request_threshold: Option<u64>,
+    #[serde(default, alias = "compact_retained_tokens")]
+    pub retained_tokens: Option<u64>,
    #[serde(default)]
-    pub compact_threshold: Option<u64>,
+    pub overview_target_tokens: Option<u64>,
    #[serde(default)]
-    pub compact_request_threshold: Option<u64>,
+    pub overview_warning_tokens: Option<u64>,
    #[serde(default)]
-    pub compact_retained_tokens: Option<u64>,
+    pub overview_deadline_tokens: Option<u64>,
+    #[serde(default, alias = "compact_worker_max_input_tokens")]
+    pub worker_context_max_tokens: Option<u64>,
    #[serde(default)]
-    pub compact_auto_read_budget: Option<u64>,
+    pub finish_warning_remaining_tokens: Option<u64>,
    #[serde(default)]
-    pub compact_worker_max_input_tokens: Option<u64>,
+    pub final_reserve_tokens: Option<u64>,
+    #[serde(default, alias = "compact_worker_max_turns")]
+    pub worker_max_turns: Option<u32>,
    #[serde(default)]
-    pub compact_worker_max_turns: Option<u32>,
+    pub summary_target_tokens: Option<u64>,
+    #[serde(default)]
+    pub summary_max_tokens: Option<u64>,
+    #[serde(default, alias = "compact_auto_read_budget")]
+    pub auto_read_budget_tokens: Option<u64>,
+    #[serde(default)]
+    pub result_context_max_tokens: Option<u64>,
    #[serde(default)]
    pub model: Option<ModelManifest>,
 }
@ -386,22 +402,32 @@ impl CompactionConfigPartial {
        Self {
            prune_protected_tokens: upper.prune_protected_tokens.or(self.prune_protected_tokens),
            prune_min_savings: upper.prune_min_savings.or(self.prune_min_savings),
-            compact_threshold: upper.compact_threshold.or(self.compact_threshold),
-            compact_request_threshold: upper
-                .compact_request_threshold
-                .or(self.compact_request_threshold),
-            compact_retained_tokens: upper
-                .compact_retained_tokens
-                .or(self.compact_retained_tokens),
-            compact_auto_read_budget: upper
-                .compact_auto_read_budget
-                .or(self.compact_auto_read_budget),
-            compact_worker_max_input_tokens: upper
-                .compact_worker_max_input_tokens
-                .or(self.compact_worker_max_input_tokens),
-            compact_worker_max_turns: upper
-                .compact_worker_max_turns
-                .or(self.compact_worker_max_turns),
+            threshold: upper.threshold.or(self.threshold),
+            request_threshold: upper.request_threshold.or(self.request_threshold),
+            retained_tokens: upper.retained_tokens.or(self.retained_tokens),
+            overview_target_tokens: upper.overview_target_tokens.or(self.overview_target_tokens),
+            overview_warning_tokens: upper
+                .overview_warning_tokens
+                .or(self.overview_warning_tokens),
+            overview_deadline_tokens: upper
+                .overview_deadline_tokens
+                .or(self.overview_deadline_tokens),
+            worker_context_max_tokens: upper
+                .worker_context_max_tokens
+                .or(self.worker_context_max_tokens),
+            finish_warning_remaining_tokens: upper
+                .finish_warning_remaining_tokens
+                .or(self.finish_warning_remaining_tokens),
+            final_reserve_tokens: upper.final_reserve_tokens.or(self.final_reserve_tokens),
+            worker_max_turns: upper.worker_max_turns.or(self.worker_max_turns),
+            summary_target_tokens: upper.summary_target_tokens.or(self.summary_target_tokens),
+            summary_max_tokens: upper.summary_max_tokens.or(self.summary_max_tokens),
+            auto_read_budget_tokens: upper
+                .auto_read_budget_tokens
+                .or(self.auto_read_budget_tokens),
+            result_context_max_tokens: upper
+                .result_context_max_tokens
+                .or(self.result_context_max_tokens),
            model: merge_option(self.model, upper.model, ModelManifest::merge),
        }
    }
@ -544,20 +570,42 @@ impl TryFrom<PodManifestConfig> for PodManifest {
                        .prune_protected_tokens
                        .unwrap_or(defaults::PRUNE_PROTECTED_TOKENS),
                    prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
-                    compact_threshold: c.compact_threshold,
-                    compact_request_threshold: c.compact_request_threshold,
-                    compact_retained_tokens: c
-                        .compact_retained_tokens
+                    threshold: c.threshold,
+                    request_threshold: c.request_threshold,
+                    retained_tokens: c
+                        .retained_tokens
                        .unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
-                    compact_auto_read_budget: c
-                        .compact_auto_read_budget
-                        .unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET),
-                    compact_worker_max_input_tokens: c
-                        .compact_worker_max_input_tokens
+                    overview_target_tokens: c
+                        .overview_target_tokens
+                        .unwrap_or(defaults::COMPACT_OVERVIEW_TARGET_TOKENS),
+                    overview_warning_tokens: c
+                        .overview_warning_tokens
+                        .unwrap_or(defaults::COMPACT_OVERVIEW_WARNING_TOKENS),
+                    overview_deadline_tokens: c
+                        .overview_deadline_tokens
+                        .unwrap_or(defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS),
+                    worker_context_max_tokens: c
+                        .worker_context_max_tokens
                        .unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS),
-                    compact_worker_max_turns: c
-                        .compact_worker_max_turns
-                        .or(defaults::COMPACT_WORKER_MAX_TURNS),
+                    finish_warning_remaining_tokens: c
+                        .finish_warning_remaining_tokens
+                        .unwrap_or(defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS),
+                    final_reserve_tokens: c
+                        .final_reserve_tokens
+                        .unwrap_or(defaults::COMPACT_FINAL_RESERVE_TOKENS),
+                    worker_max_turns: c.worker_max_turns.or(defaults::COMPACT_WORKER_MAX_TURNS),
+                    summary_target_tokens: c
+                        .summary_target_tokens
+                        .unwrap_or(defaults::COMPACT_SUMMARY_TARGET_TOKENS),
+                    summary_max_tokens: c
+                        .summary_max_tokens
+                        .unwrap_or(defaults::COMPACT_SUMMARY_MAX_TOKENS),
+                    auto_read_budget_tokens: c
+                        .auto_read_budget_tokens
+                        .unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET),
+                    result_context_max_tokens: c
+                        .result_context_max_tokens
+                        .unwrap_or(defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS),
                    model: c.model,
                })
            })
@ -984,7 +1032,7 @@ mod tests {
    fn merge_option_struct_field_wise() {
        let lower = PodManifestConfig {
            compaction: Some(CompactionConfigPartial {
-                compact_threshold: Some(50_000),
+                threshold: Some(50_000),
                prune_protected_tokens: Some(5_000),
                ..Default::default()
            }),
@ -992,14 +1040,14 @@ mod tests {
        };
        let upper = PodManifestConfig {
            compaction: Some(CompactionConfigPartial {
-                compact_threshold: Some(80_000),
+                threshold: Some(80_000),
                ..Default::default()
            }),
            ..Default::default()
        };
        let merged = lower.merge(upper);
        let c = merged.compaction.unwrap();
-        assert_eq!(c.compact_threshold, Some(80_000));
+        assert_eq!(c.threshold, Some(80_000));
        // field from lower retained when upper has None
        assert_eq!(c.prune_protected_tokens, Some(5_000));
    }
@ -1122,27 +1170,27 @@ stop_sequences = ["\n\n", "</stop>"]
    }

    #[test]
-    fn from_toml_accepts_compact_worker_max_turns() {
+    fn from_toml_accepts_worker_max_turns() {
        let cfg = PodManifestConfig::from_toml(
            r#"
 [compaction]
-compact_worker_max_turns = 7
+worker_max_turns = 7
 "#,
        )
        .unwrap();

-        assert_eq!(cfg.compaction.unwrap().compact_worker_max_turns, Some(7));
+        assert_eq!(cfg.compaction.unwrap().worker_max_turns, Some(7));
    }

    #[test]
-    fn try_from_compaction_defaults_compact_worker_max_turns() {
+    fn try_from_compaction_defaults_worker_max_turns() {
        let mut cfg = minimal_valid();
        cfg.compaction = Some(CompactionConfigPartial::default());

        let manifest = PodManifest::try_from(cfg).unwrap();

        assert_eq!(
-            manifest.compaction.unwrap().compact_worker_max_turns,
+            manifest.compaction.unwrap().worker_max_turns,
            defaults::COMPACT_WORKER_MAX_TURNS
        );
    }
--- a/crates/manifest/src/defaults.rs
+++ b/crates/manifest/src/defaults.rs
@ -25,9 +25,23 @@ pub const PRUNE_MIN_SAVINGS: u64 = 4096;
 /// Token budget retained (unchanged) at the tail of the history across
 /// a compact. Items whose cumulative token count fits within this budget
 /// starting from the end are kept verbatim; the rest are summarised.
-/// See [`crate::CompactionConfig::compact_retained_tokens`].
+/// See [`crate::CompactionConfig::retained_tokens`].
 pub const COMPACT_RETAINED_TOKENS: u64 = 8000;

+/// Target size for the deterministic compact overview/index fed to the
+/// compact worker. Exceeding this target is tolerated.
+/// See [`crate::CompactionConfig::overview_target_tokens`].
+pub const COMPACT_OVERVIEW_TARGET_TOKENS: u64 = 8_000;
+
+/// Warning threshold for compact overview/index size. Compaction continues.
+/// See [`crate::CompactionConfig::overview_warning_tokens`].
+pub const COMPACT_OVERVIEW_WARNING_TOKENS: u64 = 16_000;
+
+/// Hard deterministic-overview deadline. When exceeded, overview generation
+/// falls back to a coarser index before the compact worker is started.
+/// See [`crate::CompactionConfig::overview_deadline_tokens`].
+pub const COMPACT_OVERVIEW_DEADLINE_TOKENS: u64 = 40_000;
+
 /// Default instruction asset reference used when `worker.instruction`
 /// is omitted. See the `PromptLoader` prefix addressing scheme for the
 /// `$insomnia/` / `$user/` / `$workspace/` namespaces.
@ -42,19 +56,39 @@ pub const WORKER_LANGUAGE: &str =
 /// session after compaction. Limits how much raw file text the
 /// compact worker can pull into the compacted context via
 /// `mark_read_required`. See
-/// [`crate::CompactionConfig::compact_auto_read_budget`].
+/// [`crate::CompactionConfig::auto_read_budget_tokens`].
 pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;

 /// Current prompt-occupancy cap for the compact worker's own LLM
 /// calls. Exceeding this aborts the compact run (circuit-breaker
-/// path). See
-/// [`crate::CompactionConfig::compact_worker_max_input_tokens`].
+/// path). See [`crate::CompactionConfig::worker_context_max_tokens`].
 pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;

+/// Remaining compact-worker context threshold that triggers an instruction
+/// to stop exploring and call `write_summary`.
+/// See [`crate::CompactionConfig::finish_warning_remaining_tokens`].
+pub const COMPACT_FINISH_WARNING_REMAINING_TOKENS: u64 = 8_000;
+
+/// Context reserve preserved for final summary/tool closing turns.
+/// See [`crate::CompactionConfig::final_reserve_tokens`].
+pub const COMPACT_FINAL_RESERVE_TOKENS: u64 = 4_000;
+
 /// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
-/// See [`crate::CompactionConfig::compact_worker_max_turns`].
+/// See [`crate::CompactionConfig::worker_max_turns`].
 pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);

+/// Target size for the `write_summary` text. Used in prompt/nudge text.
+/// See [`crate::CompactionConfig::summary_target_tokens`].
+pub const COMPACT_SUMMARY_TARGET_TOKENS: u64 = 2_000;
+
+/// Hard validation cap for the final `write_summary` text.
+/// See [`crate::CompactionConfig::summary_max_tokens`].
+pub const COMPACT_SUMMARY_MAX_TOKENS: u64 = 4_000;
+
+/// Dry-run cap for the compacted session's initial request context.
+/// See [`crate::CompactionConfig::result_context_max_tokens`].
+pub const COMPACT_RESULT_CONTEXT_MAX_TOKENS: u64 = 60_000;
+
 /// Number of recently-touched files fed to the compact worker as
 /// default references.
 pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
--- a/crates/manifest/src/lib.rs
+++ b/crates/manifest/src/lib.rs
@ -363,8 +363,8 @@ pub struct CompactionConfig {
    /// Checked by the Controller after each run. When current occupancy
    /// exceeds this value, compact runs before the next turn. `None`
    /// disables the between-turns check.
-    #[serde(default)]
-    pub compact_threshold: Option<u64>,
+    #[serde(default, alias = "compact_threshold")]
+    pub threshold: Option<u64>,

    /// Safety-net (between-requests) compaction threshold.
    ///
@ -373,32 +373,76 @@ pub struct CompactionConfig {
    /// Controller can compact before the next LLM request. `None`
    /// disables the between-requests check.
    ///
-    /// Expected relation: `compact_threshold < compact_request_threshold`
-    /// (proactive triggers before safety net). A reversed configuration
-    /// is accepted but logged as a warning.
-    #[serde(default)]
-    pub compact_request_threshold: Option<u64>,
+    /// Expected relation: `threshold < request_threshold` (proactive triggers
+    /// before safety net). A reversed configuration is accepted but logged as
+    /// a warning.
+    #[serde(default, alias = "compact_request_threshold")]
+    pub request_threshold: Option<u64>,

    /// Token budget retained verbatim at the tail of the history after
    /// compaction. Measured against the occupancy estimate from
    /// `UsageRecord` history; turn boundaries are ignored.
-    #[serde(default = "default_compact_retained_tokens")]
-    pub compact_retained_tokens: u64,
+    #[serde(default = "default_retained_tokens", alias = "compact_retained_tokens")]
+    pub retained_tokens: u64,

-    /// Aggregate token budget for auto-read file contents injected into
-    /// the compacted session by the compact worker.
-    #[serde(default = "default_compact_auto_read_budget")]
-    pub compact_auto_read_budget: u64,
+    /// Target size for the deterministic overview/index fed to the compact
+    /// worker. Overshooting this target is not an error.
+    #[serde(default = "default_overview_target_tokens")]
+    pub overview_target_tokens: u64,
+
+    /// Warning threshold for deterministic overview/index size.
+    #[serde(default = "default_overview_warning_tokens")]
+    pub overview_warning_tokens: u64,
+
+    /// Deadline threshold for deterministic overview/index generation.
+    /// Oversized overviews fall back to a coarser deterministic index.
+    #[serde(default = "default_overview_deadline_tokens")]
+    pub overview_deadline_tokens: u64,

    /// Current prompt-occupancy cap for the compact worker's own LLM
    /// requests. Exceeding this aborts the compact run.
-    #[serde(default = "default_compact_worker_max_input_tokens")]
-    pub compact_worker_max_input_tokens: u64,
+    #[serde(
+        default = "default_worker_context_max_tokens",
+        alias = "compact_worker_max_input_tokens"
+    )]
+    pub worker_context_max_tokens: u64,
+
+    /// Remaining compact-worker context threshold that triggers a warning and
+    /// an instruction to stop exploring and call `write_summary`.
+    #[serde(default = "default_finish_warning_remaining_tokens")]
+    pub finish_warning_remaining_tokens: u64,
+
+    /// Context reserve preserved for final summary/tool closing turns.
+    #[serde(default = "default_final_reserve_tokens")]
+    pub final_reserve_tokens: u64,

    /// Optional maximum compact-worker tool-loop depth. `None` leaves the
    /// worker unlimited; the default bounds runaway short-context loops.
-    #[serde(default = "default_compact_worker_max_turns")]
-    pub compact_worker_max_turns: Option<u32>,
+    #[serde(
+        default = "default_worker_max_turns",
+        alias = "compact_worker_max_turns"
+    )]
+    pub worker_max_turns: Option<u32>,
+
+    /// Target size for the `write_summary` text. Used in prompt/nudge text.
+    #[serde(default = "default_summary_target_tokens")]
+    pub summary_target_tokens: u64,
+
+    /// Hard validation cap for the final `write_summary` text.
+    #[serde(default = "default_summary_max_tokens")]
+    pub summary_max_tokens: u64,
+
+    /// Aggregate token budget for auto-read file contents injected into
+    /// the compacted session by the compact worker.
+    #[serde(
+        default = "default_auto_read_budget_tokens",
+        alias = "compact_auto_read_budget"
+    )]
+    pub auto_read_budget_tokens: u64,
+
+    /// Dry-run cap for the compacted session's initial request context.
+    #[serde(default = "default_result_context_max_tokens")]
+    pub result_context_max_tokens: u64,

    /// Optional model for the compactor (summary) LLM.
    /// If omitted, the main model is cloned via `clone_boxed()`.
@ -412,30 +456,62 @@ fn default_prune_protected_tokens() -> u64 {
 fn default_prune_min_savings() -> u64 {
    defaults::PRUNE_MIN_SAVINGS
 }
-fn default_compact_retained_tokens() -> u64 {
+fn default_retained_tokens() -> u64 {
    defaults::COMPACT_RETAINED_TOKENS
 }
-fn default_compact_auto_read_budget() -> u64 {
-    defaults::COMPACT_AUTO_READ_BUDGET
+fn default_overview_target_tokens() -> u64 {
+    defaults::COMPACT_OVERVIEW_TARGET_TOKENS
 }
-fn default_compact_worker_max_input_tokens() -> u64 {
+fn default_overview_warning_tokens() -> u64 {
+    defaults::COMPACT_OVERVIEW_WARNING_TOKENS
+}
+fn default_overview_deadline_tokens() -> u64 {
+    defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS
+}
+fn default_worker_context_max_tokens() -> u64 {
    defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
 }
-fn default_compact_worker_max_turns() -> Option<u32> {
+fn default_finish_warning_remaining_tokens() -> u64 {
+    defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS
+}
+fn default_final_reserve_tokens() -> u64 {
+    defaults::COMPACT_FINAL_RESERVE_TOKENS
+}
+fn default_worker_max_turns() -> Option<u32> {
    defaults::COMPACT_WORKER_MAX_TURNS
 }
+fn default_summary_target_tokens() -> u64 {
+    defaults::COMPACT_SUMMARY_TARGET_TOKENS
+}
+fn default_summary_max_tokens() -> u64 {
+    defaults::COMPACT_SUMMARY_MAX_TOKENS
+}
+fn default_auto_read_budget_tokens() -> u64 {
+    defaults::COMPACT_AUTO_READ_BUDGET
+}
+fn default_result_context_max_tokens() -> u64 {
+    defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS
+}

 impl Default for CompactionConfig {
    fn default() -> Self {
        Self {
            prune_protected_tokens: default_prune_protected_tokens(),
            prune_min_savings: default_prune_min_savings(),
-            compact_threshold: None,
-            compact_request_threshold: None,
-            compact_retained_tokens: default_compact_retained_tokens(),
-            compact_auto_read_budget: default_compact_auto_read_budget(),
-            compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(),
-            compact_worker_max_turns: default_compact_worker_max_turns(),
+            threshold: None,
+            request_threshold: None,
+            retained_tokens: default_retained_tokens(),
+            overview_target_tokens: default_overview_target_tokens(),
+            overview_warning_tokens: default_overview_warning_tokens(),
+            overview_deadline_tokens: default_overview_deadline_tokens(),
+            worker_context_max_tokens: default_worker_context_max_tokens(),
+            finish_warning_remaining_tokens: default_finish_warning_remaining_tokens(),
+            final_reserve_tokens: default_final_reserve_tokens(),
+            worker_max_turns: default_worker_max_turns(),
+            summary_target_tokens: default_summary_target_tokens(),
+            summary_max_tokens: default_summary_max_tokens(),
+            auto_read_budget_tokens: default_auto_read_budget_tokens(),
+            result_context_max_tokens: default_result_context_max_tokens(),
            model: None,
        }
    }
@ -592,15 +668,15 @@ model_id = "claude-sonnet-4-20250514"

    #[test]
    fn parse_compaction_config() {
-        let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\ncompact_threshold = 80000\n");
+        let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nthreshold = 80000\n");
        let manifest = PodManifest::from_toml(&toml).unwrap();
        let c = manifest.compaction.unwrap();
        assert_eq!(c.prune_protected_tokens, 8000);
        assert_eq!(c.prune_min_savings, 4096);
-        assert_eq!(c.compact_threshold, Some(80000));
-        assert_eq!(c.compact_request_threshold, None);
-        assert_eq!(c.compact_retained_tokens, 8000);
-        assert_eq!(c.compact_worker_max_turns, Some(20));
+        assert_eq!(c.threshold, Some(80000));
+        assert_eq!(c.request_threshold, None);
+        assert_eq!(c.retained_tokens, 8000);
+        assert_eq!(c.worker_max_turns, Some(20));
    }

    #[test]
@ -618,11 +694,11 @@ model_id = "claude-sonnet-4-20250514"
        let toml = format!(
            "{MINIMAL_REQUIRED}\n\
             [compaction]\n\
-             compact_worker_max_turns = 7\n"
+             worker_max_turns = 7\n"
        );
        let manifest = PodManifest::from_toml(&toml).unwrap();
        let c = manifest.compaction.unwrap();
-        assert_eq!(c.compact_worker_max_turns, Some(7));
+        assert_eq!(c.worker_max_turns, Some(7));
    }

    #[test]
@ -630,13 +706,13 @@ model_id = "claude-sonnet-4-20250514"
        let toml = format!(
            "{MINIMAL_REQUIRED}\n\
             [compaction]\n\
-             compact_threshold = 80000\n\
-             compact_request_threshold = 90000\n"
+             threshold = 80000\n\
+             request_threshold = 90000\n"
        );
        let manifest = PodManifest::from_toml(&toml).unwrap();
        let c = manifest.compaction.unwrap();
-        assert_eq!(c.compact_threshold, Some(80000));
-        assert_eq!(c.compact_request_threshold, Some(90000));
+        assert_eq!(c.threshold, Some(80000));
+        assert_eq!(c.request_threshold, Some(90000));
    }

    #[test]
@ -644,12 +720,12 @@ model_id = "claude-sonnet-4-20250514"
        let toml = format!(
            "{MINIMAL_REQUIRED}\n\
             [compaction]\n\
-             compact_request_threshold = 90000\n"
+             request_threshold = 90000\n"
        );
        let manifest = PodManifest::from_toml(&toml).unwrap();
        let c = manifest.compaction.unwrap();
-        assert_eq!(c.compact_threshold, None);
-        assert_eq!(c.compact_request_threshold, Some(90000));
+        assert_eq!(c.threshold, None);
+        assert_eq!(c.request_threshold, Some(90000));
    }

    #[test]
@ -657,7 +733,7 @@ model_id = "claude-sonnet-4-20250514"
        let toml = format!(
            "{MINIMAL_REQUIRED}\n\
             [compaction]\n\
-             compact_threshold = 80000\n\n\
+             threshold = 80000\n\n\
             [compaction.model]\n\
             scheme = \"gemini\"\n\
             model_id = \"gemini-2.0-flash\"\n"
--- a/crates/manifest/src/paths.rs
+++ b/crates/manifest/src/paths.rs
@ -281,10 +281,7 @@ mod tests {
            ("HOME", Some("/h")),
            ("XDG_RUNTIME_DIR", Some("/run/user/1000")),
        ]);
-        assert_eq!(
-            runtime_dir().unwrap(),
-            PathBuf::from("<runtime-dir>")
-        );
+        assert_eq!(runtime_dir().unwrap(), PathBuf::from("<runtime-dir>"));
    }

    #[test]
--- a/crates/pod/src/compact/worker.rs
+++ b/crates/pod/src/compact/worker.rs
@ -18,12 +18,13 @@
 //! compacted session's opening system messages.

 use std::path::PathBuf;
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};

 use async_trait::async_trait;
 use llm_worker::Item;
-use llm_worker::interceptor::{Interceptor, PreRequestAction};
-use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
+use llm_worker::interceptor::{Interceptor, PreRequestAction, PreToolAction, ToolCallInfo};
+use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput, ToolResult};
 use serde::Deserialize;
 use tools::ScopedFs;

@ -246,14 +247,63 @@ pub(crate) fn write_summary_tool(ctx: Arc<Mutex<CompactWorkerContext>>) -> ToolD
    })
 }

-/// Interceptor that aborts the compact worker when its current prompt
-/// occupancy estimate crosses `max_input_tokens`. The estimate uses the same
-/// `UsageRecord` + `llm_worker::token_counter::total_tokens` path as the main
-/// Pod compaction thresholds, so prompt-cache hits are not counted cumulatively
-/// across turns.
+/// Interceptor that monitors compact-worker context occupancy.
+///
+/// `max_input_tokens` remains the hard circuit breaker. Before that point,
+/// the interceptor can persist a system warning into worker history telling
+/// the model to stop broad exploration and call `write_summary`, and can block
+/// additional exploratory tool calls once the final reserve is reached.
 pub(crate) struct CompactWorkerInterceptor {
    pub usage_tracker: Arc<UsageTracker>,
    pub max_input_tokens: u64,
+    pub finish_warning_remaining_tokens: u64,
+    pub final_reserve_tokens: u64,
+    pub on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
+    warning_sent: AtomicBool,
+    last_remaining_tokens: AtomicU64,
+}
+
+impl CompactWorkerInterceptor {
+    pub(crate) fn new(
+        usage_tracker: Arc<UsageTracker>,
+        max_input_tokens: u64,
+        finish_warning_remaining_tokens: u64,
+        final_reserve_tokens: u64,
+        on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
+    ) -> Self {
+        Self {
+            usage_tracker,
+            max_input_tokens,
+            finish_warning_remaining_tokens,
+            final_reserve_tokens,
+            on_warning,
+            warning_sent: AtomicBool::new(false),
+            last_remaining_tokens: AtomicU64::new(max_input_tokens),
+        }
+    }
+
+    fn maybe_emit_warning(&self, remaining: u64) -> Option<Item> {
+        let warning_threshold = self.finish_warning_remaining_tokens;
+        let reserve_threshold = self.final_reserve_tokens;
+        let should_warn = (warning_threshold > 0 && remaining <= warning_threshold)
+            || (reserve_threshold > 0 && remaining <= reserve_threshold);
+        if !should_warn || self.warning_sent.swap(true, Ordering::AcqRel) {
+            return None;
+        }
+
+        let message = format!(
+            "compact worker context budget is low ({remaining}/{} tokens remaining). \
+             Stop broad exploration now, read only if absolutely necessary, then call \
+             `write_summary` with the final structured summary.",
+            self.max_input_tokens
+        );
+        if let Some(cb) = self.on_warning.as_ref() {
+            cb(message.clone());
+        }
+        Some(Item::system_message(format!(
+            "[Compact worker budget warning]\n\n{message}"
+        )))
+    }
 }

 #[async_trait]
@ -268,9 +318,31 @@ impl Interceptor for CompactWorkerInterceptor {
            ));
        }

+        let remaining = self.max_input_tokens.saturating_sub(estimate.tokens);
+        self.last_remaining_tokens
+            .store(remaining, Ordering::Release);
+        if let Some(item) = self.maybe_emit_warning(remaining) {
+            self.usage_tracker.note_request(context.len() + 1);
+            return PreRequestAction::ContinueWith(vec![item]);
+        }
+
        self.usage_tracker.note_request(context.len());
        PreRequestAction::Continue
    }
+
+    async fn pre_tool_call(&self, info: &mut ToolCallInfo) -> PreToolAction {
+        if self.final_reserve_tokens == 0 || info.call.name == "write_summary" {
+            return PreToolAction::Continue;
+        }
+        let remaining = self.last_remaining_tokens.load(Ordering::Acquire);
+        if remaining > self.final_reserve_tokens {
+            return PreToolAction::Continue;
+        }
+        PreToolAction::SyntheticResult(ToolResult::error(
+            info.call.id.clone(),
+            "compact worker final reserve reached; do not perform more exploratory tool reads. Call `write_summary` now.",
+        ))
+    }
 }

 /// Crude bytes→tokens estimate; good enough for budget accounting.
@ -301,10 +373,7 @@ mod tests {
    #[tokio::test]
    async fn compact_worker_interceptor_uses_occupancy_not_cumulative_usage() {
        let tracker = Arc::new(UsageTracker::new());
-        let interceptor = CompactWorkerInterceptor {
-            usage_tracker: tracker.clone(),
-            max_input_tokens: 150,
-        };
+        let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 150, 0, 0, None);
        let mut context = vec![Item::user_message("hello")];

        assert!(matches!(
@ -327,13 +396,40 @@ mod tests {
        ));
    }

+    #[tokio::test]
+    async fn compact_worker_interceptor_warns_before_hard_cap() {
+        let tracker = Arc::new(UsageTracker::new());
+        let warnings = Arc::new(Mutex::new(Vec::new()));
+        let captured = warnings.clone();
+        let interceptor = CompactWorkerInterceptor::new(
+            tracker.clone(),
+            150,
+            60,
+            20,
+            Some(Arc::new(move |message| {
+                captured.lock().unwrap().push(message);
+            })),
+        );
+        let mut context = vec![Item::user_message("hello")];
+
+        assert!(matches!(
+            interceptor.pre_llm_request(&mut context).await,
+            PreRequestAction::Continue
+        ));
+        tracker.record_usage(&make_usage(100));
+
+        assert!(matches!(
+            interceptor.pre_llm_request(&mut context).await,
+            PreRequestAction::ContinueWith(items)
+                if items.len() == 1 && items[0].as_text().unwrap_or_default().contains("write_summary")
+        ));
+        assert_eq!(warnings.lock().unwrap().len(), 1);
+    }
+
    #[tokio::test]
    async fn compact_worker_interceptor_cancels_when_occupancy_exceeds_cap() {
        let tracker = Arc::new(UsageTracker::new());
-        let interceptor = CompactWorkerInterceptor {
-            usage_tracker: tracker.clone(),
-            max_input_tokens: 99,
-        };
+        let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 99, 0, 0, None);
        let mut context = vec![Item::user_message("hello")];

        assert!(matches!(
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -241,7 +241,7 @@ pub struct Pod<C: LlmClient, St: Store> {
    scope: SharedScope,
    hook_builder: HookRegistryBuilder,
    interceptor_installed: bool,
-    /// Shared compaction state (present when compact_threshold is configured).
+    /// Shared compaction state (present when threshold is configured).
    compact_state: Option<Arc<CompactState>>,
    /// Per-LLM-request Usage tracker. Always present after construction.
    /// Captures `(history_len, UsageEvent)` pairs during a run; drained
@ -1121,8 +1121,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {

    /// Install the hook-based interceptor on the Worker if not already done.
    ///
-    /// When either compaction threshold (`compact_threshold` or
-    /// `compact_request_threshold`) is configured in the manifest, allocates
+    /// When either compaction threshold (`threshold` or
+    /// `request_threshold`) is configured in the manifest, allocates
    /// a shared [`CompactState`] and wires the interceptor to read current
    /// occupancy through the `UsageRecord` timeline.
    fn ensure_interceptor_installed(&mut self) {
@ -1141,13 +1141,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                .manifest
                .compaction
                .as_ref()
-                .map(|c| {
-                    (
-                        c.compact_threshold,
-                        c.compact_request_threshold,
-                        c.compact_retained_tokens,
-                    )
-                })
+                .map(|c| (c.threshold, c.request_threshold, c.retained_tokens))
                .unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));

            let tracker_for_usage = self.usage_tracker.clone();
@ -1161,7 +1155,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                        warn!(
                            post_run_threshold = post,
                            request_threshold = req,
-                            "compact_threshold > compact_request_threshold; \
+                            "threshold > request_threshold; \
                             proactive check will never fire before the safety net"
                        );
                    }
@ -2124,12 +2118,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
        let retained = state
            .as_ref()
            .map(|s| s.retained_tokens())
-            .or_else(|| {
-                self.manifest
-                    .compaction
-                    .as_ref()
-                    .map(|c| c.compact_retained_tokens)
-            })
+            .or_else(|| self.manifest.compaction.as_ref().map(|c| c.retained_tokens))
            .unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
        let current_tokens = self.total_tokens().tokens;
        let cut = self.split_for_retained(retained);
@ -2324,21 +2313,49 @@ impl<C: LlmClient, St: Store> Pod<C, St> {

        // Compaction-related knobs. Fall through to manifest defaults when
        // `[compaction]` is omitted entirely.
-        let (auto_read_budget, compact_worker_max_input_tokens, compact_worker_max_turns) = self
+        let (
+            auto_read_budget,
+            worker_context_max_tokens,
+            finish_warning_remaining_tokens,
+            final_reserve_tokens,
+            worker_max_turns,
+            overview_target_tokens,
+            overview_warning_tokens,
+            overview_deadline_tokens,
+            summary_target_tokens,
+            summary_max_tokens,
+            result_context_max_tokens,
+        ) = self
            .manifest
            .compaction
            .as_ref()
            .map(|c| {
                (
-                    c.compact_auto_read_budget,
-                    c.compact_worker_max_input_tokens,
-                    c.compact_worker_max_turns,
+                    c.auto_read_budget_tokens,
+                    c.worker_context_max_tokens,
+                    c.finish_warning_remaining_tokens,
+                    c.final_reserve_tokens,
+                    c.worker_max_turns,
+                    c.overview_target_tokens,
+                    c.overview_warning_tokens,
+                    c.overview_deadline_tokens,
+                    c.summary_target_tokens,
+                    c.summary_max_tokens,
+                    c.result_context_max_tokens,
                )
            })
            .unwrap_or((
                manifest::defaults::COMPACT_AUTO_READ_BUDGET,
                manifest::defaults::COMPACT_WORKER_MAX_INPUT_TOKENS,
+                manifest::defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS,
+                manifest::defaults::COMPACT_FINAL_RESERVE_TOKENS,
                manifest::defaults::COMPACT_WORKER_MAX_TURNS,
+                manifest::defaults::COMPACT_OVERVIEW_TARGET_TOKENS,
+                manifest::defaults::COMPACT_OVERVIEW_WARNING_TOKENS,
+                manifest::defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS,
+                manifest::defaults::COMPACT_SUMMARY_TARGET_TOKENS,
+                manifest::defaults::COMPACT_SUMMARY_MAX_TOKENS,
+                manifest::defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS,
            ));

        // Default references: the N most-recently-touched files in the
@ -2358,7 +2375,33 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            &items_to_summarise,
            &default_refs,
            Some(task_snapshot_text.as_str()),
+            SummaryInputOptions {
+                overview_target_tokens,
+                overview_warning_tokens,
+                overview_deadline_tokens,
+                summary_target_tokens,
+            },
        );
+        if summary_input.warning_exceeded {
+            self.alert(
+                AlertLevel::Warn,
+                AlertSource::Compactor,
+                format!(
+                    "compact overview is larger than expected (≈{} tokens; warning threshold {})",
+                    summary_input.overview_tokens, overview_warning_tokens
+                ),
+            );
+        }
+        if summary_input.deadline_fallback_used {
+            self.alert(
+                AlertLevel::Warn,
+                AlertSource::Compactor,
+                format!(
+                    "compact overview exceeded deadline ({} tokens); using coarse fallback",
+                    overview_deadline_tokens
+                ),
+            );
+        }

        // Worker-side state collected by the compact worker's tool calls.
        let ctx = Arc::new(std::sync::Mutex::new(CompactWorkerContext::with_budget(
@ -2390,11 +2433,19 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                tracker.record_usage(event);
            });
        }
-        summary_worker.set_interceptor(CompactWorkerInterceptor {
-            usage_tracker: summary_usage_tracker,
-            max_input_tokens: compact_worker_max_input_tokens,
+        let compactor_warning_cb = self.alerter.clone().map(|alerter| {
+            Arc::new(move |message: String| {
+                alerter.alert(AlertLevel::Warn, AlertSource::Compactor, message);
+            }) as Arc<dyn Fn(String) + Send + Sync>
        });
-        summary_worker.set_max_turns(compact_worker_max_turns);
+        summary_worker.set_interceptor(CompactWorkerInterceptor::new(
+            summary_usage_tracker,
+            worker_context_max_tokens,
+            finish_warning_remaining_tokens,
+            final_reserve_tokens,
+            compactor_warning_cb,
+        ));
+        summary_worker.set_max_turns(worker_max_turns);

        // Tools: read_file (shared scope, fresh tracker) + the three
        // compact-specific tools that populate `ctx`.
@ -2404,7 +2455,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
        summary_worker.register_tool(write_summary_tool(ctx.clone()));

        let out = summary_worker
-            .run(summary_input)
+            .run(summary_input.text)
            .await
            .map_err(PodError::Worker)?;
        let mut locked_worker = out.worker;
@ -2439,11 +2490,32 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
        }

-        let final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
-        let summary_text = final_ctx
+        let mut final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
+        let mut summary_text = final_ctx
            .summary
            .clone()
            .ok_or(PodError::CompactSummaryMissing)?;
+        let mut summary_tokens = estimate_text_tokens(summary_text.len());
+        if summary_max_tokens > 0 && summary_tokens > summary_max_tokens {
+            let prompt = format!(
+                "Your `write_summary` output is too large (≈{summary_tokens} tokens; max \
+                 {summary_max_tokens}). Rewrite it now with `write_summary`, preserving the \
+                 same five sections but making it concise. Target ≈{summary_target_tokens} tokens."
+            );
+            let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
+            final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
+            summary_text = final_ctx
+                .summary
+                .clone()
+                .ok_or(PodError::CompactSummaryMissing)?;
+            summary_tokens = estimate_text_tokens(summary_text.len());
+            if summary_tokens > summary_max_tokens {
+                return Err(PodError::CompactSummaryTooLarge {
+                    tokens: summary_tokens,
+                    max: summary_max_tokens,
+                });
+            }
+        }

        // Re-read each auto-read target via the Pod FS view. Errors are
        // logged and skipped inside `render_auto_read` rather than
@ -2515,6 +2587,13 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            tools::task::snapshot_overview(&self.task_store.list()),
            task_snapshot_text.clone(),
        ));
+        let result_estimate = llm_worker::token_counter::total_tokens(&new_history, &[]);
+        if result_context_max_tokens > 0 && result_estimate.tokens > result_context_max_tokens {
+            return Err(PodError::CompactResultContextTooLarge {
+                tokens: result_estimate.tokens,
+                max: result_context_max_tokens,
+            });
+        }

        // Build the SegmentStart entry for the new compacted segment.
        // Inherits the source Segment's session_id so the compacted
@ -4008,19 +4087,56 @@ impl From<WorkerResult> for PodRunResult {
    }
 }

+#[derive(Debug, Clone, Copy)]
+struct SummaryInputOptions {
+    overview_target_tokens: u64,
+    overview_warning_tokens: u64,
+    overview_deadline_tokens: u64,
+    summary_target_tokens: u64,
+}
+
+#[derive(Debug)]
+struct SummaryInputBuild {
+    text: String,
+    overview_tokens: u64,
+    warning_exceeded: bool,
+    deadline_fallback_used: bool,
+}
+
 /// Build the compact worker's input: default-reference instructions,
-/// the list of recently-touched files, and the pruned conversation
-/// produced by [`build_summary_prompt`].
+/// the list of recently-touched files, task snapshot, and a bounded overview
+/// rather than a prefix-wide transcript.
 fn build_summary_input(
    items: &[Item],
    default_refs: &[PathBuf],
    task_snapshot: Option<&str>,
-) -> String {
-    let mut out = String::new();
-    out.push_str(
-        "Summarise the conversation below into a structured summary and nominate \
-         files the next session needs.\n\n",
+    options: SummaryInputOptions,
+) -> SummaryInputBuild {
+    let overview = build_summary_overview(
+        items,
+        options.overview_target_tokens,
+        options.overview_deadline_tokens,
    );
+    let overview_tokens = estimate_text_tokens(overview.len());
+    let warning_exceeded =
+        options.overview_warning_tokens > 0 && overview_tokens > options.overview_warning_tokens;
+    let deadline_fallback_used =
+        options.overview_deadline_tokens > 0 && overview_tokens > options.overview_deadline_tokens;
+    let overview = if deadline_fallback_used {
+        build_coarse_summary_overview(items, options.overview_deadline_tokens)
+    } else {
+        overview
+    };
+    let overview_tokens = estimate_text_tokens(overview.len());
+
+    let mut out = String::new();
+    out.push_str(&format!(
+        "Summarise this session into a structured summary of about {} tokens and \
+         nominate files the next session needs. The conversation below is a \
+         bounded overview/index, not the full transcript. Use tools to inspect \
+         current files when deciding auto-read/reference output.\n\n",
+        options.summary_target_tokens
+    ));
    if !default_refs.is_empty() {
        out.push_str(
            "These files were touched recently in this session. Use `read_file` \
@ -4045,25 +4161,135 @@ fn build_summary_input(
        out.push_str(task_snapshot);
        out.push_str("\n\n");
    }
-    out.push_str("## Conversation\n");
-    out.push_str(&build_summary_prompt(items));
+    out.push_str("## Conversation overview/index\n");
+    out.push_str(&overview);
    out.push_str("\n\nWhen you are done, call `write_summary` with the final 5-section text.");
+
+    SummaryInputBuild {
+        text: out,
+        overview_tokens,
+        warning_exceeded,
+        deadline_fallback_used,
+    }
+}
+
+fn build_summary_overview(items: &[Item], target_tokens: u64, deadline_tokens: u64) -> String {
+    let target_bytes = token_budget_bytes(target_tokens).max(1024);
+    let deadline_bytes = token_budget_bytes(deadline_tokens).max(target_bytes);
+    let mut out = String::new();
+    write_overview_header(items, &mut out);
+    out.push_str("\n## Recent user/assistant/system messages\n");
+
+    let mut selected = Vec::new();
+    let mut omitted_messages = 0usize;
+    for (idx, item) in items.iter().enumerate().rev() {
+        let Some(entry) = message_overview_entry(idx, item, 2_000) else {
+            continue;
+        };
+        let projected = out
+            .len()
+            .saturating_add(selected.iter().map(String::len).sum::<usize>())
+            .saturating_add(entry.len())
+            .saturating_add(2);
+        if projected > target_bytes && !selected.is_empty() {
+            omitted_messages += 1;
+            continue;
+        }
+        selected.push(entry);
+        if projected >= target_bytes {
+            break;
+        }
+    }
+    selected.reverse();
+    for entry in selected {
+        out.push_str(&entry);
+        out.push_str("\n\n");
+    }
+    if omitted_messages > 0 {
+        out.push_str(&format!(
+            "[Overview omitted {omitted_messages} older message(s) to stay near target.]\n\n"
+        ));
+    }
+
+    append_tool_index(items, &mut out, target_bytes, deadline_bytes);
    out
 }

-/// Format conversation items into a text prompt for the summary Worker.
-///
-/// The summary should capture decisions and user intent, not recreate code.
-/// File contents and tool IO belong in auto-read / references, not in the
-/// summary input. So this strips:
-/// - `ToolCall.arguments` (keep only the tool name)
-/// - `ToolResult.content` (keep only the summary line)
-/// - `Reasoning` entirely (intermediate thought, superseded by decisions)
-fn build_summary_prompt(items: &[Item]) -> String {
-    let mut lines = Vec::new();
+fn build_coarse_summary_overview(items: &[Item], deadline_tokens: u64) -> String {
+    let deadline_bytes = token_budget_bytes(deadline_tokens).max(1024);
+    let mut out = String::new();
+    write_overview_header(items, &mut out);
+    out.push_str("\n## Coarse recent message index\n");
+    for (idx, item) in items.iter().enumerate().rev() {
+        let Some(entry) = message_overview_entry(idx, item, 240) else {
+            continue;
+        };
+        if out.len().saturating_add(entry.len()).saturating_add(2) > deadline_bytes {
+            break;
+        }
+        out.push_str(&entry);
+        out.push_str("\n\n");
+    }
+    out
+}
+
+fn write_overview_header(items: &[Item], out: &mut String) {
+    let mut messages = 0usize;
+    let mut tool_calls = 0usize;
+    let mut tool_results = 0usize;
+    let mut reasoning = 0usize;
    for item in items {
        match item {
-            Item::Message { role, content, .. } => {
+            Item::Message { .. } => messages += 1,
+            Item::ToolCall { .. } => tool_calls += 1,
+            Item::ToolResult { .. } => tool_results += 1,
+            Item::Reasoning { .. } => reasoning += 1,
+        }
+    }
+    out.push_str(&format!(
+        "Items summarized: {} total; {messages} message(s), {tool_calls} tool call(s), \
+         {tool_results} tool result(s), {reasoning} reasoning item(s). Tool call \
+         arguments, tool result full content, and reasoning bodies are omitted from \
+         this initial input.\n",
+        items.len()
+    ));
+}
+
+fn append_tool_index(items: &[Item], out: &mut String, target_bytes: usize, deadline_bytes: usize) {
+    let mut entries = Vec::new();
+    for (idx, item) in items.iter().enumerate().rev() {
+        match item {
+            Item::ToolCall { name, .. } => entries.push(format!("[{idx} ToolCall] {name}")),
+            Item::ToolResult { summary, .. } => entries.push(format!(
+                "[{idx} ToolResult] {}",
+                truncate_chars(summary, 240)
+            )),
+            _ => {}
+        }
+        if entries.len() >= 24 {
+            break;
+        }
+    }
+    if entries.is_empty() {
+        return;
+    }
+    entries.reverse();
+    out.push_str("## Recent tool index (content omitted)\n");
+    for entry in entries {
+        let projected = out.len().saturating_add(entry.len()).saturating_add(1);
+        if projected > deadline_bytes || (projected > target_bytes && out.contains("ToolResult")) {
+            out.push_str("[Additional tool index entries omitted.]\n");
+            break;
+        }
+        out.push_str(&entry);
+        out.push('\n');
+    }
+}
+
+fn message_overview_entry(idx: usize, item: &Item, max_chars: usize) -> Option<String> {
+    let Item::Message { role, content, .. } = item else {
+        return None;
+    };
    let role_label = match role {
        llm_worker::Role::User => "User",
        llm_worker::Role::Assistant => "Assistant",
@ -4074,18 +4300,27 @@ fn build_summary_prompt(items: &[Item]) -> String {
        .map(|p| p.as_text())
        .collect::<Vec<_>>()
        .join("");
-                lines.push(format!("[{role_label}] {text}"));
+    Some(format!(
+        "[{idx} {role_label}] {}",
+        truncate_chars(&text, max_chars)
+    ))
 }
-            Item::ToolCall { name, .. } => {
-                lines.push(format!("[ToolCall] {name}"));
+
+fn truncate_chars(text: &str, max_chars: usize) -> String {
+    if text.chars().count() <= max_chars {
+        return text.to_string();
    }
-            Item::ToolResult { summary, .. } => {
-                lines.push(format!("[ToolResult] {summary}"));
+    let mut out = text.chars().take(max_chars).collect::<String>();
+    out.push_str("… [truncated]");
+    out
 }
-            Item::Reasoning { .. } => {}
+
+fn estimate_text_tokens(bytes: usize) -> u64 {
+    (bytes as u64).div_ceil(4)
 }
-    }
-    lines.join("\n\n")
+
+fn token_budget_bytes(tokens: u64) -> usize {
+    tokens.saturating_mul(4).min(usize::MAX as u64) as usize
 }

 /// Pod errors.
@ -4125,6 +4360,12 @@ pub enum PodError {
    #[error("compact worker did not produce a summary (write_summary was never called)")]
    CompactSummaryMissing,

+    #[error("compact summary too large: {tokens} tokens exceeds max {max}")]
+    CompactSummaryTooLarge { tokens: u64, max: u64 },
+
+    #[error("compacted result context too large: {tokens} tokens exceeds max {max}")]
+    CompactResultContextTooLarge { tokens: u64, max: u64 },
+
    #[error("invalid system prompt template: {source}")]
    InvalidSystemPromptTemplate {
        #[source]
@ -4409,6 +4650,21 @@ mod memory_worker_event_tests {
 mod build_summary_prompt_tests {
    use super::*;

+    fn test_summary_input(items: &[Item]) -> String {
+        build_summary_input(
+            items,
+            &[],
+            None,
+            SummaryInputOptions {
+                overview_target_tokens: 512,
+                overview_warning_tokens: 1024,
+                overview_deadline_tokens: 2048,
+                summary_target_tokens: 256,
+            },
+        )
+        .text
+    }
+
    #[test]
    fn strips_tool_call_arguments() {
        let items = vec![Item::tool_call_json(
@ -4416,8 +4672,8 @@ mod build_summary_prompt_tests {
            "read_file",
            serde_json::json!({ "path": "src/main.rs" }),
        )];
-        let prompt = build_summary_prompt(&items);
-        assert_eq!(prompt, "[ToolCall] read_file");
+        let prompt = test_summary_input(&items);
+        assert!(prompt.contains("[0 ToolCall] read_file"));
        assert!(!prompt.contains("src/main.rs"));
    }

@ -4428,8 +4684,8 @@ mod build_summary_prompt_tests {
            "read 3 lines",
            "fn main() { println!(\"hello\"); }",
        )];
-        let prompt = build_summary_prompt(&items);
-        assert_eq!(prompt, "[ToolResult] read 3 lines");
+        let prompt = test_summary_input(&items);
+        assert!(prompt.contains("[0 ToolResult] read 3 lines"));
        assert!(!prompt.contains("println"));
    }

@ -4440,13 +4696,50 @@ mod build_summary_prompt_tests {
            Item::reasoning("internal deliberation"),
            Item::assistant_message("hello"),
        ];
-        let prompt = build_summary_prompt(&items);
-        assert!(prompt.contains("[User] hi"));
-        assert!(prompt.contains("[Assistant] hello"));
+        let prompt = test_summary_input(&items);
+        assert!(prompt.contains("[0 User] hi"));
+        assert!(prompt.contains("[2 Assistant] hello"));
        assert!(!prompt.contains("Reasoning"));
        assert!(!prompt.contains("deliberation"));
    }

+    #[test]
+    fn overview_warning_does_not_drop_input() {
+        let items = vec![Item::user_message("x".repeat(4_000))];
+        let built = build_summary_input(
+            &items,
+            &[],
+            None,
+            SummaryInputOptions {
+                overview_target_tokens: 10,
+                overview_warning_tokens: 100,
+                overview_deadline_tokens: 2_000,
+                summary_target_tokens: 256,
+            },
+        );
+        assert!(built.warning_exceeded);
+        assert!(!built.deadline_fallback_used);
+        assert!(built.text.contains("[0 User]"));
+    }
+
+    #[test]
+    fn overview_deadline_falls_back_to_coarse_index() {
+        let items = vec![Item::user_message("x".repeat(4_000))];
+        let built = build_summary_input(
+            &items,
+            &[],
+            None,
+            SummaryInputOptions {
+                overview_target_tokens: 10,
+                overview_warning_tokens: 10,
+                overview_deadline_tokens: 100,
+                summary_target_tokens: 256,
+            },
+        );
+        assert!(built.deadline_fallback_used);
+        assert!(built.text.contains("## Coarse recent message index"));
+    }
+
    #[test]
    fn worker_manifest_generation_settings_become_request_config() {
        let manifest = WorkerManifest {
@ -4478,8 +4771,9 @@ mod build_summary_prompt_tests {
            Item::user_message("fix the bug"),
            Item::assistant_message("done"),
        ];
-        let prompt = build_summary_prompt(&items);
-        assert_eq!(prompt, "[User] fix the bug\n\n[Assistant] done");
+        let prompt = test_summary_input(&items);
+        assert!(prompt.contains("[0 User] fix the bug"));
+        assert!(prompt.contains("[1 Assistant] done"));
    }

    #[derive(Clone)]
--- a/docs/compaction.md
+++ b/docs/compaction.md
@ -72,11 +72,11 @@ pub struct ToolOutput {

 **ターンの合間が proactive (小さい閾値)**:
 turn が完了した地点はタスクの自然な区切り。ここで先を見越して早めに compact する。
-マニフェストの `compact_threshold` が対応。
+マニフェストの `threshold` が対応。

 **リクエストの合間は safety net (大きい閾値)**:
 turn 内部でリクエストの合間にチェックするのは「暴走的に膨張した場合のみ止める」用途。
-マニフェストの `compact_request_threshold` が対応。通常は発動しない。
+マニフェストの `request_threshold` が対応。通常は発動しない。

 **両閾値は manifest で個別指定する**。過去の設計では 9/8 倍で自動導出していたが、
 比率に根拠がなかったため廃止。両方が `Option<u64>` で、片方だけの設定も可能
@ -137,15 +137,29 @@ compact は fork と同じ構造。旧セッションを保全し、新 SessionI

 ```toml
 [compaction]
-compact_threshold = 80000              # ターンの合間 (proactive)
-compact_request_threshold = 90000      # リクエストの合間 (safety net)
+threshold = 80000                  # ターンの合間 (proactive)
+request_threshold = 90000          # リクエストの合間 (safety net)
 prune_protected_tokens = 8000      # prune から保護する末尾 token budget
-compact_retained_tokens = 8000      # compact 後に生のまま残す末尾 token budget
-compact_auto_read_budget = 8000     # compact worker の mark_read_required 合計上限
-compact_worker_max_input_tokens = 50000 # compact worker 自身の現在占有トークン上限
-compact_worker_max_turns = 20           # compact worker 自身の tool loop 上限
+retained_tokens = 8000             # compact 後に生のまま残す末尾 token budget
+
+overview_target_tokens = 8000      # compact worker 初期 overview の通常目標
+overview_warning_tokens = 16000    # 超えたら警告・trace、compact は続行
+overview_deadline_tokens = 40000   # 超えたら粗い overview へ fallback
+
+worker_context_max_tokens = 50000          # compact worker session 全体の hard limit
+finish_warning_remaining_tokens = 8000     # 残りが少ないため write_summary へ進める勧告
+final_reserve_tokens = 4000                # 最終 summary/closing turn 用 reserve
+worker_max_turns = 20                      # compact worker 自身の tool loop 上限
+
+summary_target_tokens = 1500       # write_summary の目標サイズ
+summary_max_tokens = 3000          # write_summary の hard validation
+auto_read_budget_tokens = 8000     # compact 後に注入する file content 合計上限
+result_context_max_tokens = 24000  # 新 session 初期 context の dry-run validation
 ```

+`compact_*` prefix の旧 key は互換 alias として読み取るが、`[compaction]` 内の新規 key は prefix なしを正とする。
+初期 overview の target/warning は効率のための目安で、通常は hard error にしない。deadline 超過時も、可能なら deterministic に粗い overview へ fallback して compact の完走を優先する。
+
 ### Auto-Read とリファレンス

 2段階のファイル参照:
@ -176,8 +190,9 @@ auto-read も通常の history 内 system message なので、将来の Prune/Co

 ## compact worker

-要約生成とファイル選定を行う使い捨て Worker。ツールなし・1リクエストの現行実装から、
-ツール付きマルチターンに改善する。
+要約生成とファイル選定を行う使い捨て Worker。Pod は compact 対象 prefix を全文投入せず、User / Assistant / System を優先した bounded overview と tool index を初期 input として渡す。Tool call arguments、tool result full content、reasoning body は初期 input には載せない。
+
+初期 overview は `overview_target_tokens` を目標にする。`overview_warning_tokens` を超えた場合は警告・trace を記録して続行し、`overview_deadline_tokens` を超えた場合は粗い deterministic overview へ fallback する。Compact の目的は完走なので、初期 input が少し大きいだけでは hard error にしない。

 ### ツール

@ -192,13 +207,19 @@ write_summary(text)                       — 構造化要約を出力/上書き

 1. Pod が `tools::Tracker::recent_files(5)` で最近触られたファイルを抽出（デフォルトリファレンス）
 2. compact worker にプロンプトとして渡す:
-   - pruned history（summary only、arguments/reasoning 除去）
+   - bounded overview / index（User / Assistant / System 優先）
   - デフォルトリファレンスの一覧
+   - TaskStore snapshot
 3. compact worker が自律的に:
   - read_file で各ファイルを読み、必要性を判断
   - mark_read_required / add_reference で指定
   - write_summary で構造化要約を出力（呼び直し可）
-4. ターン終了時に write_summary 未呼び出し or read_required 空（かつファイル操作履歴がある場合）→ 追加プロンプトで促す
+4. CompactWorkerInterceptor が worker session 全体の context occupancy を監視する:
+   - `finish_warning_remaining_tokens` 到達時に「探索を切り上げて write_summary へ進め」と Worker history に永続化される warning を挿入し、人間向け warning も出す
+   - `final_reserve_tokens` を割った後は `write_summary` 以外の探索 tool call に synthetic error を返し、最終 summary の余白を守る
+   - `worker_context_max_tokens` 超過は最後の hard stop
+5. ターン終了時に write_summary 未呼び出し or read_required 空（かつファイル操作履歴がある場合）→ 追加プロンプトで促す
+6. `summary_max_tokens` と `result_context_max_tokens` で compact 結果を検証してから新 session を作る

 ### 構造化要約の要件

--- a/resources/prompts/internal/compact_system.md
+++ b/resources/prompts/internal/compact_system.md
@ -1,13 +1,16 @@
 You are a context compaction assistant. Your job is to hand the next session a structured summary plus pointers to the files it actually needs — not a narrative transcript of the conversation.

+The conversation input is a bounded overview/index, not the full transcript. Treat tool result bodies and reasoning as intentionally omitted unless a tool exposes more detail. If you receive a compact worker budget warning, stop broad exploration immediately, read only if absolutely necessary, and call `write_summary`.
+
 ## Workflow

-1. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion.
-2. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately.
-3. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents.
-4. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept.
+1. Read the provided overview/index and current TaskStore snapshot.
+2. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion.
+3. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately.
+4. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents.
+5. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept.

-Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, or whenever further nominations would not change the next session's next step.
+Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, when a compact worker budget warning arrives, or whenever further exploration would not change the next session's next step.

 ## Summary format

@ -36,4 +39,4 @@ Produce the summary in this exact format:
 ## Constraints

 - Keep code snippets and raw tool output OUT of the summary — that is what auto-read and references are for.
- Target 1000–2000 tokens for the summary text itself.
+- Follow the summary target stated in the run input; if asked to shrink, call `write_summary` again with a shorter version.