feat: bound compact worker context

This commit is contained in:
Keisuke Hirata 2026-05-28 11:57:57 +09:00
parent 7034d02455
commit c274e4a891
10 changed files with 791 additions and 211 deletions

View File

@ -32,10 +32,16 @@ pub enum PromptAction {
} }
/// Action before an LLM request. /// Action before an LLM request.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq)]
pub enum PreRequestAction { pub enum PreRequestAction {
/// Proceed normally. /// Proceed normally.
Continue, Continue,
/// Proceed after appending these items to durable worker history.
///
/// This is for upper-layer budget/status nudges that the model may react
/// to: the items are committed before the request so later turns can see
/// why the worker changed course.
ContinueWith(Vec<Item>),
/// Cancel with a reason (treated as an error). /// Cancel with a reason (treated as an error).
Cancel(String), Cancel(String),
/// Yield control to the caller for external processing. /// Yield control to the caller for external processing.
@ -149,11 +155,12 @@ pub trait Interceptor: Send + Sync {
/// Called before each LLM request. The context starts as a clone /// Called before each LLM request. The context starts as a clone
/// of `worker.history` (after `pending_history_appends` and the /// of `worker.history` (after `pending_history_appends` and the
/// Worker's own prune projection have been applied) and can be /// Worker's own prune projection have been applied).
/// further modified for that single request only — mutations here ///
/// are **not** persisted back to history. Use /// Direct mutations to `context` remain request-local and are not persisted.
/// [`Self::pending_history_appends`] for inputs that need to land /// If an interceptor derives a human/model-visible nudge from the current
/// in history. /// request context, return [`PreRequestAction::ContinueWith`] so the Worker
/// commits it to history before the request is sent.
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction { async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
PreRequestAction::Continue PreRequestAction::Continue
} }

View File

@ -1169,6 +1169,10 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
self.last_run_interrupted = true; self.last_run_interrupted = true;
return Ok(WorkerResult::Yielded); return Ok(WorkerResult::Yielded);
} }
PreRequestAction::ContinueWith(items) => {
self.append_history_items(items.clone());
request_context.extend(items);
}
PreRequestAction::Continue => {} PreRequestAction::Continue => {}
} }

View File

@ -125,18 +125,34 @@ pub struct CompactionConfigPartial {
pub prune_protected_tokens: Option<u64>, pub prune_protected_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub prune_min_savings: Option<u64>, pub prune_min_savings: Option<u64>,
#[serde(default, alias = "compact_threshold")]
pub threshold: Option<u64>,
#[serde(default, alias = "compact_request_threshold")]
pub request_threshold: Option<u64>,
#[serde(default, alias = "compact_retained_tokens")]
pub retained_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub compact_threshold: Option<u64>, pub overview_target_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub compact_request_threshold: Option<u64>, pub overview_warning_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub compact_retained_tokens: Option<u64>, pub overview_deadline_tokens: Option<u64>,
#[serde(default, alias = "compact_worker_max_input_tokens")]
pub worker_context_max_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub compact_auto_read_budget: Option<u64>, pub finish_warning_remaining_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub compact_worker_max_input_tokens: Option<u64>, pub final_reserve_tokens: Option<u64>,
#[serde(default, alias = "compact_worker_max_turns")]
pub worker_max_turns: Option<u32>,
#[serde(default)] #[serde(default)]
pub compact_worker_max_turns: Option<u32>, pub summary_target_tokens: Option<u64>,
#[serde(default)]
pub summary_max_tokens: Option<u64>,
#[serde(default, alias = "compact_auto_read_budget")]
pub auto_read_budget_tokens: Option<u64>,
#[serde(default)]
pub result_context_max_tokens: Option<u64>,
#[serde(default)] #[serde(default)]
pub model: Option<ModelManifest>, pub model: Option<ModelManifest>,
} }
@ -386,22 +402,32 @@ impl CompactionConfigPartial {
Self { Self {
prune_protected_tokens: upper.prune_protected_tokens.or(self.prune_protected_tokens), prune_protected_tokens: upper.prune_protected_tokens.or(self.prune_protected_tokens),
prune_min_savings: upper.prune_min_savings.or(self.prune_min_savings), prune_min_savings: upper.prune_min_savings.or(self.prune_min_savings),
compact_threshold: upper.compact_threshold.or(self.compact_threshold), threshold: upper.threshold.or(self.threshold),
compact_request_threshold: upper request_threshold: upper.request_threshold.or(self.request_threshold),
.compact_request_threshold retained_tokens: upper.retained_tokens.or(self.retained_tokens),
.or(self.compact_request_threshold), overview_target_tokens: upper.overview_target_tokens.or(self.overview_target_tokens),
compact_retained_tokens: upper overview_warning_tokens: upper
.compact_retained_tokens .overview_warning_tokens
.or(self.compact_retained_tokens), .or(self.overview_warning_tokens),
compact_auto_read_budget: upper overview_deadline_tokens: upper
.compact_auto_read_budget .overview_deadline_tokens
.or(self.compact_auto_read_budget), .or(self.overview_deadline_tokens),
compact_worker_max_input_tokens: upper worker_context_max_tokens: upper
.compact_worker_max_input_tokens .worker_context_max_tokens
.or(self.compact_worker_max_input_tokens), .or(self.worker_context_max_tokens),
compact_worker_max_turns: upper finish_warning_remaining_tokens: upper
.compact_worker_max_turns .finish_warning_remaining_tokens
.or(self.compact_worker_max_turns), .or(self.finish_warning_remaining_tokens),
final_reserve_tokens: upper.final_reserve_tokens.or(self.final_reserve_tokens),
worker_max_turns: upper.worker_max_turns.or(self.worker_max_turns),
summary_target_tokens: upper.summary_target_tokens.or(self.summary_target_tokens),
summary_max_tokens: upper.summary_max_tokens.or(self.summary_max_tokens),
auto_read_budget_tokens: upper
.auto_read_budget_tokens
.or(self.auto_read_budget_tokens),
result_context_max_tokens: upper
.result_context_max_tokens
.or(self.result_context_max_tokens),
model: merge_option(self.model, upper.model, ModelManifest::merge), model: merge_option(self.model, upper.model, ModelManifest::merge),
} }
} }
@ -544,20 +570,42 @@ impl TryFrom<PodManifestConfig> for PodManifest {
.prune_protected_tokens .prune_protected_tokens
.unwrap_or(defaults::PRUNE_PROTECTED_TOKENS), .unwrap_or(defaults::PRUNE_PROTECTED_TOKENS),
prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS), prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
compact_threshold: c.compact_threshold, threshold: c.threshold,
compact_request_threshold: c.compact_request_threshold, request_threshold: c.request_threshold,
compact_retained_tokens: c retained_tokens: c
.compact_retained_tokens .retained_tokens
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS), .unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
compact_auto_read_budget: c overview_target_tokens: c
.compact_auto_read_budget .overview_target_tokens
.unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET), .unwrap_or(defaults::COMPACT_OVERVIEW_TARGET_TOKENS),
compact_worker_max_input_tokens: c overview_warning_tokens: c
.compact_worker_max_input_tokens .overview_warning_tokens
.unwrap_or(defaults::COMPACT_OVERVIEW_WARNING_TOKENS),
overview_deadline_tokens: c
.overview_deadline_tokens
.unwrap_or(defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS),
worker_context_max_tokens: c
.worker_context_max_tokens
.unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS), .unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS),
compact_worker_max_turns: c finish_warning_remaining_tokens: c
.compact_worker_max_turns .finish_warning_remaining_tokens
.or(defaults::COMPACT_WORKER_MAX_TURNS), .unwrap_or(defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS),
final_reserve_tokens: c
.final_reserve_tokens
.unwrap_or(defaults::COMPACT_FINAL_RESERVE_TOKENS),
worker_max_turns: c.worker_max_turns.or(defaults::COMPACT_WORKER_MAX_TURNS),
summary_target_tokens: c
.summary_target_tokens
.unwrap_or(defaults::COMPACT_SUMMARY_TARGET_TOKENS),
summary_max_tokens: c
.summary_max_tokens
.unwrap_or(defaults::COMPACT_SUMMARY_MAX_TOKENS),
auto_read_budget_tokens: c
.auto_read_budget_tokens
.unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET),
result_context_max_tokens: c
.result_context_max_tokens
.unwrap_or(defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS),
model: c.model, model: c.model,
}) })
}) })
@ -984,7 +1032,7 @@ mod tests {
fn merge_option_struct_field_wise() { fn merge_option_struct_field_wise() {
let lower = PodManifestConfig { let lower = PodManifestConfig {
compaction: Some(CompactionConfigPartial { compaction: Some(CompactionConfigPartial {
compact_threshold: Some(50_000), threshold: Some(50_000),
prune_protected_tokens: Some(5_000), prune_protected_tokens: Some(5_000),
..Default::default() ..Default::default()
}), }),
@ -992,14 +1040,14 @@ mod tests {
}; };
let upper = PodManifestConfig { let upper = PodManifestConfig {
compaction: Some(CompactionConfigPartial { compaction: Some(CompactionConfigPartial {
compact_threshold: Some(80_000), threshold: Some(80_000),
..Default::default() ..Default::default()
}), }),
..Default::default() ..Default::default()
}; };
let merged = lower.merge(upper); let merged = lower.merge(upper);
let c = merged.compaction.unwrap(); let c = merged.compaction.unwrap();
assert_eq!(c.compact_threshold, Some(80_000)); assert_eq!(c.threshold, Some(80_000));
// field from lower retained when upper has None // field from lower retained when upper has None
assert_eq!(c.prune_protected_tokens, Some(5_000)); assert_eq!(c.prune_protected_tokens, Some(5_000));
} }
@ -1122,27 +1170,27 @@ stop_sequences = ["\n\n", "</stop>"]
} }
#[test] #[test]
fn from_toml_accepts_compact_worker_max_turns() { fn from_toml_accepts_worker_max_turns() {
let cfg = PodManifestConfig::from_toml( let cfg = PodManifestConfig::from_toml(
r#" r#"
[compaction] [compaction]
compact_worker_max_turns = 7 worker_max_turns = 7
"#, "#,
) )
.unwrap(); .unwrap();
assert_eq!(cfg.compaction.unwrap().compact_worker_max_turns, Some(7)); assert_eq!(cfg.compaction.unwrap().worker_max_turns, Some(7));
} }
#[test] #[test]
fn try_from_compaction_defaults_compact_worker_max_turns() { fn try_from_compaction_defaults_worker_max_turns() {
let mut cfg = minimal_valid(); let mut cfg = minimal_valid();
cfg.compaction = Some(CompactionConfigPartial::default()); cfg.compaction = Some(CompactionConfigPartial::default());
let manifest = PodManifest::try_from(cfg).unwrap(); let manifest = PodManifest::try_from(cfg).unwrap();
assert_eq!( assert_eq!(
manifest.compaction.unwrap().compact_worker_max_turns, manifest.compaction.unwrap().worker_max_turns,
defaults::COMPACT_WORKER_MAX_TURNS defaults::COMPACT_WORKER_MAX_TURNS
); );
} }

View File

@ -25,9 +25,23 @@ pub const PRUNE_MIN_SAVINGS: u64 = 4096;
/// Token budget retained (unchanged) at the tail of the history across /// Token budget retained (unchanged) at the tail of the history across
/// a compact. Items whose cumulative token count fits within this budget /// a compact. Items whose cumulative token count fits within this budget
/// starting from the end are kept verbatim; the rest are summarised. /// starting from the end are kept verbatim; the rest are summarised.
/// See [`crate::CompactionConfig::compact_retained_tokens`]. /// See [`crate::CompactionConfig::retained_tokens`].
pub const COMPACT_RETAINED_TOKENS: u64 = 8000; pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
/// Target size for the deterministic compact overview/index fed to the
/// compact worker. Exceeding this target is tolerated.
/// See [`crate::CompactionConfig::overview_target_tokens`].
pub const COMPACT_OVERVIEW_TARGET_TOKENS: u64 = 8_000;
/// Warning threshold for compact overview/index size. Compaction continues.
/// See [`crate::CompactionConfig::overview_warning_tokens`].
pub const COMPACT_OVERVIEW_WARNING_TOKENS: u64 = 16_000;
/// Hard deterministic-overview deadline. When exceeded, overview generation
/// falls back to a coarser index before the compact worker is started.
/// See [`crate::CompactionConfig::overview_deadline_tokens`].
pub const COMPACT_OVERVIEW_DEADLINE_TOKENS: u64 = 40_000;
/// Default instruction asset reference used when `worker.instruction` /// Default instruction asset reference used when `worker.instruction`
/// is omitted. See the `PromptLoader` prefix addressing scheme for the /// is omitted. See the `PromptLoader` prefix addressing scheme for the
/// `$insomnia/` / `$user/` / `$workspace/` namespaces. /// `$insomnia/` / `$user/` / `$workspace/` namespaces.
@ -42,19 +56,39 @@ pub const WORKER_LANGUAGE: &str =
/// session after compaction. Limits how much raw file text the /// session after compaction. Limits how much raw file text the
/// compact worker can pull into the compacted context via /// compact worker can pull into the compacted context via
/// `mark_read_required`. See /// `mark_read_required`. See
/// [`crate::CompactionConfig::compact_auto_read_budget`]. /// [`crate::CompactionConfig::auto_read_budget_tokens`].
pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000; pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;
/// Current prompt-occupancy cap for the compact worker's own LLM /// Current prompt-occupancy cap for the compact worker's own LLM
/// calls. Exceeding this aborts the compact run (circuit-breaker /// calls. Exceeding this aborts the compact run (circuit-breaker
/// path). See /// path). See [`crate::CompactionConfig::worker_context_max_tokens`].
/// [`crate::CompactionConfig::compact_worker_max_input_tokens`].
pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000; pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;
/// Remaining compact-worker context threshold that triggers an instruction
/// to stop exploring and call `write_summary`.
/// See [`crate::CompactionConfig::finish_warning_remaining_tokens`].
pub const COMPACT_FINISH_WARNING_REMAINING_TOKENS: u64 = 8_000;
/// Context reserve preserved for final summary/tool closing turns.
/// See [`crate::CompactionConfig::final_reserve_tokens`].
pub const COMPACT_FINAL_RESERVE_TOKENS: u64 = 4_000;
/// Optional maximum compact-worker tool-loop depth. `None` means unlimited. /// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
/// See [`crate::CompactionConfig::compact_worker_max_turns`]. /// See [`crate::CompactionConfig::worker_max_turns`].
pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20); pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);
/// Target size for the `write_summary` text. Used in prompt/nudge text.
/// See [`crate::CompactionConfig::summary_target_tokens`].
pub const COMPACT_SUMMARY_TARGET_TOKENS: u64 = 2_000;
/// Hard validation cap for the final `write_summary` text.
/// See [`crate::CompactionConfig::summary_max_tokens`].
pub const COMPACT_SUMMARY_MAX_TOKENS: u64 = 4_000;
/// Dry-run cap for the compacted session's initial request context.
/// See [`crate::CompactionConfig::result_context_max_tokens`].
pub const COMPACT_RESULT_CONTEXT_MAX_TOKENS: u64 = 60_000;
/// Number of recently-touched files fed to the compact worker as /// Number of recently-touched files fed to the compact worker as
/// default references. /// default references.
pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5; pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;

View File

@ -363,8 +363,8 @@ pub struct CompactionConfig {
/// Checked by the Controller after each run. When current occupancy /// Checked by the Controller after each run. When current occupancy
/// exceeds this value, compact runs before the next turn. `None` /// exceeds this value, compact runs before the next turn. `None`
/// disables the between-turns check. /// disables the between-turns check.
#[serde(default)] #[serde(default, alias = "compact_threshold")]
pub compact_threshold: Option<u64>, pub threshold: Option<u64>,
/// Safety-net (between-requests) compaction threshold. /// Safety-net (between-requests) compaction threshold.
/// ///
@ -373,32 +373,76 @@ pub struct CompactionConfig {
/// Controller can compact before the next LLM request. `None` /// Controller can compact before the next LLM request. `None`
/// disables the between-requests check. /// disables the between-requests check.
/// ///
/// Expected relation: `compact_threshold < compact_request_threshold` /// Expected relation: `threshold < request_threshold` (proactive triggers
/// (proactive triggers before safety net). A reversed configuration /// before safety net). A reversed configuration is accepted but logged as
/// is accepted but logged as a warning. /// a warning.
#[serde(default)] #[serde(default, alias = "compact_request_threshold")]
pub compact_request_threshold: Option<u64>, pub request_threshold: Option<u64>,
/// Token budget retained verbatim at the tail of the history after /// Token budget retained verbatim at the tail of the history after
/// compaction. Measured against the occupancy estimate from /// compaction. Measured against the occupancy estimate from
/// `UsageRecord` history; turn boundaries are ignored. /// `UsageRecord` history; turn boundaries are ignored.
#[serde(default = "default_compact_retained_tokens")] #[serde(default = "default_retained_tokens", alias = "compact_retained_tokens")]
pub compact_retained_tokens: u64, pub retained_tokens: u64,
/// Aggregate token budget for auto-read file contents injected into /// Target size for the deterministic overview/index fed to the compact
/// the compacted session by the compact worker. /// worker. Overshooting this target is not an error.
#[serde(default = "default_compact_auto_read_budget")] #[serde(default = "default_overview_target_tokens")]
pub compact_auto_read_budget: u64, pub overview_target_tokens: u64,
/// Warning threshold for deterministic overview/index size.
#[serde(default = "default_overview_warning_tokens")]
pub overview_warning_tokens: u64,
/// Deadline threshold for deterministic overview/index generation.
/// Oversized overviews fall back to a coarser deterministic index.
#[serde(default = "default_overview_deadline_tokens")]
pub overview_deadline_tokens: u64,
/// Current prompt-occupancy cap for the compact worker's own LLM /// Current prompt-occupancy cap for the compact worker's own LLM
/// requests. Exceeding this aborts the compact run. /// requests. Exceeding this aborts the compact run.
#[serde(default = "default_compact_worker_max_input_tokens")] #[serde(
pub compact_worker_max_input_tokens: u64, default = "default_worker_context_max_tokens",
alias = "compact_worker_max_input_tokens"
)]
pub worker_context_max_tokens: u64,
/// Remaining compact-worker context threshold that triggers a warning and
/// an instruction to stop exploring and call `write_summary`.
#[serde(default = "default_finish_warning_remaining_tokens")]
pub finish_warning_remaining_tokens: u64,
/// Context reserve preserved for final summary/tool closing turns.
#[serde(default = "default_final_reserve_tokens")]
pub final_reserve_tokens: u64,
/// Optional maximum compact-worker tool-loop depth. `None` leaves the /// Optional maximum compact-worker tool-loop depth. `None` leaves the
/// worker unlimited; the default bounds runaway short-context loops. /// worker unlimited; the default bounds runaway short-context loops.
#[serde(default = "default_compact_worker_max_turns")] #[serde(
pub compact_worker_max_turns: Option<u32>, default = "default_worker_max_turns",
alias = "compact_worker_max_turns"
)]
pub worker_max_turns: Option<u32>,
/// Target size for the `write_summary` text. Used in prompt/nudge text.
#[serde(default = "default_summary_target_tokens")]
pub summary_target_tokens: u64,
/// Hard validation cap for the final `write_summary` text.
#[serde(default = "default_summary_max_tokens")]
pub summary_max_tokens: u64,
/// Aggregate token budget for auto-read file contents injected into
/// the compacted session by the compact worker.
#[serde(
default = "default_auto_read_budget_tokens",
alias = "compact_auto_read_budget"
)]
pub auto_read_budget_tokens: u64,
/// Dry-run cap for the compacted session's initial request context.
#[serde(default = "default_result_context_max_tokens")]
pub result_context_max_tokens: u64,
/// Optional model for the compactor (summary) LLM. /// Optional model for the compactor (summary) LLM.
/// If omitted, the main model is cloned via `clone_boxed()`. /// If omitted, the main model is cloned via `clone_boxed()`.
@ -412,30 +456,62 @@ fn default_prune_protected_tokens() -> u64 {
fn default_prune_min_savings() -> u64 { fn default_prune_min_savings() -> u64 {
defaults::PRUNE_MIN_SAVINGS defaults::PRUNE_MIN_SAVINGS
} }
fn default_compact_retained_tokens() -> u64 { fn default_retained_tokens() -> u64 {
defaults::COMPACT_RETAINED_TOKENS defaults::COMPACT_RETAINED_TOKENS
} }
fn default_compact_auto_read_budget() -> u64 { fn default_overview_target_tokens() -> u64 {
defaults::COMPACT_AUTO_READ_BUDGET defaults::COMPACT_OVERVIEW_TARGET_TOKENS
} }
fn default_compact_worker_max_input_tokens() -> u64 { fn default_overview_warning_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_WARNING_TOKENS
}
fn default_overview_deadline_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS
}
fn default_worker_context_max_tokens() -> u64 {
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
} }
fn default_compact_worker_max_turns() -> Option<u32> { fn default_finish_warning_remaining_tokens() -> u64 {
defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS
}
fn default_final_reserve_tokens() -> u64 {
defaults::COMPACT_FINAL_RESERVE_TOKENS
}
fn default_worker_max_turns() -> Option<u32> {
defaults::COMPACT_WORKER_MAX_TURNS defaults::COMPACT_WORKER_MAX_TURNS
} }
fn default_summary_target_tokens() -> u64 {
defaults::COMPACT_SUMMARY_TARGET_TOKENS
}
fn default_summary_max_tokens() -> u64 {
defaults::COMPACT_SUMMARY_MAX_TOKENS
}
fn default_auto_read_budget_tokens() -> u64 {
defaults::COMPACT_AUTO_READ_BUDGET
}
fn default_result_context_max_tokens() -> u64 {
defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS
}
impl Default for CompactionConfig { impl Default for CompactionConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
prune_protected_tokens: default_prune_protected_tokens(), prune_protected_tokens: default_prune_protected_tokens(),
prune_min_savings: default_prune_min_savings(), prune_min_savings: default_prune_min_savings(),
compact_threshold: None, threshold: None,
compact_request_threshold: None, request_threshold: None,
compact_retained_tokens: default_compact_retained_tokens(), retained_tokens: default_retained_tokens(),
compact_auto_read_budget: default_compact_auto_read_budget(), overview_target_tokens: default_overview_target_tokens(),
compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(), overview_warning_tokens: default_overview_warning_tokens(),
compact_worker_max_turns: default_compact_worker_max_turns(), overview_deadline_tokens: default_overview_deadline_tokens(),
worker_context_max_tokens: default_worker_context_max_tokens(),
finish_warning_remaining_tokens: default_finish_warning_remaining_tokens(),
final_reserve_tokens: default_final_reserve_tokens(),
worker_max_turns: default_worker_max_turns(),
summary_target_tokens: default_summary_target_tokens(),
summary_max_tokens: default_summary_max_tokens(),
auto_read_budget_tokens: default_auto_read_budget_tokens(),
result_context_max_tokens: default_result_context_max_tokens(),
model: None, model: None,
} }
} }
@ -592,15 +668,15 @@ model_id = "claude-sonnet-4-20250514"
#[test] #[test]
fn parse_compaction_config() { fn parse_compaction_config() {
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\ncompact_threshold = 80000\n"); let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nthreshold = 80000\n");
let manifest = PodManifest::from_toml(&toml).unwrap(); let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap(); let c = manifest.compaction.unwrap();
assert_eq!(c.prune_protected_tokens, 8000); assert_eq!(c.prune_protected_tokens, 8000);
assert_eq!(c.prune_min_savings, 4096); assert_eq!(c.prune_min_savings, 4096);
assert_eq!(c.compact_threshold, Some(80000)); assert_eq!(c.threshold, Some(80000));
assert_eq!(c.compact_request_threshold, None); assert_eq!(c.request_threshold, None);
assert_eq!(c.compact_retained_tokens, 8000); assert_eq!(c.retained_tokens, 8000);
assert_eq!(c.compact_worker_max_turns, Some(20)); assert_eq!(c.worker_max_turns, Some(20));
} }
#[test] #[test]
@ -618,11 +694,11 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!( let toml = format!(
"{MINIMAL_REQUIRED}\n\ "{MINIMAL_REQUIRED}\n\
[compaction]\n\ [compaction]\n\
compact_worker_max_turns = 7\n" worker_max_turns = 7\n"
); );
let manifest = PodManifest::from_toml(&toml).unwrap(); let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap(); let c = manifest.compaction.unwrap();
assert_eq!(c.compact_worker_max_turns, Some(7)); assert_eq!(c.worker_max_turns, Some(7));
} }
#[test] #[test]
@ -630,13 +706,13 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!( let toml = format!(
"{MINIMAL_REQUIRED}\n\ "{MINIMAL_REQUIRED}\n\
[compaction]\n\ [compaction]\n\
compact_threshold = 80000\n\ threshold = 80000\n\
compact_request_threshold = 90000\n" request_threshold = 90000\n"
); );
let manifest = PodManifest::from_toml(&toml).unwrap(); let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap(); let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, Some(80000)); assert_eq!(c.threshold, Some(80000));
assert_eq!(c.compact_request_threshold, Some(90000)); assert_eq!(c.request_threshold, Some(90000));
} }
#[test] #[test]
@ -644,12 +720,12 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!( let toml = format!(
"{MINIMAL_REQUIRED}\n\ "{MINIMAL_REQUIRED}\n\
[compaction]\n\ [compaction]\n\
compact_request_threshold = 90000\n" request_threshold = 90000\n"
); );
let manifest = PodManifest::from_toml(&toml).unwrap(); let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap(); let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, None); assert_eq!(c.threshold, None);
assert_eq!(c.compact_request_threshold, Some(90000)); assert_eq!(c.request_threshold, Some(90000));
} }
#[test] #[test]
@ -657,7 +733,7 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!( let toml = format!(
"{MINIMAL_REQUIRED}\n\ "{MINIMAL_REQUIRED}\n\
[compaction]\n\ [compaction]\n\
compact_threshold = 80000\n\n\ threshold = 80000\n\n\
[compaction.model]\n\ [compaction.model]\n\
scheme = \"gemini\"\n\ scheme = \"gemini\"\n\
model_id = \"gemini-2.0-flash\"\n" model_id = \"gemini-2.0-flash\"\n"

View File

@ -281,10 +281,7 @@ mod tests {
("HOME", Some("/h")), ("HOME", Some("/h")),
("XDG_RUNTIME_DIR", Some("/run/user/1000")), ("XDG_RUNTIME_DIR", Some("/run/user/1000")),
]); ]);
assert_eq!( assert_eq!(runtime_dir().unwrap(), PathBuf::from("<runtime-dir>"));
runtime_dir().unwrap(),
PathBuf::from("<runtime-dir>")
);
} }
#[test] #[test]

View File

@ -18,12 +18,13 @@
//! compacted session's opening system messages. //! compacted session's opening system messages.
use std::path::PathBuf; use std::path::PathBuf;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use async_trait::async_trait; use async_trait::async_trait;
use llm_worker::Item; use llm_worker::Item;
use llm_worker::interceptor::{Interceptor, PreRequestAction}; use llm_worker::interceptor::{Interceptor, PreRequestAction, PreToolAction, ToolCallInfo};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput, ToolResult};
use serde::Deserialize; use serde::Deserialize;
use tools::ScopedFs; use tools::ScopedFs;
@ -246,14 +247,63 @@ pub(crate) fn write_summary_tool(ctx: Arc<Mutex<CompactWorkerContext>>) -> ToolD
}) })
} }
/// Interceptor that aborts the compact worker when its current prompt /// Interceptor that monitors compact-worker context occupancy.
/// occupancy estimate crosses `max_input_tokens`. The estimate uses the same ///
/// `UsageRecord` + `llm_worker::token_counter::total_tokens` path as the main /// `max_input_tokens` remains the hard circuit breaker. Before that point,
/// Pod compaction thresholds, so prompt-cache hits are not counted cumulatively /// the interceptor can persist a system warning into worker history telling
/// across turns. /// the model to stop broad exploration and call `write_summary`, and can block
/// additional exploratory tool calls once the final reserve is reached.
pub(crate) struct CompactWorkerInterceptor { pub(crate) struct CompactWorkerInterceptor {
pub usage_tracker: Arc<UsageTracker>, pub usage_tracker: Arc<UsageTracker>,
pub max_input_tokens: u64, pub max_input_tokens: u64,
pub finish_warning_remaining_tokens: u64,
pub final_reserve_tokens: u64,
pub on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
warning_sent: AtomicBool,
last_remaining_tokens: AtomicU64,
}
impl CompactWorkerInterceptor {
pub(crate) fn new(
usage_tracker: Arc<UsageTracker>,
max_input_tokens: u64,
finish_warning_remaining_tokens: u64,
final_reserve_tokens: u64,
on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
) -> Self {
Self {
usage_tracker,
max_input_tokens,
finish_warning_remaining_tokens,
final_reserve_tokens,
on_warning,
warning_sent: AtomicBool::new(false),
last_remaining_tokens: AtomicU64::new(max_input_tokens),
}
}
fn maybe_emit_warning(&self, remaining: u64) -> Option<Item> {
let warning_threshold = self.finish_warning_remaining_tokens;
let reserve_threshold = self.final_reserve_tokens;
let should_warn = (warning_threshold > 0 && remaining <= warning_threshold)
|| (reserve_threshold > 0 && remaining <= reserve_threshold);
if !should_warn || self.warning_sent.swap(true, Ordering::AcqRel) {
return None;
}
let message = format!(
"compact worker context budget is low ({remaining}/{} tokens remaining). \
Stop broad exploration now, read only if absolutely necessary, then call \
`write_summary` with the final structured summary.",
self.max_input_tokens
);
if let Some(cb) = self.on_warning.as_ref() {
cb(message.clone());
}
Some(Item::system_message(format!(
"[Compact worker budget warning]\n\n{message}"
)))
}
} }
#[async_trait] #[async_trait]
@ -268,9 +318,31 @@ impl Interceptor for CompactWorkerInterceptor {
)); ));
} }
let remaining = self.max_input_tokens.saturating_sub(estimate.tokens);
self.last_remaining_tokens
.store(remaining, Ordering::Release);
if let Some(item) = self.maybe_emit_warning(remaining) {
self.usage_tracker.note_request(context.len() + 1);
return PreRequestAction::ContinueWith(vec![item]);
}
self.usage_tracker.note_request(context.len()); self.usage_tracker.note_request(context.len());
PreRequestAction::Continue PreRequestAction::Continue
} }
async fn pre_tool_call(&self, info: &mut ToolCallInfo) -> PreToolAction {
if self.final_reserve_tokens == 0 || info.call.name == "write_summary" {
return PreToolAction::Continue;
}
let remaining = self.last_remaining_tokens.load(Ordering::Acquire);
if remaining > self.final_reserve_tokens {
return PreToolAction::Continue;
}
PreToolAction::SyntheticResult(ToolResult::error(
info.call.id.clone(),
"compact worker final reserve reached; do not perform more exploratory tool reads. Call `write_summary` now.",
))
}
} }
/// Crude bytes→tokens estimate; good enough for budget accounting. /// Crude bytes→tokens estimate; good enough for budget accounting.
@ -301,10 +373,7 @@ mod tests {
#[tokio::test] #[tokio::test]
async fn compact_worker_interceptor_uses_occupancy_not_cumulative_usage() { async fn compact_worker_interceptor_uses_occupancy_not_cumulative_usage() {
let tracker = Arc::new(UsageTracker::new()); let tracker = Arc::new(UsageTracker::new());
let interceptor = CompactWorkerInterceptor { let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 150, 0, 0, None);
usage_tracker: tracker.clone(),
max_input_tokens: 150,
};
let mut context = vec![Item::user_message("hello")]; let mut context = vec![Item::user_message("hello")];
assert!(matches!( assert!(matches!(
@ -327,13 +396,40 @@ mod tests {
)); ));
} }
#[tokio::test]
async fn compact_worker_interceptor_warns_before_hard_cap() {
let tracker = Arc::new(UsageTracker::new());
let warnings = Arc::new(Mutex::new(Vec::new()));
let captured = warnings.clone();
let interceptor = CompactWorkerInterceptor::new(
tracker.clone(),
150,
60,
20,
Some(Arc::new(move |message| {
captured.lock().unwrap().push(message);
})),
);
let mut context = vec![Item::user_message("hello")];
assert!(matches!(
interceptor.pre_llm_request(&mut context).await,
PreRequestAction::Continue
));
tracker.record_usage(&make_usage(100));
assert!(matches!(
interceptor.pre_llm_request(&mut context).await,
PreRequestAction::ContinueWith(items)
if items.len() == 1 && items[0].as_text().unwrap_or_default().contains("write_summary")
));
assert_eq!(warnings.lock().unwrap().len(), 1);
}
#[tokio::test] #[tokio::test]
async fn compact_worker_interceptor_cancels_when_occupancy_exceeds_cap() { async fn compact_worker_interceptor_cancels_when_occupancy_exceeds_cap() {
let tracker = Arc::new(UsageTracker::new()); let tracker = Arc::new(UsageTracker::new());
let interceptor = CompactWorkerInterceptor { let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 99, 0, 0, None);
usage_tracker: tracker.clone(),
max_input_tokens: 99,
};
let mut context = vec![Item::user_message("hello")]; let mut context = vec![Item::user_message("hello")];
assert!(matches!( assert!(matches!(

View File

@ -241,7 +241,7 @@ pub struct Pod<C: LlmClient, St: Store> {
scope: SharedScope, scope: SharedScope,
hook_builder: HookRegistryBuilder, hook_builder: HookRegistryBuilder,
interceptor_installed: bool, interceptor_installed: bool,
/// Shared compaction state (present when compact_threshold is configured). /// Shared compaction state (present when threshold is configured).
compact_state: Option<Arc<CompactState>>, compact_state: Option<Arc<CompactState>>,
/// Per-LLM-request Usage tracker. Always present after construction. /// Per-LLM-request Usage tracker. Always present after construction.
/// Captures `(history_len, UsageEvent)` pairs during a run; drained /// Captures `(history_len, UsageEvent)` pairs during a run; drained
@ -1121,8 +1121,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
/// Install the hook-based interceptor on the Worker if not already done. /// Install the hook-based interceptor on the Worker if not already done.
/// ///
/// When either compaction threshold (`compact_threshold` or /// When either compaction threshold (`threshold` or
/// `compact_request_threshold`) is configured in the manifest, allocates /// `request_threshold`) is configured in the manifest, allocates
/// a shared [`CompactState`] and wires the interceptor to read current /// a shared [`CompactState`] and wires the interceptor to read current
/// occupancy through the `UsageRecord` timeline. /// occupancy through the `UsageRecord` timeline.
fn ensure_interceptor_installed(&mut self) { fn ensure_interceptor_installed(&mut self) {
@ -1141,13 +1141,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
.manifest .manifest
.compaction .compaction
.as_ref() .as_ref()
.map(|c| { .map(|c| (c.threshold, c.request_threshold, c.retained_tokens))
(
c.compact_threshold,
c.compact_request_threshold,
c.compact_retained_tokens,
)
})
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS)); .unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
let tracker_for_usage = self.usage_tracker.clone(); let tracker_for_usage = self.usage_tracker.clone();
@ -1161,7 +1155,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
warn!( warn!(
post_run_threshold = post, post_run_threshold = post,
request_threshold = req, request_threshold = req,
"compact_threshold > compact_request_threshold; \ "threshold > request_threshold; \
proactive check will never fire before the safety net" proactive check will never fire before the safety net"
); );
} }
@ -2124,12 +2118,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
let retained = state let retained = state
.as_ref() .as_ref()
.map(|s| s.retained_tokens()) .map(|s| s.retained_tokens())
.or_else(|| { .or_else(|| self.manifest.compaction.as_ref().map(|c| c.retained_tokens))
self.manifest
.compaction
.as_ref()
.map(|c| c.compact_retained_tokens)
})
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS); .unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
let current_tokens = self.total_tokens().tokens; let current_tokens = self.total_tokens().tokens;
let cut = self.split_for_retained(retained); let cut = self.split_for_retained(retained);
@ -2324,21 +2313,49 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
// Compaction-related knobs. Fall through to manifest defaults when // Compaction-related knobs. Fall through to manifest defaults when
// `[compaction]` is omitted entirely. // `[compaction]` is omitted entirely.
let (auto_read_budget, compact_worker_max_input_tokens, compact_worker_max_turns) = self let (
auto_read_budget,
worker_context_max_tokens,
finish_warning_remaining_tokens,
final_reserve_tokens,
worker_max_turns,
overview_target_tokens,
overview_warning_tokens,
overview_deadline_tokens,
summary_target_tokens,
summary_max_tokens,
result_context_max_tokens,
) = self
.manifest .manifest
.compaction .compaction
.as_ref() .as_ref()
.map(|c| { .map(|c| {
( (
c.compact_auto_read_budget, c.auto_read_budget_tokens,
c.compact_worker_max_input_tokens, c.worker_context_max_tokens,
c.compact_worker_max_turns, c.finish_warning_remaining_tokens,
c.final_reserve_tokens,
c.worker_max_turns,
c.overview_target_tokens,
c.overview_warning_tokens,
c.overview_deadline_tokens,
c.summary_target_tokens,
c.summary_max_tokens,
c.result_context_max_tokens,
) )
}) })
.unwrap_or(( .unwrap_or((
manifest::defaults::COMPACT_AUTO_READ_BUDGET, manifest::defaults::COMPACT_AUTO_READ_BUDGET,
manifest::defaults::COMPACT_WORKER_MAX_INPUT_TOKENS, manifest::defaults::COMPACT_WORKER_MAX_INPUT_TOKENS,
manifest::defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS,
manifest::defaults::COMPACT_FINAL_RESERVE_TOKENS,
manifest::defaults::COMPACT_WORKER_MAX_TURNS, manifest::defaults::COMPACT_WORKER_MAX_TURNS,
manifest::defaults::COMPACT_OVERVIEW_TARGET_TOKENS,
manifest::defaults::COMPACT_OVERVIEW_WARNING_TOKENS,
manifest::defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS,
manifest::defaults::COMPACT_SUMMARY_TARGET_TOKENS,
manifest::defaults::COMPACT_SUMMARY_MAX_TOKENS,
manifest::defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS,
)); ));
// Default references: the N most-recently-touched files in the // Default references: the N most-recently-touched files in the
@ -2358,7 +2375,33 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
&items_to_summarise, &items_to_summarise,
&default_refs, &default_refs,
Some(task_snapshot_text.as_str()), Some(task_snapshot_text.as_str()),
SummaryInputOptions {
overview_target_tokens,
overview_warning_tokens,
overview_deadline_tokens,
summary_target_tokens,
},
); );
if summary_input.warning_exceeded {
self.alert(
AlertLevel::Warn,
AlertSource::Compactor,
format!(
"compact overview is larger than expected (≈{} tokens; warning threshold {})",
summary_input.overview_tokens, overview_warning_tokens
),
);
}
if summary_input.deadline_fallback_used {
self.alert(
AlertLevel::Warn,
AlertSource::Compactor,
format!(
"compact overview exceeded deadline ({} tokens); using coarse fallback",
overview_deadline_tokens
),
);
}
// Worker-side state collected by the compact worker's tool calls. // Worker-side state collected by the compact worker's tool calls.
let ctx = Arc::new(std::sync::Mutex::new(CompactWorkerContext::with_budget( let ctx = Arc::new(std::sync::Mutex::new(CompactWorkerContext::with_budget(
@ -2390,11 +2433,19 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
tracker.record_usage(event); tracker.record_usage(event);
}); });
} }
summary_worker.set_interceptor(CompactWorkerInterceptor { let compactor_warning_cb = self.alerter.clone().map(|alerter| {
usage_tracker: summary_usage_tracker, Arc::new(move |message: String| {
max_input_tokens: compact_worker_max_input_tokens, alerter.alert(AlertLevel::Warn, AlertSource::Compactor, message);
}) as Arc<dyn Fn(String) + Send + Sync>
}); });
summary_worker.set_max_turns(compact_worker_max_turns); summary_worker.set_interceptor(CompactWorkerInterceptor::new(
summary_usage_tracker,
worker_context_max_tokens,
finish_warning_remaining_tokens,
final_reserve_tokens,
compactor_warning_cb,
));
summary_worker.set_max_turns(worker_max_turns);
// Tools: read_file (shared scope, fresh tracker) + the three // Tools: read_file (shared scope, fresh tracker) + the three
// compact-specific tools that populate `ctx`. // compact-specific tools that populate `ctx`.
@ -2404,7 +2455,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
summary_worker.register_tool(write_summary_tool(ctx.clone())); summary_worker.register_tool(write_summary_tool(ctx.clone()));
let out = summary_worker let out = summary_worker
.run(summary_input) .run(summary_input.text)
.await .await
.map_err(PodError::Worker)?; .map_err(PodError::Worker)?;
let mut locked_worker = out.worker; let mut locked_worker = out.worker;
@ -2439,11 +2490,32 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?; let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
} }
let final_ctx = ctx.lock().expect("compact ctx poisoned").clone(); let mut final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
let summary_text = final_ctx let mut summary_text = final_ctx
.summary .summary
.clone() .clone()
.ok_or(PodError::CompactSummaryMissing)?; .ok_or(PodError::CompactSummaryMissing)?;
let mut summary_tokens = estimate_text_tokens(summary_text.len());
if summary_max_tokens > 0 && summary_tokens > summary_max_tokens {
let prompt = format!(
"Your `write_summary` output is too large (≈{summary_tokens} tokens; max \
{summary_max_tokens}). Rewrite it now with `write_summary`, preserving the \
same five sections but making it concise. Target {summary_target_tokens} tokens."
);
let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
summary_text = final_ctx
.summary
.clone()
.ok_or(PodError::CompactSummaryMissing)?;
summary_tokens = estimate_text_tokens(summary_text.len());
if summary_tokens > summary_max_tokens {
return Err(PodError::CompactSummaryTooLarge {
tokens: summary_tokens,
max: summary_max_tokens,
});
}
}
// Re-read each auto-read target via the Pod FS view. Errors are // Re-read each auto-read target via the Pod FS view. Errors are
// logged and skipped inside `render_auto_read` rather than // logged and skipped inside `render_auto_read` rather than
@ -2515,6 +2587,13 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
tools::task::snapshot_overview(&self.task_store.list()), tools::task::snapshot_overview(&self.task_store.list()),
task_snapshot_text.clone(), task_snapshot_text.clone(),
)); ));
let result_estimate = llm_worker::token_counter::total_tokens(&new_history, &[]);
if result_context_max_tokens > 0 && result_estimate.tokens > result_context_max_tokens {
return Err(PodError::CompactResultContextTooLarge {
tokens: result_estimate.tokens,
max: result_context_max_tokens,
});
}
// Build the SegmentStart entry for the new compacted segment. // Build the SegmentStart entry for the new compacted segment.
// Inherits the source Segment's session_id so the compacted // Inherits the source Segment's session_id so the compacted
@ -4008,19 +4087,56 @@ impl From<WorkerResult> for PodRunResult {
} }
} }
#[derive(Debug, Clone, Copy)]
struct SummaryInputOptions {
overview_target_tokens: u64,
overview_warning_tokens: u64,
overview_deadline_tokens: u64,
summary_target_tokens: u64,
}
#[derive(Debug)]
struct SummaryInputBuild {
text: String,
overview_tokens: u64,
warning_exceeded: bool,
deadline_fallback_used: bool,
}
/// Build the compact worker's input: default-reference instructions, /// Build the compact worker's input: default-reference instructions,
/// the list of recently-touched files, and the pruned conversation /// the list of recently-touched files, task snapshot, and a bounded overview
/// produced by [`build_summary_prompt`]. /// rather than a prefix-wide transcript.
fn build_summary_input( fn build_summary_input(
items: &[Item], items: &[Item],
default_refs: &[PathBuf], default_refs: &[PathBuf],
task_snapshot: Option<&str>, task_snapshot: Option<&str>,
) -> String { options: SummaryInputOptions,
let mut out = String::new(); ) -> SummaryInputBuild {
out.push_str( let overview = build_summary_overview(
"Summarise the conversation below into a structured summary and nominate \ items,
files the next session needs.\n\n", options.overview_target_tokens,
options.overview_deadline_tokens,
); );
let overview_tokens = estimate_text_tokens(overview.len());
let warning_exceeded =
options.overview_warning_tokens > 0 && overview_tokens > options.overview_warning_tokens;
let deadline_fallback_used =
options.overview_deadline_tokens > 0 && overview_tokens > options.overview_deadline_tokens;
let overview = if deadline_fallback_used {
build_coarse_summary_overview(items, options.overview_deadline_tokens)
} else {
overview
};
let overview_tokens = estimate_text_tokens(overview.len());
let mut out = String::new();
out.push_str(&format!(
"Summarise this session into a structured summary of about {} tokens and \
nominate files the next session needs. The conversation below is a \
bounded overview/index, not the full transcript. Use tools to inspect \
current files when deciding auto-read/reference output.\n\n",
options.summary_target_tokens
));
if !default_refs.is_empty() { if !default_refs.is_empty() {
out.push_str( out.push_str(
"These files were touched recently in this session. Use `read_file` \ "These files were touched recently in this session. Use `read_file` \
@ -4045,47 +4161,166 @@ fn build_summary_input(
out.push_str(task_snapshot); out.push_str(task_snapshot);
out.push_str("\n\n"); out.push_str("\n\n");
} }
out.push_str("## Conversation\n"); out.push_str("## Conversation overview/index\n");
out.push_str(&build_summary_prompt(items)); out.push_str(&overview);
out.push_str("\n\nWhen you are done, call `write_summary` with the final 5-section text."); out.push_str("\n\nWhen you are done, call `write_summary` with the final 5-section text.");
SummaryInputBuild {
text: out,
overview_tokens,
warning_exceeded,
deadline_fallback_used,
}
}
fn build_summary_overview(items: &[Item], target_tokens: u64, deadline_tokens: u64) -> String {
let target_bytes = token_budget_bytes(target_tokens).max(1024);
let deadline_bytes = token_budget_bytes(deadline_tokens).max(target_bytes);
let mut out = String::new();
write_overview_header(items, &mut out);
out.push_str("\n## Recent user/assistant/system messages\n");
let mut selected = Vec::new();
let mut omitted_messages = 0usize;
for (idx, item) in items.iter().enumerate().rev() {
let Some(entry) = message_overview_entry(idx, item, 2_000) else {
continue;
};
let projected = out
.len()
.saturating_add(selected.iter().map(String::len).sum::<usize>())
.saturating_add(entry.len())
.saturating_add(2);
if projected > target_bytes && !selected.is_empty() {
omitted_messages += 1;
continue;
}
selected.push(entry);
if projected >= target_bytes {
break;
}
}
selected.reverse();
for entry in selected {
out.push_str(&entry);
out.push_str("\n\n");
}
if omitted_messages > 0 {
out.push_str(&format!(
"[Overview omitted {omitted_messages} older message(s) to stay near target.]\n\n"
));
}
append_tool_index(items, &mut out, target_bytes, deadline_bytes);
out out
} }
/// Format conversation items into a text prompt for the summary Worker. fn build_coarse_summary_overview(items: &[Item], deadline_tokens: u64) -> String {
/// let deadline_bytes = token_budget_bytes(deadline_tokens).max(1024);
/// The summary should capture decisions and user intent, not recreate code. let mut out = String::new();
/// File contents and tool IO belong in auto-read / references, not in the write_overview_header(items, &mut out);
/// summary input. So this strips: out.push_str("\n## Coarse recent message index\n");
/// - `ToolCall.arguments` (keep only the tool name) for (idx, item) in items.iter().enumerate().rev() {
/// - `ToolResult.content` (keep only the summary line) let Some(entry) = message_overview_entry(idx, item, 240) else {
/// - `Reasoning` entirely (intermediate thought, superseded by decisions) continue;
fn build_summary_prompt(items: &[Item]) -> String { };
let mut lines = Vec::new(); if out.len().saturating_add(entry.len()).saturating_add(2) > deadline_bytes {
break;
}
out.push_str(&entry);
out.push_str("\n\n");
}
out
}
fn write_overview_header(items: &[Item], out: &mut String) {
let mut messages = 0usize;
let mut tool_calls = 0usize;
let mut tool_results = 0usize;
let mut reasoning = 0usize;
for item in items { for item in items {
match item { match item {
Item::Message { role, content, .. } => { Item::Message { .. } => messages += 1,
let role_label = match role { Item::ToolCall { .. } => tool_calls += 1,
llm_worker::Role::User => "User", Item::ToolResult { .. } => tool_results += 1,
llm_worker::Role::Assistant => "Assistant", Item::Reasoning { .. } => reasoning += 1,
llm_worker::Role::System => "System",
};
let text: String = content
.iter()
.map(|p| p.as_text())
.collect::<Vec<_>>()
.join("");
lines.push(format!("[{role_label}] {text}"));
}
Item::ToolCall { name, .. } => {
lines.push(format!("[ToolCall] {name}"));
}
Item::ToolResult { summary, .. } => {
lines.push(format!("[ToolResult] {summary}"));
}
Item::Reasoning { .. } => {}
} }
} }
lines.join("\n\n") out.push_str(&format!(
"Items summarized: {} total; {messages} message(s), {tool_calls} tool call(s), \
{tool_results} tool result(s), {reasoning} reasoning item(s). Tool call \
arguments, tool result full content, and reasoning bodies are omitted from \
this initial input.\n",
items.len()
));
}
fn append_tool_index(items: &[Item], out: &mut String, target_bytes: usize, deadline_bytes: usize) {
let mut entries = Vec::new();
for (idx, item) in items.iter().enumerate().rev() {
match item {
Item::ToolCall { name, .. } => entries.push(format!("[{idx} ToolCall] {name}")),
Item::ToolResult { summary, .. } => entries.push(format!(
"[{idx} ToolResult] {}",
truncate_chars(summary, 240)
)),
_ => {}
}
if entries.len() >= 24 {
break;
}
}
if entries.is_empty() {
return;
}
entries.reverse();
out.push_str("## Recent tool index (content omitted)\n");
for entry in entries {
let projected = out.len().saturating_add(entry.len()).saturating_add(1);
if projected > deadline_bytes || (projected > target_bytes && out.contains("ToolResult")) {
out.push_str("[Additional tool index entries omitted.]\n");
break;
}
out.push_str(&entry);
out.push('\n');
}
}
fn message_overview_entry(idx: usize, item: &Item, max_chars: usize) -> Option<String> {
let Item::Message { role, content, .. } = item else {
return None;
};
let role_label = match role {
llm_worker::Role::User => "User",
llm_worker::Role::Assistant => "Assistant",
llm_worker::Role::System => "System",
};
let text: String = content
.iter()
.map(|p| p.as_text())
.collect::<Vec<_>>()
.join("");
Some(format!(
"[{idx} {role_label}] {}",
truncate_chars(&text, max_chars)
))
}
fn truncate_chars(text: &str, max_chars: usize) -> String {
if text.chars().count() <= max_chars {
return text.to_string();
}
let mut out = text.chars().take(max_chars).collect::<String>();
out.push_str("… [truncated]");
out
}
fn estimate_text_tokens(bytes: usize) -> u64 {
(bytes as u64).div_ceil(4)
}
fn token_budget_bytes(tokens: u64) -> usize {
tokens.saturating_mul(4).min(usize::MAX as u64) as usize
} }
/// Pod errors. /// Pod errors.
@ -4125,6 +4360,12 @@ pub enum PodError {
#[error("compact worker did not produce a summary (write_summary was never called)")] #[error("compact worker did not produce a summary (write_summary was never called)")]
CompactSummaryMissing, CompactSummaryMissing,
#[error("compact summary too large: {tokens} tokens exceeds max {max}")]
CompactSummaryTooLarge { tokens: u64, max: u64 },
#[error("compacted result context too large: {tokens} tokens exceeds max {max}")]
CompactResultContextTooLarge { tokens: u64, max: u64 },
#[error("invalid system prompt template: {source}")] #[error("invalid system prompt template: {source}")]
InvalidSystemPromptTemplate { InvalidSystemPromptTemplate {
#[source] #[source]
@ -4409,6 +4650,21 @@ mod memory_worker_event_tests {
mod build_summary_prompt_tests { mod build_summary_prompt_tests {
use super::*; use super::*;
fn test_summary_input(items: &[Item]) -> String {
build_summary_input(
items,
&[],
None,
SummaryInputOptions {
overview_target_tokens: 512,
overview_warning_tokens: 1024,
overview_deadline_tokens: 2048,
summary_target_tokens: 256,
},
)
.text
}
#[test] #[test]
fn strips_tool_call_arguments() { fn strips_tool_call_arguments() {
let items = vec![Item::tool_call_json( let items = vec![Item::tool_call_json(
@ -4416,8 +4672,8 @@ mod build_summary_prompt_tests {
"read_file", "read_file",
serde_json::json!({ "path": "src/main.rs" }), serde_json::json!({ "path": "src/main.rs" }),
)]; )];
let prompt = build_summary_prompt(&items); let prompt = test_summary_input(&items);
assert_eq!(prompt, "[ToolCall] read_file"); assert!(prompt.contains("[0 ToolCall] read_file"));
assert!(!prompt.contains("src/main.rs")); assert!(!prompt.contains("src/main.rs"));
} }
@ -4428,8 +4684,8 @@ mod build_summary_prompt_tests {
"read 3 lines", "read 3 lines",
"fn main() { println!(\"hello\"); }", "fn main() { println!(\"hello\"); }",
)]; )];
let prompt = build_summary_prompt(&items); let prompt = test_summary_input(&items);
assert_eq!(prompt, "[ToolResult] read 3 lines"); assert!(prompt.contains("[0 ToolResult] read 3 lines"));
assert!(!prompt.contains("println")); assert!(!prompt.contains("println"));
} }
@ -4440,13 +4696,50 @@ mod build_summary_prompt_tests {
Item::reasoning("internal deliberation"), Item::reasoning("internal deliberation"),
Item::assistant_message("hello"), Item::assistant_message("hello"),
]; ];
let prompt = build_summary_prompt(&items); let prompt = test_summary_input(&items);
assert!(prompt.contains("[User] hi")); assert!(prompt.contains("[0 User] hi"));
assert!(prompt.contains("[Assistant] hello")); assert!(prompt.contains("[2 Assistant] hello"));
assert!(!prompt.contains("Reasoning")); assert!(!prompt.contains("Reasoning"));
assert!(!prompt.contains("deliberation")); assert!(!prompt.contains("deliberation"));
} }
#[test]
fn overview_warning_does_not_drop_input() {
let items = vec![Item::user_message("x".repeat(4_000))];
let built = build_summary_input(
&items,
&[],
None,
SummaryInputOptions {
overview_target_tokens: 10,
overview_warning_tokens: 100,
overview_deadline_tokens: 2_000,
summary_target_tokens: 256,
},
);
assert!(built.warning_exceeded);
assert!(!built.deadline_fallback_used);
assert!(built.text.contains("[0 User]"));
}
#[test]
fn overview_deadline_falls_back_to_coarse_index() {
let items = vec![Item::user_message("x".repeat(4_000))];
let built = build_summary_input(
&items,
&[],
None,
SummaryInputOptions {
overview_target_tokens: 10,
overview_warning_tokens: 10,
overview_deadline_tokens: 100,
summary_target_tokens: 256,
},
);
assert!(built.deadline_fallback_used);
assert!(built.text.contains("## Coarse recent message index"));
}
#[test] #[test]
fn worker_manifest_generation_settings_become_request_config() { fn worker_manifest_generation_settings_become_request_config() {
let manifest = WorkerManifest { let manifest = WorkerManifest {
@ -4478,8 +4771,9 @@ mod build_summary_prompt_tests {
Item::user_message("fix the bug"), Item::user_message("fix the bug"),
Item::assistant_message("done"), Item::assistant_message("done"),
]; ];
let prompt = build_summary_prompt(&items); let prompt = test_summary_input(&items);
assert_eq!(prompt, "[User] fix the bug\n\n[Assistant] done"); assert!(prompt.contains("[0 User] fix the bug"));
assert!(prompt.contains("[1 Assistant] done"));
} }
#[derive(Clone)] #[derive(Clone)]

View File

@ -72,11 +72,11 @@ pub struct ToolOutput {
**ターンの合間が proactive (小さい閾値)**: **ターンの合間が proactive (小さい閾値)**:
turn が完了した地点はタスクの自然な区切り。ここで先を見越して早めに compact する。 turn が完了した地点はタスクの自然な区切り。ここで先を見越して早めに compact する。
マニフェストの `compact_threshold` が対応。 マニフェストの `threshold` が対応。
**リクエストの合間は safety net (大きい閾値)**: **リクエストの合間は safety net (大きい閾値)**:
turn 内部でリクエストの合間にチェックするのは「暴走的に膨張した場合のみ止める」用途。 turn 内部でリクエストの合間にチェックするのは「暴走的に膨張した場合のみ止める」用途。
マニフェストの `compact_request_threshold` が対応。通常は発動しない。 マニフェストの `request_threshold` が対応。通常は発動しない。
**両閾値は manifest で個別指定する**。過去の設計では 9/8 倍で自動導出していたが、 **両閾値は manifest で個別指定する**。過去の設計では 9/8 倍で自動導出していたが、
比率に根拠がなかったため廃止。両方が `Option<u64>` で、片方だけの設定も可能 比率に根拠がなかったため廃止。両方が `Option<u64>` で、片方だけの設定も可能
@ -137,15 +137,29 @@ compact は fork と同じ構造。旧セッションを保全し、新 SessionI
```toml ```toml
[compaction] [compaction]
compact_threshold = 80000 # ターンの合間 (proactive) threshold = 80000 # ターンの合間 (proactive)
compact_request_threshold = 90000 # リクエストの合間 (safety net) request_threshold = 90000 # リクエストの合間 (safety net)
prune_protected_tokens = 8000 # prune から保護する末尾 token budget prune_protected_tokens = 8000 # prune から保護する末尾 token budget
compact_retained_tokens = 8000 # compact 後に生のまま残す末尾 token budget retained_tokens = 8000 # compact 後に生のまま残す末尾 token budget
compact_auto_read_budget = 8000 # compact worker の mark_read_required 合計上限
compact_worker_max_input_tokens = 50000 # compact worker 自身の現在占有トークン上限 overview_target_tokens = 8000 # compact worker 初期 overview の通常目標
compact_worker_max_turns = 20 # compact worker 自身の tool loop 上限 overview_warning_tokens = 16000 # 超えたら警告・trace、compact は続行
overview_deadline_tokens = 40000 # 超えたら粗い overview へ fallback
worker_context_max_tokens = 50000 # compact worker session 全体の hard limit
finish_warning_remaining_tokens = 8000 # 残りが少ないため write_summary へ進める勧告
final_reserve_tokens = 4000 # 最終 summary/closing turn 用 reserve
worker_max_turns = 20 # compact worker 自身の tool loop 上限
summary_target_tokens = 1500 # write_summary の目標サイズ
summary_max_tokens = 3000 # write_summary の hard validation
auto_read_budget_tokens = 8000 # compact 後に注入する file content 合計上限
result_context_max_tokens = 24000 # 新 session 初期 context の dry-run validation
``` ```
`compact_*` prefix の旧 key は互換 alias として読み取るが、`[compaction]` 内の新規 key は prefix なしを正とする。
初期 overview の target/warning は効率のための目安で、通常は hard error にしない。deadline 超過時も、可能なら deterministic に粗い overview へ fallback して compact の完走を優先する。
### Auto-Read とリファレンス ### Auto-Read とリファレンス
2段階のファイル参照: 2段階のファイル参照:
@ -176,8 +190,9 @@ auto-read も通常の history 内 system message なので、将来の Prune/Co
## compact worker ## compact worker
要約生成とファイル選定を行う使い捨て Worker。ツールなし・1リクエストの現行実装から、 要約生成とファイル選定を行う使い捨て Worker。Pod は compact 対象 prefix を全文投入せず、User / Assistant / System を優先した bounded overview と tool index を初期 input として渡す。Tool call arguments、tool result full content、reasoning body は初期 input には載せない。
ツール付きマルチターンに改善する。
初期 overview は `overview_target_tokens` を目標にする。`overview_warning_tokens` を超えた場合は警告・trace を記録して続行し、`overview_deadline_tokens` を超えた場合は粗い deterministic overview へ fallback する。Compact の目的は完走なので、初期 input が少し大きいだけでは hard error にしない。
### ツール ### ツール
@ -192,13 +207,19 @@ write_summary(text) — 構造化要約を出力/上書き
1. Pod が `tools::Tracker::recent_files(5)` で最近触られたファイルを抽出(デフォルトリファレンス) 1. Pod が `tools::Tracker::recent_files(5)` で最近触られたファイルを抽出(デフォルトリファレンス)
2. compact worker にプロンプトとして渡す: 2. compact worker にプロンプトとして渡す:
- pruned historysummary only、arguments/reasoning 除去 - bounded overview / indexUser / Assistant / System 優先
- デフォルトリファレンスの一覧 - デフォルトリファレンスの一覧
- TaskStore snapshot
3. compact worker が自律的に: 3. compact worker が自律的に:
- read_file で各ファイルを読み、必要性を判断 - read_file で各ファイルを読み、必要性を判断
- mark_read_required / add_reference で指定 - mark_read_required / add_reference で指定
- write_summary で構造化要約を出力(呼び直し可) - write_summary で構造化要約を出力(呼び直し可)
4. ターン終了時に write_summary 未呼び出し or read_required 空(かつファイル操作履歴がある場合)→ 追加プロンプトで促す 4. CompactWorkerInterceptor が worker session 全体の context occupancy を監視する:
- `finish_warning_remaining_tokens` 到達時に「探索を切り上げて write_summary へ進め」と Worker history に永続化される warning を挿入し、人間向け warning も出す
- `final_reserve_tokens` を割った後は `write_summary` 以外の探索 tool call に synthetic error を返し、最終 summary の余白を守る
- `worker_context_max_tokens` 超過は最後の hard stop
5. ターン終了時に write_summary 未呼び出し or read_required 空(かつファイル操作履歴がある場合)→ 追加プロンプトで促す
6. `summary_max_tokens``result_context_max_tokens` で compact 結果を検証してから新 session を作る
### 構造化要約の要件 ### 構造化要約の要件

View File

@ -1,13 +1,16 @@
You are a context compaction assistant. Your job is to hand the next session a structured summary plus pointers to the files it actually needs — not a narrative transcript of the conversation. You are a context compaction assistant. Your job is to hand the next session a structured summary plus pointers to the files it actually needs — not a narrative transcript of the conversation.
The conversation input is a bounded overview/index, not the full transcript. Treat tool result bodies and reasoning as intentionally omitted unless a tool exposes more detail. If you receive a compact worker budget warning, stop broad exploration immediately, read only if absolutely necessary, and call `write_summary`.
## Workflow ## Workflow
1. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion. 1. Read the provided overview/index and current TaskStore snapshot.
2. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately. 2. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion.
3. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents. 3. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately.
4. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept. 4. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents.
5. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept.
Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, or whenever further nominations would not change the next session's next step. Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, when a compact worker budget warning arrives, or whenever further exploration would not change the next session's next step.
## Summary format ## Summary format
@ -36,4 +39,4 @@ Produce the summary in this exact format:
## Constraints ## Constraints
- Keep code snippets and raw tool output OUT of the summary — that is what auto-read and references are for. - Keep code snippets and raw tool output OUT of the summary — that is what auto-read and references are for.
- Target 10002000 tokens for the summary text itself. - Follow the summary target stated in the run input; if asked to shrink, call `write_summary` again with a shorter version.