diff --git a/crates/manifest/src/config.rs b/crates/manifest/src/config.rs index 0d736e9f..ca447e19 100644 --- a/crates/manifest/src/config.rs +++ b/crates/manifest/src/config.rs @@ -86,7 +86,7 @@ pub struct CompactionConfigPartial { #[serde(default)] pub compact_request_threshold: Option, #[serde(default)] - pub compact_retained_turns: Option, + pub compact_retained_tokens: Option, #[serde(default)] pub provider: Option, } @@ -241,9 +241,9 @@ impl CompactionConfigPartial { compact_request_threshold: upper .compact_request_threshold .or(self.compact_request_threshold), - compact_retained_turns: upper - .compact_retained_turns - .or(self.compact_retained_turns), + compact_retained_tokens: upper + .compact_retained_tokens + .or(self.compact_retained_tokens), provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge), } } @@ -371,9 +371,9 @@ impl TryFrom for PodManifest { .unwrap_or(defaults::PRUNE_MIN_SAVINGS), compact_threshold: c.compact_threshold, compact_request_threshold: c.compact_request_threshold, - compact_retained_turns: c - .compact_retained_turns - .unwrap_or(defaults::COMPACT_RETAINED_TURNS), + compact_retained_tokens: c + .compact_retained_tokens + .unwrap_or(defaults::COMPACT_RETAINED_TOKENS), provider: comp_provider, }) }) diff --git a/crates/manifest/src/defaults.rs b/crates/manifest/src/defaults.rs index 273b693d..cca50e1f 100644 --- a/crates/manifest/src/defaults.rs +++ b/crates/manifest/src/defaults.rs @@ -18,9 +18,11 @@ pub const PRUNE_PROTECTED_TURNS: usize = 3; /// [`crate::CompactionConfig::prune_min_savings`]. pub const PRUNE_MIN_SAVINGS: u64 = 4096; -/// Number of most-recent turns retained after a compact. See -/// [`crate::CompactionConfig::compact_retained_turns`]. -pub const COMPACT_RETAINED_TURNS: usize = 2; +/// Token budget retained (unchanged) at the tail of the history across +/// a compact. Items whose cumulative token count fits within this budget +/// starting from the end are kept verbatim; the rest are summarised. +/// See [`crate::CompactionConfig::compact_retained_tokens`]. +pub const COMPACT_RETAINED_TOKENS: u64 = 8000; /// Default instruction asset reference used when `worker.instruction` /// is omitted. See the `PromptLoader` prefix addressing scheme for the diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs index 4c07f043..d387b619 100644 --- a/crates/manifest/src/lib.rs +++ b/crates/manifest/src/lib.rs @@ -195,9 +195,11 @@ pub struct CompactionConfig { #[serde(default)] pub compact_request_threshold: Option, - /// Number of recent turns retained after compaction. - #[serde(default = "default_compact_retained_turns")] - pub compact_retained_turns: usize, + /// Token budget retained verbatim at the tail of the history after + /// compaction. Measured against the occupancy estimate from + /// `UsageRecord` history; turn boundaries are ignored. + #[serde(default = "default_compact_retained_tokens")] + pub compact_retained_tokens: u64, /// Optional provider for the compactor (summary) LLM. /// If omitted, the main provider is cloned via `clone_boxed()`. @@ -211,8 +213,8 @@ fn default_prune_protected_turns() -> usize { fn default_prune_min_savings() -> u64 { defaults::PRUNE_MIN_SAVINGS } -fn default_compact_retained_turns() -> usize { - defaults::COMPACT_RETAINED_TURNS +fn default_compact_retained_tokens() -> u64 { + defaults::COMPACT_RETAINED_TOKENS } impl Default for CompactionConfig { @@ -222,7 +224,7 @@ impl Default for CompactionConfig { prune_min_savings: default_prune_min_savings(), compact_threshold: None, compact_request_threshold: None, - compact_retained_turns: default_compact_retained_turns(), + compact_retained_tokens: default_compact_retained_tokens(), provider: None, } } @@ -357,7 +359,7 @@ model = "claude-sonnet-4-20250514" assert_eq!(c.prune_min_savings, 4096); assert_eq!(c.compact_threshold, Some(80000)); assert_eq!(c.compact_request_threshold, None); - assert_eq!(c.compact_retained_turns, 2); + assert_eq!(c.compact_retained_tokens, 8000); } #[test] diff --git a/crates/pod/src/compact_state.rs b/crates/pod/src/compact_state.rs index a01fa5f4..4f6ff106 100644 --- a/crates/pod/src/compact_state.rs +++ b/crates/pod/src/compact_state.rs @@ -25,8 +25,8 @@ pub(crate) struct CompactState { /// Between-requests threshold (safety net). Checked inside a turn /// before each LLM request. `None` disables the request check. request_threshold: Option, - /// Number of recent turns to retain after compaction. - retained_turns: usize, + /// Token budget retained verbatim at the tail after compaction. + retained_tokens: u64, /// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled. consecutive_failures: AtomicUsize, /// `true` immediately after a successful compact, cleared on next normal completion. @@ -39,12 +39,12 @@ impl CompactState { pub(crate) fn new( post_run_threshold: Option, request_threshold: Option, - retained_turns: usize, + retained_tokens: u64, ) -> Self { Self { post_run_threshold, request_threshold, - retained_turns, + retained_tokens, consecutive_failures: AtomicUsize::new(0), just_compacted: AtomicBool::new(false), disabled: AtomicBool::new(false), @@ -56,9 +56,9 @@ impl CompactState { self.request_threshold } - /// Number of turns to retain after compaction. - pub(crate) fn retained_turns(&self) -> usize { - self.retained_turns + /// Token budget retained verbatim at the tail after compaction. + pub(crate) fn retained_tokens(&self) -> u64 { + self.retained_tokens } /// Whether compaction has been disabled by the circuit breaker. @@ -115,7 +115,7 @@ mod tests { fn both_thresholds_configured() { let state = CompactState::new(Some(80_000), Some(90_000), 2); assert_eq!(state.request_threshold(), Some(90_000)); - assert_eq!(state.retained_turns(), 2); + assert_eq!(state.retained_tokens(), 2); assert!(!state.exceeds_request(70_000)); assert!(!state.exceeds_post_run(70_000)); diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index 13006a88..247ece40 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -415,10 +415,10 @@ impl Pod { ( c.compact_threshold, c.compact_request_threshold, - c.compact_retained_turns, + c.compact_retained_tokens, ) }) - .unwrap_or((None, None, 2)); + .unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS)); let tracker_for_usage = self.usage_tracker.clone(); self.worker_mut().on_usage(move |event| { @@ -648,8 +648,8 @@ impl Pod { let retained = self .compact_state .as_ref() - .map(|s| s.retained_turns()) - .unwrap_or(2); + .map(|s| s.retained_tokens()) + .unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS); match self.compact(retained).await { Ok(new_session_id) => { @@ -691,7 +691,7 @@ impl Pod { return Ok(()); } - let retained = state.retained_turns(); + let retained = state.retained_tokens(); match self.compact(retained).await { Ok(new_session_id) => { info!( @@ -791,24 +791,15 @@ impl Pod { /// - a clone of the main LlmClient via `clone_boxed()`. /// /// Returns the new session ID. - pub async fn compact(&mut self, retained_turns: usize) -> Result { + pub async fn compact(&mut self, retained_tokens: u64) -> Result { + // Decide the cut point by projecting the UsageRecord timeline onto + // the current history: keep the tail whose estimated token count is + // within `retained_tokens`. Item-granular, turn boundaries ignored. + let cut = self.split_for_retained(retained_tokens); + let worker = self.worker.as_ref().expect("worker taken during run"); let history = worker.history(); - - // Identify turn boundaries (user message positions). - let turn_starts: Vec = history - .iter() - .enumerate() - .filter(|(_, item)| item.is_user_message()) - .map(|(i, _)| i) - .collect(); - - // Items to retain: everything from `retained_turns` turns ago onward. - let retain_from = if turn_starts.len() > retained_turns { - turn_starts[turn_starts.len() - retained_turns] - } else { - 0 - }; + let retain_from = cut.index.min(history.len()); let retained_items = history[retain_from..].to_vec(); let items_to_summarise = &history[..retain_from]; diff --git a/crates/pod/tests/system_prompt_template_test.rs b/crates/pod/tests/system_prompt_template_test.rs index ab273e90..1fe3d1a1 100644 --- a/crates/pod/tests/system_prompt_template_test.rs +++ b/crates/pod/tests/system_prompt_template_test.rs @@ -250,7 +250,7 @@ async fn agents_md_not_reread_after_compact() { // Mutate the file after the first turn — must not affect the cached // system prompt either on a subsequent turn or across compaction. std::fs::write(&agents_path, "mutated").unwrap(); - pod.compact(1).await.unwrap(); + pod.compact(0).await.unwrap(); let after_compact = pod.worker().get_system_prompt().unwrap().to_string(); assert!(after_compact.contains("original")); assert!(!after_compact.contains("mutated")); @@ -277,7 +277,7 @@ async fn compact_preserves_system_prompt() { let before = pod.worker().get_system_prompt().unwrap().to_string(); pod.run("second").await.unwrap(); - pod.compact(1).await.unwrap(); + pod.compact(0).await.unwrap(); let after = pod.worker().get_system_prompt().unwrap().to_string(); assert_eq!(before, after);