compact: retained_turns を retained_tokens に置換

保護単位をターン数からトークン量に変更。compact 時のカット位置は Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
2026-04-19 08:56:16 +09:00 · 2026-04-19 08:56:16 +09:00 · 758ced5e7f
commit 758ced5e7f
parent da16015768
6 changed files with 43 additions and 48 deletions
--- a/crates/manifest/src/config.rs
+++ b/crates/manifest/src/config.rs
@ -86,7 +86,7 @@ pub struct CompactionConfigPartial {
    #[serde(default)]
    pub compact_request_threshold: Option<u64>,
    #[serde(default)]
-    pub compact_retained_turns: Option<usize>,
+    pub compact_retained_tokens: Option<u64>,
    #[serde(default)]
    pub provider: Option<ProviderConfigPartial>,
 }
@ -241,9 +241,9 @@ impl CompactionConfigPartial {
            compact_request_threshold: upper
                .compact_request_threshold
                .or(self.compact_request_threshold),
-            compact_retained_turns: upper
-                .compact_retained_turns
-                .or(self.compact_retained_turns),
+            compact_retained_tokens: upper
+                .compact_retained_tokens
+                .or(self.compact_retained_tokens),
            provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge),
        }
    }
@ -371,9 +371,9 @@ impl TryFrom<PodManifestConfig> for PodManifest {
                        .unwrap_or(defaults::PRUNE_MIN_SAVINGS),
                    compact_threshold: c.compact_threshold,
                    compact_request_threshold: c.compact_request_threshold,
-                    compact_retained_turns: c
-                        .compact_retained_turns
-                        .unwrap_or(defaults::COMPACT_RETAINED_TURNS),
+                    compact_retained_tokens: c
+                        .compact_retained_tokens
+                        .unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
                    provider: comp_provider,
                })
            })
--- a/crates/manifest/src/defaults.rs
+++ b/crates/manifest/src/defaults.rs
@ -18,9 +18,11 @@ pub const PRUNE_PROTECTED_TURNS: usize = 3;
 /// [`crate::CompactionConfig::prune_min_savings`].
 pub const PRUNE_MIN_SAVINGS: u64 = 4096;

-/// Number of most-recent turns retained after a compact. See
-/// [`crate::CompactionConfig::compact_retained_turns`].
-pub const COMPACT_RETAINED_TURNS: usize = 2;
+/// Token budget retained (unchanged) at the tail of the history across
+/// a compact. Items whose cumulative token count fits within this budget
+/// starting from the end are kept verbatim; the rest are summarised.
+/// See [`crate::CompactionConfig::compact_retained_tokens`].
+pub const COMPACT_RETAINED_TOKENS: u64 = 8000;

 /// Default instruction asset reference used when `worker.instruction`
 /// is omitted. See the `PromptLoader` prefix addressing scheme for the
--- a/crates/manifest/src/lib.rs
+++ b/crates/manifest/src/lib.rs
@ -195,9 +195,11 @@ pub struct CompactionConfig {
    #[serde(default)]
    pub compact_request_threshold: Option<u64>,

-    /// Number of recent turns retained after compaction.
-    #[serde(default = "default_compact_retained_turns")]
-    pub compact_retained_turns: usize,
+    /// Token budget retained verbatim at the tail of the history after
+    /// compaction. Measured against the occupancy estimate from
+    /// `UsageRecord` history; turn boundaries are ignored.
+    #[serde(default = "default_compact_retained_tokens")]
+    pub compact_retained_tokens: u64,

    /// Optional provider for the compactor (summary) LLM.
    /// If omitted, the main provider is cloned via `clone_boxed()`.
@ -211,8 +213,8 @@ fn default_prune_protected_turns() -> usize {
 fn default_prune_min_savings() -> u64 {
    defaults::PRUNE_MIN_SAVINGS
 }
-fn default_compact_retained_turns() -> usize {
-    defaults::COMPACT_RETAINED_TURNS
+fn default_compact_retained_tokens() -> u64 {
+    defaults::COMPACT_RETAINED_TOKENS
 }

 impl Default for CompactionConfig {
@ -222,7 +224,7 @@ impl Default for CompactionConfig {
            prune_min_savings: default_prune_min_savings(),
            compact_threshold: None,
            compact_request_threshold: None,
-            compact_retained_turns: default_compact_retained_turns(),
+            compact_retained_tokens: default_compact_retained_tokens(),
            provider: None,
        }
    }
@ -357,7 +359,7 @@ model = "claude-sonnet-4-20250514"
        assert_eq!(c.prune_min_savings, 4096);
        assert_eq!(c.compact_threshold, Some(80000));
        assert_eq!(c.compact_request_threshold, None);
-        assert_eq!(c.compact_retained_turns, 2);
+        assert_eq!(c.compact_retained_tokens, 8000);
    }

    #[test]
--- a/crates/pod/src/compact_state.rs
+++ b/crates/pod/src/compact_state.rs
@ -25,8 +25,8 @@ pub(crate) struct CompactState {
    /// Between-requests threshold (safety net). Checked inside a turn
    /// before each LLM request. `None` disables the request check.
    request_threshold: Option<u64>,
-    /// Number of recent turns to retain after compaction.
-    retained_turns: usize,
+    /// Token budget retained verbatim at the tail after compaction.
+    retained_tokens: u64,
    /// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled.
    consecutive_failures: AtomicUsize,
    /// `true` immediately after a successful compact, cleared on next normal completion.
@ -39,12 +39,12 @@ impl CompactState {
    pub(crate) fn new(
        post_run_threshold: Option<u64>,
        request_threshold: Option<u64>,
-        retained_turns: usize,
+        retained_tokens: u64,
    ) -> Self {
        Self {
            post_run_threshold,
            request_threshold,
-            retained_turns,
+            retained_tokens,
            consecutive_failures: AtomicUsize::new(0),
            just_compacted: AtomicBool::new(false),
            disabled: AtomicBool::new(false),
@ -56,9 +56,9 @@ impl CompactState {
        self.request_threshold
    }

-    /// Number of turns to retain after compaction.
-    pub(crate) fn retained_turns(&self) -> usize {
-        self.retained_turns
+    /// Token budget retained verbatim at the tail after compaction.
+    pub(crate) fn retained_tokens(&self) -> u64 {
+        self.retained_tokens
    }

    /// Whether compaction has been disabled by the circuit breaker.
@ -115,7 +115,7 @@ mod tests {
    fn both_thresholds_configured() {
        let state = CompactState::new(Some(80_000), Some(90_000), 2);
        assert_eq!(state.request_threshold(), Some(90_000));
-        assert_eq!(state.retained_turns(), 2);
+        assert_eq!(state.retained_tokens(), 2);

        assert!(!state.exceeds_request(70_000));
        assert!(!state.exceeds_post_run(70_000));
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -415,10 +415,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                    (
                        c.compact_threshold,
                        c.compact_request_threshold,
-                        c.compact_retained_turns,
+                        c.compact_retained_tokens,
                    )
                })
-                .unwrap_or((None, None, 2));
+                .unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));

            let tracker_for_usage = self.usage_tracker.clone();
            self.worker_mut().on_usage(move |event| {
@ -648,8 +648,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            let retained = self
                .compact_state
                .as_ref()
-                .map(|s| s.retained_turns())
-                .unwrap_or(2);
+                .map(|s| s.retained_tokens())
+                .unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);

            match self.compact(retained).await {
                Ok(new_session_id) => {
@ -691,7 +691,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            return Ok(());
        }

-        let retained = state.retained_turns();
+        let retained = state.retained_tokens();
        match self.compact(retained).await {
            Ok(new_session_id) => {
                info!(
@ -791,24 +791,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
    /// - a clone of the main LlmClient via `clone_boxed()`.
    ///
    /// Returns the new session ID.
-    pub async fn compact(&mut self, retained_turns: usize) -> Result<SessionId, PodError> {
+    pub async fn compact(&mut self, retained_tokens: u64) -> Result<SessionId, PodError> {
+        // Decide the cut point by projecting the UsageRecord timeline onto
+        // the current history: keep the tail whose estimated token count is
+        // within `retained_tokens`. Item-granular, turn boundaries ignored.
+        let cut = self.split_for_retained(retained_tokens);
+
        let worker = self.worker.as_ref().expect("worker taken during run");
        let history = worker.history();
-
-        // Identify turn boundaries (user message positions).
-        let turn_starts: Vec<usize> = history
-            .iter()
-            .enumerate()
-            .filter(|(_, item)| item.is_user_message())
-            .map(|(i, _)| i)
-            .collect();
-
-        // Items to retain: everything from `retained_turns` turns ago onward.
-        let retain_from = if turn_starts.len() > retained_turns {
-            turn_starts[turn_starts.len() - retained_turns]
-        } else {
-            0
-        };
+        let retain_from = cut.index.min(history.len());
        let retained_items = history[retain_from..].to_vec();
        let items_to_summarise = &history[..retain_from];

--- a/crates/pod/tests/system_prompt_template_test.rs
+++ b/crates/pod/tests/system_prompt_template_test.rs
@ -250,7 +250,7 @@ async fn agents_md_not_reread_after_compact() {
    // Mutate the file after the first turn — must not affect the cached
    // system prompt either on a subsequent turn or across compaction.
    std::fs::write(&agents_path, "mutated").unwrap();
-    pod.compact(1).await.unwrap();
+    pod.compact(0).await.unwrap();
    let after_compact = pod.worker().get_system_prompt().unwrap().to_string();
    assert!(after_compact.contains("original"));
    assert!(!after_compact.contains("mutated"));
@ -277,7 +277,7 @@ async fn compact_preserves_system_prompt() {
    let before = pod.worker().get_system_prompt().unwrap().to_string();
    pod.run("second").await.unwrap();

-    pod.compact(1).await.unwrap();
+    pod.compact(0).await.unwrap();

    let after = pod.worker().get_system_prompt().unwrap().to_string();
    assert_eq!(before, after);