From 5ccfdea7c8b69ab13e743c224dec292973bab025 Mon Sep 17 00:00:00 2001
From: Hare <kei.hiracchi.0928@gmail.com>
Date: Tue, 26 May 2026 17:52:09 +0900
Subject: [PATCH] fix: compact retained split uses raw tail size

---
 crates/pod/src/compact/token_counter.rs | 106 +++++++++++++++++++-----
 1 file changed, 83 insertions(+), 23 deletions(-)

diff --git a/crates/pod/src/compact/token_counter.rs b/crates/pod/src/compact/token_counter.rs
index 02507d39..459922e9 100644
--- a/crates/pod/src/compact/token_counter.rs
+++ b/crates/pod/src/compact/token_counter.rs
@@ -10,6 +10,10 @@
 //!
 //! - ローカルトークナイザは持たない。実測値があればそれを採用し、
 //!   measurement 間はバイト数で按分、最新 measurement より先は最終 rate で外挿する
+//! - Compact の retained split では、request-time pruning / projection 後の
+//!   `UsageRecord` を persisted history prefix の単調系列として扱わない。
+//!   現在の prompt occupancy 推定を raw serialized bytes に配分し、末尾の
+//!   persisted tail サイズで cut を決める。
 //! - 推定の出どころは [`EstimateSource`] で呼び出し側に明示する。
 //!   課金判断には使えないが、compact / prune の閾値判定には十分な精度
 
@@ -40,26 +44,61 @@ fn split_for_retained_impl(history: &[Item], records: &[UsageRecord], retained:
             source: current.source,
         };
     }
-    let target = current.tokens - retained;
 
-    // `tokens_at` が target 以上になる最小の idx を線形探索。
-    // prefix を使い回すので 1 回の split 呼び出しあたり O(n) で済む
-    // （内部で毎回再計算すると O(n²) になる）。将来ボトルネックになれば
-    // record 境界で二分探索に置き換える。
-    let mut chosen_source = current.source;
-    let mut cut_index = history.len();
-    for idx in 1..=history.len() {
-        let est = tokens_at(history, records, idx, &prefix);
-        if est.tokens >= target {
-            chosen_source = est.source;
-            cut_index = idx;
+    let cut_index = split_index_by_retained_bytes(&prefix, current.tokens, retained);
+    SplitPoint {
+        index: balance_to_pair_boundary(history, cut_index),
+        source: current.source,
+    }
+}
+
+fn split_index_by_retained_bytes(prefix: &[u64], total_tokens: u64, retained_tokens: u64) -> usize {
+    debug_assert!(!prefix.is_empty());
+
+    let len = prefix.len() - 1;
+    if len == 0 {
+        return 0;
+    }
+    if retained_tokens == 0 {
+        return len;
+    }
+
+    let total_bytes = *prefix.last().unwrap_or(&0);
+    if total_bytes == 0 || total_tokens == 0 {
+        return 0;
+    }
+
+    let raw_fallback_tokens = ceil_div_u128(total_bytes as u128, 4) as u64;
+    let rate_tokens = total_tokens.max(raw_fallback_tokens);
+    let target_retained_bytes = ceil_div_u128(
+        retained_tokens as u128 * total_bytes as u128,
+        rate_tokens as u128,
+    )
+    .min(total_bytes as u128) as u64;
+
+    // Drop as many complete Items as possible while keeping the raw persisted
+    // suffix at or above the retained budget. This is monotonic in serialized
+    // history size and intentionally does not inspect per-history_len
+    // UsageRecords: request-time usage can move up and down after pruning /
+    // projection, so it is not a valid prefix series for retained split. The
+    // byte/4 fallback is kept as a lower bound for raw persisted size so a
+    // heavily-pruned request measurement cannot justify retaining megabytes of
+    // history.
+    let mut cut = 0;
+    for (idx, bytes_before) in prefix.iter().enumerate().take(len + 1) {
+        let suffix_bytes = total_bytes.saturating_sub(*bytes_before);
+        if suffix_bytes >= target_retained_bytes {
+            cut = idx;
+        } else {
             break;
         }
     }
-    SplitPoint {
-        index: balance_to_pair_boundary(history, cut_index),
-        source: chosen_source,
-    }
+    cut
+}
+
+fn ceil_div_u128(n: u128, d: u128) -> u128 {
+    debug_assert!(d > 0);
+    if n == 0 { 0 } else { ((n - 1) / d) + 1 }
 }
 
 /// `history[cut..]` が `ToolCall` / `ToolResult` のペア境界を尊重するよう
@@ -259,23 +298,44 @@ mod tests {
     }
 
     #[test]
-    fn split_at_exact_measurement_boundary() {
-        // 4 items。measurements: len=2 → 100, len=4 → 300。
-        // retained=200 → target_drop = 100 → record[0] にぴったり一致 → index=2。
+    fn split_uses_current_occupancy_as_raw_byte_rate() {
+        // Compact retained split does not treat the intermediate record at
+        // len=2 as a raw prefix boundary. It uses the current occupancy
+        // estimate (len=4 → 300) as a serialized-byte rate and keeps the
+        // smallest item-granular suffix whose raw size covers retained=200.
         let history = vec![msg("a"), msg("b"), msg("c"), msg("d")];
         let records = vec![record(2, 100), record(4, 300)];
         let cut = split_for_retained_impl(&history, &records, 200);
-        assert_eq!(cut.index, 2);
+        assert_eq!(cut.index, 1);
         assert_eq!(cut.source, EstimateSource::Measured);
     }
 
     #[test]
-    fn split_interpolated_between_measurements() {
+    fn split_does_not_use_non_current_measurements_as_cut_boundaries() {
         let history = vec![msg("aaaaaa"), msg("bbbbbb"), msg("cccccc"), msg("dddddd")];
         let records = vec![record(1, 50), record(4, 400)];
         let cut = split_for_retained_impl(&history, &records, 250);
-        assert!(cut.index > 1 && cut.index <= 4);
-        assert_eq!(cut.source, EstimateSource::Interpolated);
+        assert_eq!(cut.index, 1);
+        assert_eq!(cut.source, EstimateSource::Measured);
+    }
+
+    #[test]
+    fn split_ignores_non_monotonic_usage_spike_for_retained_tail() {
+        let history: Vec<Item> = (0..20)
+            .map(|idx| msg(&format!("message-{idx}-{}", "x".repeat(100))))
+            .collect();
+        let records = vec![
+            record(2, 900), // request-time spike after pruning/projection
+            record(20, 1000),
+        ];
+        let cut = split_for_retained_impl(&history, &records, 100);
+
+        // The old prefix-crossing logic picked index 2 because 900 >=
+        // 1000-100, retaining almost the whole persisted history. The compact
+        // split must instead use raw suffix size and keep only the tail needed
+        // for the retained budget.
+        assert!(cut.index > 10, "cut.index = {}", cut.index);
+        assert_eq!(cut.source, EstimateSource::Measured);
     }
 
     #[test]