fix: compact retained split uses raw tail size

2026-05-26 17:52:09 +09:00 · 2026-05-26 17:52:09 +09:00 · 5ccfdea7c8
commit 5ccfdea7c8
parent ded02e4c08
1 changed files with 83 additions and 23 deletions
--- a/crates/pod/src/compact/token_counter.rs
+++ b/crates/pod/src/compact/token_counter.rs
@ -10,6 +10,10 @@
 //!
 //! - ローカルトークナイザは持たない。実測値があればそれを採用し、
 //!   measurement 間はバイト数で按分、最新 measurement より先は最終 rate で外挿する
 //! - Compact の retained split では、request-time pruning / projection 後の
 //!   `UsageRecord` を persisted history prefix の単調系列として扱わない。
 //!   現在の prompt occupancy 推定を raw serialized bytes に配分し、末尾の
 //!   persisted tail サイズで cut を決める。
 //! - 推定の出どころは [`EstimateSource`] で呼び出し側に明示する。
 //!   課金判断には使えないが、compact / prune の閾値判定には十分な精度
@ -40,26 +44,61 @@ fn split_for_retained_impl(history: &[Item], records: &[UsageRecord], retained:
            source: current.source,
        };
    }
    let target = current.tokens - retained;
-    // `tokens_at` が target 以上になる最小の idx を線形探索。
+    let cut_index = split_index_by_retained_bytes(&prefix, current.tokens, retained);
-    // prefix を使い回すので 1 回の split 呼び出しあたり O(n) で済む
+    SplitPoint {
-    // （内部で毎回再計算すると O(n²) になる）。将来ボトルネックになれば
+        index: balance_to_pair_boundary(history, cut_index),
-    // record 境界で二分探索に置き換える。
+        source: current.source,
-    let mut chosen_source = current.source;
+    }
-    let mut cut_index = history.len();
+}
-    for idx in 1..=history.len() {
+
-        let est = tokens_at(history, records, idx, &prefix);
+fn split_index_by_retained_bytes(prefix: &[u64], total_tokens: u64, retained_tokens: u64) -> usize {
-        if est.tokens >= target {
+    debug_assert!(!prefix.is_empty());
-            chosen_source = est.source;
+
-            cut_index = idx;
+    let len = prefix.len() - 1;
    if len == 0 {
        return 0;
    }
    if retained_tokens == 0 {
        return len;
    }
    let total_bytes = *prefix.last().unwrap_or(&0);
    if total_bytes == 0 || total_tokens == 0 {
        return 0;
    }
    let raw_fallback_tokens = ceil_div_u128(total_bytes as u128, 4) as u64;
    let rate_tokens = total_tokens.max(raw_fallback_tokens);
    let target_retained_bytes = ceil_div_u128(
        retained_tokens as u128 * total_bytes as u128,
        rate_tokens as u128,
    )
    .min(total_bytes as u128) as u64;
    // Drop as many complete Items as possible while keeping the raw persisted
    // suffix at or above the retained budget. This is monotonic in serialized
    // history size and intentionally does not inspect per-history_len
    // UsageRecords: request-time usage can move up and down after pruning /
    // projection, so it is not a valid prefix series for retained split. The
    // byte/4 fallback is kept as a lower bound for raw persisted size so a
    // heavily-pruned request measurement cannot justify retaining megabytes of
    // history.
    let mut cut = 0;
    for (idx, bytes_before) in prefix.iter().enumerate().take(len + 1) {
        let suffix_bytes = total_bytes.saturating_sub(*bytes_before);
        if suffix_bytes >= target_retained_bytes {
            cut = idx;
        } else {
            break;
        }
    }
-    SplitPoint {
+    cut
-        index: balance_to_pair_boundary(history, cut_index),
+}
-        source: chosen_source,
+
-    }
+fn ceil_div_u128(n: u128, d: u128) -> u128 {
    debug_assert!(d > 0);
    if n == 0 { 0 } else { ((n - 1) / d) + 1 }
 }
 /// `history[cut..]` が `ToolCall` / `ToolResult` のペア境界を尊重するよう
@ -259,23 +298,44 @@ mod tests {
    }
    #[test]
-    fn split_at_exact_measurement_boundary() {
+    fn split_uses_current_occupancy_as_raw_byte_rate() {
-        // 4 items。measurements: len=2 → 100, len=4 → 300。
+        // Compact retained split does not treat the intermediate record at
-        // retained=200 → target_drop = 100 → record[0] にぴったり一致 → index=2。
+        // len=2 as a raw prefix boundary. It uses the current occupancy
        // estimate (len=4 → 300) as a serialized-byte rate and keeps the
        // smallest item-granular suffix whose raw size covers retained=200.
        let history = vec![msg("a"), msg("b"), msg("c"), msg("d")];
        let records = vec![record(2, 100), record(4, 300)];
        let cut = split_for_retained_impl(&history, &records, 200);
-        assert_eq!(cut.index, 2);
+        assert_eq!(cut.index, 1);
        assert_eq!(cut.source, EstimateSource::Measured);
    }
    #[test]
-    fn split_interpolated_between_measurements() {
+    fn split_does_not_use_non_current_measurements_as_cut_boundaries() {
        let history = vec![msg("aaaaaa"), msg("bbbbbb"), msg("cccccc"), msg("dddddd")];
        let records = vec![record(1, 50), record(4, 400)];
        let cut = split_for_retained_impl(&history, &records, 250);
-        assert!(cut.index > 1 && cut.index <= 4);
+        assert_eq!(cut.index, 1);
-        assert_eq!(cut.source, EstimateSource::Interpolated);
+        assert_eq!(cut.source, EstimateSource::Measured);
    }
    #[test]
    fn split_ignores_non_monotonic_usage_spike_for_retained_tail() {
        let history: Vec<Item> = (0..20)
            .map(|idx| msg(&format!("message-{idx}-{}", "x".repeat(100))))
            .collect();
        let records = vec![
            record(2, 900), // request-time spike after pruning/projection
            record(20, 1000),
        ];
        let cut = split_for_retained_impl(&history, &records, 100);
        // The old prefix-crossing logic picked index 2 because 900 >=
        // 1000-100, retaining almost the whole persisted history. The compact
        // split must instead use raw suffix size and keep only the tail needed
        // for the retained budget.
        assert!(cut.index > 10, "cut.index = {}", cut.index);
        assert_eq!(cut.source, EstimateSource::Measured);
    }
    #[test]