From 5ccfdea7c8b69ab13e743c224dec292973bab025 Mon Sep 17 00:00:00 2001 From: Hare Date: Tue, 26 May 2026 17:52:09 +0900 Subject: [PATCH] fix: compact retained split uses raw tail size --- crates/pod/src/compact/token_counter.rs | 106 +++++++++++++++++++----- 1 file changed, 83 insertions(+), 23 deletions(-) diff --git a/crates/pod/src/compact/token_counter.rs b/crates/pod/src/compact/token_counter.rs index 02507d39..459922e9 100644 --- a/crates/pod/src/compact/token_counter.rs +++ b/crates/pod/src/compact/token_counter.rs @@ -10,6 +10,10 @@ //! //! - ローカルトークナイザは持たない。実測値があればそれを採用し、 //! measurement 間はバイト数で按分、最新 measurement より先は最終 rate で外挿する +//! - Compact の retained split では、request-time pruning / projection 後の +//! `UsageRecord` を persisted history prefix の単調系列として扱わない。 +//! 現在の prompt occupancy 推定を raw serialized bytes に配分し、末尾の +//! persisted tail サイズで cut を決める。 //! - 推定の出どころは [`EstimateSource`] で呼び出し側に明示する。 //! 課金判断には使えないが、compact / prune の閾値判定には十分な精度 @@ -40,26 +44,61 @@ fn split_for_retained_impl(history: &[Item], records: &[UsageRecord], retained: source: current.source, }; } - let target = current.tokens - retained; - // `tokens_at` が target 以上になる最小の idx を線形探索。 - // prefix を使い回すので 1 回の split 呼び出しあたり O(n) で済む - // (内部で毎回再計算すると O(n²) になる)。将来ボトルネックになれば - // record 境界で二分探索に置き換える。 - let mut chosen_source = current.source; - let mut cut_index = history.len(); - for idx in 1..=history.len() { - let est = tokens_at(history, records, idx, &prefix); - if est.tokens >= target { - chosen_source = est.source; - cut_index = idx; + let cut_index = split_index_by_retained_bytes(&prefix, current.tokens, retained); + SplitPoint { + index: balance_to_pair_boundary(history, cut_index), + source: current.source, + } +} + +fn split_index_by_retained_bytes(prefix: &[u64], total_tokens: u64, retained_tokens: u64) -> usize { + debug_assert!(!prefix.is_empty()); + + let len = prefix.len() - 1; + if len == 0 { + return 0; + } + if retained_tokens == 0 { + return len; + } + + let total_bytes = *prefix.last().unwrap_or(&0); + if total_bytes == 0 || total_tokens == 0 { + return 0; + } + + let raw_fallback_tokens = ceil_div_u128(total_bytes as u128, 4) as u64; + let rate_tokens = total_tokens.max(raw_fallback_tokens); + let target_retained_bytes = ceil_div_u128( + retained_tokens as u128 * total_bytes as u128, + rate_tokens as u128, + ) + .min(total_bytes as u128) as u64; + + // Drop as many complete Items as possible while keeping the raw persisted + // suffix at or above the retained budget. This is monotonic in serialized + // history size and intentionally does not inspect per-history_len + // UsageRecords: request-time usage can move up and down after pruning / + // projection, so it is not a valid prefix series for retained split. The + // byte/4 fallback is kept as a lower bound for raw persisted size so a + // heavily-pruned request measurement cannot justify retaining megabytes of + // history. + let mut cut = 0; + for (idx, bytes_before) in prefix.iter().enumerate().take(len + 1) { + let suffix_bytes = total_bytes.saturating_sub(*bytes_before); + if suffix_bytes >= target_retained_bytes { + cut = idx; + } else { break; } } - SplitPoint { - index: balance_to_pair_boundary(history, cut_index), - source: chosen_source, - } + cut +} + +fn ceil_div_u128(n: u128, d: u128) -> u128 { + debug_assert!(d > 0); + if n == 0 { 0 } else { ((n - 1) / d) + 1 } } /// `history[cut..]` が `ToolCall` / `ToolResult` のペア境界を尊重するよう @@ -259,23 +298,44 @@ mod tests { } #[test] - fn split_at_exact_measurement_boundary() { - // 4 items。measurements: len=2 → 100, len=4 → 300。 - // retained=200 → target_drop = 100 → record[0] にぴったり一致 → index=2。 + fn split_uses_current_occupancy_as_raw_byte_rate() { + // Compact retained split does not treat the intermediate record at + // len=2 as a raw prefix boundary. It uses the current occupancy + // estimate (len=4 → 300) as a serialized-byte rate and keeps the + // smallest item-granular suffix whose raw size covers retained=200. let history = vec![msg("a"), msg("b"), msg("c"), msg("d")]; let records = vec![record(2, 100), record(4, 300)]; let cut = split_for_retained_impl(&history, &records, 200); - assert_eq!(cut.index, 2); + assert_eq!(cut.index, 1); assert_eq!(cut.source, EstimateSource::Measured); } #[test] - fn split_interpolated_between_measurements() { + fn split_does_not_use_non_current_measurements_as_cut_boundaries() { let history = vec![msg("aaaaaa"), msg("bbbbbb"), msg("cccccc"), msg("dddddd")]; let records = vec![record(1, 50), record(4, 400)]; let cut = split_for_retained_impl(&history, &records, 250); - assert!(cut.index > 1 && cut.index <= 4); - assert_eq!(cut.source, EstimateSource::Interpolated); + assert_eq!(cut.index, 1); + assert_eq!(cut.source, EstimateSource::Measured); + } + + #[test] + fn split_ignores_non_monotonic_usage_spike_for_retained_tail() { + let history: Vec = (0..20) + .map(|idx| msg(&format!("message-{idx}-{}", "x".repeat(100)))) + .collect(); + let records = vec![ + record(2, 900), // request-time spike after pruning/projection + record(20, 1000), + ]; + let cut = split_for_retained_impl(&history, &records, 100); + + // The old prefix-crossing logic picked index 2 because 900 >= + // 1000-100, retaining almost the whole persisted history. The compact + // split must instead use raw suffix size and keep only the tail needed + // for the retained budget. + assert!(cut.index > 10, "cut.index = {}", cut.index); + assert_eq!(cut.source, EstimateSource::Measured); } #[test]