compact: retained_turns を retained_tokens に置換

保護単位をターン数からトークン量に変更。compact 時のカット位置は
Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、
ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
This commit is contained in:
Keisuke Hirata 2026-04-19 08:56:16 +09:00
parent da16015768
commit 758ced5e7f
6 changed files with 43 additions and 48 deletions

View File

@ -86,7 +86,7 @@ pub struct CompactionConfigPartial {
#[serde(default)]
pub compact_request_threshold: Option<u64>,
#[serde(default)]
pub compact_retained_turns: Option<usize>,
pub compact_retained_tokens: Option<u64>,
#[serde(default)]
pub provider: Option<ProviderConfigPartial>,
}
@ -241,9 +241,9 @@ impl CompactionConfigPartial {
compact_request_threshold: upper
.compact_request_threshold
.or(self.compact_request_threshold),
compact_retained_turns: upper
.compact_retained_turns
.or(self.compact_retained_turns),
compact_retained_tokens: upper
.compact_retained_tokens
.or(self.compact_retained_tokens),
provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge),
}
}
@ -371,9 +371,9 @@ impl TryFrom<PodManifestConfig> for PodManifest {
.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
compact_threshold: c.compact_threshold,
compact_request_threshold: c.compact_request_threshold,
compact_retained_turns: c
.compact_retained_turns
.unwrap_or(defaults::COMPACT_RETAINED_TURNS),
compact_retained_tokens: c
.compact_retained_tokens
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
provider: comp_provider,
})
})

View File

@ -18,9 +18,11 @@ pub const PRUNE_PROTECTED_TURNS: usize = 3;
/// [`crate::CompactionConfig::prune_min_savings`].
pub const PRUNE_MIN_SAVINGS: u64 = 4096;
/// Number of most-recent turns retained after a compact. See
/// [`crate::CompactionConfig::compact_retained_turns`].
pub const COMPACT_RETAINED_TURNS: usize = 2;
/// Token budget retained (unchanged) at the tail of the history across
/// a compact. Items whose cumulative token count fits within this budget
/// starting from the end are kept verbatim; the rest are summarised.
/// See [`crate::CompactionConfig::compact_retained_tokens`].
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
/// Default instruction asset reference used when `worker.instruction`
/// is omitted. See the `PromptLoader` prefix addressing scheme for the

View File

@ -195,9 +195,11 @@ pub struct CompactionConfig {
#[serde(default)]
pub compact_request_threshold: Option<u64>,
/// Number of recent turns retained after compaction.
#[serde(default = "default_compact_retained_turns")]
pub compact_retained_turns: usize,
/// Token budget retained verbatim at the tail of the history after
/// compaction. Measured against the occupancy estimate from
/// `UsageRecord` history; turn boundaries are ignored.
#[serde(default = "default_compact_retained_tokens")]
pub compact_retained_tokens: u64,
/// Optional provider for the compactor (summary) LLM.
/// If omitted, the main provider is cloned via `clone_boxed()`.
@ -211,8 +213,8 @@ fn default_prune_protected_turns() -> usize {
fn default_prune_min_savings() -> u64 {
defaults::PRUNE_MIN_SAVINGS
}
fn default_compact_retained_turns() -> usize {
defaults::COMPACT_RETAINED_TURNS
fn default_compact_retained_tokens() -> u64 {
defaults::COMPACT_RETAINED_TOKENS
}
impl Default for CompactionConfig {
@ -222,7 +224,7 @@ impl Default for CompactionConfig {
prune_min_savings: default_prune_min_savings(),
compact_threshold: None,
compact_request_threshold: None,
compact_retained_turns: default_compact_retained_turns(),
compact_retained_tokens: default_compact_retained_tokens(),
provider: None,
}
}
@ -357,7 +359,7 @@ model = "claude-sonnet-4-20250514"
assert_eq!(c.prune_min_savings, 4096);
assert_eq!(c.compact_threshold, Some(80000));
assert_eq!(c.compact_request_threshold, None);
assert_eq!(c.compact_retained_turns, 2);
assert_eq!(c.compact_retained_tokens, 8000);
}
#[test]

View File

@ -25,8 +25,8 @@ pub(crate) struct CompactState {
/// Between-requests threshold (safety net). Checked inside a turn
/// before each LLM request. `None` disables the request check.
request_threshold: Option<u64>,
/// Number of recent turns to retain after compaction.
retained_turns: usize,
/// Token budget retained verbatim at the tail after compaction.
retained_tokens: u64,
/// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled.
consecutive_failures: AtomicUsize,
/// `true` immediately after a successful compact, cleared on next normal completion.
@ -39,12 +39,12 @@ impl CompactState {
pub(crate) fn new(
post_run_threshold: Option<u64>,
request_threshold: Option<u64>,
retained_turns: usize,
retained_tokens: u64,
) -> Self {
Self {
post_run_threshold,
request_threshold,
retained_turns,
retained_tokens,
consecutive_failures: AtomicUsize::new(0),
just_compacted: AtomicBool::new(false),
disabled: AtomicBool::new(false),
@ -56,9 +56,9 @@ impl CompactState {
self.request_threshold
}
/// Number of turns to retain after compaction.
pub(crate) fn retained_turns(&self) -> usize {
self.retained_turns
/// Token budget retained verbatim at the tail after compaction.
pub(crate) fn retained_tokens(&self) -> u64 {
self.retained_tokens
}
/// Whether compaction has been disabled by the circuit breaker.
@ -115,7 +115,7 @@ mod tests {
fn both_thresholds_configured() {
let state = CompactState::new(Some(80_000), Some(90_000), 2);
assert_eq!(state.request_threshold(), Some(90_000));
assert_eq!(state.retained_turns(), 2);
assert_eq!(state.retained_tokens(), 2);
assert!(!state.exceeds_request(70_000));
assert!(!state.exceeds_post_run(70_000));

View File

@ -415,10 +415,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
(
c.compact_threshold,
c.compact_request_threshold,
c.compact_retained_turns,
c.compact_retained_tokens,
)
})
.unwrap_or((None, None, 2));
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
let tracker_for_usage = self.usage_tracker.clone();
self.worker_mut().on_usage(move |event| {
@ -648,8 +648,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
let retained = self
.compact_state
.as_ref()
.map(|s| s.retained_turns())
.unwrap_or(2);
.map(|s| s.retained_tokens())
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
match self.compact(retained).await {
Ok(new_session_id) => {
@ -691,7 +691,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
return Ok(());
}
let retained = state.retained_turns();
let retained = state.retained_tokens();
match self.compact(retained).await {
Ok(new_session_id) => {
info!(
@ -791,24 +791,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
/// - a clone of the main LlmClient via `clone_boxed()`.
///
/// Returns the new session ID.
pub async fn compact(&mut self, retained_turns: usize) -> Result<SessionId, PodError> {
pub async fn compact(&mut self, retained_tokens: u64) -> Result<SessionId, PodError> {
// Decide the cut point by projecting the UsageRecord timeline onto
// the current history: keep the tail whose estimated token count is
// within `retained_tokens`. Item-granular, turn boundaries ignored.
let cut = self.split_for_retained(retained_tokens);
let worker = self.worker.as_ref().expect("worker taken during run");
let history = worker.history();
// Identify turn boundaries (user message positions).
let turn_starts: Vec<usize> = history
.iter()
.enumerate()
.filter(|(_, item)| item.is_user_message())
.map(|(i, _)| i)
.collect();
// Items to retain: everything from `retained_turns` turns ago onward.
let retain_from = if turn_starts.len() > retained_turns {
turn_starts[turn_starts.len() - retained_turns]
} else {
0
};
let retain_from = cut.index.min(history.len());
let retained_items = history[retain_from..].to_vec();
let items_to_summarise = &history[..retain_from];

View File

@ -250,7 +250,7 @@ async fn agents_md_not_reread_after_compact() {
// Mutate the file after the first turn — must not affect the cached
// system prompt either on a subsequent turn or across compaction.
std::fs::write(&agents_path, "mutated").unwrap();
pod.compact(1).await.unwrap();
pod.compact(0).await.unwrap();
let after_compact = pod.worker().get_system_prompt().unwrap().to_string();
assert!(after_compact.contains("original"));
assert!(!after_compact.contains("mutated"));
@ -277,7 +277,7 @@ async fn compact_preserves_system_prompt() {
let before = pod.worker().get_system_prompt().unwrap().to_string();
pod.run("second").await.unwrap();
pod.compact(1).await.unwrap();
pod.compact(0).await.unwrap();
let after = pod.worker().get_system_prompt().unwrap().to_string();
assert_eq!(before, after);