compact: retained_turns を retained_tokens に置換
保護単位をターン数からトークン量に変更。compact 時のカット位置は Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、 ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
This commit is contained in:
parent
da16015768
commit
758ced5e7f
|
|
@ -86,7 +86,7 @@ pub struct CompactionConfigPartial {
|
|||
#[serde(default)]
|
||||
pub compact_request_threshold: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub compact_retained_turns: Option<usize>,
|
||||
pub compact_retained_tokens: Option<u64>,
|
||||
#[serde(default)]
|
||||
pub provider: Option<ProviderConfigPartial>,
|
||||
}
|
||||
|
|
@ -241,9 +241,9 @@ impl CompactionConfigPartial {
|
|||
compact_request_threshold: upper
|
||||
.compact_request_threshold
|
||||
.or(self.compact_request_threshold),
|
||||
compact_retained_turns: upper
|
||||
.compact_retained_turns
|
||||
.or(self.compact_retained_turns),
|
||||
compact_retained_tokens: upper
|
||||
.compact_retained_tokens
|
||||
.or(self.compact_retained_tokens),
|
||||
provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge),
|
||||
}
|
||||
}
|
||||
|
|
@ -371,9 +371,9 @@ impl TryFrom<PodManifestConfig> for PodManifest {
|
|||
.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
|
||||
compact_threshold: c.compact_threshold,
|
||||
compact_request_threshold: c.compact_request_threshold,
|
||||
compact_retained_turns: c
|
||||
.compact_retained_turns
|
||||
.unwrap_or(defaults::COMPACT_RETAINED_TURNS),
|
||||
compact_retained_tokens: c
|
||||
.compact_retained_tokens
|
||||
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
|
||||
provider: comp_provider,
|
||||
})
|
||||
})
|
||||
|
|
|
|||
|
|
@ -18,9 +18,11 @@ pub const PRUNE_PROTECTED_TURNS: usize = 3;
|
|||
/// [`crate::CompactionConfig::prune_min_savings`].
|
||||
pub const PRUNE_MIN_SAVINGS: u64 = 4096;
|
||||
|
||||
/// Number of most-recent turns retained after a compact. See
|
||||
/// [`crate::CompactionConfig::compact_retained_turns`].
|
||||
pub const COMPACT_RETAINED_TURNS: usize = 2;
|
||||
/// Token budget retained (unchanged) at the tail of the history across
|
||||
/// a compact. Items whose cumulative token count fits within this budget
|
||||
/// starting from the end are kept verbatim; the rest are summarised.
|
||||
/// See [`crate::CompactionConfig::compact_retained_tokens`].
|
||||
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
|
||||
|
||||
/// Default instruction asset reference used when `worker.instruction`
|
||||
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
|
||||
|
|
|
|||
|
|
@ -195,9 +195,11 @@ pub struct CompactionConfig {
|
|||
#[serde(default)]
|
||||
pub compact_request_threshold: Option<u64>,
|
||||
|
||||
/// Number of recent turns retained after compaction.
|
||||
#[serde(default = "default_compact_retained_turns")]
|
||||
pub compact_retained_turns: usize,
|
||||
/// Token budget retained verbatim at the tail of the history after
|
||||
/// compaction. Measured against the occupancy estimate from
|
||||
/// `UsageRecord` history; turn boundaries are ignored.
|
||||
#[serde(default = "default_compact_retained_tokens")]
|
||||
pub compact_retained_tokens: u64,
|
||||
|
||||
/// Optional provider for the compactor (summary) LLM.
|
||||
/// If omitted, the main provider is cloned via `clone_boxed()`.
|
||||
|
|
@ -211,8 +213,8 @@ fn default_prune_protected_turns() -> usize {
|
|||
fn default_prune_min_savings() -> u64 {
|
||||
defaults::PRUNE_MIN_SAVINGS
|
||||
}
|
||||
fn default_compact_retained_turns() -> usize {
|
||||
defaults::COMPACT_RETAINED_TURNS
|
||||
fn default_compact_retained_tokens() -> u64 {
|
||||
defaults::COMPACT_RETAINED_TOKENS
|
||||
}
|
||||
|
||||
impl Default for CompactionConfig {
|
||||
|
|
@ -222,7 +224,7 @@ impl Default for CompactionConfig {
|
|||
prune_min_savings: default_prune_min_savings(),
|
||||
compact_threshold: None,
|
||||
compact_request_threshold: None,
|
||||
compact_retained_turns: default_compact_retained_turns(),
|
||||
compact_retained_tokens: default_compact_retained_tokens(),
|
||||
provider: None,
|
||||
}
|
||||
}
|
||||
|
|
@ -357,7 +359,7 @@ model = "claude-sonnet-4-20250514"
|
|||
assert_eq!(c.prune_min_savings, 4096);
|
||||
assert_eq!(c.compact_threshold, Some(80000));
|
||||
assert_eq!(c.compact_request_threshold, None);
|
||||
assert_eq!(c.compact_retained_turns, 2);
|
||||
assert_eq!(c.compact_retained_tokens, 8000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ pub(crate) struct CompactState {
|
|||
/// Between-requests threshold (safety net). Checked inside a turn
|
||||
/// before each LLM request. `None` disables the request check.
|
||||
request_threshold: Option<u64>,
|
||||
/// Number of recent turns to retain after compaction.
|
||||
retained_turns: usize,
|
||||
/// Token budget retained verbatim at the tail after compaction.
|
||||
retained_tokens: u64,
|
||||
/// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled.
|
||||
consecutive_failures: AtomicUsize,
|
||||
/// `true` immediately after a successful compact, cleared on next normal completion.
|
||||
|
|
@ -39,12 +39,12 @@ impl CompactState {
|
|||
pub(crate) fn new(
|
||||
post_run_threshold: Option<u64>,
|
||||
request_threshold: Option<u64>,
|
||||
retained_turns: usize,
|
||||
retained_tokens: u64,
|
||||
) -> Self {
|
||||
Self {
|
||||
post_run_threshold,
|
||||
request_threshold,
|
||||
retained_turns,
|
||||
retained_tokens,
|
||||
consecutive_failures: AtomicUsize::new(0),
|
||||
just_compacted: AtomicBool::new(false),
|
||||
disabled: AtomicBool::new(false),
|
||||
|
|
@ -56,9 +56,9 @@ impl CompactState {
|
|||
self.request_threshold
|
||||
}
|
||||
|
||||
/// Number of turns to retain after compaction.
|
||||
pub(crate) fn retained_turns(&self) -> usize {
|
||||
self.retained_turns
|
||||
/// Token budget retained verbatim at the tail after compaction.
|
||||
pub(crate) fn retained_tokens(&self) -> u64 {
|
||||
self.retained_tokens
|
||||
}
|
||||
|
||||
/// Whether compaction has been disabled by the circuit breaker.
|
||||
|
|
@ -115,7 +115,7 @@ mod tests {
|
|||
fn both_thresholds_configured() {
|
||||
let state = CompactState::new(Some(80_000), Some(90_000), 2);
|
||||
assert_eq!(state.request_threshold(), Some(90_000));
|
||||
assert_eq!(state.retained_turns(), 2);
|
||||
assert_eq!(state.retained_tokens(), 2);
|
||||
|
||||
assert!(!state.exceeds_request(70_000));
|
||||
assert!(!state.exceeds_post_run(70_000));
|
||||
|
|
|
|||
|
|
@ -415,10 +415,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
(
|
||||
c.compact_threshold,
|
||||
c.compact_request_threshold,
|
||||
c.compact_retained_turns,
|
||||
c.compact_retained_tokens,
|
||||
)
|
||||
})
|
||||
.unwrap_or((None, None, 2));
|
||||
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
|
||||
|
||||
let tracker_for_usage = self.usage_tracker.clone();
|
||||
self.worker_mut().on_usage(move |event| {
|
||||
|
|
@ -648,8 +648,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
let retained = self
|
||||
.compact_state
|
||||
.as_ref()
|
||||
.map(|s| s.retained_turns())
|
||||
.unwrap_or(2);
|
||||
.map(|s| s.retained_tokens())
|
||||
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
|
||||
|
||||
match self.compact(retained).await {
|
||||
Ok(new_session_id) => {
|
||||
|
|
@ -691,7 +691,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
let retained = state.retained_turns();
|
||||
let retained = state.retained_tokens();
|
||||
match self.compact(retained).await {
|
||||
Ok(new_session_id) => {
|
||||
info!(
|
||||
|
|
@ -791,24 +791,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
/// - a clone of the main LlmClient via `clone_boxed()`.
|
||||
///
|
||||
/// Returns the new session ID.
|
||||
pub async fn compact(&mut self, retained_turns: usize) -> Result<SessionId, PodError> {
|
||||
pub async fn compact(&mut self, retained_tokens: u64) -> Result<SessionId, PodError> {
|
||||
// Decide the cut point by projecting the UsageRecord timeline onto
|
||||
// the current history: keep the tail whose estimated token count is
|
||||
// within `retained_tokens`. Item-granular, turn boundaries ignored.
|
||||
let cut = self.split_for_retained(retained_tokens);
|
||||
|
||||
let worker = self.worker.as_ref().expect("worker taken during run");
|
||||
let history = worker.history();
|
||||
|
||||
// Identify turn boundaries (user message positions).
|
||||
let turn_starts: Vec<usize> = history
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, item)| item.is_user_message())
|
||||
.map(|(i, _)| i)
|
||||
.collect();
|
||||
|
||||
// Items to retain: everything from `retained_turns` turns ago onward.
|
||||
let retain_from = if turn_starts.len() > retained_turns {
|
||||
turn_starts[turn_starts.len() - retained_turns]
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let retain_from = cut.index.min(history.len());
|
||||
let retained_items = history[retain_from..].to_vec();
|
||||
let items_to_summarise = &history[..retain_from];
|
||||
|
||||
|
|
|
|||
|
|
@ -250,7 +250,7 @@ async fn agents_md_not_reread_after_compact() {
|
|||
// Mutate the file after the first turn — must not affect the cached
|
||||
// system prompt either on a subsequent turn or across compaction.
|
||||
std::fs::write(&agents_path, "mutated").unwrap();
|
||||
pod.compact(1).await.unwrap();
|
||||
pod.compact(0).await.unwrap();
|
||||
let after_compact = pod.worker().get_system_prompt().unwrap().to_string();
|
||||
assert!(after_compact.contains("original"));
|
||||
assert!(!after_compact.contains("mutated"));
|
||||
|
|
@ -277,7 +277,7 @@ async fn compact_preserves_system_prompt() {
|
|||
let before = pod.worker().get_system_prompt().unwrap().to_string();
|
||||
pod.run("second").await.unwrap();
|
||||
|
||||
pod.compact(1).await.unwrap();
|
||||
pod.compact(0).await.unwrap();
|
||||
|
||||
let after = pod.worker().get_system_prompt().unwrap().to_string();
|
||||
assert_eq!(before, after);
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user