compact: retained_turns を retained_tokens に置換
保護単位をターン数からトークン量に変更。compact 時のカット位置は Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、 ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
This commit is contained in:
parent
83f68e35ad
commit
db2dd8a3c0
|
|
@ -86,7 +86,7 @@ pub struct CompactionConfigPartial {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_request_threshold: Option<u64>,
|
pub compact_request_threshold: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_retained_turns: Option<usize>,
|
pub compact_retained_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub provider: Option<ProviderConfigPartial>,
|
pub provider: Option<ProviderConfigPartial>,
|
||||||
}
|
}
|
||||||
|
|
@ -241,9 +241,9 @@ impl CompactionConfigPartial {
|
||||||
compact_request_threshold: upper
|
compact_request_threshold: upper
|
||||||
.compact_request_threshold
|
.compact_request_threshold
|
||||||
.or(self.compact_request_threshold),
|
.or(self.compact_request_threshold),
|
||||||
compact_retained_turns: upper
|
compact_retained_tokens: upper
|
||||||
.compact_retained_turns
|
.compact_retained_tokens
|
||||||
.or(self.compact_retained_turns),
|
.or(self.compact_retained_tokens),
|
||||||
provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge),
|
provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -371,9 +371,9 @@ impl TryFrom<PodManifestConfig> for PodManifest {
|
||||||
.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
|
.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
|
||||||
compact_threshold: c.compact_threshold,
|
compact_threshold: c.compact_threshold,
|
||||||
compact_request_threshold: c.compact_request_threshold,
|
compact_request_threshold: c.compact_request_threshold,
|
||||||
compact_retained_turns: c
|
compact_retained_tokens: c
|
||||||
.compact_retained_turns
|
.compact_retained_tokens
|
||||||
.unwrap_or(defaults::COMPACT_RETAINED_TURNS),
|
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
|
||||||
provider: comp_provider,
|
provider: comp_provider,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -18,9 +18,11 @@ pub const PRUNE_PROTECTED_TURNS: usize = 3;
|
||||||
/// [`crate::CompactionConfig::prune_min_savings`].
|
/// [`crate::CompactionConfig::prune_min_savings`].
|
||||||
pub const PRUNE_MIN_SAVINGS: u64 = 4096;
|
pub const PRUNE_MIN_SAVINGS: u64 = 4096;
|
||||||
|
|
||||||
/// Number of most-recent turns retained after a compact. See
|
/// Token budget retained (unchanged) at the tail of the history across
|
||||||
/// [`crate::CompactionConfig::compact_retained_turns`].
|
/// a compact. Items whose cumulative token count fits within this budget
|
||||||
pub const COMPACT_RETAINED_TURNS: usize = 2;
|
/// starting from the end are kept verbatim; the rest are summarised.
|
||||||
|
/// See [`crate::CompactionConfig::compact_retained_tokens`].
|
||||||
|
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
|
||||||
|
|
||||||
/// Default instruction asset reference used when `worker.instruction`
|
/// Default instruction asset reference used when `worker.instruction`
|
||||||
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
|
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
|
||||||
|
|
|
||||||
|
|
@ -195,9 +195,11 @@ pub struct CompactionConfig {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_request_threshold: Option<u64>,
|
pub compact_request_threshold: Option<u64>,
|
||||||
|
|
||||||
/// Number of recent turns retained after compaction.
|
/// Token budget retained verbatim at the tail of the history after
|
||||||
#[serde(default = "default_compact_retained_turns")]
|
/// compaction. Measured against the occupancy estimate from
|
||||||
pub compact_retained_turns: usize,
|
/// `UsageRecord` history; turn boundaries are ignored.
|
||||||
|
#[serde(default = "default_compact_retained_tokens")]
|
||||||
|
pub compact_retained_tokens: u64,
|
||||||
|
|
||||||
/// Optional provider for the compactor (summary) LLM.
|
/// Optional provider for the compactor (summary) LLM.
|
||||||
/// If omitted, the main provider is cloned via `clone_boxed()`.
|
/// If omitted, the main provider is cloned via `clone_boxed()`.
|
||||||
|
|
@ -211,8 +213,8 @@ fn default_prune_protected_turns() -> usize {
|
||||||
fn default_prune_min_savings() -> u64 {
|
fn default_prune_min_savings() -> u64 {
|
||||||
defaults::PRUNE_MIN_SAVINGS
|
defaults::PRUNE_MIN_SAVINGS
|
||||||
}
|
}
|
||||||
fn default_compact_retained_turns() -> usize {
|
fn default_compact_retained_tokens() -> u64 {
|
||||||
defaults::COMPACT_RETAINED_TURNS
|
defaults::COMPACT_RETAINED_TOKENS
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for CompactionConfig {
|
impl Default for CompactionConfig {
|
||||||
|
|
@ -222,7 +224,7 @@ impl Default for CompactionConfig {
|
||||||
prune_min_savings: default_prune_min_savings(),
|
prune_min_savings: default_prune_min_savings(),
|
||||||
compact_threshold: None,
|
compact_threshold: None,
|
||||||
compact_request_threshold: None,
|
compact_request_threshold: None,
|
||||||
compact_retained_turns: default_compact_retained_turns(),
|
compact_retained_tokens: default_compact_retained_tokens(),
|
||||||
provider: None,
|
provider: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -357,7 +359,7 @@ model = "claude-sonnet-4-20250514"
|
||||||
assert_eq!(c.prune_min_savings, 4096);
|
assert_eq!(c.prune_min_savings, 4096);
|
||||||
assert_eq!(c.compact_threshold, Some(80000));
|
assert_eq!(c.compact_threshold, Some(80000));
|
||||||
assert_eq!(c.compact_request_threshold, None);
|
assert_eq!(c.compact_request_threshold, None);
|
||||||
assert_eq!(c.compact_retained_turns, 2);
|
assert_eq!(c.compact_retained_tokens, 8000);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -25,8 +25,8 @@ pub(crate) struct CompactState {
|
||||||
/// Between-requests threshold (safety net). Checked inside a turn
|
/// Between-requests threshold (safety net). Checked inside a turn
|
||||||
/// before each LLM request. `None` disables the request check.
|
/// before each LLM request. `None` disables the request check.
|
||||||
request_threshold: Option<u64>,
|
request_threshold: Option<u64>,
|
||||||
/// Number of recent turns to retain after compaction.
|
/// Token budget retained verbatim at the tail after compaction.
|
||||||
retained_turns: usize,
|
retained_tokens: u64,
|
||||||
/// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled.
|
/// Consecutive compact failures. At `MAX_COMPACT_FAILURES`, compaction is disabled.
|
||||||
consecutive_failures: AtomicUsize,
|
consecutive_failures: AtomicUsize,
|
||||||
/// `true` immediately after a successful compact, cleared on next normal completion.
|
/// `true` immediately after a successful compact, cleared on next normal completion.
|
||||||
|
|
@ -39,12 +39,12 @@ impl CompactState {
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
post_run_threshold: Option<u64>,
|
post_run_threshold: Option<u64>,
|
||||||
request_threshold: Option<u64>,
|
request_threshold: Option<u64>,
|
||||||
retained_turns: usize,
|
retained_tokens: u64,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
post_run_threshold,
|
post_run_threshold,
|
||||||
request_threshold,
|
request_threshold,
|
||||||
retained_turns,
|
retained_tokens,
|
||||||
consecutive_failures: AtomicUsize::new(0),
|
consecutive_failures: AtomicUsize::new(0),
|
||||||
just_compacted: AtomicBool::new(false),
|
just_compacted: AtomicBool::new(false),
|
||||||
disabled: AtomicBool::new(false),
|
disabled: AtomicBool::new(false),
|
||||||
|
|
@ -56,9 +56,9 @@ impl CompactState {
|
||||||
self.request_threshold
|
self.request_threshold
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Number of turns to retain after compaction.
|
/// Token budget retained verbatim at the tail after compaction.
|
||||||
pub(crate) fn retained_turns(&self) -> usize {
|
pub(crate) fn retained_tokens(&self) -> u64 {
|
||||||
self.retained_turns
|
self.retained_tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether compaction has been disabled by the circuit breaker.
|
/// Whether compaction has been disabled by the circuit breaker.
|
||||||
|
|
@ -115,7 +115,7 @@ mod tests {
|
||||||
fn both_thresholds_configured() {
|
fn both_thresholds_configured() {
|
||||||
let state = CompactState::new(Some(80_000), Some(90_000), 2);
|
let state = CompactState::new(Some(80_000), Some(90_000), 2);
|
||||||
assert_eq!(state.request_threshold(), Some(90_000));
|
assert_eq!(state.request_threshold(), Some(90_000));
|
||||||
assert_eq!(state.retained_turns(), 2);
|
assert_eq!(state.retained_tokens(), 2);
|
||||||
|
|
||||||
assert!(!state.exceeds_request(70_000));
|
assert!(!state.exceeds_request(70_000));
|
||||||
assert!(!state.exceeds_post_run(70_000));
|
assert!(!state.exceeds_post_run(70_000));
|
||||||
|
|
|
||||||
|
|
@ -415,10 +415,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
(
|
(
|
||||||
c.compact_threshold,
|
c.compact_threshold,
|
||||||
c.compact_request_threshold,
|
c.compact_request_threshold,
|
||||||
c.compact_retained_turns,
|
c.compact_retained_tokens,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap_or((None, None, 2));
|
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
|
||||||
|
|
||||||
let tracker_for_usage = self.usage_tracker.clone();
|
let tracker_for_usage = self.usage_tracker.clone();
|
||||||
self.worker_mut().on_usage(move |event| {
|
self.worker_mut().on_usage(move |event| {
|
||||||
|
|
@ -648,8 +648,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
let retained = self
|
let retained = self
|
||||||
.compact_state
|
.compact_state
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|s| s.retained_turns())
|
.map(|s| s.retained_tokens())
|
||||||
.unwrap_or(2);
|
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
|
||||||
|
|
||||||
match self.compact(retained).await {
|
match self.compact(retained).await {
|
||||||
Ok(new_session_id) => {
|
Ok(new_session_id) => {
|
||||||
|
|
@ -691,7 +691,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
let retained = state.retained_turns();
|
let retained = state.retained_tokens();
|
||||||
match self.compact(retained).await {
|
match self.compact(retained).await {
|
||||||
Ok(new_session_id) => {
|
Ok(new_session_id) => {
|
||||||
info!(
|
info!(
|
||||||
|
|
@ -791,24 +791,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
/// - a clone of the main LlmClient via `clone_boxed()`.
|
/// - a clone of the main LlmClient via `clone_boxed()`.
|
||||||
///
|
///
|
||||||
/// Returns the new session ID.
|
/// Returns the new session ID.
|
||||||
pub async fn compact(&mut self, retained_turns: usize) -> Result<SessionId, PodError> {
|
pub async fn compact(&mut self, retained_tokens: u64) -> Result<SessionId, PodError> {
|
||||||
|
// Decide the cut point by projecting the UsageRecord timeline onto
|
||||||
|
// the current history: keep the tail whose estimated token count is
|
||||||
|
// within `retained_tokens`. Item-granular, turn boundaries ignored.
|
||||||
|
let cut = self.split_for_retained(retained_tokens);
|
||||||
|
|
||||||
let worker = self.worker.as_ref().expect("worker taken during run");
|
let worker = self.worker.as_ref().expect("worker taken during run");
|
||||||
let history = worker.history();
|
let history = worker.history();
|
||||||
|
let retain_from = cut.index.min(history.len());
|
||||||
// Identify turn boundaries (user message positions).
|
|
||||||
let turn_starts: Vec<usize> = history
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter(|(_, item)| item.is_user_message())
|
|
||||||
.map(|(i, _)| i)
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// Items to retain: everything from `retained_turns` turns ago onward.
|
|
||||||
let retain_from = if turn_starts.len() > retained_turns {
|
|
||||||
turn_starts[turn_starts.len() - retained_turns]
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
let retained_items = history[retain_from..].to_vec();
|
let retained_items = history[retain_from..].to_vec();
|
||||||
let items_to_summarise = &history[..retain_from];
|
let items_to_summarise = &history[..retain_from];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -250,7 +250,7 @@ async fn agents_md_not_reread_after_compact() {
|
||||||
// Mutate the file after the first turn — must not affect the cached
|
// Mutate the file after the first turn — must not affect the cached
|
||||||
// system prompt either on a subsequent turn or across compaction.
|
// system prompt either on a subsequent turn or across compaction.
|
||||||
std::fs::write(&agents_path, "mutated").unwrap();
|
std::fs::write(&agents_path, "mutated").unwrap();
|
||||||
pod.compact(1).await.unwrap();
|
pod.compact(0).await.unwrap();
|
||||||
let after_compact = pod.worker().get_system_prompt().unwrap().to_string();
|
let after_compact = pod.worker().get_system_prompt().unwrap().to_string();
|
||||||
assert!(after_compact.contains("original"));
|
assert!(after_compact.contains("original"));
|
||||||
assert!(!after_compact.contains("mutated"));
|
assert!(!after_compact.contains("mutated"));
|
||||||
|
|
@ -277,7 +277,7 @@ async fn compact_preserves_system_prompt() {
|
||||||
let before = pod.worker().get_system_prompt().unwrap().to_string();
|
let before = pod.worker().get_system_prompt().unwrap().to_string();
|
||||||
pod.run("second").await.unwrap();
|
pod.run("second").await.unwrap();
|
||||||
|
|
||||||
pod.compact(1).await.unwrap();
|
pod.compact(0).await.unwrap();
|
||||||
|
|
||||||
let after = pod.worker().get_system_prompt().unwrap().to_string();
|
let after = pod.worker().get_system_prompt().unwrap().to_string();
|
||||||
assert_eq!(before, after);
|
assert_eq!(before, after);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user