yoi/crates/pod/src/compact/prune.rs

125 lines
5.9 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Prune integration — wires the Worker's prune projection to the Pod's
//! usage-history-backed token accounting.
//!
//! Worker 自身がコンテキスト射影を行う(`worker.rs` の `request_context` 構築
//! 直後。Worker は usage 履歴を知らないので、`min_savings` 判定に使う savings
//! の見積もりはコールバックで外部から注入する。このモジュールはそのコールバック
//! を組み立てて Worker に差し込むための `impl Pod` を提供する。
//!
//! 同じ経路で `PruneObserver` も install し、評価のたびに `prune.fire` /
//! `prune.skip` metric を `MetricsTracker` に積む。`Fired` 時は uuid を
//! `UsageTracker` にも stash しておき、後続の `LlmUsage` と組で
//! `prune.post_request` を吐けるようにする。
use llm_worker::Item;
use llm_worker::llm_client::client::LlmClient;
use llm_worker::prune::{
PruneConfig, PruneDecision, PruneObserver, SavingsEstimator, TokenEstimator,
};
use session_metrics::Metric;
use session_store::Store;
use crate::Pod;
use crate::compact::token_counter::{
EstimateSource, savings_for_prune_impl, token_estimates_for_prune_impl,
};
impl<C: LlmClient, St: Store> Pod<C, St> {
/// Enable prune projection on the underlying Worker.
///
/// Registers the config and token/savings-estimator closures on the Worker.
/// The estimators combine persisted [`Pod::usage_history_handle`] records
/// with in-flight `UsageTracker` records so multi-request tool loops can
/// prune before the surrounding Pod run finishes.
///
/// Measurement-less estimates (before the first LLM call, or immediately
/// after a compact) return `0` from the estimator, which naturally
/// prevents the prune projection from firing until usage data exists.
///
/// Also installs a [`PruneObserver`] that pushes `prune.fire` /
/// `prune.skip` metrics into the shared [`MetricsTracker`]. On `Fired`
/// the observer additionally stashes a fresh correlation_id in
/// [`UsageTracker`] so the next `LlmUsage` can be paired with a
/// `prune.post_request` metric carrying the same id.
pub fn attach_prune(&mut self, config: PruneConfig) {
let usage_history_for_tokens = self.usage_history_handle();
let usage_tracker_for_tokens = self.usage_tracker_handle();
let token_estimator: TokenEstimator = Box::new(move |history: &[Item]| {
let mut snapshot = usage_history_for_tokens
.lock()
.expect("usage_history poisoned")
.clone();
snapshot.extend(usage_tracker_for_tokens.records());
token_estimates_for_prune_impl(history, &snapshot)
});
let usage_history_for_savings = self.usage_history_handle();
let usage_tracker_for_savings = self.usage_tracker_handle();
let estimator: SavingsEstimator = Box::new(move |history: &[Item], indices| {
let mut snapshot = usage_history_for_savings
.lock()
.expect("usage_history poisoned")
.clone();
snapshot.extend(usage_tracker_for_savings.records());
let est = savings_for_prune_impl(history, &snapshot, indices);
match est.source {
EstimateSource::NoData => 0,
_ => est.tokens,
}
});
let metrics = self.metrics_tracker_handle();
let usage_tracker = self.usage_tracker_handle();
let observer: PruneObserver = Box::new(move |eval| match &eval.decision {
PruneDecision::Fired { .. } => {
let correlation_id = uuid::Uuid::now_v7().to_string();
let mut metric = Metric::now("prune.fire")
.with_value(eval.estimated_savings as f64)
.with_correlation_id(&correlation_id)
.with_dimension("candidate_count", eval.candidate_count.to_string());
if let Some(protected_start) = eval.protected_start_index {
metric =
metric.with_dimension("protected_start_index", protected_start.to_string());
}
metrics.push(metric);
usage_tracker.note_correlation_id(correlation_id);
}
PruneDecision::SkippedNoCandidates => {
metrics.push(Metric::now("prune.skip").with_dimension("reason", "no_candidates"));
}
PruneDecision::SkippedBelowMinSavings => {
let mut metric = Metric::now("prune.skip")
.with_dimension("reason", "below_min_savings")
.with_dimension("candidate_count", eval.candidate_count.to_string())
.with_value(eval.estimated_savings as f64);
if let Some(protected_start) = eval.protected_start_index {
metric =
metric.with_dimension("protected_start_index", protected_start.to_string());
}
metrics.push(metric);
}
});
let worker = self.worker_mut();
worker.set_prune_config(Some(config));
worker.set_token_estimator(Some(token_estimator));
worker.set_savings_estimator(Some(estimator));
worker.set_prune_observer(Some(observer));
}
/// If the manifest has a `[compaction]` section, build a `PruneConfig`
/// from its `prune_*` fields and call [`attach_prune`](Self::attach_prune).
/// Otherwise no-op. Called from all Pod constructors so prune is
/// active whenever the manifest asks for it.
pub(crate) fn apply_prune_from_manifest(&mut self) {
let Some(compaction) = self.manifest().compaction.as_ref() else {
return;
};
let config = PruneConfig {
protected_tokens: compaction.prune_protected_tokens,
min_savings: compaction.prune_min_savings,
};
self.attach_prune(config);
}
}