//! Prune integration — wires the Worker's prune projection to the Pod's //! usage-history-backed token accounting. //! //! Worker 自身がコンテキスト射影を行う(`worker.rs` の `request_context` 構築 //! 直後)。Worker は usage 履歴を知らないので、`min_savings` 判定に使う savings //! の見積もりはコールバックで外部から注入する。このモジュールはそのコールバック //! を組み立てて Worker に差し込むための `impl Pod` を提供する。 //! //! 同じ経路で `PruneObserver` も install し、評価のたびに `prune.fire` / //! `prune.skip` metric を `MetricsTracker` に積む。`Fired` 時は uuid を //! `UsageTracker` にも stash しておき、後続の `LlmUsage` と組で //! `prune.post_request` を吐けるようにする。 use llm_worker::Item; use llm_worker::llm_client::client::LlmClient; use llm_worker::prune::{ PruneConfig, PruneDecision, PruneObserver, SavingsEstimator, TokenEstimator, }; use session_metrics::Metric; use session_store::Store; use crate::Pod; use crate::compact::token_counter::{ EstimateSource, savings_for_prune_impl, token_estimates_for_prune_impl, }; impl Pod { /// Enable prune projection on the underlying Worker. /// /// Registers the config and token/savings-estimator closures on the Worker. /// The estimators combine persisted [`Pod::usage_history_handle`] records /// with in-flight `UsageTracker` records so multi-request tool loops can /// prune before the surrounding Pod run finishes. /// /// Measurement-less estimates (before the first LLM call, or immediately /// after a compact) return `0` from the estimator, which naturally /// prevents the prune projection from firing until usage data exists. /// /// Also installs a [`PruneObserver`] that pushes `prune.fire` / /// `prune.skip` metrics into the shared [`MetricsTracker`]. On `Fired` /// the observer additionally stashes a fresh correlation_id in /// [`UsageTracker`] so the next `LlmUsage` can be paired with a /// `prune.post_request` metric carrying the same id. pub fn attach_prune(&mut self, config: PruneConfig) { let usage_history_for_tokens = self.usage_history_handle(); let usage_tracker_for_tokens = self.usage_tracker_handle(); let token_estimator: TokenEstimator = Box::new(move |history: &[Item]| { let mut snapshot = usage_history_for_tokens .lock() .expect("usage_history poisoned") .clone(); snapshot.extend(usage_tracker_for_tokens.records()); token_estimates_for_prune_impl(history, &snapshot) }); let usage_history_for_savings = self.usage_history_handle(); let usage_tracker_for_savings = self.usage_tracker_handle(); let estimator: SavingsEstimator = Box::new(move |history: &[Item], indices| { let mut snapshot = usage_history_for_savings .lock() .expect("usage_history poisoned") .clone(); snapshot.extend(usage_tracker_for_savings.records()); let est = savings_for_prune_impl(history, &snapshot, indices); match est.source { EstimateSource::NoData => 0, _ => est.tokens, } }); let metrics = self.metrics_tracker_handle(); let usage_tracker = self.usage_tracker_handle(); let observer: PruneObserver = Box::new(move |eval| match &eval.decision { PruneDecision::Fired { .. } => { let correlation_id = uuid::Uuid::now_v7().to_string(); let mut metric = Metric::now("prune.fire") .with_value(eval.estimated_savings as f64) .with_correlation_id(&correlation_id) .with_dimension("candidate_count", eval.candidate_count.to_string()); if let Some(protected_start) = eval.protected_start_index { metric = metric.with_dimension("protected_start_index", protected_start.to_string()); } metrics.push(metric); usage_tracker.note_correlation_id(correlation_id); } PruneDecision::SkippedNoCandidates => { metrics.push(Metric::now("prune.skip").with_dimension("reason", "no_candidates")); } PruneDecision::SkippedBelowMinSavings => { let mut metric = Metric::now("prune.skip") .with_dimension("reason", "below_min_savings") .with_dimension("candidate_count", eval.candidate_count.to_string()) .with_value(eval.estimated_savings as f64); if let Some(protected_start) = eval.protected_start_index { metric = metric.with_dimension("protected_start_index", protected_start.to_string()); } metrics.push(metric); } }); let worker = self.worker_mut(); worker.set_prune_config(Some(config)); worker.set_token_estimator(Some(token_estimator)); worker.set_savings_estimator(Some(estimator)); worker.set_prune_observer(Some(observer)); } /// If the manifest has a `[compaction]` section, build a `PruneConfig` /// from its `prune_*` fields and call [`attach_prune`](Self::attach_prune). /// Otherwise no-op. Called from all Pod constructors so prune is /// active whenever the manifest asks for it. pub(crate) fn apply_prune_from_manifest(&mut self) { let Some(compaction) = self.manifest().compaction.as_ref() else { return; }; let config = PruneConfig { protected_tokens: compaction.prune_protected_tokens, min_savings: compaction.prune_min_savings, }; self.attach_prune(config); } }