From eb670bfba5faaa413772eab154207e8fb16d53e4 Mon Sep 17 00:00:00 2001 From: Hare Date: Sun, 12 Apr 2026 06:02:46 +0900 Subject: [PATCH] =?UTF-8?q?Prune=E3=81=AE=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + TODO.md | 3 + crates/llm-worker/src/lib.rs | 1 + crates/llm-worker/src/prune.rs | 282 +++++++++++++++++++++++++++++++++ crates/pod/Cargo.toml | 1 + crates/pod/src/lib.rs | 3 + crates/pod/src/prune_hook.rs | 38 +++++ tickets/context-compaction.md | 33 ++-- 8 files changed, 353 insertions(+), 9 deletions(-) create mode 100644 crates/llm-worker/src/prune.rs create mode 100644 crates/pod/src/prune_hook.rs diff --git a/Cargo.lock b/Cargo.lock index 24ea6f6d..a54d0d61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1234,6 +1234,7 @@ dependencies = [ "thiserror", "tokio", "toml", + "tracing", ] [[package]] diff --git a/TODO.md b/TODO.md index e8d14a5a..fefe9e1f 100644 --- a/TODO.md +++ b/TODO.md @@ -15,6 +15,9 @@ - [x] Hook モジュールの llm-worker からの除去 → [tickets/remove-hook-module.md](tickets/remove-hook-module.md) - [x] api_key_file: ファイルパスによるAPIキー解決 → [tickets/api-key-file.md](tickets/api-key-file.md) - [ ] コンテキスト圧縮 (Prune + Compact) → [tickets/context-compaction.md](tickets/context-compaction.md) + - [x] ToolOutput 再設計 + 旧モジュール削除 (Step 1-2) + - [x] prune.rs + PruneHook (Step 3-4) + - [ ] Compact (Step 5-8、session-store-extraction 後) - [x] Protocol: request-response パターン (GetHistory等) → [tickets/request-response-protocol.md](tickets/request-response-protocol.md) - [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md) - [ ] session-store: persistence クレートの再構成(wrap廃止、リネーム) → [tickets/session-store-extraction.md](tickets/session-store-extraction.md) diff --git a/crates/llm-worker/src/lib.rs b/crates/llm-worker/src/lib.rs index 48f80cf0..3c4f1e2a 100644 --- a/crates/llm-worker/src/lib.rs +++ b/crates/llm-worker/src/lib.rs @@ -45,6 +45,7 @@ pub(crate) mod callback; pub mod event; pub mod llm_client; pub mod interceptor; +pub mod prune; pub mod state; pub mod timeline; pub mod tool; diff --git a/crates/llm-worker/src/prune.rs b/crates/llm-worker/src/prune.rs new file mode 100644 index 00000000..dd112e83 --- /dev/null +++ b/crates/llm-worker/src/prune.rs @@ -0,0 +1,282 @@ +//! Conditional Prune algorithm for context window management. +//! +//! Removes `content` from old [`Item::ToolResult`] entries, leaving only +//! their `summary`. This reclaims tokens while preserving the "what +//! happened" trail. +//! +//! Pruning is **conditional**: it only fires when the estimated token +//! savings exceed [`PruneConfig::min_savings`], avoiding unnecessary +//! KV-cache invalidation. + +use serde::{Deserialize, Serialize}; + +use crate::llm_client::types::Item; + +/// Configuration for the Prune algorithm. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PruneConfig { + /// Number of recent turns to protect from pruning. + /// A "turn" starts at each user message. + #[serde(default = "default_protected_turns")] + pub protected_turns: usize, + + /// Minimum estimated token savings required to actually prune. + /// If the prunable content is smaller than this, we skip to + /// avoid pointless KV-cache invalidation. + #[serde(default = "default_min_savings")] + pub min_savings: usize, +} + +fn default_protected_turns() -> usize { + 3 +} +fn default_min_savings() -> usize { + 4096 +} + +impl Default for PruneConfig { + fn default() -> Self { + Self { + protected_turns: default_protected_turns(), + min_savings: default_min_savings(), + } + } +} + +/// Result of a prune operation. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PruneResult { + /// Number of items whose `content` was set to `None`. + pub pruned_count: usize, + /// Estimated tokens reclaimed. + pub estimated_savings: usize, +} + +/// Estimate the token count of a string (rough: chars / 4). +fn estimate_tokens(s: &str) -> usize { + s.len() / 4 +} + +/// Find indices where each "turn" begins. +/// +/// A turn starts at every user message. Returns the indices of those +/// user messages in ascending order. +fn find_turn_starts(items: &[Item]) -> Vec { + items + .iter() + .enumerate() + .filter(|(_, item)| item.is_user_message()) + .map(|(i, _)| i) + .collect() +} + +/// Conditionally prune old tool-result content from `items`. +/// +/// Returns `None` if pruning was skipped (not enough savings or not +/// enough turns). Returns `Some(PruneResult)` if items were modified. +/// +/// # Algorithm +/// +/// 1. Identify turn boundaries (user-message positions). +/// 2. Compute the protection boundary: items before the last +/// `protected_turns` turns are candidates. +/// 3. Sum the estimated token savings from prunable `content` fields. +/// 4. If savings < `min_savings`, skip. +/// 5. Otherwise, set `content = None` on each candidate. +pub fn prune(items: &mut [Item], config: &PruneConfig) -> Option { + let turn_starts = find_turn_starts(items); + + // Not enough turns to have anything outside the protected window. + if turn_starts.len() <= config.protected_turns { + return None; + } + + // Everything before this index is a prune candidate. + let boundary = turn_starts[turn_starts.len() - config.protected_turns]; + + // Collect prunable indices and total savings. + let mut total_savings: usize = 0; + let mut prunable: Vec = Vec::new(); + + for (i, item) in items[..boundary].iter().enumerate() { + if let Item::ToolResult { + content: Some(c), .. + } = item + { + total_savings += estimate_tokens(c); + prunable.push(i); + } + } + + if prunable.is_empty() || total_savings < config.min_savings { + return None; + } + + // Apply: drop content, keep summary. + for &i in &prunable { + if let Item::ToolResult { content, .. } = &mut items[i] { + *content = None; + } + } + + Some(PruneResult { + pruned_count: prunable.len(), + estimated_savings: total_savings, + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Helper: build a history with interleaved user messages and tool results. + fn make_history(turns: &[(&str, Vec<(&str, Option<&str>)>)]) -> Vec { + let mut items = Vec::new(); + for (user_msg, tool_results) in turns { + items.push(Item::user_message(*user_msg)); + items.push(Item::assistant_message("ok")); + for (i, (summary, content)) in tool_results.iter().enumerate() { + let call_id = format!("call_{}", items.len() + i); + items.push(Item::tool_call(&call_id, "some_tool", "{}")); + match content { + Some(c) => items.push(Item::tool_result_with_content(&call_id, *summary, *c)), + None => items.push(Item::tool_result(&call_id, *summary)), + } + } + } + items + } + + #[test] + fn no_prune_when_too_few_turns() { + let mut items = make_history(&[ + ("turn1", vec![("summary1", Some("big content here"))]), + ("turn2", vec![("summary2", Some("more content"))]), + ]); + let config = PruneConfig { + protected_turns: 3, + min_savings: 0, + }; + assert!(prune(&mut items, &config).is_none()); + } + + #[test] + fn no_prune_when_savings_below_threshold() { + let mut items = make_history(&[ + ("turn1", vec![("s", Some("tiny"))]), // ~1 token + ("turn2", vec![]), + ("turn3", vec![]), + ("turn4", vec![]), + ]); + let config = PruneConfig { + protected_turns: 2, + min_savings: 9999, + }; + assert!(prune(&mut items, &config).is_none()); + } + + #[test] + fn prune_old_content() { + // 4 turns. protected_turns=2 → turns 1-2 are candidates. + let big = "x".repeat(4096 * 4); // ~4096 tokens + let mut items = make_history(&[ + ("turn1", vec![("s1", Some(&big))]), + ("turn2", vec![("s2", Some(&big))]), + ("turn3", vec![("s3", Some("keep me"))]), + ("turn4", vec![("s4", Some("keep me too"))]), + ]); + let config = PruneConfig { + protected_turns: 2, + min_savings: 1000, + }; + + let result = prune(&mut items, &config).expect("should prune"); + assert_eq!(result.pruned_count, 2); + assert!(result.estimated_savings >= 8000); + + // Verify: pruned items have content=None, protected items keep content. + for item in &items { + if let Item::ToolResult { + summary, content, .. + } = item + { + if summary == "s1" || summary == "s2" { + assert!(content.is_none(), "old content should be pruned"); + } else { + assert!(content.is_some(), "protected content should remain"); + } + } + } + } + + #[test] + fn idempotent() { + let big = "x".repeat(4096 * 4); + let mut items = make_history(&[ + ("turn1", vec![("s1", Some(&big))]), + ("turn2", vec![]), + ("turn3", vec![]), + ("turn4", vec![]), + ]); + let config = PruneConfig { + protected_turns: 2, + min_savings: 100, + }; + + let first = prune(&mut items, &config).expect("first prune"); + assert_eq!(first.pruned_count, 1); + + // Second call: nothing left to prune. + assert!(prune(&mut items, &config).is_none()); + } + + #[test] + fn already_pruned_items_skipped() { + // Items that already have content=None are not counted as savings. + let mut items = make_history(&[ + ("turn1", vec![("s1", None)]), // already pruned + ("turn2", vec![]), + ("turn3", vec![]), + ("turn4", vec![]), + ]); + let config = PruneConfig { + protected_turns: 2, + min_savings: 0, // Even with threshold 0, no savings means no prune + }; + + assert!(prune(&mut items, &config).is_none()); + } + + #[test] + fn protected_turns_boundary_exact() { + // 3 turns with protected_turns=2: + // Turn 1 content should be pruned, turns 2-3 protected. + let big = "x".repeat(4096 * 4); + let mut items = make_history(&[ + ("turn1", vec![("s1", Some(&big))]), + ("turn2", vec![("s2", Some("protected"))]), + ("turn3", vec![("s3", Some("also protected"))]), + ]); + let config = PruneConfig { + protected_turns: 2, + min_savings: 100, + }; + + let result = prune(&mut items, &config).expect("should prune turn1"); + assert_eq!(result.pruned_count, 1); + + // Verify s1 pruned, s2 and s3 intact. + for item in &items { + if let Item::ToolResult { + summary, content, .. + } = item + { + match summary.as_str() { + "s1" => assert!(content.is_none()), + "s2" | "s3" => assert!(content.is_some()), + _ => {} + } + } + } + } +} diff --git a/crates/pod/Cargo.toml b/crates/pod/Cargo.toml index 15c0e38a..f452b641 100644 --- a/crates/pod/Cargo.toml +++ b/crates/pod/Cargo.toml @@ -17,6 +17,7 @@ serde_json = "1.0.149" thiserror = "2.0" tokio = { version = "1.49", features = ["fs", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync"] } toml = "1.1.2" +tracing = "0.1.44" [dev-dependencies] async-trait = "0.1.89" diff --git a/crates/pod/src/lib.rs b/crates/pod/src/lib.rs index 1da428b8..4e4113fc 100644 --- a/crates/pod/src/lib.rs +++ b/crates/pod/src/lib.rs @@ -4,12 +4,15 @@ pub mod runtime_dir; pub mod shared_state; pub mod socket_server; +pub mod prune_hook; + mod hook_interceptor; mod pod; pub use controller::{PodController, PodHandle}; pub use manifest::{PodManifest, ProviderConfig, ProviderKind, Scope}; pub use hook::{Hook, HookEventKind, HookRegistryBuilder}; +pub use prune_hook::PruneHook; pub use pod::{Pod, PodError, PodRunResult, apply_worker_manifest}; pub use protocol::{ErrorCode, Event, Method, TurnResult}; pub use provider::{ProviderError, build_client}; diff --git a/crates/pod/src/prune_hook.rs b/crates/pod/src/prune_hook.rs new file mode 100644 index 00000000..c653f687 --- /dev/null +++ b/crates/pod/src/prune_hook.rs @@ -0,0 +1,38 @@ +//! PruneHook — applies conditional pruning before each LLM request. +//! +//! Wraps [`llm_worker::prune::prune()`] as a [`Hook`] so +//! that Pod can register it in the hook pipeline. + +use async_trait::async_trait; +use llm_worker::interceptor::PreRequestAction; +use llm_worker::prune::{PruneConfig, prune}; +use llm_worker::Item; +use tracing::debug; + +use crate::hook::{Hook, PreLlmRequest}; + +/// Hook that conditionally prunes old tool-result content before each +/// LLM request, reclaiming context-window tokens. +pub struct PruneHook { + config: PruneConfig, +} + +impl PruneHook { + pub fn new(config: PruneConfig) -> Self { + Self { config } + } +} + +#[async_trait] +impl Hook for PruneHook { + async fn call(&self, context: &mut Vec) -> PreRequestAction { + if let Some(result) = prune(context, &self.config) { + debug!( + pruned = result.pruned_count, + estimated_savings = result.estimated_savings, + "Pruned old tool-result content" + ); + } + PreRequestAction::Continue + } +} diff --git a/tickets/context-compaction.md b/tickets/context-compaction.md index cd03a9e2..1031bda4 100644 --- a/tickets/context-compaction.md +++ b/tickets/context-compaction.md @@ -182,20 +182,23 @@ const MAX_COMPACT_FAILURES: usize = 3; ### Compaction フロー -Compact は fork と同じ構造。旧セッションを保全し、新しい SessionId で圧縮後のセッションを開始する。 +session-store-extraction 後の構造を前提とする。 +Pod が Worker を直接保持し、session-store は save/restore の関数群。 ``` Run 完了 → input_tokens > threshold ↓ -Controller: history を要約プロンプトに変換 +Pod: worker.history() + worker.request_config() を読み出す ↓ -Controller: 要約用 Worker 生成(ツールなし、temperature=0) +Pod: build_client(&manifest.provider) で要約用 Worker を生成(ツールなし、temperature=0) ↓ -要約 Worker: 構造化要約を生成 +要約 Worker: history を要約プロンプトとして受け取り、構造化要約を生成 ↓ -Controller: [要約 Item, 直近 N ターン] で新 history を構築 +Pod: [要約 Item, 直近 N ターン] で新 history を構築 ↓ -Controller: 新 SessionId で新セッションを作成(SessionStart に compacted_from を記録) +Pod: worker.set_history(新 history) + ↓ +Pod: session_store::save_compacted(store, new_id, compacted_from, ...) で新セッション開始 ↓ 旧セッション JSONL はそのまま保全(append-only 原則を維持) ``` @@ -228,7 +231,17 @@ LogEntry::SessionStart { - compact: `compacted_from = Some(...)` - EntryHash で元セッションのどの時点からの操作かを追跡可能 -### 要約フォーマット +### 要約用 Worker + +- `build_client(&manifest.provider, manifest_dir)` で新しい LlmClient を作る + - reqwest::Client は内部 Arc。1回きりのリクエストなので新規プールで問題なし +- Pod が `manifest_dir` を保持する必要がある(現状 `from_manifest` では受け取るが保持していない) + +### 要約プロンプト + +TODO: system prompt の文面、history を文字列化する方法を詰める。 + +出力フォーマット: ``` ## Original Task @@ -312,15 +325,17 @@ pub struct CompactionConfig { 3. **`prune.rs`** — 条件付き Prune アルゴリズム。単体テスト 4. **`PruneHook`** — Pod に Hook 実装 5. **`CompactionConfig`** — manifest にセクション追加 -6. **`LogEntry::Compacted`** — session_log に variant 追加 -7. **`compact()` 関数** — Controller に compaction ロジック + サーキットブレーカー +6. **`LogEntry` に provenance フィールド追加** — SessionStart に `compacted_from` / `forked_from` +7. **`compact()` 関数** — Pod に compaction ロジック + サーキットブレーカー 8. **Protocol** — `CompactionStart` / `CompactionDone` イベント追加 ステップ 1-2 は ToolOutput 移行として独立実行可能。 ステップ 3-4(Prune)と 5-6(Compact 準備)は並行可能。 +ステップ 5-8 は session-store-extraction 完了後に実装。 --- ## 依存チケット - ~~[remove-hook-module.md](remove-hook-module.md)~~ — 完了 +- [session-store-extraction.md](session-store-extraction.md) — ステップ 5-8 の前提