Pruneの実装
This commit is contained in:
parent
c0d283b47d
commit
eb670bfba5
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1234,6 +1234,7 @@ dependencies = [
|
|||
"thiserror",
|
||||
"tokio",
|
||||
"toml",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
3
TODO.md
3
TODO.md
|
|
@ -15,6 +15,9 @@
|
|||
- [x] Hook モジュールの llm-worker からの除去 → [tickets/remove-hook-module.md](tickets/remove-hook-module.md)
|
||||
- [x] api_key_file: ファイルパスによるAPIキー解決 → [tickets/api-key-file.md](tickets/api-key-file.md)
|
||||
- [ ] コンテキスト圧縮 (Prune + Compact) → [tickets/context-compaction.md](tickets/context-compaction.md)
|
||||
- [x] ToolOutput 再設計 + 旧モジュール削除 (Step 1-2)
|
||||
- [x] prune.rs + PruneHook (Step 3-4)
|
||||
- [ ] Compact (Step 5-8、session-store-extraction 後)
|
||||
- [x] Protocol: request-response パターン (GetHistory等) → [tickets/request-response-protocol.md](tickets/request-response-protocol.md)
|
||||
- [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
|
||||
- [ ] session-store: persistence クレートの再構成(wrap廃止、リネーム) → [tickets/session-store-extraction.md](tickets/session-store-extraction.md)
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ pub(crate) mod callback;
|
|||
pub mod event;
|
||||
pub mod llm_client;
|
||||
pub mod interceptor;
|
||||
pub mod prune;
|
||||
pub mod state;
|
||||
pub mod timeline;
|
||||
pub mod tool;
|
||||
|
|
|
|||
282
crates/llm-worker/src/prune.rs
Normal file
282
crates/llm-worker/src/prune.rs
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
//! Conditional Prune algorithm for context window management.
|
||||
//!
|
||||
//! Removes `content` from old [`Item::ToolResult`] entries, leaving only
|
||||
//! their `summary`. This reclaims tokens while preserving the "what
|
||||
//! happened" trail.
|
||||
//!
|
||||
//! Pruning is **conditional**: it only fires when the estimated token
|
||||
//! savings exceed [`PruneConfig::min_savings`], avoiding unnecessary
|
||||
//! KV-cache invalidation.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::llm_client::types::Item;
|
||||
|
||||
/// Configuration for the Prune algorithm.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PruneConfig {
|
||||
/// Number of recent turns to protect from pruning.
|
||||
/// A "turn" starts at each user message.
|
||||
#[serde(default = "default_protected_turns")]
|
||||
pub protected_turns: usize,
|
||||
|
||||
/// Minimum estimated token savings required to actually prune.
|
||||
/// If the prunable content is smaller than this, we skip to
|
||||
/// avoid pointless KV-cache invalidation.
|
||||
#[serde(default = "default_min_savings")]
|
||||
pub min_savings: usize,
|
||||
}
|
||||
|
||||
fn default_protected_turns() -> usize {
|
||||
3
|
||||
}
|
||||
fn default_min_savings() -> usize {
|
||||
4096
|
||||
}
|
||||
|
||||
impl Default for PruneConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
protected_turns: default_protected_turns(),
|
||||
min_savings: default_min_savings(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a prune operation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct PruneResult {
|
||||
/// Number of items whose `content` was set to `None`.
|
||||
pub pruned_count: usize,
|
||||
/// Estimated tokens reclaimed.
|
||||
pub estimated_savings: usize,
|
||||
}
|
||||
|
||||
/// Estimate the token count of a string (rough: chars / 4).
|
||||
fn estimate_tokens(s: &str) -> usize {
|
||||
s.len() / 4
|
||||
}
|
||||
|
||||
/// Find indices where each "turn" begins.
|
||||
///
|
||||
/// A turn starts at every user message. Returns the indices of those
|
||||
/// user messages in ascending order.
|
||||
fn find_turn_starts(items: &[Item]) -> Vec<usize> {
|
||||
items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, item)| item.is_user_message())
|
||||
.map(|(i, _)| i)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Conditionally prune old tool-result content from `items`.
|
||||
///
|
||||
/// Returns `None` if pruning was skipped (not enough savings or not
|
||||
/// enough turns). Returns `Some(PruneResult)` if items were modified.
|
||||
///
|
||||
/// # Algorithm
|
||||
///
|
||||
/// 1. Identify turn boundaries (user-message positions).
|
||||
/// 2. Compute the protection boundary: items before the last
|
||||
/// `protected_turns` turns are candidates.
|
||||
/// 3. Sum the estimated token savings from prunable `content` fields.
|
||||
/// 4. If savings < `min_savings`, skip.
|
||||
/// 5. Otherwise, set `content = None` on each candidate.
|
||||
pub fn prune(items: &mut [Item], config: &PruneConfig) -> Option<PruneResult> {
|
||||
let turn_starts = find_turn_starts(items);
|
||||
|
||||
// Not enough turns to have anything outside the protected window.
|
||||
if turn_starts.len() <= config.protected_turns {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Everything before this index is a prune candidate.
|
||||
let boundary = turn_starts[turn_starts.len() - config.protected_turns];
|
||||
|
||||
// Collect prunable indices and total savings.
|
||||
let mut total_savings: usize = 0;
|
||||
let mut prunable: Vec<usize> = Vec::new();
|
||||
|
||||
for (i, item) in items[..boundary].iter().enumerate() {
|
||||
if let Item::ToolResult {
|
||||
content: Some(c), ..
|
||||
} = item
|
||||
{
|
||||
total_savings += estimate_tokens(c);
|
||||
prunable.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if prunable.is_empty() || total_savings < config.min_savings {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Apply: drop content, keep summary.
|
||||
for &i in &prunable {
|
||||
if let Item::ToolResult { content, .. } = &mut items[i] {
|
||||
*content = None;
|
||||
}
|
||||
}
|
||||
|
||||
Some(PruneResult {
|
||||
pruned_count: prunable.len(),
|
||||
estimated_savings: total_savings,
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Helper: build a history with interleaved user messages and tool results.
|
||||
fn make_history(turns: &[(&str, Vec<(&str, Option<&str>)>)]) -> Vec<Item> {
|
||||
let mut items = Vec::new();
|
||||
for (user_msg, tool_results) in turns {
|
||||
items.push(Item::user_message(*user_msg));
|
||||
items.push(Item::assistant_message("ok"));
|
||||
for (i, (summary, content)) in tool_results.iter().enumerate() {
|
||||
let call_id = format!("call_{}", items.len() + i);
|
||||
items.push(Item::tool_call(&call_id, "some_tool", "{}"));
|
||||
match content {
|
||||
Some(c) => items.push(Item::tool_result_with_content(&call_id, *summary, *c)),
|
||||
None => items.push(Item::tool_result(&call_id, *summary)),
|
||||
}
|
||||
}
|
||||
}
|
||||
items
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_prune_when_too_few_turns() {
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("summary1", Some("big content here"))]),
|
||||
("turn2", vec![("summary2", Some("more content"))]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 3,
|
||||
min_savings: 0,
|
||||
};
|
||||
assert!(prune(&mut items, &config).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_prune_when_savings_below_threshold() {
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s", Some("tiny"))]), // ~1 token
|
||||
("turn2", vec![]),
|
||||
("turn3", vec![]),
|
||||
("turn4", vec![]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 2,
|
||||
min_savings: 9999,
|
||||
};
|
||||
assert!(prune(&mut items, &config).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prune_old_content() {
|
||||
// 4 turns. protected_turns=2 → turns 1-2 are candidates.
|
||||
let big = "x".repeat(4096 * 4); // ~4096 tokens
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![("s2", Some(&big))]),
|
||||
("turn3", vec![("s3", Some("keep me"))]),
|
||||
("turn4", vec![("s4", Some("keep me too"))]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 2,
|
||||
min_savings: 1000,
|
||||
};
|
||||
|
||||
let result = prune(&mut items, &config).expect("should prune");
|
||||
assert_eq!(result.pruned_count, 2);
|
||||
assert!(result.estimated_savings >= 8000);
|
||||
|
||||
// Verify: pruned items have content=None, protected items keep content.
|
||||
for item in &items {
|
||||
if let Item::ToolResult {
|
||||
summary, content, ..
|
||||
} = item
|
||||
{
|
||||
if summary == "s1" || summary == "s2" {
|
||||
assert!(content.is_none(), "old content should be pruned");
|
||||
} else {
|
||||
assert!(content.is_some(), "protected content should remain");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn idempotent() {
|
||||
let big = "x".repeat(4096 * 4);
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![]),
|
||||
("turn3", vec![]),
|
||||
("turn4", vec![]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 2,
|
||||
min_savings: 100,
|
||||
};
|
||||
|
||||
let first = prune(&mut items, &config).expect("first prune");
|
||||
assert_eq!(first.pruned_count, 1);
|
||||
|
||||
// Second call: nothing left to prune.
|
||||
assert!(prune(&mut items, &config).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn already_pruned_items_skipped() {
|
||||
// Items that already have content=None are not counted as savings.
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", None)]), // already pruned
|
||||
("turn2", vec![]),
|
||||
("turn3", vec![]),
|
||||
("turn4", vec![]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 2,
|
||||
min_savings: 0, // Even with threshold 0, no savings means no prune
|
||||
};
|
||||
|
||||
assert!(prune(&mut items, &config).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn protected_turns_boundary_exact() {
|
||||
// 3 turns with protected_turns=2:
|
||||
// Turn 1 content should be pruned, turns 2-3 protected.
|
||||
let big = "x".repeat(4096 * 4);
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![("s2", Some("protected"))]),
|
||||
("turn3", vec![("s3", Some("also protected"))]),
|
||||
]);
|
||||
let config = PruneConfig {
|
||||
protected_turns: 2,
|
||||
min_savings: 100,
|
||||
};
|
||||
|
||||
let result = prune(&mut items, &config).expect("should prune turn1");
|
||||
assert_eq!(result.pruned_count, 1);
|
||||
|
||||
// Verify s1 pruned, s2 and s3 intact.
|
||||
for item in &items {
|
||||
if let Item::ToolResult {
|
||||
summary, content, ..
|
||||
} = item
|
||||
{
|
||||
match summary.as_str() {
|
||||
"s1" => assert!(content.is_none()),
|
||||
"s2" | "s3" => assert!(content.is_some()),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -17,6 +17,7 @@ serde_json = "1.0.149"
|
|||
thiserror = "2.0"
|
||||
tokio = { version = "1.49", features = ["fs", "io-util", "macros", "net", "rt-multi-thread", "signal", "sync"] }
|
||||
toml = "1.1.2"
|
||||
tracing = "0.1.44"
|
||||
|
||||
[dev-dependencies]
|
||||
async-trait = "0.1.89"
|
||||
|
|
|
|||
|
|
@ -4,12 +4,15 @@ pub mod runtime_dir;
|
|||
pub mod shared_state;
|
||||
pub mod socket_server;
|
||||
|
||||
pub mod prune_hook;
|
||||
|
||||
mod hook_interceptor;
|
||||
mod pod;
|
||||
|
||||
pub use controller::{PodController, PodHandle};
|
||||
pub use manifest::{PodManifest, ProviderConfig, ProviderKind, Scope};
|
||||
pub use hook::{Hook, HookEventKind, HookRegistryBuilder};
|
||||
pub use prune_hook::PruneHook;
|
||||
pub use pod::{Pod, PodError, PodRunResult, apply_worker_manifest};
|
||||
pub use protocol::{ErrorCode, Event, Method, TurnResult};
|
||||
pub use provider::{ProviderError, build_client};
|
||||
|
|
|
|||
38
crates/pod/src/prune_hook.rs
Normal file
38
crates/pod/src/prune_hook.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
//! PruneHook — applies conditional pruning before each LLM request.
|
||||
//!
|
||||
//! Wraps [`llm_worker::prune::prune()`] as a [`Hook<PreLlmRequest>`] so
|
||||
//! that Pod can register it in the hook pipeline.
|
||||
|
||||
use async_trait::async_trait;
|
||||
use llm_worker::interceptor::PreRequestAction;
|
||||
use llm_worker::prune::{PruneConfig, prune};
|
||||
use llm_worker::Item;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::hook::{Hook, PreLlmRequest};
|
||||
|
||||
/// Hook that conditionally prunes old tool-result content before each
|
||||
/// LLM request, reclaiming context-window tokens.
|
||||
pub struct PruneHook {
|
||||
config: PruneConfig,
|
||||
}
|
||||
|
||||
impl PruneHook {
|
||||
pub fn new(config: PruneConfig) -> Self {
|
||||
Self { config }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Hook<PreLlmRequest> for PruneHook {
|
||||
async fn call(&self, context: &mut Vec<Item>) -> PreRequestAction {
|
||||
if let Some(result) = prune(context, &self.config) {
|
||||
debug!(
|
||||
pruned = result.pruned_count,
|
||||
estimated_savings = result.estimated_savings,
|
||||
"Pruned old tool-result content"
|
||||
);
|
||||
}
|
||||
PreRequestAction::Continue
|
||||
}
|
||||
}
|
||||
|
|
@ -182,20 +182,23 @@ const MAX_COMPACT_FAILURES: usize = 3;
|
|||
|
||||
### Compaction フロー
|
||||
|
||||
Compact は fork と同じ構造。旧セッションを保全し、新しい SessionId で圧縮後のセッションを開始する。
|
||||
session-store-extraction 後の構造を前提とする。
|
||||
Pod が Worker を直接保持し、session-store は save/restore の関数群。
|
||||
|
||||
```
|
||||
Run 完了 → input_tokens > threshold
|
||||
↓
|
||||
Controller: history を要約プロンプトに変換
|
||||
Pod: worker.history() + worker.request_config() を読み出す
|
||||
↓
|
||||
Controller: 要約用 Worker 生成(ツールなし、temperature=0)
|
||||
Pod: build_client(&manifest.provider) で要約用 Worker を生成(ツールなし、temperature=0)
|
||||
↓
|
||||
要約 Worker: 構造化要約を生成
|
||||
要約 Worker: history を要約プロンプトとして受け取り、構造化要約を生成
|
||||
↓
|
||||
Controller: [要約 Item, 直近 N ターン] で新 history を構築
|
||||
Pod: [要約 Item, 直近 N ターン] で新 history を構築
|
||||
↓
|
||||
Controller: 新 SessionId で新セッションを作成(SessionStart に compacted_from を記録)
|
||||
Pod: worker.set_history(新 history)
|
||||
↓
|
||||
Pod: session_store::save_compacted(store, new_id, compacted_from, ...) で新セッション開始
|
||||
↓
|
||||
旧セッション JSONL はそのまま保全(append-only 原則を維持)
|
||||
```
|
||||
|
|
@ -228,7 +231,17 @@ LogEntry::SessionStart {
|
|||
- compact: `compacted_from = Some(...)`
|
||||
- EntryHash で元セッションのどの時点からの操作かを追跡可能
|
||||
|
||||
### 要約フォーマット
|
||||
### 要約用 Worker
|
||||
|
||||
- `build_client(&manifest.provider, manifest_dir)` で新しい LlmClient を作る
|
||||
- reqwest::Client は内部 Arc。1回きりのリクエストなので新規プールで問題なし
|
||||
- Pod が `manifest_dir` を保持する必要がある(現状 `from_manifest` では受け取るが保持していない)
|
||||
|
||||
### 要約プロンプト
|
||||
|
||||
TODO: system prompt の文面、history を文字列化する方法を詰める。
|
||||
|
||||
出力フォーマット:
|
||||
|
||||
```
|
||||
## Original Task
|
||||
|
|
@ -312,15 +325,17 @@ pub struct CompactionConfig {
|
|||
3. **`prune.rs`** — 条件付き Prune アルゴリズム。単体テスト
|
||||
4. **`PruneHook`** — Pod に Hook 実装
|
||||
5. **`CompactionConfig`** — manifest にセクション追加
|
||||
6. **`LogEntry::Compacted`** — session_log に variant 追加
|
||||
7. **`compact()` 関数** — Controller に compaction ロジック + サーキットブレーカー
|
||||
6. **`LogEntry` に provenance フィールド追加** — SessionStart に `compacted_from` / `forked_from`
|
||||
7. **`compact()` 関数** — Pod に compaction ロジック + サーキットブレーカー
|
||||
8. **Protocol** — `CompactionStart` / `CompactionDone` イベント追加
|
||||
|
||||
ステップ 1-2 は ToolOutput 移行として独立実行可能。
|
||||
ステップ 3-4(Prune)と 5-6(Compact 準備)は並行可能。
|
||||
ステップ 5-8 は session-store-extraction 完了後に実装。
|
||||
|
||||
---
|
||||
|
||||
## 依存チケット
|
||||
|
||||
- ~~[remove-hook-module.md](remove-hook-module.md)~~ — 完了
|
||||
- [session-store-extraction.md](session-store-extraction.md) — ステップ 5-8 の前提
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user