pruneで用いるトークン計算の改善

2026-04-14 00:15:09 +09:00 · 2026-04-14 00:15:09 +09:00 · 5a995cf099
commit 5a995cf099
parent 2edc2dc245
4 changed files with 131 additions and 84 deletions
--- a/crates/llm-worker/src/prune.rs
+++ b/crates/llm-worker/src/prune.rs
@ -1,12 +1,18 @@
-//! Conditional Prune algorithm for context window management.
+//! Prune — context projection for old tool-result content.
 //!
-//! Removes `content` from old [`Item::ToolResult`] entries, leaving only
-//! their `summary`. This reclaims tokens while preserving the "what
-//! happened" trail.
+//! LLM 送信時のコンテキストから古い [`Item::ToolResult`] の `content` を
+//! 省略して、コンテキスト窓のトークンを回収する。`summary` は残すので
+//! 「何が起きたか」の痕跡は保たれる。
 //!
-//! このモジュールは pure な「候補抽出」と「適用」だけを提供する。
-//! `min_savings` 判定や savings 推定はこの crate には置かず、上位層
-//! （`pod::prune_hook` など）が usage 履歴ベースのトークン会計と組み合わせて行う。
+//! # 設計方針
+//!
+//! Prune は **コンテキスト射影** であり、history の変換ではない。
+//! この crate が提供するのは pure な候補抽出 [`prunable_indices`] のみで、
+//! 射影の適用は上位層（`pod::prune_hook` 等）が LLM に送る一時コンテキスト
+//! に対してだけ行う。Worker の永続履歴は決して変更されない。
+//!
+//! `min_savings` 判定や savings 推定もこの crate には置かず、上位層が
+//! usage 履歴ベースのトークン会計と組み合わせて行う。

 use serde::{Deserialize, Serialize};

@ -45,13 +51,6 @@ impl Default for PruneConfig {
    }
 }

-/// Result of [`apply_prune`].
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct PruneResult {
-    /// Number of items whose `content` was set to `None`.
-    pub pruned_count: usize,
-}
-
 /// Find indices where each "turn" begins.
 ///
 /// A turn starts at every user message. Returns the indices of those
@ -88,24 +87,6 @@ pub fn prunable_indices(items: &[Item], protected_turns: usize) -> Vec<usize> {
        .collect()
 }

-/// Set `content = None` on each item at `indices`. Returns the number
-/// of items that were actually modified (already-pruned items are
-/// counted as 0).
-pub fn apply_prune(items: &mut [Item], indices: &[usize]) -> PruneResult {
-    let mut count = 0;
-    for &i in indices {
-        if let Item::ToolResult { content, .. } = &mut items[i] {
-            if content.is_some() {
-                *content = None;
-                count += 1;
-            }
-        }
-    }
-    PruneResult {
-        pruned_count: count,
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@ -158,51 +139,6 @@ mod tests {
        }
    }

-    #[test]
-    fn apply_drops_content_only() {
-        let big = "x".repeat(64);
-        let mut items = make_history(&[
-            ("turn1", vec![("s1", Some(&big))]),
-            ("turn2", vec![("s2", Some(&big))]),
-            ("turn3", vec![("s3", Some("keep me"))]),
-            ("turn4", vec![("s4", Some("keep me too"))]),
-        ]);
-        let candidates = prunable_indices(&items, 2);
-        let result = apply_prune(&mut items, &candidates);
-        assert_eq!(result.pruned_count, 2);
-
-        for item in &items {
-            if let Item::ToolResult {
-                summary, content, ..
-            } = item
-            {
-                if summary == "s1" || summary == "s2" {
-                    assert!(content.is_none(), "old content should be pruned");
-                } else {
-                    assert!(content.is_some(), "protected content should remain");
-                }
-            }
-        }
-    }
-
-    #[test]
-    fn apply_is_idempotent() {
-        let big = "x".repeat(64);
-        let mut items = make_history(&[
-            ("turn1", vec![("s1", Some(&big))]),
-            ("turn2", vec![]),
-            ("turn3", vec![]),
-            ("turn4", vec![]),
-        ]);
-        let first_indices = prunable_indices(&items, 2);
-        assert_eq!(apply_prune(&mut items, &first_indices).pruned_count, 1);
-
-        // 2 周目: 候補は (まだ) いるかもしれないが、すでに content=None なので
-        // apply_prune は 0 件と数える。
-        let second_indices = prunable_indices(&items, 2);
-        assert!(second_indices.is_empty());
-    }
-
    #[test]
    fn already_pruned_items_excluded_from_candidates() {
        let items = make_history(&[
--- a/crates/pod/src/prune_hook.rs
+++ b/crates/pod/src/prune_hook.rs
@ -1,6 +1,11 @@
-//! PruneHook — applies conditional pruning before each LLM request.
+//! PruneHook — projects the LLM request context before each call.
+//!
+//! Prune は **コンテキスト射影** として実装する。`PreLlmRequest` hook に
+//! 渡される `context: &mut Vec<Item>` は Worker が毎 turn 冒頭で history を
+//! clone した一時配列 (`worker.rs:701`)。ここで ToolResult.content を省いても
+//! Worker の永続履歴には影響しない。`prunable_indices` で候補を抽出し、
+//! `min_savings` を満たせば content を `None` に射影する。
 //!
-//! Wraps the pure `prune` API from `llm-worker` as a [`Hook<PreLlmRequest>`].
 //! `min_savings` の判定は usage 履歴ベースのトークン会計
 //! ([`crate::token_counter::savings_for_drop_impl`]) で行う。

@ -9,7 +14,7 @@ use std::sync::{Arc, Mutex};
 use async_trait::async_trait;
 use llm_worker::Item;
 use llm_worker::interceptor::PreRequestAction;
-use llm_worker::prune::{PruneConfig, apply_prune, prunable_indices};
+use llm_worker::prune::{PruneConfig, prunable_indices};
 use session_store::UsageRecord;
 use tracing::debug;

@ -66,13 +71,23 @@ impl Hook<PreLlmRequest> for PruneHook {
            return PreRequestAction::Continue;
        }

-        let result = apply_prune(context, &candidates);
-        if result.pruned_count > 0 {
+        // 射影: context (= history の clone) 上の対象 ToolResult だけ content を
+        // drop する。Worker の永続履歴は別インスタンスなので影響を受けない。
+        let mut projected = 0usize;
+        for &i in &candidates {
+            if let Item::ToolResult { content, .. } = &mut context[i] {
+                if content.is_some() {
+                    *content = None;
+                    projected += 1;
+                }
+            }
+        }
+        if projected > 0 {
            debug!(
-                pruned = result.pruned_count,
+                pruned = projected,
                estimated_savings_tokens = savings.tokens,
                source = ?savings.source,
-                "Pruned old tool-result content"
+                "Projected old tool-result content out of request context"
            );
        }
        PreRequestAction::Continue
--- a/tickets/prune-projection.md
+++ b/tickets/prune-projection.md
@ -34,6 +34,54 @@ history の変換ではない。
 - PreLlmRequest hook の戻り値: 現在 `PreRequestAction::Continue` で history を
  そのまま使う設計。射影したコンテキストを渡す方法の検討が必要

+## 追加作業: Worker への統合
+
+初回レビューで「pure ロジックが `llm-worker::prune` にあるのに適用は pod 側
+Hook で行う」という責務の不整合が見つかった。現状:
+
+- `llm-worker::prune` が `PruneConfig` と `prunable_indices` を提供
+- `pod::prune_hook::PruneHook` が `PreLlmRequest` hook として射影を実行
+
+Worker 自身が context window 管理の責任を持つべき。`build_request` の直前
+（`worker.rs:701` 付近）で射影すれば `PruneHook` は不要になる。
+
+### 変更内容
+
+- `Worker` が `PruneConfig` を直接保持する
+  （`Worker::set_prune_config(config)` 等）
+- `worker.rs:701` の `request_context = self.history.clone()` の直後に
+  Worker 自身が `prunable_indices` で候補を抽出し、`min_savings` を満たせば
+  content を射影する
+- `min_savings` 判定のためのトークン会計は Worker から外部に問い合わせる。
+  `Worker::set_savings_estimator(Box<dyn Fn(&[Item], Range<usize>) -> u64>)`
+  のようにコールバックを注入する形にする
+  （Worker は usage 履歴を知らないので、計算は pod 側に残す）
+- `pod::prune_hook` モジュールを廃止。Pod は `worker.set_prune_config(...)` と
+  `worker.set_savings_estimator(...)` を呼ぶだけ
+
+### 影響範囲
+
+- `crates/llm-worker/src/worker.rs`: `PruneConfig` / savings estimator の
+  保持、`build_request` 直前での射影適用
+- `crates/pod/src/prune_hook.rs`: **削除**
+- `crates/pod/src/pod.rs`: `PruneHook` 生成を `worker.set_prune_config` +
+  `worker.set_savings_estimator` に置き換え
+- `crates/pod/src/lib.rs`: `prune_hook` モジュールの削除
+
+### 得られるもの
+
+- llm-worker 側に prune 関連コードを集約できる
+- `PreLlmRequest` hook から prune 固有の mutation が消える
+  （現状 hook に渡る `&mut Vec<Item>` はユーザー向け hook が自由に使えるが、
+  Worker 内部の prune と同じ経路を共有するのは混乱の元）
+- PruneHook の単体テスト不足問題（初回レビュー指摘2）も Worker のテストとして
+  自然に書ける
+
+## レビュー状態
+
+Reviewed — [prune-projection.review.md](prune-projection.review.md)
+（追加作業「Worker への統合」は未着手）
+
 ## 依存

 - なし
--- a/tickets/prune-projection.review.md
+++ b/tickets/prune-projection.review.md
@ -0,0 +1,48 @@
+# prune-projection レビュー
+
+## 要件の充足
+
+チケットが定義した3つの設計方針は全て達成されている:
+
+1. **判定結果を index 集合として保持** — `prunable_indices` がそのまま該当
+2. **PreLlmRequest hook でコンテキスト構築時に content を省く** —
+   `PruneHook::call` で `context` に対してのみ書き換え
+3. **Worker の history を一切変更しない** — `worker.rs:701` の
+   `let mut request_context = self.history.clone()` により、hook に渡るのは
+   clone。`build_request` で使われた後に破棄され、`self.history` には戻らない
+
+## アーキテクチャ
+
+責務分離が綺麗に実装されている:
+
+- `llm-worker::prune` には pure な `prunable_indices` と `PruneConfig` のみ。
+  mutation 系 API (`apply_prune`, `PruneResult`) は完全に削除
+- `PruneHook` 内に content ドロップの inline ループ。意図的にインライン化されており、
+  「射影は呼び出し側の責任」という設計が形になっている
+- `PruneHook` の doc comment で `worker.rs:701` への参照により
+  「なぜ mutate しても安全か」を明示
+
+## 指摘と対処
+
+### 1. clone 依存の脆さ（非ブロッカー、未対処）
+
+現在の安全性は `worker.rs:701` の clone に依存している。worker の実装変更で
+clone が消えると設計が即座に壊れる。対策案:
+
+- worker.rs の clone 箇所に「この clone は射影 hook に依存されている」旨の
+  コメントを足す（軽い対処）
+- `PreLlmRequest` hook の型を `RequestContext` のような専用型にして
+  型レベルで分離（本格対処）
+
+将来的な事故防止の観点で気になるが、チケット範疇の外。
+
+### 2. PruneHook の射影ロジックに単体テストが無い（非ブロッカー、未対処）
+
+`llm-worker::prune` の `apply_prune` テストを削除したのは正しいが、
+`PruneHook::call` の射影ロジックには単体テストがない。
+`PruneHook::call` を呼んで、context が変更され、かつ元 history が
+変わらないことを検証する統合テストがあると安心。
+
+## 判定
+
+承認。