From fade875c6f53ddd54330b022020938e3afd1bfff Mon Sep 17 00:00:00 2001
From: Hare <kei.hiracchi.0928@gmail.com>
Date: Wed, 15 Apr 2026 04:08:56 +0900
Subject: [PATCH] =?UTF-8?q?tool=E5=87=BA=E5=8A=9B=E3=81=AE=E5=88=B6?=
 =?UTF-8?q?=E9=99=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/llm-worker/src/lib.rs        |   2 +-
 crates/llm-worker/src/tool.rs       | 118 ++++++++++++++++++++++++++++
 crates/llm-worker/src/worker.rs     |  47 ++++++++++-
 crates/manifest/src/lib.rs          |  81 +++++++++++++++++++
 crates/pod/src/pod.rs               |   6 +-
 tickets/tool-output-limit.md        |  60 +++++++++-----
 tickets/tool-output-limit.review.md |  87 ++++++++++++++++++++
 7 files changed, 380 insertions(+), 21 deletions(-)
 create mode 100644 tickets/tool-output-limit.review.md

diff --git a/crates/llm-worker/src/lib.rs b/crates/llm-worker/src/lib.rs
index 29893327..dfa27135 100644
--- a/crates/llm-worker/src/lib.rs
+++ b/crates/llm-worker/src/lib.rs
@@ -55,5 +55,5 @@ pub use callback::{TextBlockScope, ToolUseBlockScope};
 pub use handler::ToolUseBlockStart;
 pub use interceptor::Interceptor;
 pub use message::{ContentPart, Item, Message, Role};
-pub use tool::{ToolCall, ToolResult};
+pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
 pub use worker::{RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
diff --git a/crates/llm-worker/src/tool.rs b/crates/llm-worker/src/tool.rs
index e5abd09f..df70d43c 100644
--- a/crates/llm-worker/src/tool.rs
+++ b/crates/llm-worker/src/tool.rs
@@ -3,6 +3,7 @@
 //! Traits for defining tools callable by LLM.
 //! Usually auto-implemented using the `#[tool]` macro.
 
+use std::collections::HashMap;
 use std::sync::Arc;
 
 use async_trait::async_trait;
@@ -32,6 +33,62 @@ pub enum ToolError {
 /// Outputs this small don't benefit from pruning.
 pub const SUMMARY_THRESHOLD: usize = 200;
 
+/// Byte-size caps applied to tool execution `content` at the Worker's
+/// tool-execution boundary, before results enter conversation history.
+///
+/// Exists so a single oversized tool result (e.g. a wide `Glob` scan)
+/// cannot blow past the provider's per-minute input-token rate limit.
+/// Individual tools are not trusted to self-limit — this is the single
+/// chokepoint.
+///
+/// The unit is bytes rather than tokens because accurate pre-send token
+/// estimation is not available. The limits can be migrated to token
+/// units later without changing callers.
+#[derive(Debug, Clone)]
+pub struct ToolOutputLimits {
+    /// Cap applied to any tool not listed in `per_tool`.
+    pub default_max_bytes: usize,
+    /// Per-tool overrides, keyed by tool registration name.
+    pub per_tool: HashMap<String, usize>,
+}
+
+impl ToolOutputLimits {
+    /// Resolve the cap for a given tool name.
+    pub fn limit_for(&self, tool_name: &str) -> usize {
+        self.per_tool
+            .get(tool_name)
+            .copied()
+            .unwrap_or(self.default_max_bytes)
+    }
+}
+
+/// Truncate `content` in-place if it exceeds `limit` bytes, replacing
+/// the dropped tail with a short human- and LLM-readable marker so the
+/// model can self-correct by narrowing its query.
+///
+/// The cut point is walked back to the nearest UTF-8 char boundary so
+/// multibyte characters are never split.
+pub(crate) fn truncate_content(content: &mut String, limit: usize) {
+    let original_len = content.len();
+    if original_len <= limit {
+        return;
+    }
+
+    let suffix_template = "\n\n[truncated: %BYTES% bytes dropped, refine your query]";
+    // Reserve enough headroom for the suffix (upper bound on the byte length
+    // of the number substitution). usize::MAX fits in 20 digits.
+    let reserved = suffix_template.len() + 20 - "%BYTES%".len();
+    let body_budget = limit.saturating_sub(reserved);
+
+    let mut cut = body_budget.min(original_len);
+    while cut > 0 && !content.is_char_boundary(cut) {
+        cut -= 1;
+    }
+    content.truncate(cut);
+    let dropped = original_len - cut;
+    content.push_str(&suffix_template.replace("%BYTES%", &dropped.to_string()));
+}
+
 /// Tool execution result.
 ///
 /// Every output has a mandatory `summary` (1-2 lines) that persists in
@@ -253,3 +310,64 @@ impl ToolResult {
         }
     }
 }
+
+#[cfg(test)]
+mod truncate_tests {
+    use super::*;
+
+    #[test]
+    fn noop_when_within_limit() {
+        let mut s = "hello world".to_string();
+        truncate_content(&mut s, 1024);
+        assert_eq!(s, "hello world");
+    }
+
+    #[test]
+    fn noop_at_exact_limit() {
+        let mut s = "a".repeat(100);
+        truncate_content(&mut s, 100);
+        assert_eq!(s.len(), 100);
+    }
+
+    #[test]
+    fn truncates_oversized_ascii_with_marker() {
+        let mut s = "a".repeat(1000);
+        truncate_content(&mut s, 200);
+        assert!(s.contains("[truncated:"));
+        assert!(s.contains("refine your query"));
+        assert!(s.len() <= 200, "result was {} bytes", s.len());
+        let dropped: usize = s
+            .split("[truncated: ")
+            .nth(1)
+            .unwrap()
+            .split(' ')
+            .next()
+            .unwrap()
+            .parse()
+            .unwrap();
+        let body_len = s.find("\n\n[truncated:").unwrap();
+        assert_eq!(body_len + dropped, 1000);
+    }
+
+    #[test]
+    fn respects_utf8_char_boundaries() {
+        // 100 copies of "あ" (3 bytes each) = 300 bytes.
+        let mut s = "あ".repeat(100);
+        truncate_content(&mut s, 120);
+        // Truncation must not split a multibyte character.
+        assert!(s.is_char_boundary(s.find("\n\n[truncated:").unwrap_or(s.len())));
+        // And the result must still be valid UTF-8 (implicitly true for String).
+        assert!(s.contains("[truncated:"));
+    }
+
+    #[test]
+    fn limits_per_tool_override() {
+        let mut limits = ToolOutputLimits {
+            default_max_bytes: 1024,
+            per_tool: HashMap::new(),
+        };
+        limits.per_tool.insert("Read".to_string(), 4096);
+        assert_eq!(limits.limit_for("Read"), 4096);
+        assert_eq!(limits.limit_for("Grep"), 1024);
+    }
+}
diff --git a/crates/llm-worker/src/worker.rs b/crates/llm-worker/src/worker.rs
index 7191eacd..41e06e8a 100644
--- a/crates/llm-worker/src/worker.rs
+++ b/crates/llm-worker/src/worker.rs
@@ -20,7 +20,10 @@ use crate::{
     state::{Locked, Mutable, WorkerState},
     timeline::event::{ErrorEvent, StatusEvent, UsageEvent},
     timeline::{TextBlockCollector, Timeline, ToolCallCollector},
-    tool::{ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolResult},
+    tool::{
+        ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolOutputLimits, ToolResult,
+        truncate_content,
+    },
     tool_server::{ToolServer, ToolServerHandle},
 };
 
@@ -158,6 +161,9 @@ pub struct Worker<C: LlmClient, S: WorkerState = Mutable> {
     /// Cancel notification channel (for interrupting execution)
     cancel_tx: mpsc::Sender<()>,
     cancel_rx: mpsc::Receiver<()>,
+    /// Byte-size caps applied to tool `content` before it reaches history.
+    /// `None` disables truncation (tests and minimal setups).
+    tool_output_limits: Option<ToolOutputLimits>,
     /// Prune configuration. `None` disables the prune projection.
     prune_config: Option<crate::prune::PruneConfig>,
     /// Callback that estimates token savings for a drop range, injected
@@ -644,6 +650,33 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
             }
         };
 
+        // Cap `content` byte-size before it enters history. This is the
+        // single chokepoint that protects the next LLM request from
+        // blowing past the provider's per-minute input-token limit; no
+        // individual tool is trusted to self-limit.
+        if let Some(limits) = self.tool_output_limits.as_ref() {
+            for tool_result in &mut results {
+                let Some(content) = tool_result.content.as_mut() else {
+                    continue;
+                };
+                let Some((tool_call, _, _)) = call_info_map.get(&tool_result.tool_use_id) else {
+                    continue;
+                };
+                let limit = limits.limit_for(&tool_call.name);
+                let before = content.len();
+                truncate_content(content, limit);
+                if content.len() != before {
+                    warn!(
+                        tool = %tool_call.name,
+                        before_bytes = before,
+                        after_bytes = content.len(),
+                        limit_bytes = limit,
+                        "Tool output exceeded byte limit and was truncated"
+                    );
+                }
+            }
+        }
+
         // Phase 3: Apply post_tool_call interceptor
         for tool_result in &mut results {
             if let Some((tool_call, meta, tool)) = call_info_map.get(&tool_result.tool_use_id) {
@@ -932,6 +965,7 @@ impl<C: LlmClient> Worker<C, Mutable> {
             last_run_interrupted: false,
             cancel_tx,
             cancel_rx,
+            tool_output_limits: None,
             prune_config: None,
             savings_estimator: None,
             _state: PhantomData,
@@ -963,6 +997,15 @@ impl<C: LlmClient> Worker<C, Mutable> {
         self.system_prompt = Some(prompt.into());
     }
 
+    /// Install byte-size caps for tool execution `content`.
+    ///
+    /// Passing `None` (the default) disables truncation. Higher layers
+    /// (e.g. Pod) translate manifest configuration into a concrete
+    /// [`ToolOutputLimits`] and install it here.
+    pub fn set_tool_output_limits(&mut self, limits: Option<ToolOutputLimits>) {
+        self.tool_output_limits = limits;
+    }
+
     /// Set maximum tokens (builder pattern)
     ///
     /// # Examples
@@ -1175,6 +1218,7 @@ impl<C: LlmClient> Worker<C, Mutable> {
 
             cancel_tx: self.cancel_tx,
             cancel_rx: self.cancel_rx,
+            tool_output_limits: self.tool_output_limits,
             prune_config: self.prune_config,
             savings_estimator: self.savings_estimator,
             _state: PhantomData,
@@ -1246,6 +1290,7 @@ impl<C: LlmClient> Worker<C, Locked> {
 
             cancel_tx: self.cancel_tx,
             cancel_rx: self.cancel_rx,
+            tool_output_limits: self.tool_output_limits,
             prune_config: self.prune_config,
             savings_estimator: self.savings_estimator,
             _state: PhantomData,
diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs
index a43712d0..60e61513 100644
--- a/crates/manifest/src/lib.rs
+++ b/crates/manifest/src/lib.rs
@@ -2,6 +2,7 @@ mod scope;
 
 pub use scope::{Scope, ScopeError};
 
+use std::collections::HashMap;
 use std::num::NonZeroU32;
 use std::path::PathBuf;
 
@@ -79,6 +80,48 @@ pub struct WorkerManifest {
     pub max_turns: Option<NonZeroU32>,
     #[serde(default)]
     pub temperature: Option<f32>,
+    #[serde(default)]
+    pub tool_output: Option<ToolOutputLimits>,
+}
+
+/// Byte-size caps applied to tool execution `content` before it enters
+/// conversation history. Guards against a single oversized tool result
+/// blowing past the provider's per-minute input-token rate limit.
+///
+/// Field names are deliberately phrased in bytes (not tokens) because
+/// accurate pre-send token counting is not yet available; the caps can
+/// be migrated to token units later without renaming.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ToolOutputLimits {
+    /// Cap applied to any tool not listed in `per_tool`.
+    #[serde(default = "default_tool_output_max_bytes")]
+    pub default_max_bytes: usize,
+    /// Per-tool overrides, keyed by tool registration name (e.g. "Glob").
+    #[serde(default)]
+    pub per_tool: HashMap<String, usize>,
+}
+
+fn default_tool_output_max_bytes() -> usize {
+    16 * 1024
+}
+
+impl Default for ToolOutputLimits {
+    fn default() -> Self {
+        Self {
+            default_max_bytes: default_tool_output_max_bytes(),
+            per_tool: HashMap::new(),
+        }
+    }
+}
+
+impl ToolOutputLimits {
+    /// Resolve the cap for a given tool name.
+    pub fn limit_for(&self, tool_name: &str) -> usize {
+        self.per_tool
+            .get(tool_name)
+            .copied()
+            .unwrap_or(self.default_max_bytes)
+    }
 }
 
 /// Declarative scope configuration.
@@ -367,6 +410,44 @@ permission = "write"
         assert!(PodManifest::from_toml(&toml).is_err());
     }
 
+    #[test]
+    fn omitted_tool_output_is_none() {
+        let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
+        assert!(manifest.worker.tool_output.is_none());
+    }
+
+    #[test]
+    fn parse_tool_output_limits() {
+        let toml = MINIMAL_REQUIRED.replace(
+            "[worker]\n",
+            "[worker]\n\
+             [worker.tool_output]\n\
+             default_max_bytes = 8192\n\n\
+             [worker.tool_output.per_tool]\n\
+             Read = 32768\n\
+             Grep = 4096\n",
+        );
+        let manifest = PodManifest::from_toml(&toml).unwrap();
+        let limits = manifest.worker.tool_output.unwrap();
+        assert_eq!(limits.default_max_bytes, 8192);
+        assert_eq!(limits.limit_for("Read"), 32768);
+        assert_eq!(limits.limit_for("Grep"), 4096);
+        assert_eq!(limits.limit_for("Unknown"), 8192);
+    }
+
+    #[test]
+    fn tool_output_default_max_bytes_is_16k() {
+        let toml = MINIMAL_REQUIRED.replace(
+            "[worker]\n",
+            "[worker]\n\
+             [worker.tool_output]\n",
+        );
+        let manifest = PodManifest::from_toml(&toml).unwrap();
+        let limits = manifest.worker.tool_output.unwrap();
+        assert_eq!(limits.default_max_bytes, 16 * 1024);
+        assert!(limits.per_tool.is_empty());
+    }
+
     #[test]
     fn default_recursive_true() {
         let rule: ScopeRule = toml::from_str(
diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs
index 45aac3b9..944ad1cb 100644
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@@ -5,7 +5,7 @@ use llm_worker::Item;
 use llm_worker::llm_client::RequestConfig;
 use llm_worker::llm_client::client::LlmClient;
 use llm_worker::state::Mutable;
-use llm_worker::{Worker, WorkerError, WorkerResult};
+use llm_worker::{ToolOutputLimits, Worker, WorkerError, WorkerResult};
 use session_store::{
     EntryHash, Outcome, SessionId, SessionStartState, Store, StoreError, UsageRecord,
 };
@@ -846,6 +846,10 @@ pub fn apply_worker_manifest<C: LlmClient>(worker: &mut Worker<C>, wm: &WorkerMa
     }
     worker.set_request_config(config);
     worker.set_max_turns(wm.max_turns.map(|n| n.get()));
+    worker.set_tool_output_limits(wm.tool_output.as_ref().map(|limits| ToolOutputLimits {
+        default_max_bytes: limits.default_max_bytes,
+        per_tool: limits.per_tool.clone(),
+    }));
 }
 
 /// Result of a Pod run.
diff --git a/tickets/tool-output-limit.md b/tickets/tool-output-limit.md
index ffd93687..517738c7 100644
--- a/tickets/tool-output-limit.md
+++ b/tickets/tool-output-limit.md
@@ -1,5 +1,10 @@
 # ツール実行結果のサイズ上限
 
+## レビュー状態
+
+初回レビュー実施済み。指摘事項と判断は [tool-output-limit.review.md](tool-output-limit.review.md) を参照。
+指摘1（デフォルト不適用）・指摘2（truncate と interceptor の順序）の修正待ち。
+
 ## 背景
 
 Pod のセッションで、Glob が `pattern:"*"` でプロジェクト全体を走査し、
@@ -13,46 +18,62 @@ Pod のセッションで、Glob が `pattern:"*"` でプロジェクト全体
 ツール実行結果のサイズを LLM に投げる前に強制的にキャップし、LLM に
 「検索範囲を絞れ」と促す必要がある。
 
+## 単位について
+
+理想はトークン単位での上限だが、既存の `pod::token_counter` は provider の
+`UsageRecord` 実測値を基にした按分・外挿専用で、**未送信の単発ツール出力を
+事前にトークン計測する手段は現時点で存在しない**。
+ローカルトークナイザは精度・信頼性の理由で意図的に持たない方針。
+
+そこで本チケットではバイト数ベースで上限をかける。UTF-8 のため
+`str::len()` で O(1) に判定でき、`floor_char_boundary` を使えば文字境界で
+安全に切断できる。将来 provider 実測値ベースのトークン上限に
+置き換える余地は残す（マニフェストのキー名をそれに合わせる）。
+
 ## 要件
 
 - **単一チョークポイントで全ツールに効く**: 個別ツールの実装を信用しない。
-  Tool 実行境界（llm-worker の Tool runner）で `ToolOutput.content` をトークン計測し、
+  Tool 実行境界（`llm-worker::worker::execute_tools` 内、`ToolResult::from_output`
+  直後）で `ToolOutput.content` のバイト数を計測し、
   上限を超えていたら切り詰めてから履歴に積む。
-- **マニフェストで設定可能**: デフォルトは 5000 トークン（30k/分レートリミットに
-  対して余裕を持った値）。プロジェクトごと・ツールごとに上書き可能。
-- **切り詰め後は LLM が検知できる**: `summary` 又は `content` 末尾に
-  `truncated: N tokens dropped, refine your query` 形式の追記を入れ、
+- **マニフェストで設定可能**: デフォルトは 16KB（30k/分レートリミットに対して
+  余裕を持った値）。プロジェクトごと・ツールごとに上書き可能。
+- **切り詰め後は LLM が検知できる**: `content` 末尾に
+  `[truncated: N bytes dropped, refine your query]` 形式の追記を入れ、
   LLM が自発的に絞り込みを試みるヒントにする。`ToolError` にはしない
   （エラーにすると LLM がリトライループに入りやすい）。
-- **計測は既存の `token-counter` クレートを使う**。文字数やバイト数で近似しない。
+- **`summary` には手を入れない**: summary は常に短い 1-2 行で、上限に達しない前提。
+- **`content` が `None` の場合はスキップ**: 計測・切り詰めの対象外。
 
 ## マニフェスト
 
 ```toml
 [worker.tool_output]
-# 全ツール共通の既定上限。省略時 5000。
-default_max_tokens = 5000
+# 全ツール共通の既定上限（バイト）。省略時 16384。
+default_max_bytes = 16384
 
 # ツールごとの上書き。ツール名は登録名（"Glob", "Read", ...）。
 [worker.tool_output.per_tool]
-Read = 8000     # Read は大きいファイルを意図的に返すので少し緩める
-Grep = 3000
+Read = 32768     # Read は大きいファイルを意図的に返すので少し緩める
+Grep = 8192
 ```
 
-- `[worker.tool_output]` セクション自体は省略可能。省略時はデフォルト 5000 が全ツールに適用。
+- `[worker.tool_output]` セクション自体は省略可能。省略時はデフォルト 16KB が全ツールに適用。
 - `per_tool` も省略可能。
 - 未知のツール名がマップに含まれていても manifest エラーにはしない（ログ警告のみ）。
 
 ## 実装方針（実装順序）
 
-1. `token-counter` を llm-worker の依存に追加（まだなら）し、Tool 実行境界の
-   `ToolOutput` を計測する薄いラッパーを導入
-2. 超過時の切り詰めロジック（content を二分探索などでトークン数ぎりぎりまで詰め、
-   末尾に注記を追記）
-3. `manifest::WorkerManifest` に `tool_output: Option<ToolOutputLimits>` を追加、
-   llm-worker の `Worker` 生成時に渡す
+1. `manifest::WorkerManifest` に `tool_output: Option<ToolOutputLimits>` を追加。
+   `ToolOutputLimits { default_max_bytes: usize, per_tool: HashMap<String, usize> }`。
+2. 切り詰め関数を `llm-worker` 側に薄く追加。
+   入力: `content: String`, `limit: usize`, `tool_name: &str`。
+   `content.len() <= limit` ならそのまま返す。超えていれば
+   `str::floor_char_boundary(limit - suffix.len())` で切って末尾に注記を追記。
+3. `Worker` 生成時に `tool_output: Option<ToolOutputLimits>` を渡し、
+   `execute_tools` の結果ループで `ToolResult::content` を in-place に書き換える。
 4. 各ツール単体には本チケットでは手を入れない。上限を踏んだツールに対して
-   後続の改善（Glob が `git_ignore` を尊重する等）は別チケットで扱う
+   後続の改善（Glob が `git_ignore` を尊重する等）は別チケットで扱う。
 
 ## 非ゴール
 
@@ -61,3 +82,6 @@ Grep = 3000
 - **prompt caching の導入**や compaction 側の改善は扱わない。
   本チケットは「1 回のツール結果が履歴に載る前にキャップする」ことだけに集中する。
 - **入力側（ツール引数）のサイズ制限**は扱わない。
+- **トークン単位での上限**は扱わない。将来 provider 実測値ベースの
+  オンライン・トークン推定が利用可能になった段階で、本チケットで入れた
+  バイト上限をトークン上限に置き換えることを検討する。
diff --git a/tickets/tool-output-limit.review.md b/tickets/tool-output-limit.review.md
new file mode 100644
index 00000000..cbb3c188
--- /dev/null
+++ b/tickets/tool-output-limit.review.md
@@ -0,0 +1,87 @@
+# レビュー: ツール実行結果のサイズ上限
+
+対象コミット: 未コミット（作業ツリー）
+対象差分: `crates/manifest/src/lib.rs`, `crates/llm-worker/src/{lib,tool,worker}.rs`, `crates/pod/src/pod.rs`
+
+## 要件達成状況
+
+| 要件 | 状態 |
+|---|---|
+| 単一チョークポイントで全ツールに効く | ✅ `worker::execute_tools` 内に配置 |
+| マニフェストで設定可能（デフォルト 16KB） | 🚨 省略時にデフォルトが効かない（下記参照） |
+| 切り詰めマーカーで LLM が検知できる | ✅ `[truncated: N bytes dropped, refine your query]` |
+| `summary` 非改変 | ✅ `ToolResult::content` のみを書き換え |
+| `content: None` スキップ | ✅ `as_mut()` の else で continue |
+| 未知ツール名を許容 | ✅ `HashMap::get` なので自然にフォールバック |
+| UTF-8 安全な切断 | ✅ `is_char_boundary` ループで実装 |
+| 切り詰め関数の単体テスト | ✅ ASCII / UTF-8 / 境界 / per_tool |
+
+## 指摘
+
+### 1. 🚨 デフォルト 16KB が `[worker.tool_output]` 省略時に効かない
+
+チケット本文:
+> `[worker.tool_output]` セクション自体は省略可能。省略時はデフォルト 16KB が全ツールに適用。
+
+現状の実装:
+
+- `manifest::WorkerManifest.tool_output: Option<ToolOutputLimits>` + `#[serde(default)]`
+  → TOML で省略すると `None`
+- `pod::apply_worker_manifest`:
+  ```rust
+  worker.set_tool_output_limits(wm.tool_output.as_ref().map(|l| ...));
+  ```
+  → `None` がそのまま worker に伝播
+- `Worker::execute_tools`:
+  ```rust
+  if let Some(limits) = self.tool_output_limits.as_ref() { ... }
+  ```
+  → `None` なら truncate ブロック丸ごとスキップ
+
+結果、マニフェストに `[worker.tool_output]` を書き忘れると truncate 自体が無効化される。事故再発防止が目的のチケットで、**ユーザーの明示的なオプトイン**に縮退しており要件違反。
+
+**判断**: 要修正（必須）。
+
+**修正方針**: `manifest::WorkerManifest.tool_output` を非 Option にして `#[serde(default)]` + `impl Default for ToolOutputLimits` を利用する。Pod 側の `as_ref().map(...)` も `Some(...)` を常に渡す形に直す。
+
+---
+
+### 2. 🟡 truncate が post_tool_call interceptor の **前** に挿入されている
+
+現状の順序:
+
+```
+Phase 2: ツール実行
+  → truncate
+Phase 3: post_tool_call interceptor
+  → 履歴に積む
+```
+
+この順だと post_tool_call interceptor が切り詰め済みの content しか見られず、ログ・監査・分類などで full content を使いたいケースで情報ロスが起きる。
+また interceptor が content を差し替えた場合、その差し替え後の巨大データは truncate を経由せず履歴に入る（防御が迂回される）。
+
+**判断**: 修正する。truncate を Phase 3 の直後、履歴コミット直前に移動する。
+
+**修正方針**: `worker::execute_tools` 内の truncate ループを Phase 3 の for ループ後に移動。それ以外の構造は変えない。
+
+---
+
+### 3. 🟢 `ToolOutputLimits` 型が manifest と llm-worker で二重定義
+
+`manifest::ToolOutputLimits`（serde 付き）と `llm_worker::ToolOutputLimits`（serde なし）が同構造で、`pod::apply_worker_manifest` で手変換している。
+
+manifest クレートが llm-worker に依存しない方針は正しく、他の manifest 型（`CompactionConfig` など）も Pod 境界で類似の変換を行っているため、**既存パターンと整合**している。
+
+**判断**: 不問。現行のまま。
+
+---
+
+### 4. 🟢 極小 limit でのオーバーラン
+
+`truncate_content` は limit が suffix のリザーブ分（約 70 bytes）を下回ると、body_budget が 0 になっても suffix 自体が 50〜70 bytes あるため結果サイズが limit を超える。実運用で limit < 128 を設定するケースは無く、実害は無い。
+
+**判断**: 不問。必要なら将来 `debug_assert!(limit >= 128)` を足す程度で十分。
+
+## 結論
+
+**指摘1・2の修正を条件に受け入れ可**。3・4 はそのまま。