From 2c5a0edef3bca8be23a9c613b5576df7ae1f9fbf Mon Sep 17 00:00:00 2001 From: Hare Date: Sun, 12 Apr 2026 05:19:00 +0900 Subject: [PATCH] =?UTF-8?q?Tool=20Output=E3=81=AE=E4=BB=95=E6=A7=98?= =?UTF-8?q?=E7=B0=A1=E7=B4=A0=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 1 + crates/llm-worker-macros/src/lib.rs | 6 +- .../src/blob_output_processor.rs | 47 -- .../llm-worker-persistence/src/blob_store.rs | 54 -- .../src/fs_blob_store.rs | 83 --- .../src/inspect_tool.rs | 666 ------------------ crates/llm-worker-persistence/src/lib.rs | 8 - .../tests/session_test.rs | 6 +- crates/llm-worker/examples/worker_cli.rs | 4 +- .../llm_client/scheme/anthropic/request.rs | 8 +- .../src/llm_client/scheme/gemini/request.rs | 8 +- .../src/llm_client/scheme/openai/request.rs | 8 +- crates/llm-worker/src/llm_client/types.rs | 28 +- crates/llm-worker/src/tool.rs | 260 ++----- crates/llm-worker/src/tool_server.rs | 39 +- crates/llm-worker/src/worker.rs | 48 +- .../tests/parallel_execution_test.rs | 14 +- crates/llm-worker/tests/tool_macro_test.rs | 12 +- crates/llm-worker/tests/worker_fixtures.rs | 6 +- crates/llm-worker/tests/worker_state_test.rs | 6 +- tickets/context-compaction.md | 2 +- tickets/session-store-extraction.md | 57 ++ .../docs => tickets}/tool-output-design.md | 0 23 files changed, 212 insertions(+), 1159 deletions(-) delete mode 100644 crates/llm-worker-persistence/src/blob_output_processor.rs delete mode 100644 crates/llm-worker-persistence/src/blob_store.rs delete mode 100644 crates/llm-worker-persistence/src/fs_blob_store.rs delete mode 100644 crates/llm-worker-persistence/src/inspect_tool.rs create mode 100644 tickets/session-store-extraction.md rename {crates/llm-worker/docs => tickets}/tool-output-design.md (100%) diff --git a/TODO.md b/TODO.md index d13297a7..0054c847 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,7 @@ - [x] 永続化データ構造の制定 - [ ] テスト設計 → [tickets/test-design.md](tickets/test-design.md) - [x] ツール出力の遅延読み込み設計 (ToolOutput / BlobStore / auto_summarize) +- [x] ToolOutput 再設計: summary + content 構造化、BlobStore/inspect 削除 → [tickets/tool-output-design.md](tickets/tool-output-design.md) - [ ] ツール設計 - [x] ツールの動的追加/削除 → [tickets/tool-dynamic-registry.md](tickets/tool-dynamic-registry.md) - [x] run() 自動ロックとファクトリ遅延初期化 → [tickets/worker-auto-lock.md](tickets/worker-auto-lock.md) diff --git a/crates/llm-worker-macros/src/lib.rs b/crates/llm-worker-macros/src/lib.rs index d1007db4..9597bec4 100644 --- a/crates/llm-worker-macros/src/lib.rs +++ b/crates/llm-worker-macros/src/lib.rs @@ -192,13 +192,13 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2:: let result_handling = if is_result_type(&sig.output) { quote! { match result { - Ok(val) => Ok(format!("{:?}", val)), + Ok(val) => Ok(format!("{:?}", val).into()), Err(e) => Err(::llm_worker::tool::ToolError::ExecutionFailed(format!("{}", e))), } } } else { quote! { - Ok(format!("{:?}", result)) + Ok(format!("{:?}", result).into()) } }; @@ -247,7 +247,7 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2:: #[async_trait::async_trait] impl ::llm_worker::tool::Tool for #tool_struct_name { - async fn execute(&self, input_json: &str) -> Result { + async fn execute(&self, input_json: &str) -> Result<::llm_worker::tool::ToolOutput, ::llm_worker::tool::ToolError> { #execute_body } } diff --git a/crates/llm-worker-persistence/src/blob_output_processor.rs b/crates/llm-worker-persistence/src/blob_output_processor.rs deleted file mode 100644 index c2415579..00000000 --- a/crates/llm-worker-persistence/src/blob_output_processor.rs +++ /dev/null @@ -1,47 +0,0 @@ -//! [`ToolOutputProcessor`] implementation backed by a [`BlobStore`]. -//! -//! Converts large tool output strings into [`ToolOutput::Stored`] and -//! persists the content via a [`BlobStore`], returning a summary with -//! a blob reference for conversation history. - -use crate::blob_store::BlobStore; -use async_trait::async_trait; -use llm_worker::tool::{ToolError, ToolOutput, ToolOutputProcessor}; -use std::sync::Arc; - -/// A [`ToolOutputProcessor`] that stores large outputs in a [`BlobStore`]. -/// -/// Small outputs (≤ `INLINE_THRESHOLD` bytes) pass through unchanged. -/// Large outputs are stored as blobs, and a summary with a `[blob:]` -/// reference replaces the original content in conversation history. -pub struct BlobOutputProcessor { - blob_store: Arc, -} - -impl BlobOutputProcessor { - /// Create a new processor backed by the given blob store. - pub fn new(blob_store: Arc) -> Self { - Self { blob_store } - } -} - -#[async_trait] -impl ToolOutputProcessor for BlobOutputProcessor { - async fn process(&self, output: String) -> Result { - let tool_output = ToolOutput::from(output); - - match tool_output { - ToolOutput::Inline(s) => Ok(s), - ToolOutput::Stored { summary, content } => { - let blob_id = self - .blob_store - .store(&content) - .await - .map_err(|e| ToolError::Internal(format!("blob store error: {e}")))?; - - // Prepend blob reference to the summary - Ok(format!("[blob:{blob_id}] {summary}")) - } - } - } -} diff --git a/crates/llm-worker-persistence/src/blob_store.rs b/crates/llm-worker-persistence/src/blob_store.rs deleted file mode 100644 index 48b26f7c..00000000 --- a/crates/llm-worker-persistence/src/blob_store.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Blob storage abstraction for large tool outputs. -//! -//! [`BlobStore`] provides async storage and retrieval of [`Content`] blobs, -//! keeping them separate from session logs. Session logs reference blobs -//! by [`BlobId`] in tool result summaries. - -use llm_worker::tool::Content; -use std::future::Future; - -/// Unique blob identifier. UUID v7 (time-ordered). -pub type BlobId = uuid::Uuid; - -/// Generate a new blob ID. -pub fn new_blob_id() -> BlobId { - uuid::Uuid::now_v7() -} - -/// Errors from the blob store. -#[derive(Debug, thiserror::Error)] -pub enum BlobStoreError { - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), - - #[error("serialization error: {0}")] - Serde(#[from] serde_json::Error), - - #[error("blob not found: {0}")] - NotFound(BlobId), -} - -/// Async blob storage backend. -/// -/// Stores and retrieves [`Content`] blobs independently of session logs. -/// All methods take `&self` — implementations should use interior mutability -/// when needed. -pub trait BlobStore: Send + Sync { - /// Store content and return its assigned ID. - fn store( - &self, - content: &Content, - ) -> impl Future> + Send; - - /// Load content by ID. - fn load( - &self, - id: BlobId, - ) -> impl Future> + Send; - - /// Check if a blob exists. - fn exists( - &self, - id: BlobId, - ) -> impl Future> + Send; -} diff --git a/crates/llm-worker-persistence/src/fs_blob_store.rs b/crates/llm-worker-persistence/src/fs_blob_store.rs deleted file mode 100644 index 6d15c93b..00000000 --- a/crates/llm-worker-persistence/src/fs_blob_store.rs +++ /dev/null @@ -1,83 +0,0 @@ -//! Filesystem-backed blob store. -//! -//! Layout: -//! - Text blobs: `{root}/{blob_id}.txt` -//! - Structured blobs: `{root}/{blob_id}.json` - -use crate::blob_store::{new_blob_id, BlobId, BlobStore, BlobStoreError}; -use llm_worker::tool::Content; -use std::path::PathBuf; -use tokio::fs; - -/// Filesystem-backed blob store. -/// -/// Each blob is stored as a single file. Text content uses `.txt`, -/// structured (JSON) content uses `.json`. -#[derive(Clone)] -pub struct FsBlobStore { - root: PathBuf, -} - -impl FsBlobStore { - /// Create a new `FsBlobStore` rooted at the given directory. - /// Creates the directory if it does not exist. - pub async fn new(root: impl Into) -> Result { - let root = root.into(); - fs::create_dir_all(&root).await?; - Ok(Self { root }) - } - - fn text_path(&self, id: BlobId) -> PathBuf { - self.root.join(format!("{id}.txt")) - } - - fn json_path(&self, id: BlobId) -> PathBuf { - self.root.join(format!("{id}.json")) - } - - /// Resolve the actual path for a blob, checking both extensions. - fn resolve_path(&self, id: BlobId) -> Option<(PathBuf, bool)> { - let txt = self.text_path(id); - if txt.exists() { - return Some((txt, true)); - } - let json = self.json_path(id); - if json.exists() { - return Some((json, false)); - } - None - } -} - -impl BlobStore for FsBlobStore { - async fn store(&self, content: &Content) -> Result { - let id = new_blob_id(); - match content { - Content::Text(text) => { - fs::write(self.text_path(id), text.as_bytes()).await?; - } - Content::Structured(value) => { - let json = serde_json::to_string_pretty(value)?; - fs::write(self.json_path(id), json.as_bytes()).await?; - } - } - Ok(id) - } - - async fn load(&self, id: BlobId) -> Result { - let (path, is_text) = self - .resolve_path(id) - .ok_or(BlobStoreError::NotFound(id))?; - let bytes = fs::read_to_string(&path).await?; - if is_text { - Ok(Content::Text(bytes)) - } else { - let value = serde_json::from_str(&bytes)?; - Ok(Content::Structured(value)) - } - } - - async fn exists(&self, id: BlobId) -> Result { - Ok(self.resolve_path(id).is_some()) - } -} diff --git a/crates/llm-worker-persistence/src/inspect_tool.rs b/crates/llm-worker-persistence/src/inspect_tool.rs deleted file mode 100644 index 961d57f5..00000000 --- a/crates/llm-worker-persistence/src/inspect_tool.rs +++ /dev/null @@ -1,666 +0,0 @@ -//! Built-in `inspect` tool for retrieving stored blob content. -//! -//! When large tool outputs are stored in a [`BlobStore`], only a summary -//! with a `[blob:]` reference is placed in conversation history. -//! This tool lets the LLM retrieve details on demand, with optional -//! selectors for partial access. - -use std::sync::Arc; - -use async_trait::async_trait; -use serde::Deserialize; -use serde_json::json; - -use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta}; -use llm_worker::state::Mutable; -use llm_worker::Worker; -use llm_worker::llm_client::LlmClient; - -use crate::blob_store::{BlobId, BlobStore}; - -// ─── Constants ─────────────────────────────────────────────────────────────── - -/// Maximum lines shown in the default text preview. -const DEFAULT_PREVIEW_LINES: usize = 50; -/// Maximum array elements shown in the default preview. -const DEFAULT_PREVIEW_ELEMENTS: usize = 5; -/// Maximum object keys whose values are shown in the default preview. -const DEFAULT_PREVIEW_KEYS: usize = 3; - -// ─── Selector ──────────────────────────────────────────────────────────────── - -/// Parsed selector for partial blob content retrieval. -#[derive(Debug, Clone, PartialEq, Eq)] -enum Selector { - /// Extract a range of lines (1-based, inclusive). - Lines { start: usize, end: usize }, - /// Extract a range of array elements (0-based, exclusive end). - Slice { start: usize, end: usize }, - /// Extract a specific key from a JSON object. - Key(String), -} - -fn parse_selector(s: &str) -> Result { - if let Some(rest) = s.strip_prefix("lines:") { - let (a, b) = rest - .split_once('-') - .ok_or_else(|| ToolError::InvalidArgument(format!( - "invalid lines selector '{s}': expected format lines:N-M" - )))?; - let start: usize = a.parse().map_err(|_| { - ToolError::InvalidArgument(format!("invalid start line number: '{a}'")) - })?; - let end: usize = b.parse().map_err(|_| { - ToolError::InvalidArgument(format!("invalid end line number: '{b}'")) - })?; - if start == 0 { - return Err(ToolError::InvalidArgument( - "line numbers are 1-based, got 0".into(), - )); - } - if start > end { - return Err(ToolError::InvalidArgument(format!( - "start line ({start}) must be <= end line ({end})" - ))); - } - Ok(Selector::Lines { start, end }) - } else if let Some(rest) = s.strip_prefix("slice:") { - let (a, b) = rest - .split_once("..") - .ok_or_else(|| ToolError::InvalidArgument(format!( - "invalid slice selector '{s}': expected format slice:N..M" - )))?; - let start: usize = a.parse().map_err(|_| { - ToolError::InvalidArgument(format!("invalid start index: '{a}'")) - })?; - let end: usize = b.parse().map_err(|_| { - ToolError::InvalidArgument(format!("invalid end index: '{b}'")) - })?; - if start > end { - return Err(ToolError::InvalidArgument(format!( - "start index ({start}) must be <= end index ({end})" - ))); - } - Ok(Selector::Slice { start, end }) - } else if let Some(rest) = s.strip_prefix("key:") { - if rest.is_empty() { - return Err(ToolError::InvalidArgument("key name must not be empty".into())); - } - Ok(Selector::Key(rest.to_string())) - } else { - Err(ToolError::InvalidArgument(format!( - "unrecognized selector format: '{s}'. Expected: lines:N-M, slice:N..M, or key:NAME" - ))) - } -} - -// ─── InspectTool ───────────────────────────────────────────────────────────── - -#[derive(Deserialize)] -struct InspectArgs { - blob_id: String, - selector: Option, -} - -/// Built-in tool that retrieves stored blob content. -pub struct InspectTool { - blob_store: Arc, -} - -impl InspectTool { - pub fn new(blob_store: Arc) -> Self { - Self { blob_store } - } -} - -impl InspectTool { - /// Create a [`ToolDefinition`] factory for this tool. - pub fn tool_definition(blob_store: Arc) -> ToolDefinition { - Arc::new(move || { - let meta = ToolMeta::new("inspect") - .description( - "Retrieve content from a stored blob referenced by [blob:] in conversation history. \ - Supports selectors for partial access: \ - 'lines:N-M' (text line range, 1-based inclusive), \ - 'slice:N..M' (array element range, 0-based exclusive end), \ - 'key:NAME' (object key lookup). \ - Without a selector, returns metadata and a preview.", - ) - .input_schema(json!({ - "type": "object", - "properties": { - "blob_id": { - "type": "string", - "description": "The blob UUID from a [blob:] reference" - }, - "selector": { - "type": "string", - "description": "Optional: 'lines:N-M', 'slice:N..M', or 'key:NAME'" - } - }, - "required": ["blob_id"] - })); - let tool = Arc::new(InspectTool::new(Arc::clone(&blob_store))) as Arc; - (meta, tool) - }) - } -} - -#[async_trait] -impl Tool for InspectTool { - async fn execute(&self, input_json: &str) -> Result { - let args: InspectArgs = serde_json::from_str(input_json) - .map_err(|e| ToolError::InvalidArgument(format!("invalid arguments: {e}")))?; - - let blob_id: BlobId = args - .blob_id - .parse() - .map_err(|_| ToolError::InvalidArgument(format!( - "invalid blob_id: '{}' is not a valid UUID", args.blob_id - )))?; - - let content = self - .blob_store - .load(blob_id) - .await - .map_err(|e| ToolError::ExecutionFailed(format!("{e}")))?; - - match args.selector { - None => Ok(default_view(&content)), - Some(sel) => { - let selector = parse_selector(&sel)?; - apply_selector(&content, &selector) - } - } - } -} - -// ─── Default view ──────────────────────────────────────────────────────────── - -use llm_worker::tool::Content; - -fn default_view(content: &Content) -> String { - match content { - Content::Text(text) => default_view_text(text), - Content::Structured(value) => default_view_structured(value), - } -} - -fn default_view_text(text: &str) -> String { - let lines: Vec<&str> = text.lines().collect(); - let total = lines.len(); - let size = text.len(); - let preview_end = total.min(DEFAULT_PREVIEW_LINES); - - let mut out = format!("type: text\nlines: {total}\nsize: {size} bytes\n\n"); - out.push_str(&format!("── preview (lines 1-{preview_end}) ──\n")); - for line in &lines[..preview_end] { - out.push_str(line); - out.push('\n'); - } - if total > DEFAULT_PREVIEW_LINES { - out.push_str(&format!("... ({} more lines)\n", total - DEFAULT_PREVIEW_LINES)); - } - out -} - -fn default_view_structured(value: &serde_json::Value) -> String { - use serde_json::Value; - match value { - Value::Array(arr) => { - let total = arr.len(); - let preview_end = total.min(DEFAULT_PREVIEW_ELEMENTS); - let mut out = format!("type: json_array\nentries: {total}\n\n"); - out.push_str(&format!("── preview (0..{preview_end}) ──\n")); - for item in &arr[..preview_end] { - if let Ok(json) = serde_json::to_string_pretty(item) { - out.push_str(&json); - out.push('\n'); - } - } - if total > DEFAULT_PREVIEW_ELEMENTS { - out.push_str(&format!("... ({} more entries)\n", total - DEFAULT_PREVIEW_ELEMENTS)); - } - out - } - Value::Object(map) => { - let total = map.len(); - let mut out = format!("type: json_object\nkeys: {total}\n\n── keys ──\n"); - for (key, val) in map.iter() { - out.push_str(&format!("{key}: {}\n", value_type_label(val))); - } - // Preview first N key-value pairs - let preview_keys: Vec<_> = map.iter().take(DEFAULT_PREVIEW_KEYS).collect(); - if !preview_keys.is_empty() { - out.push_str("\n── preview ──\n"); - for (key, val) in preview_keys { - if let Ok(json) = serde_json::to_string_pretty(val) { - out.push_str(&format!("{key}: {json}\n")); - } - } - } - out - } - other => { - // Scalar — just show it - serde_json::to_string_pretty(other).unwrap_or_default() - } - } -} - -fn value_type_label(value: &serde_json::Value) -> &'static str { - match value { - serde_json::Value::Null => "null", - serde_json::Value::Bool(_) => "bool", - serde_json::Value::Number(_) => "number", - serde_json::Value::String(_) => "string", - serde_json::Value::Array(_) => "array", - serde_json::Value::Object(_) => "object", - } -} - -// ─── Selector application ──────────────────────────────────────────────────── - -fn apply_selector(content: &Content, selector: &Selector) -> Result { - match (content, selector) { - (Content::Text(text), Selector::Lines { start, end }) => { - let lines: Vec<&str> = text.lines().collect(); - let total = lines.len(); - // Convert 1-based inclusive to 0-based - let from = (*start - 1).min(total); - let to = (*end).min(total); - if from >= total { - return Ok(format!("(no lines — content has {total} lines)")); - } - Ok(lines[from..to].join("\n")) - } - - (Content::Structured(serde_json::Value::Array(arr)), Selector::Slice { start, end }) => { - let total = arr.len(); - let from = (*start).min(total); - let to = (*end).min(total); - let slice = &arr[from..to]; - serde_json::to_string_pretty(slice) - .map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}"))) - } - - (Content::Structured(serde_json::Value::Object(map)), Selector::Key(key)) => { - match map.get(key.as_str()) { - Some(val) => serde_json::to_string_pretty(val) - .map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}"))), - None => { - let available: Vec<_> = map.keys().collect(); - Err(ToolError::InvalidArgument(format!( - "key '{key}' not found. Available keys: {available:?}" - ))) - } - } - } - - // Type mismatches - (Content::Text(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument( - "slice selector only applies to JSON arrays, but this blob contains text. Use 'lines:N-M' instead.".into(), - )), - (Content::Text(_), Selector::Key(_)) => Err(ToolError::InvalidArgument( - "key selector only applies to JSON objects, but this blob contains text. Use 'lines:N-M' instead.".into(), - )), - (Content::Structured(_), Selector::Lines { .. }) => Err(ToolError::InvalidArgument( - "lines selector only applies to text content, but this blob contains JSON. Use 'slice:N..M' or 'key:NAME' instead.".into(), - )), - (Content::Structured(serde_json::Value::Object(_)), Selector::Slice { .. }) => Err(ToolError::InvalidArgument( - "slice selector only applies to JSON arrays, but this blob is a JSON object. Use 'key:NAME' instead.".into(), - )), - (Content::Structured(serde_json::Value::Array(_)), Selector::Key(_)) => Err(ToolError::InvalidArgument( - "key selector only applies to JSON objects, but this blob is a JSON array. Use 'slice:N..M' instead.".into(), - )), - (Content::Structured(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument( - "slice selector only applies to JSON arrays.".into(), - )), - (Content::Structured(_), Selector::Key(_)) => Err(ToolError::InvalidArgument( - "key selector only applies to JSON objects.".into(), - )), - } -} - -// ─── Registration helper ───────────────────────────────────────────────────── - -/// Register the `inspect` tool on a [`Worker`]. -/// -/// Call this alongside [`BlobOutputProcessor`](crate::BlobOutputProcessor) -/// setup so the LLM can retrieve stored blob content. -pub fn register_inspect_tool( - worker: &mut Worker, - blob_store: Arc, -) where - C: LlmClient, - B: BlobStore + 'static, -{ - worker.register_tool(InspectTool::::tool_definition(blob_store)); -} - -// ─── Tests ─────────────────────────────────────────────────────────────────── - -#[cfg(test)] -mod tests { - use super::*; - use crate::blob_store::{new_blob_id, BlobStoreError}; - use llm_worker::tool::Content; - use std::collections::HashMap; - use tokio::sync::Mutex; - - // ── In-memory BlobStore for tests ──────────────────────────────────── - - struct MemBlobStore { - blobs: Mutex>, - } - - impl MemBlobStore { - fn new() -> Self { - Self { - blobs: Mutex::new(HashMap::new()), - } - } - } - - impl BlobStore for MemBlobStore { - async fn store(&self, content: &Content) -> Result { - let id = new_blob_id(); - self.blobs.lock().await.insert(id, content.clone()); - Ok(id) - } - - async fn load(&self, id: BlobId) -> Result { - self.blobs - .lock() - .await - .get(&id) - .cloned() - .ok_or(BlobStoreError::NotFound(id)) - } - - async fn exists(&self, id: BlobId) -> Result { - Ok(self.blobs.lock().await.contains_key(&id)) - } - } - - // ── Selector parsing ───────────────────────────────────────────────── - - #[test] - fn parse_lines_valid() { - assert_eq!( - parse_selector("lines:1-50").unwrap(), - Selector::Lines { start: 1, end: 50 } - ); - assert_eq!( - parse_selector("lines:5-5").unwrap(), - Selector::Lines { start: 5, end: 5 } - ); - } - - #[test] - fn parse_lines_zero_start() { - let err = parse_selector("lines:0-5").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn parse_lines_inverted() { - let err = parse_selector("lines:50-20").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn parse_lines_missing_dash() { - let err = parse_selector("lines:20").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn parse_slice_valid() { - assert_eq!( - parse_selector("slice:0..10").unwrap(), - Selector::Slice { start: 0, end: 10 } - ); - assert_eq!( - parse_selector("slice:3..8").unwrap(), - Selector::Slice { start: 3, end: 8 } - ); - } - - #[test] - fn parse_slice_inverted() { - let err = parse_selector("slice:10..3").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn parse_key_valid() { - assert_eq!( - parse_selector("key:results").unwrap(), - Selector::Key("results".into()) - ); - // Key name with colon - assert_eq!( - parse_selector("key:nested:key").unwrap(), - Selector::Key("nested:key".into()) - ); - } - - #[test] - fn parse_key_empty() { - let err = parse_selector("key:").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn parse_unknown_prefix() { - let err = parse_selector("unknown:foo").unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - // ── Default view ───────────────────────────────────────────────────── - - #[test] - fn default_view_text_short() { - let text = "line1\nline2\nline3\n"; - let content = Content::Text(text.into()); - let view = default_view(&content); - assert!(view.contains("type: text")); - assert!(view.contains("lines: 3")); - assert!(view.contains("line1")); - assert!(!view.contains("more lines")); - } - - #[test] - fn default_view_text_long() { - let text: String = (1..=100).map(|i| format!("line {i}\n")).collect(); - let content = Content::Text(text); - let view = default_view(&content); - assert!(view.contains("type: text")); - assert!(view.contains("lines: 100")); - assert!(view.contains("line 1")); - assert!(view.contains("line 50")); - assert!(!view.contains("line 51\n")); - assert!(view.contains("50 more lines")); - } - - #[test] - fn default_view_array() { - let arr: Vec = (0..20).map(|i| json!({"id": i})).collect(); - let content = Content::Structured(json!(arr)); - let view = default_view(&content); - assert!(view.contains("type: json_array")); - assert!(view.contains("entries: 20")); - assert!(view.contains("15 more entries")); - } - - #[test] - fn default_view_object() { - let content = Content::Structured(json!({ - "name": "test", - "count": 42, - "items": [1, 2, 3], - "nested": {"a": 1} - })); - let view = default_view(&content); - assert!(view.contains("type: json_object")); - assert!(view.contains("keys: 4")); - assert!(view.contains("── keys ──")); - assert!(view.contains("── preview ──")); - } - - // ── Selector application ───────────────────────────────────────────── - - #[test] - fn apply_lines_on_text() { - let text = "a\nb\nc\nd\ne\nf\n"; - let content = Content::Text(text.into()); - let result = apply_selector(&content, &Selector::Lines { start: 2, end: 4 }).unwrap(); - assert_eq!(result, "b\nc\nd"); - } - - #[test] - fn apply_lines_clamp() { - let text = "a\nb\nc\n"; - let content = Content::Text(text.into()); - let result = apply_selector(&content, &Selector::Lines { start: 2, end: 100 }).unwrap(); - assert_eq!(result, "b\nc"); - } - - #[test] - fn apply_lines_beyond_content() { - let text = "a\nb\n"; - let content = Content::Text(text.into()); - let result = apply_selector(&content, &Selector::Lines { start: 10, end: 20 }).unwrap(); - assert!(result.contains("no lines")); - } - - #[test] - fn apply_slice_on_array() { - let content = Content::Structured(json!([10, 20, 30, 40, 50])); - let result = apply_selector(&content, &Selector::Slice { start: 1, end: 3 }).unwrap(); - let parsed: Vec = serde_json::from_str(&result).unwrap(); - assert_eq!(parsed, vec![20, 30]); - } - - #[test] - fn apply_slice_clamp() { - let content = Content::Structured(json!([10, 20, 30])); - let result = apply_selector(&content, &Selector::Slice { start: 1, end: 100 }).unwrap(); - let parsed: Vec = serde_json::from_str(&result).unwrap(); - assert_eq!(parsed, vec![20, 30]); - } - - #[test] - fn apply_key_on_object() { - let content = Content::Structured(json!({"name": "test", "count": 42})); - let result = apply_selector(&content, &Selector::Key("name".into())).unwrap(); - assert_eq!(result.trim(), "\"test\""); - } - - #[test] - fn apply_key_not_found() { - let content = Content::Structured(json!({"name": "test"})); - let err = apply_selector(&content, &Selector::Key("missing".into())).unwrap_err(); - match err { - ToolError::InvalidArgument(msg) => { - assert!(msg.contains("missing")); - assert!(msg.contains("name")); - } - _ => panic!("expected InvalidArgument"), - } - } - - // ── Type mismatch errors ───────────────────────────────────────────── - - #[test] - fn lines_on_json_error() { - let content = Content::Structured(json!([1, 2, 3])); - let err = apply_selector(&content, &Selector::Lines { start: 1, end: 3 }).unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn slice_on_text_error() { - let content = Content::Text("hello".into()); - let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn key_on_text_error() { - let content = Content::Text("hello".into()); - let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn slice_on_object_error() { - let content = Content::Structured(json!({"a": 1})); - let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[test] - fn key_on_array_error() { - let content = Content::Structured(json!([1, 2, 3])); - let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - // ── Integration via execute() ──────────────────────────────────────── - - #[tokio::test] - async fn execute_default_view() { - let store = Arc::new(MemBlobStore::new()); - let text = (1..=100).map(|i| format!("line {i}")).collect::>().join("\n"); - let blob_id = store.store(&Content::Text(text)).await.unwrap(); - - let tool = InspectTool::new(store); - let result = tool - .execute(&json!({"blob_id": blob_id.to_string()}).to_string()) - .await - .unwrap(); - assert!(result.contains("type: text")); - assert!(result.contains("lines: 100")); - } - - #[tokio::test] - async fn execute_with_selector() { - let store = Arc::new(MemBlobStore::new()); - let blob_id = store - .store(&Content::Structured(json!({"name": "test", "value": 42}))) - .await - .unwrap(); - - let tool = InspectTool::new(store); - let result = tool - .execute(&json!({"blob_id": blob_id.to_string(), "selector": "key:name"}).to_string()) - .await - .unwrap(); - assert_eq!(result.trim(), "\"test\""); - } - - #[tokio::test] - async fn execute_invalid_blob_id() { - let store = Arc::new(MemBlobStore::new()); - let tool = InspectTool::new(store); - let err = tool - .execute(&json!({"blob_id": "not-a-uuid"}).to_string()) - .await - .unwrap_err(); - assert!(matches!(err, ToolError::InvalidArgument(_))); - } - - #[tokio::test] - async fn execute_blob_not_found() { - let store = Arc::new(MemBlobStore::new()); - let tool = InspectTool::new(store); - let fake_id = new_blob_id(); - let err = tool - .execute(&json!({"blob_id": fake_id.to_string()}).to_string()) - .await - .unwrap_err(); - assert!(matches!(err, ToolError::ExecutionFailed(_))); - } -} diff --git a/crates/llm-worker-persistence/src/lib.rs b/crates/llm-worker-persistence/src/lib.rs index 111222d1..d2ff8f9e 100644 --- a/crates/llm-worker-persistence/src/lib.rs +++ b/crates/llm-worker-persistence/src/lib.rs @@ -20,21 +20,13 @@ //! session.run("Hello!").await?; //! ``` -pub mod blob_output_processor; -pub mod blob_store; pub mod event_trace; -pub mod fs_blob_store; pub mod fs_store; -pub mod inspect_tool; pub mod session; pub mod session_log; pub mod store; -pub use blob_output_processor::BlobOutputProcessor; -pub use blob_store::{BlobId, BlobStore, BlobStoreError}; -pub use inspect_tool::{InspectTool, register_inspect_tool}; pub use event_trace::TraceEntry; -pub use fs_blob_store::FsBlobStore; pub use fs_store::FsStore; pub use session::{Session, SessionConfig, SessionError}; pub use session_log::{ diff --git a/crates/llm-worker-persistence/tests/session_test.rs b/crates/llm-worker-persistence/tests/session_test.rs index c77b7cff..fc97c58e 100644 --- a/crates/llm-worker-persistence/tests/session_test.rs +++ b/crates/llm-worker-persistence/tests/session_test.rs @@ -7,7 +7,7 @@ use common::MockLlmClient; use llm_worker::interceptor::{Interceptor, TurnEndAction}; use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent}; use llm_worker::llm_client::types::{Item, RequestConfig}; -use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta}; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use llm_worker::Worker; use llm_worker_persistence::{ FsStore, LogEntry, Outcome, Session, SessionConfig, Store, collect_state, @@ -56,8 +56,8 @@ struct MockWeatherTool; #[async_trait] impl Tool for MockWeatherTool { - async fn execute(&self, _input_json: &str) -> Result { - Ok("Sunny, 25C".to_string()) + async fn execute(&self, _input_json: &str) -> Result { + Ok("Sunny, 25C".to_string().into()) } } diff --git a/crates/llm-worker/examples/worker_cli.rs b/crates/llm-worker/examples/worker_cli.rs index fd61f84b..54f302ab 100644 --- a/crates/llm-worker/examples/worker_cli.rs +++ b/crates/llm-worker/examples/worker_cli.rs @@ -292,9 +292,9 @@ impl Interceptor for ToolResultPrinterPolicy { .unwrap_or_else(|| info.result.tool_use_id.clone()); if info.result.is_error { - println!(" Result ({}): ❌ {}", name, info.result.content); + println!(" Result ({}): ❌ {}", name, info.result.summary); } else { - println!(" Result ({}): ✅ {}", name, info.result.content); + println!(" Result ({}): ✅ {}", name, info.result.summary); } PostToolAction::Continue diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs index 30fd4c13..ba5228b8 100644 --- a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs @@ -183,7 +183,7 @@ impl AnthropicScheme { } Item::ToolResult { - call_id, output, .. + call_id, summary, content, .. } => { // Flush pending assistant parts first if !pending_assistant_parts.is_empty() { @@ -195,9 +195,13 @@ impl AnthropicScheme { }); } + let text = match content { + Some(c) => format!("{summary}\n{c}"), + None => summary.clone(), + }; pending_user_parts.push(AnthropicContentPart::ToolResult { tool_use_id: call_id.clone(), - content: output.clone(), + content: text, }); } diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs index 55646da0..4add2841 100644 --- a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs @@ -258,7 +258,7 @@ impl GeminiScheme { } Item::ToolResult { - call_id, output, .. + call_id, summary, content, .. } => { // Flush pending model parts first if !pending_model_parts.is_empty() { @@ -268,12 +268,16 @@ impl GeminiScheme { }); } + let text = match content { + Some(c) => format!("{summary}\n{c}"), + None => summary.clone(), + }; pending_user_parts.push(GeminiPart::FunctionResponse { function_response: GeminiFunctionResponse { name: call_id.clone(), response: GeminiFunctionResponseContent { name: call_id.clone(), - content: Value::String(output.clone()), + content: Value::String(text), }, }, }); diff --git a/crates/llm-worker/src/llm_client/scheme/openai/request.rs b/crates/llm-worker/src/llm_client/scheme/openai/request.rs index f3dea651..9935a3d6 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai/request.rs @@ -212,7 +212,7 @@ impl OpenAIScheme { } Item::ToolResult { - call_id, output, .. + call_id, summary, content, .. } => { // Flush pending tool calls before tool result self.flush_pending_assistant( @@ -221,9 +221,13 @@ impl OpenAIScheme { &mut pending_assistant_text, ); + let text = match content { + Some(c) => format!("{summary}\n{c}"), + None => summary.clone(), + }; messages.push(OpenAIMessage { role: "tool".to_string(), - content: Some(OpenAIContent::Text(output.clone())), + content: Some(OpenAIContent::Text(text)), tool_calls: vec![], tool_call_id: Some(call_id.clone()), name: None, diff --git a/crates/llm-worker/src/llm_client/types.rs b/crates/llm-worker/src/llm_client/types.rs index 4fbb0c7f..3450df5e 100644 --- a/crates/llm-worker/src/llm_client/types.rs +++ b/crates/llm-worker/src/llm_client/types.rs @@ -74,8 +74,11 @@ pub enum Item { id: Option, /// Call ID linking to the tool call call_id: CallId, - /// Output content - output: String, + /// Short summary (always kept in history, survives pruning) + summary: String, + /// Detailed output (removed by pruning when old enough) + #[serde(default, skip_serializing_if = "Option::is_none")] + content: Option, }, /// Reasoning/thinking item @@ -164,12 +167,27 @@ impl Item { Self::tool_call(call_id, name, arguments.to_string()) } - /// Create a tool result item - pub fn tool_result(call_id: impl Into, output: impl Into) -> Self { + /// Create a tool result item with summary only (no content). + pub fn tool_result(call_id: impl Into, summary: impl Into) -> Self { Self::ToolResult { id: None, call_id: call_id.into(), - output: output.into(), + summary: summary.into(), + content: None, + } + } + + /// Create a tool result item with summary and content. + pub fn tool_result_with_content( + call_id: impl Into, + summary: impl Into, + content: impl Into, + ) -> Self { + Self::ToolResult { + id: None, + call_id: call_id.into(), + summary: summary.into(), + content: Some(content.into()), } } diff --git a/crates/llm-worker/src/tool.rs b/crates/llm-worker/src/tool.rs index 622470f4..d1737f67 100644 --- a/crates/llm-worker/src/tool.rs +++ b/crates/llm-worker/src/tool.rs @@ -25,199 +25,50 @@ pub enum ToolError { } // ============================================================================= -// ToolOutput - Tool execution result with size-aware storage +// ToolOutput - Tool execution result with summary + content // ============================================================================= -/// Tool output size threshold in bytes. -/// Results larger than this are automatically promoted to `Stored`. -pub const INLINE_THRESHOLD: usize = 800; - -/// Maximum size of auto-generated summaries in bytes. -pub const SUMMARY_MAX_BYTES: usize = 400; - -/// Number of lines to include from the head of text content in summaries. -pub const SUMMARY_HEAD_LINES: usize = 5; - -/// Number of lines to include from the tail of text content in summaries. -pub const SUMMARY_TAIL_LINES: usize = 3; +/// Threshold below which tool output is treated as summary-only (no content). +/// Outputs this small don't benefit from pruning. +pub const SUMMARY_THRESHOLD: usize = 200; /// Tool execution result. /// -/// Small results are kept inline in conversation history. -/// Large results are stored externally via `BlobStore`, with only -/// a summary placed in the history. The LLM can retrieve details -/// using the built-in `inspect` tool. -#[derive(Debug, Clone)] -pub enum ToolOutput { - /// Small result: placed directly into history as-is. - Inline(String), - /// Large result: summary goes into history, full content is stored externally. - Stored { - /// Concise summary shown to the LLM in conversation context. - summary: String, - /// Full content to be persisted in a BlobStore. - content: Content, - }, -} - -impl ToolOutput { - /// Get the string that should be placed into conversation history. - pub fn history_text(&self) -> &str { - match self { - ToolOutput::Inline(s) => s, - ToolOutput::Stored { summary, .. } => summary, - } - } - - /// Whether this output requires external storage. - pub fn is_stored(&self) -> bool { - matches!(self, ToolOutput::Stored { .. }) - } -} - -/// Content to be stored in a BlobStore. +/// Every output has a mandatory `summary` (1-2 lines) that persists in +/// conversation history even after pruning. The optional `content` carries +/// full details and is removed by the Prune mechanism when the context +/// grows too large. #[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type", content = "data")] -pub enum Content { - /// Plain text (file contents, search results, logs, etc.) - Text(String), - /// Structured JSON data (API responses, query results, etc.) - Structured(Value), +pub struct ToolOutput { + /// Short summary (1-2 lines). Always remains in history. + pub summary: String, + /// Detailed output. Removed by Prune when old enough. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub content: Option, } impl From for ToolOutput { fn from(s: String) -> Self { - if s.len() <= INLINE_THRESHOLD { - ToolOutput::Inline(s) + if s.len() <= SUMMARY_THRESHOLD { + ToolOutput { + summary: s, + content: None, + } } else { - let summary = auto_summarize_text(&s); - ToolOutput::Stored { - summary, - content: Content::Text(s), - } - } - } -} - -/// Generate a summary for any [`Content`] variant. -/// -/// The blob ID prefix (`[blob:]`) is NOT included here — it is -/// prepended by the Worker after the content is stored and an ID is assigned. -pub fn auto_summarize(content: &Content) -> String { - match content { - Content::Text(text) => auto_summarize_text(text), - Content::Structured(value) => auto_summarize_structured(value), - } -} - -/// Generate a summary for plain text content. -fn auto_summarize_text(text: &str) -> String { - let lines: Vec<&str> = text.lines().collect(); - let total = lines.len(); - - let mut summary = format!("text | {total} lines\n"); - - // Head - summary.push_str("── head ──\n"); - for line in lines.iter().take(SUMMARY_HEAD_LINES) { - summary.push_str(line); - summary.push('\n'); - } - - // Tail (only if there's content beyond head) - if total > SUMMARY_HEAD_LINES + SUMMARY_TAIL_LINES { - summary.push_str("── tail ──\n"); - let tail_start = total.saturating_sub(SUMMARY_TAIL_LINES); - for line in &lines[tail_start..] { - summary.push_str(line); - summary.push('\n'); - } - } - - // Truncate if summary itself is too large - if summary.len() > SUMMARY_MAX_BYTES { - summary.truncate(SUMMARY_MAX_BYTES); - summary.push_str("…\n"); - } - - summary -} - -/// Generate a summary for structured JSON content. -fn auto_summarize_structured(value: &Value) -> String { - let mut summary = match value { - Value::Array(arr) => { - let mut s = format!("json_array | {} entries\n", arr.len()); - // Show schema from first element - if let Some(first) = arr.first() { - s.push_str("── schema ──\n"); - s.push_str(&describe_value_shape(first)); - s.push('\n'); - } - // Show first 2 entries - s.push_str("── head ──\n"); - for item in arr.iter().take(2) { - if let Ok(json) = serde_json::to_string(item) { - s.push_str(&json); - s.push('\n'); - } - } - s - } - Value::Object(map) => { - let mut s = format!("json_object | {} keys\n", map.len()); - s.push_str("── keys ──\n"); - for (key, val) in map.iter() { - s.push_str(&format!("{key}: {}\n", value_type_label(val))); - } - s - } - _ => { - // Scalar or other — just show the JSON - format!( - "json | {}\n", - serde_json::to_string(value).unwrap_or_default() - ) - } - }; - - if summary.len() > SUMMARY_MAX_BYTES { - summary.truncate(SUMMARY_MAX_BYTES); - summary.push_str("…\n"); - } - - summary -} - -/// Describe the shape of a JSON value (for schema preview). -fn describe_value_shape(value: &Value) -> String { - match value { - Value::Object(map) => { - let fields: Vec = map - .iter() - .map(|(k, v)| format!("{k}: {}", value_type_label(v))) + let lines = s.lines().count(); + let first_line: String = s + .lines() + .next() + .unwrap_or("") + .chars() + .take(80) .collect(); - format!("{{ {} }}", fields.join(", ")) - } - _ => value_type_label(value), - } -} - -/// Human-readable type label for a JSON value. -fn value_type_label(value: &Value) -> String { - match value { - Value::Null => "null".to_string(), - Value::Bool(_) => "bool".to_string(), - Value::Number(_) => "number".to_string(), - Value::String(s) => { - if s.len() > 50 { - format!("string({})", s.len()) - } else { - "string".to_string() + let summary = format!("{lines} lines | {first_line}…"); + ToolOutput { + summary, + content: Some(s), } } - Value::Array(arr) => format!("array({})", arr.len()), - Value::Object(map) => format!("object({})", map.len()), } } @@ -341,34 +192,15 @@ pub type ToolDefinition = Arc (ToolMeta, Arc) + Send + Syn /// ``` #[async_trait] pub trait Tool: Send + Sync { - /// Execute the tool + /// Execute the tool. /// /// # Arguments /// * `input_json` - JSON-formatted arguments generated by LLM /// /// # Returns - /// Result string from execution. This content is returned to LLM. - async fn execute(&self, input_json: &str) -> Result; -} - -// ============================================================================= -// ToolOutputProcessor - Output storage abstraction -// ============================================================================= - -/// Processes tool output before it enters conversation history. -/// -/// When a tool produces a large result, the processor can store the -/// full content externally and return a summary string for the history. -/// -/// If no processor is set on Worker, all tool outputs are used as-is (inline). -#[async_trait] -pub trait ToolOutputProcessor: Send + Sync { - /// Process a tool's raw output string. - /// - /// Returns the string that should be placed into conversation history. - /// For small outputs, this may be the original string unchanged. - /// For large outputs, this should be a summary with a blob reference. - async fn process(&self, output: String) -> Result; + /// A [`ToolOutput`] with summary and optional detailed content. + /// For simple cases, use `From`: `Ok("done".to_string().into())` + async fn execute(&self, input_json: &str) -> Result; } // ============================================================================= @@ -390,33 +222,39 @@ pub struct ToolCall { /// Tool execution result /// -/// Represents the result after tool execution. +/// Intermediate representation between tool execution and history. +/// Carries `summary` + optional `content` from [`ToolOutput`]. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ToolResult { /// Corresponding tool call ID pub tool_use_id: String, - /// Result content - pub content: String, + /// Short summary (always kept in history) + pub summary: String, + /// Detailed output (prunable) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub content: Option, /// Whether this is an error #[serde(default)] pub is_error: bool, } impl ToolResult { - /// Create a success result - pub fn success(tool_use_id: impl Into, content: impl Into) -> Self { + /// Create a success result from a [`ToolOutput`]. + pub fn from_output(tool_use_id: impl Into, output: ToolOutput) -> Self { Self { tool_use_id: tool_use_id.into(), - content: content.into(), + summary: output.summary, + content: output.content, is_error: false, } } - /// Create an error result - pub fn error(tool_use_id: impl Into, content: impl Into) -> Self { + /// Create an error result. + pub fn error(tool_use_id: impl Into, message: impl Into) -> Self { Self { tool_use_id: tool_use_id.into(), - content: content.into(), + summary: message.into(), + content: None, is_error: true, } } diff --git a/crates/llm-worker/src/tool_server.rs b/crates/llm-worker/src/tool_server.rs index dfc42cf7..a056290a 100644 --- a/crates/llm-worker/src/tool_server.rs +++ b/crates/llm-worker/src/tool_server.rs @@ -4,7 +4,7 @@ use std::sync::{Arc, Mutex}; use thiserror::Error; use crate::llm_client::ToolDefinition as LlmToolDefinition; -use crate::tool::{Tool, ToolDefinition as WorkerToolDefinition, ToolMeta}; +use crate::tool::{Tool, ToolDefinition as WorkerToolDefinition, ToolMeta, ToolOutput}; type ToolMap = HashMap)>; @@ -110,7 +110,7 @@ impl ToolServerHandle { } /// Execute a tool by name. - pub async fn call_tool(&self, name: &str, input_json: &str) -> Result { + pub async fn call_tool(&self, name: &str, input_json: &str) -> Result { let tool = { let guard = self.tools.lock().unwrap_or_else(|e| e.into_inner()); let (_, tool) = guard @@ -180,8 +180,8 @@ mod tests { #[async_trait] impl Tool for EchoTool { - async fn execute(&self, input_json: &str) -> Result { - Ok(input_json.to_string()) + async fn execute(&self, input_json: &str) -> Result { + Ok(input_json.to_string().into()) } } @@ -230,7 +230,8 @@ mod tests { handle.flush_pending(); let out = handle.call_tool("echo", r#"{"x":1}"#).await.expect("call"); - assert_eq!(out, r#"{"x":1}"#); + assert_eq!(out.summary, r#"{"x":1}"#); + assert!(out.content.is_none()); let err = handle .call_tool("missing", "{}") @@ -290,8 +291,8 @@ mod tests { #[async_trait] impl Tool for FixedTool { - async fn execute(&self, _input_json: &str) -> Result { - Ok("replaced".to_string()) + async fn execute(&self, _input_json: &str) -> Result { + Ok("replaced".to_string().into()) } } @@ -319,8 +320,8 @@ mod tests { #[async_trait] impl Tool for ConstTool { - async fn execute(&self, _input_json: &str) -> Result { - Ok("const".to_string()) + async fn execute(&self, _input_json: &str) -> Result { + Ok("const".to_string().into()) } } @@ -335,7 +336,7 @@ mod tests { handle.replace(replacement).expect("replace"); let out = handle.call_tool("echo", "{}").await.expect("call"); - assert_eq!(out, "const"); + assert_eq!(out.summary, "const"); } #[tokio::test] @@ -352,10 +353,10 @@ mod tests { #[async_trait] impl Tool for GatedTool { - async fn execute(&self, _input_json: &str) -> Result { + async fn execute(&self, _input_json: &str) -> Result { self.started.notify_one(); self.finish.notified().await; - Ok("done".to_string()) + Ok("done".to_string().into()) } } @@ -388,7 +389,7 @@ mod tests { // Let the in-flight call finish. finish.notify_one(); let result = call.await.expect("join"); - assert_eq!(result.expect("call"), "done"); + assert_eq!(result.expect("call").summary, "done"); } #[tokio::test] @@ -405,10 +406,10 @@ mod tests { #[async_trait] impl Tool for OldTool { - async fn execute(&self, _input_json: &str) -> Result { + async fn execute(&self, _input_json: &str) -> Result { self.started.notify_one(); self.finish.notified().await; - Ok("old".to_string()) + Ok("old".to_string().into()) } } @@ -439,8 +440,8 @@ mod tests { #[async_trait] impl Tool for NewTool { - async fn execute(&self, _input_json: &str) -> Result { - Ok("new".to_string()) + async fn execute(&self, _input_json: &str) -> Result { + Ok("new".to_string().into()) } } @@ -458,11 +459,11 @@ mod tests { // Let the old in-flight call finish — it should return "old". finish.notify_one(); let result = call.await.expect("join"); - assert_eq!(result.expect("call"), "old"); + assert_eq!(result.expect("call").summary, "old"); // New calls use the replacement. let out = handle.call_tool("t", "{}").await.expect("call"); - assert_eq!(out, "new"); + assert_eq!(out.summary, "new"); } #[test] diff --git a/crates/llm-worker/src/worker.rs b/crates/llm-worker/src/worker.rs index 54486617..ead81ee8 100644 --- a/crates/llm-worker/src/worker.rs +++ b/crates/llm-worker/src/worker.rs @@ -1,6 +1,5 @@ use std::collections::HashMap; use std::marker::PhantomData; -use std::sync::Arc; use futures::StreamExt; use tokio::sync::mpsc; @@ -21,7 +20,7 @@ use crate::{ handler::{ErrorKind, StatusKind, ToolUseBlockStart, UsageKind}, timeline::{TextBlockCollector, Timeline, ToolCallCollector}, timeline::event::{ErrorEvent, StatusEvent, UsageEvent}, - tool::{ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolOutputProcessor, ToolResult}, + tool::{ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolResult}, tool_server::{ToolServer, ToolServerHandle}, }; @@ -154,8 +153,6 @@ pub struct Worker { request_config: RequestConfig, /// Whether the previous run was interrupted last_run_interrupted: bool, - /// Optional processor for large tool outputs (stores externally, returns summary) - output_processor: Option>, /// Cancel notification channel (for interrupting execution) cancel_tx: mpsc::Sender<()>, cancel_rx: mpsc::Receiver<()>, @@ -610,7 +607,7 @@ impl Worker { async move { let input_json = serde_json::to_string(&tool_call.input).unwrap_or_default(); match tool_server.call_tool(&tool_call.name, &input_json).await { - Ok(content) => ToolResult::success(&tool_call.id, content), + Ok(output) => ToolResult::from_output(&tool_call.id, output), Err(e) => ToolResult::error(&tool_call.id, e.to_string()), } } @@ -630,20 +627,6 @@ impl Worker { } }; - // Phase 2.5: Apply output processor (store large results externally) - if let Some(ref processor) = self.output_processor { - for tool_result in &mut results { - if !tool_result.is_error { - match processor.process(tool_result.content.clone()).await { - Ok(processed) => tool_result.content = processed, - Err(e) => { - warn!(error = %e, "Output processor failed, keeping original content"); - } - } - } - } - } - // Phase 3: Apply post_tool_call interceptor for tool_result in &mut results { if let Some((tool_call, meta, tool)) = call_info_map.get(&tool_result.tool_use_id) { @@ -835,8 +818,18 @@ impl Worker { } Ok(ToolExecutionResult::Completed(results)) => { for result in results { - self.history - .push(Item::tool_result(&result.tool_use_id, &result.content)); + if let Some(ref content) = result.content { + self.history.push(Item::tool_result_with_content( + &result.tool_use_id, + &result.summary, + content, + )); + } else { + self.history.push(Item::tool_result( + &result.tool_use_id, + &result.summary, + )); + } } Ok(None) } @@ -878,7 +871,6 @@ impl Worker { turn_end_cbs: Vec::new(), request_config: RequestConfig::default(), last_run_interrupted: false, - output_processor: None, cancel_tx, cancel_rx, _state: PhantomData, @@ -1063,14 +1055,6 @@ impl Worker { self.last_run_interrupted = interrupted; } - /// Set a tool output processor for handling large tool results. - /// - /// When set, tool execution results are passed through this processor - /// before being placed into conversation history. - pub fn set_output_processor(&mut self, processor: Arc) { - self.output_processor = Some(processor); - } - /// Apply configuration (reserved for future extensions) #[allow(dead_code)] pub fn config(self, _config: WorkerConfig) -> Self { @@ -1134,7 +1118,7 @@ impl Worker { turn_end_cbs: self.turn_end_cbs, request_config: self.request_config, last_run_interrupted: self.last_run_interrupted, - output_processor: self.output_processor, + cancel_tx: self.cancel_tx, cancel_rx: self.cancel_rx, _state: PhantomData, @@ -1204,7 +1188,7 @@ impl Worker { turn_end_cbs: self.turn_end_cbs, request_config: self.request_config, last_run_interrupted: self.last_run_interrupted, - output_processor: self.output_processor, + cancel_tx: self.cancel_tx, cancel_rx: self.cancel_rx, _state: PhantomData, diff --git a/crates/llm-worker/tests/parallel_execution_test.rs b/crates/llm-worker/tests/parallel_execution_test.rs index c63c35ea..b78ad2f7 100644 --- a/crates/llm-worker/tests/parallel_execution_test.rs +++ b/crates/llm-worker/tests/parallel_execution_test.rs @@ -10,7 +10,7 @@ use async_trait::async_trait; use llm_worker::Worker; use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent}; use llm_worker::interceptor::{Interceptor, PostToolAction, PreToolAction, ToolCallInfo, ToolResultInfo}; -use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta}; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; mod common; use common::MockLlmClient; @@ -57,10 +57,10 @@ impl SlowTool { #[async_trait] impl Tool for SlowTool { - async fn execute(&self, _input_json: &str) -> Result { + async fn execute(&self, _input_json: &str) -> Result { self.call_count.fetch_add(1, Ordering::SeqCst); tokio::time::sleep(Duration::from_millis(self.delay_ms)).await; - Ok(format!("Completed after {}ms", self.delay_ms)) + Ok(format!("Completed after {}ms", self.delay_ms).into()) } } @@ -218,8 +218,8 @@ async fn test_post_tool_call_modification() { #[async_trait] impl Tool for SimpleTool { - async fn execute(&self, _: &str) -> Result { - Ok("Original Result".to_string()) + async fn execute(&self, _: &str) -> Result { + Ok("Original Result".to_string().into()) } } @@ -242,8 +242,8 @@ async fn test_post_tool_call_modification() { #[async_trait] impl Interceptor for ModifyingPolicy { async fn post_tool_call(&self, info: &mut ToolResultInfo) -> PostToolAction { - info.result.content = format!("[Modified] {}", info.result.content); - *self.modified_content.lock().unwrap() = Some(info.result.content.clone()); + info.result.summary = format!("[Modified] {}", info.result.summary); + *self.modified_content.lock().unwrap() = Some(info.result.summary.clone()); PostToolAction::Continue } } diff --git a/crates/llm-worker/tests/tool_macro_test.rs b/crates/llm-worker/tests/tool_macro_test.rs index 873d46cd..86d8bee0 100644 --- a/crates/llm-worker/tests/tool_macro_test.rs +++ b/crates/llm-worker/tests/tool_macro_test.rs @@ -77,8 +77,8 @@ async fn test_basic_tool_generation() { let result = tool.execute(r#"{"message": "World"}"#).await; assert!(result.is_ok(), "Should execute successfully"); let output = result.unwrap(); - assert!(output.contains("Hello"), "Output should contain prefix"); - assert!(output.contains("World"), "Output should contain message"); + assert!(output.summary.contains("Hello"), "Output should contain prefix"); + assert!(output.summary.contains("World"), "Output should contain message"); } #[tokio::test] @@ -94,7 +94,7 @@ async fn test_multiple_arguments() { let result = tool.execute(r#"{"a": 10, "b": 20}"#).await; assert!(result.is_ok()); let output = result.unwrap(); - assert!(output.contains("30"), "Should contain sum: {}", output); + assert!(output.summary.contains("30"), "Should contain sum: {:?}", output); } #[tokio::test] @@ -112,8 +112,8 @@ async fn test_no_arguments() { assert!(result.is_ok()); let output = result.unwrap(); assert!( - output.contains("TestPrefix"), - "Should contain prefix: {}", + output.summary.contains("TestPrefix"), + "Should contain prefix: {:?}", output ); } @@ -168,7 +168,7 @@ async fn test_result_return_type_success() { let result = tool.execute(r#"{"value": 42}"#).await; assert!(result.is_ok(), "Should succeed for positive value"); let output = result.unwrap(); - assert!(output.contains("Valid"), "Should contain Valid: {}", output); + assert!(output.summary.contains("Valid"), "Should contain Valid: {:?}", output); } #[tokio::test] diff --git a/crates/llm-worker/tests/worker_fixtures.rs b/crates/llm-worker/tests/worker_fixtures.rs index 3f999068..cef140c8 100644 --- a/crates/llm-worker/tests/worker_fixtures.rs +++ b/crates/llm-worker/tests/worker_fixtures.rs @@ -12,7 +12,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use async_trait::async_trait; use common::MockLlmClient; use llm_worker::Worker; -use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta}; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; /// Fixture directory path fn fixtures_dir() -> std::path::PathBuf { @@ -58,7 +58,7 @@ impl MockWeatherTool { #[async_trait] impl Tool for MockWeatherTool { - async fn execute(&self, input_json: &str) -> Result { + async fn execute(&self, input_json: &str) -> Result { self.call_count.fetch_add(1, Ordering::SeqCst); // Parse input @@ -68,7 +68,7 @@ impl Tool for MockWeatherTool { let city = input["city"].as_str().unwrap_or("Unknown"); // Return mock response - Ok(format!("Weather in {}: Sunny, 22°C", city)) + Ok(format!("Weather in {}: Sunny, 22°C", city).into()) } } diff --git a/crates/llm-worker/tests/worker_state_test.rs b/crates/llm-worker/tests/worker_state_test.rs index 14ca1197..73176ae7 100644 --- a/crates/llm-worker/tests/worker_state_test.rs +++ b/crates/llm-worker/tests/worker_state_test.rs @@ -13,7 +13,7 @@ use common::MockLlmClient; use llm_worker::Item; use llm_worker::{Worker, WorkerError}; use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent}; -use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta}; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; // ============================================================================= // Mutable State Tests @@ -134,9 +134,9 @@ impl CountingTool { #[async_trait] impl Tool for CountingTool { - async fn execute(&self, _input_json: &str) -> Result { + async fn execute(&self, _input_json: &str) -> Result { self.calls.fetch_add(1, Ordering::SeqCst); - Ok(format!("{}-ok", self.name)) + Ok(format!("{}-ok", self.name).into()) } } diff --git a/tickets/context-compaction.md b/tickets/context-compaction.md index b4009b7e..cd03a9e2 100644 --- a/tickets/context-compaction.md +++ b/tickets/context-compaction.md @@ -17,7 +17,7 @@ Insomnia では2層(条件付き Prune + Compact)で対処する。 Prune の設計は ToolOutput の構造に依存する。 現行の Inline/Stored enum を **summary + content** の2フィールド構造に改める。 -詳細: [crates/llm-worker/docs/tool-output-design.md](../crates/llm-worker/docs/tool-output-design.md) +詳細: ~~[tool-output-design.md](tool-output-design.md)~~ — **実装済み** ### 構造 diff --git a/tickets/session-store-extraction.md b/tickets/session-store-extraction.md new file mode 100644 index 00000000..4963ff85 --- /dev/null +++ b/tickets/session-store-extraction.md @@ -0,0 +1,57 @@ +# session-store: persistence クレートの再構成 + +## 背景 + +`llm-worker-persistence` は名前・構造ともに llm-worker のサブクレートに見えるが、 +実態はセッション管理という上位層の関心を持っている。 + +現状の `Session` は Worker を wrap して `run()`/`resume()` をインターセプトするが、 +永続化のためにレイヤーとして呼び出しパスに噛む必要はない。 +Worker からセッション状態を抜き出して保存する/復元するだけで十分。 + +## 方針 + +- クレート名を `llm-worker-persistence` → `session-store` に変更 +- `Session` の Worker wrap を廃止し、save/restore の関数群にする +- Pod が Worker を直接保持し、run 後に session-store の関数を呼ぶ +- `llm-worker` への型依存(`Item`, `RequestConfig`)はそのまま残す(構造的に層にならなければ問題ない) + +## 現状の構造 + +``` +Controller → Pod → Session (wraps Worker) → Worker + ↑ run()/resume() をインターセプト +``` + +`pod.session_mut().worker_mut()` と2段潜る必要がある。 + +## 変更後の構造 + +``` +Controller → Pod → Worker (直接保持) + │ + └─ run 後に session_store::save_delta(store, ...) を呼ぶ + restore 時に session_store::restore(store, id) → state を返す +``` + +## 変更内容 + +### session-store クレート(旧 llm-worker-persistence) + +- `Session` struct を廃止 +- save 系関数を提供: history delta の記録、turn end、outcome 等 +- restore 関数: ログ再生 → `RestoredState` を返す(Worker は作らない) +- `Store` trait, `FsStore`, `LogEntry`, ハッシュチェーンはそのまま維持 +- fork / fork_at も関数として残す + +### pod クレート + +- `Pod` が `Worker` を直接フィールドに持つ +- `Pod::run()` 内で Worker を呼び、その後 session-store の save 関数を呼ぶ +- `Pod::restore()` は session-store から `RestoredState` を受け取り、Worker に適用 +- Controller は `pod.worker()` / `pod.worker_mut()` で直接アクセス + +### 影響範囲 + +- `Session` を使っている箇所: `pod.rs`, `controller.rs`, テスト +- `SessionError` が消えるので、`PodError` から `SessionError` variant を除去し、`StoreError` に置換 diff --git a/crates/llm-worker/docs/tool-output-design.md b/tickets/tool-output-design.md similarity index 100% rename from crates/llm-worker/docs/tool-output-design.md rename to tickets/tool-output-design.md