diff --git a/.insomnia/.gitignore b/.insomnia/.gitignore index 1b7c3366..987a4be5 100644 --- a/.insomnia/.gitignore +++ b/.insomnia/.gitignore @@ -1 +1 @@ -_memory +_staging diff --git a/crates/manifest/src/scope.rs b/crates/manifest/src/scope.rs index 2d7b3a8f..753703d8 100644 --- a/crates/manifest/src/scope.rs +++ b/crates/manifest/src/scope.rs @@ -156,6 +156,23 @@ impl Scope { .collect() } + /// Deny rules with their targets resolved to absolute paths. + /// + /// Counterpart to [`allow_rules`](Self::allow_rules); together they + /// round-trip through [`ScopeConfig`] for callers that need to + /// rebuild a scope after layering extra rules on top of an + /// already-constructed [`Scope`]. + pub fn deny_rules(&self) -> Vec { + self.deny + .iter() + .map(|r| ScopeRule { + target: r.target.clone(), + permission: r.permission, + recursive: r.recursive, + }) + .collect() + } + /// Iterate over absolute paths granted `Write` by an allow rule. /// Subset of [`readable_paths`](Self::readable_paths). pub fn writable_paths(&self) -> impl Iterator { diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index 775b8a16..d4685ed7 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -221,20 +221,49 @@ impl PodController { }); // Register the builtin file-manipulation tools (Read / Write / - // Edit / Glob / Grep). `ScopedFs` carries the pod-lifetime - // scope/pwd; `Tracker` is session-scoped — a fresh instance per - // controller spawn ensures state from a previous process - // lifetime cannot be reused after a resume. The tracker is - // also handed to the Pod itself so Pod-level operations (e.g. + // Edit / Glob / Grep / Bash). `ScopedFs` carries the pod- + // lifetime scope/pwd; `Tracker` is session-scoped — a fresh + // instance per controller spawn ensures state from a previous + // process lifetime cannot be reused after a resume. The tracker + // is also handed to the Pod itself so Pod-level operations (e.g. // context compaction) can ask which files the agent has been // touching. - let fs = tools::ScopedFs::new(scope_for_tools, pwd_for_tools.clone()); + // + // Bash spills long outputs to a per-pod subdir under the + // runtime dir. We layer a recursive `allow(Read)` rule for + // that path on top of the user-facing scope so the agent can + // `Read` the saved files without polluting the workspace. + // Same approach memory takes for its deny rules: round-trip + // through `ScopeConfig` and rebuild via `from_config`. + let bash_output_dir = runtime_dir.path().join("bash-output"); + std::fs::create_dir_all(&bash_output_dir).map_err(|e| { + std::io::Error::other(format!( + "create bash output dir {}: {e}", + bash_output_dir.display() + )) + })?; + let mut scope_config = manifest::ScopeConfig { + allow: scope_for_tools.allow_rules(), + deny: scope_for_tools.deny_rules(), + }; + scope_config.allow.push(manifest::ScopeRule { + target: bash_output_dir.clone(), + permission: manifest::Permission::Read, + recursive: true, + }); + let scope_with_bash = manifest::Scope::from_config(&scope_config) + .map_err(std::io::Error::other)?; + let fs = tools::ScopedFs::new(scope_with_bash, pwd_for_tools.clone()); let tracker = tools::Tracker::new(); // The same ScopedFs also powers the IPC `ListCompletions` // query — keep a clone for the FS view we attach below, // since the tools consume `fs` itself. fs_for_view = fs.clone(); - worker.register_tools(tools::builtin_tools(fs, tracker.clone())); + worker.register_tools(tools::builtin_tools( + fs, + tracker.clone(), + bash_output_dir, + )); // Memory subsystem opt-in. When `[memory]` is present in // the manifest, register the memory-specific Read/Write/Edit diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index 336de27b..1113b33d 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -19,7 +19,7 @@ serde_json = "1.0.149" sha2 = "0.11.0" tempfile = "3.27.0" thiserror = "2.0.18" -tokio = { version = "1.51.1", features = ["rt"] } +tokio = { version = "1.51.1", features = ["process", "rt", "time"] } tracing = "0.1.44" [dev-dependencies] diff --git a/crates/tools/src/bash.rs b/crates/tools/src/bash.rs new file mode 100644 index 00000000..93dfa072 --- /dev/null +++ b/crates/tools/src/bash.rs @@ -0,0 +1,581 @@ +//! `Bash` tool — execute shell commands in a one-shot, stateless way. +//! +//! Each call runs `bash -c ` via [`tokio::process::Command`]. +//! The wrapper redirects all output to a file so we never have to read +//! from a pipe (which would expose us to bg-pipe hangs). There is no +//! shell session: every call starts fresh at `cwd`, so the agent must +//! chain `cd && cmd` when it wants to operate elsewhere. This +//! mirrors Claude Code's own Bash tool — predictable, no hidden state. +//! +//! Output handling: when output is short (≤ 80 lines, ≤ 12 KiB) it is +//! returned inline and the file is cleaned up. When it is longer the +//! full output is left on disk and only the **last 80 lines** are +//! returned, prefixed with the saved file's path. This sidesteps the +//! Worker's blanket `ToolOutputLimits` (default 16 KiB), which would +//! otherwise drop the *tail* of the output — usually the most useful +//! part (errors, exit messages, summary). The saved file lives under +//! a caller-supplied directory that the parent has added to the +//! `ScopedFs` allow set, so the agent can inspect it via either Read +//! or a follow-up Bash call. +//! +//! Filesystem and network access are NOT mediated by `ScopedFs`: the +//! child process can touch any path. Safety is delegated to the +//! Permission layer (deny/allow rules on the command string). + +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use std::sync::Arc; +use std::time::Duration; + +use async_trait::async_trait; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; +use serde::Deserialize; +use tokio::process::Command; + +use crate::scoped_fs::ScopedFs; + +const DESCRIPTION: &str = "Execute a shell command via bash. Supports the \ +full shell — pipes, redirects, command substitution, `&&`/`||`. Each call \ +runs in a fresh shell rooted at the workspace; chain `cd && cmd` \ +when you need to operate elsewhere. stdout and stderr are merged. Default \ +timeout 120s, max 600s.\n\n\ +Output handling: when the command produces more than 80 lines (or ~12 KiB), \ +the full output is saved to a file and only the LAST 80 lines are returned, \ +prefixed with the saved path. The path is readable by Read; you can also \ +inspect it from a follow-up Bash call (`grep ... `, etc.).\n\n\ +Prefer dedicated tools when one fits: Read instead of `cat`/`head`/`tail` \ +on workspace files, Edit instead of `sed`/`awk` rewrites, Glob instead of \ +`find `, Grep instead of `grep`/`rg`. Reach for Bash when the task \ +is shell-shaped: building, testing, version control, package management."; + +const DEFAULT_TIMEOUT_SECS: u64 = 120; +const MAX_TIMEOUT_SECS: u64 = 600; + +/// Number of trailing lines returned when output spills to a file. +const TAIL_LINES: usize = 80; + +/// Inline-return budget. Outputs at or below this are returned in full; +/// above it triggers the spill-to-file path. Sized to leave headroom under +/// the Worker's 16 KiB default `ToolOutputLimits` cap so the inline path +/// reliably reaches the model intact. +const INLINE_BYTE_BUDGET: usize = 12 * 1024; + +/// Maximum bytes loaded into memory from the spilled output file. The +/// file itself can be arbitrarily large; we only ever read the tail end +/// since that is what we return. +const TAIL_READ_BUDGET: usize = 256 * 1024; + +#[derive(Debug, Deserialize, schemars::JsonSchema)] +pub(crate) struct BashParams { + /// Shell command to execute. Passed verbatim to `bash -c`. + pub command: String, + /// Timeout in seconds. Defaults to 120, capped at 600. + #[serde(default)] + pub timeout: Option, +} + +pub(crate) struct BashTool { + /// Workspace root that every invocation starts in. Snapshot of + /// `ScopedFs::pwd()` at registration time; never mutated, since we + /// don't track `cd` across calls. + cwd: PathBuf, + /// Directory to spill long outputs into. Caller is expected to have + /// added this path to the readable scope so the agent can Read the + /// saved files. The directory itself is created lazily. + output_dir: PathBuf, + /// Files we left on disk for follow-up inspection. Cleaned up on + /// `Drop` (= session end). `std::sync::Mutex` because access is + /// always synchronous and very brief. + spilled_outputs: std::sync::Mutex>, +} + +impl Drop for BashTool { + fn drop(&mut self) { + if let Ok(mut paths) = self.spilled_outputs.lock() { + for p in paths.drain(..) { + let _ = std::fs::remove_file(&p); + } + } + } +} + +#[async_trait] +impl Tool for BashTool { + async fn execute(&self, input_json: &str) -> Result { + let params: BashParams = serde_json::from_str(input_json) + .map_err(|e| ToolError::InvalidArgument(format!("invalid Bash input: {e}")))?; + let timeout_secs = params + .timeout + .unwrap_or(DEFAULT_TIMEOUT_SECS) + .clamp(1, MAX_TIMEOUT_SECS); + + // Persistent output file in the caller-supplied directory. + // `keep()` opts out of auto-delete so the agent can inspect the + // full output later; cleanup is deferred to `Drop` on this tool. + std::fs::create_dir_all(&self.output_dir).map_err(|e| { + ToolError::Internal(format!( + "create bash output dir {}: {e}", + self.output_dir.display() + )) + })?; + let output_path: PathBuf = tempfile::Builder::new() + .prefix("bash-") + .suffix(".log") + .tempfile_in(&self.output_dir) + .map_err(|e| ToolError::Internal(format!("output tempfile: {e}")))? + .into_temp_path() + .keep() + .map_err(|e| ToolError::Internal(format!("persist output tempfile: {e}")))?; + + let output_path_str = output_path + .to_str() + .ok_or_else(|| ToolError::Internal("output path is not UTF-8".into()))?; + + // Wrapper: + // exec >file 2>&1 redirect stdout/stderr to the output file + // { user_cmd } run in a brace group (no subshell, so any + // `cd` inside still affects $? capture below) + // __exit=$? preserve the user command's exit code… + // wait 2>/dev/null …since `wait` clobbers $?. Reaping bg jobs + // guarantees the output file's writers all + // close before bash itself exits. + // exit $__exit propagate the user's exit + let wrapped = format!( + "exec >{out} 2>&1\n{{ {user_cmd}\n}}\n__insomnia_exit=$?\nwait 2>/dev/null\nexit $__insomnia_exit\n", + out = shell_single_quote(output_path_str), + user_cmd = params.command, + ); + + tracing::debug!(cmd = %params.command, cwd = %self.cwd.display(), timeout_secs, "Bash"); + + let mut child = Command::new("bash") + .arg("-c") + .arg(&wrapped) + .current_dir(&self.cwd) + .stdin(Stdio::null()) + .stdout(Stdio::null()) // bash inherits — but the wrapper redirected via `exec` + .stderr(Stdio::null()) + .kill_on_drop(true) + .spawn() + .map_err(|e| { + let _ = std::fs::remove_file(&output_path); + ToolError::ExecutionFailed(format!("spawn bash: {e}")) + })?; + + let timeout_dur = Duration::from_secs(timeout_secs); + let wait_result = tokio::time::timeout(timeout_dur, child.wait()).await; + let (status, timed_out) = match wait_result { + Ok(Ok(s)) => (Some(s), false), + Ok(Err(e)) => { + let _ = std::fs::remove_file(&output_path); + return Err(ToolError::ExecutionFailed(format!("bash wait: {e}"))); + } + Err(_) => (None, true), + }; + + // Inspect the on-disk output: total size first, tail bytes second. + let total_bytes = std::fs::metadata(&output_path) + .map(|m| m.len() as usize) + .unwrap_or(0); + let tail_bytes = read_tail_bytes(&output_path, TAIL_READ_BUDGET).unwrap_or_default(); + let tail_text = String::from_utf8_lossy(&tail_bytes).into_owned(); + + let cmd_summary = truncate_for_summary(¶ms.command); + + if timed_out { + // Preserve the partial output file — even cut-short logs help + // diagnose hangs. + let content = if total_bytes > 0 { + let last = take_last_n_lines(&tail_text, TAIL_LINES); + self.remember_spilled(&output_path); + Some(format!( + "[partial output before timeout — full at {}]\n{last}", + output_path.display() + )) + } else { + let _ = std::fs::remove_file(&output_path); + None + }; + return Ok(ToolOutput { + summary: format!("$ {cmd_summary} (timed out after {timeout_secs}s)"), + content, + }); + } + + let status = status.expect("status set on the success branch"); + let summary = match status.code() { + Some(0) => format!("$ {cmd_summary}"), + Some(c) => format!("$ {cmd_summary} (exit {c})"), + None => format!("$ {cmd_summary} (terminated by signal)"), + }; + + if total_bytes == 0 { + let _ = std::fs::remove_file(&output_path); + return Ok(ToolOutput { + summary, + content: None, + }); + } + + // Inline if the whole output fits in our tail-read window AND is + // small enough to ride under the Worker's default cap. + let line_count = tail_text.lines().count(); + let fully_loaded = total_bytes <= tail_bytes.len(); + let fits_inline = + fully_loaded && total_bytes <= INLINE_BYTE_BUDGET && line_count <= TAIL_LINES; + + let content = if fits_inline { + let _ = std::fs::remove_file(&output_path); + Some(tail_text) + } else { + let last = take_last_n_lines(&tail_text, TAIL_LINES); + // When `fully_loaded` we know the exact line count; otherwise + // the file is bigger than our read window so we report bytes + // and an "approximate" disclaimer. + let header = if fully_loaded { + format!( + "[showing last {TAIL_LINES} of {line_count} lines — full output ({total_bytes} bytes) at {}]", + output_path.display() + ) + } else { + format!( + "[showing last {TAIL_LINES} lines (tail of {total_bytes}-byte output) — full at {}]", + output_path.display() + ) + }; + self.remember_spilled(&output_path); + Some(format!("{header}\n{last}")) + }; + + Ok(ToolOutput { summary, content }) + } +} + +impl BashTool { + fn remember_spilled(&self, path: &Path) { + if let Ok(mut v) = self.spilled_outputs.lock() { + v.push(path.to_path_buf()); + } + } +} + +/// Read up to `max_bytes` from the end of `path`. If the file is smaller +/// than `max_bytes`, the entire file is returned. +fn read_tail_bytes(path: &Path, max_bytes: usize) -> std::io::Result> { + use std::io::{Read, Seek, SeekFrom}; + let mut f = std::fs::File::open(path)?; + let len = f.seek(SeekFrom::End(0))?; + let start = if len > max_bytes as u64 { + len - max_bytes as u64 + } else { + 0 + }; + f.seek(SeekFrom::Start(start))?; + let mut buf = Vec::with_capacity((len - start) as usize); + f.read_to_end(&mut buf)?; + Ok(buf) +} + +/// Return the last `n` lines of `text`. If `text` has `n` or fewer lines +/// (per [`str::lines`]), the input is returned as-is (no allocation). +fn take_last_n_lines(text: &str, n: usize) -> String { + if text.is_empty() { + return String::new(); + } + let total = text.lines().count(); + if total <= n { + return text.to_owned(); + } + let skip = total - n; + let mut count = 0usize; + for (i, b) in text.bytes().enumerate() { + if b == b'\n' { + count += 1; + if count == skip { + return text[i + 1..].to_owned(); + } + } + } + text.to_owned() +} + +fn truncate_for_summary(command: &str) -> String { + let one_line = command.lines().next().unwrap_or(""); + let mut chars = one_line.chars(); + let head: String = chars.by_ref().take(80).collect(); + if chars.next().is_some() { + let mut shortened = head; + while shortened.chars().count() > 77 { + shortened.pop(); + } + shortened.push_str("..."); + shortened + } else { + head + } +} + +/// Wrap a string in single quotes for safe inclusion in a bash command. +fn shell_single_quote(s: &str) -> String { + let escaped = s.replace('\'', "'\\''"); + format!("'{escaped}'") +} + +/// Factory for the `Bash` tool. +/// +/// `output_dir` is where long outputs spill to; the caller is responsible +/// for arranging that the path is in the agent's readable scope. Every +/// invocation starts at `fs.pwd()` — the tool is intentionally stateless +/// w.r.t. the working directory. +pub fn bash_tool(fs: ScopedFs, output_dir: PathBuf) -> ToolDefinition { + Arc::new(move || { + let schema = schemars::schema_for!(BashParams); + let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({})); + let meta = ToolMeta::new("Bash") + .description(DESCRIPTION) + .input_schema(schema_value); + let tool: Arc = Arc::new(BashTool { + cwd: fs.pwd().to_path_buf(), + output_dir: output_dir.clone(), + spilled_outputs: std::sync::Mutex::new(Vec::new()), + }); + (meta, tool) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use manifest::Scope; + use tempfile::TempDir; + + /// Test harness: workspace tempdir + a separate spill tempdir kept + /// alive for the test's lifetime. The spill dir is added to the + /// scope as readable so callers exercise the production path. + struct Harness { + _workspace: TempDir, + spill: TempDir, + fs: ScopedFs, + } + + fn setup() -> Harness { + let workspace = TempDir::new().unwrap(); + let spill = TempDir::new().unwrap(); + let base = Scope::writable(workspace.path()).unwrap(); + let mut config = manifest::ScopeConfig { + allow: base.allow_rules(), + deny: base.deny_rules(), + }; + config.allow.push(manifest::ScopeRule { + target: spill.path().to_path_buf(), + permission: manifest::Permission::Read, + recursive: true, + }); + let scope = Scope::from_config(&config).unwrap(); + let fs = ScopedFs::new(scope, workspace.path().to_path_buf()); + Harness { + _workspace: workspace, + spill, + fs, + } + } + + fn make_tool(h: &Harness) -> Arc { + let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf()); + let (_, tool) = def(); + tool + } + + #[tokio::test] + async fn runs_simple_command() { + let h = setup(); + let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf()); + let (meta, tool) = def(); + assert_eq!(meta.name, "Bash"); + + let inp = serde_json::json!({ "command": "echo hello" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert_eq!(out.summary, "$ echo hello"); + assert_eq!(out.content.as_deref().map(str::trim), Some("hello")); + } + + #[tokio::test] + async fn merges_stdout_and_stderr() { + let h = setup(); + let tool = make_tool(&h); + + let inp = serde_json::json!({ + "command": "echo out; echo err 1>&2", + }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let body = out.content.unwrap(); + assert!(body.contains("out")); + assert!(body.contains("err")); + } + + #[tokio::test] + async fn nonzero_exit_is_reported() { + let h = setup(); + let tool = make_tool(&h); + + let inp = serde_json::json!({ "command": "exit 7" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert!(out.summary.contains("exit 7"), "summary: {}", out.summary); + assert!( + out.content.is_none(), + "no output expected, got {:?}", + out.content + ); + } + + #[tokio::test] + async fn cd_does_not_persist_across_calls() { + // Stateless: a `cd` in one call must NOT leak into the next. + let h = setup(); + let sub = h._workspace.path().join("nested"); + std::fs::create_dir(&sub).unwrap(); + let tool = make_tool(&h); + + tool.execute( + &serde_json::json!({ + "command": format!("cd {}", sub.to_str().unwrap()), + }) + .to_string(), + ) + .await + .unwrap(); + + let pwd_out = tool + .execute(&serde_json::json!({ "command": "pwd" }).to_string()) + .await + .unwrap(); + let body = pwd_out.content.unwrap(); + let actual = std::fs::canonicalize(body.trim()).unwrap(); + let workspace = std::fs::canonicalize(h._workspace.path()).unwrap(); + assert_eq!( + actual, workspace, + "second call should start at workspace root, not the previous cd target" + ); + } + + #[tokio::test] + async fn timeout_kills_long_command() { + let h = setup(); + let tool = make_tool(&h); + + let inp = serde_json::json!({ + "command": "sleep 30", + "timeout": 1, + }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert!( + out.summary.contains("timed out"), + "summary: {}", + out.summary + ); + } + + #[tokio::test] + async fn invalid_json_is_invalid_argument() { + let h = setup(); + let tool = make_tool(&h); + + let err = tool.execute("not json").await.unwrap_err(); + assert!(matches!(err, ToolError::InvalidArgument(_))); + } + + #[tokio::test] + async fn long_output_spills_and_returns_tail() { + let h = setup(); + let spill_dir = h.spill.path().to_path_buf(); + let tool = make_tool(&h); + + // 200 lines: "line 1" .. "line 200". Tail of 80 keeps lines 121-200. + let inp = serde_json::json!({ + "command": "for i in $(seq 1 200); do echo line $i; done", + }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let body = out.content.expect("expected content"); + + assert!( + body.contains(&format!("showing last {TAIL_LINES} of 200 lines")), + "tail header missing in: {}", + &body[..body.len().min(300)] + ); + assert!( + body.contains(spill_dir.to_str().unwrap()), + "spill dir path missing: {body}" + ); + // Last 80 lines are 121..200. + assert!(body.contains("\nline 200\n")); + assert!(body.contains("\nline 121\n")); + // line 120 is the last *elided* line. + assert!(!body.contains("\nline 120\n"), "elided line leaked: {body}"); + } + + #[tokio::test] + async fn wide_short_output_still_spills_when_byte_budget_exceeded() { + let h = setup(); + let spill_dir = h.spill.path().to_path_buf(); + let tool = make_tool(&h); + + // One single line of ~20 KiB (over INLINE_BYTE_BUDGET = 12 KiB). + let inp = serde_json::json!({ + "command": "printf 'x%.0s' {1..20480}", + }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let body = out.content.unwrap(); + assert!( + body.contains(spill_dir.to_str().unwrap()), + "expected spill marker in: {}", + &body[..body.len().min(200)] + ); + } + + #[tokio::test] + async fn background_job_does_not_hang() { + let h = setup(); + let tool = make_tool(&h); + + // The wrapper's `wait` ensures we don't hang on a stray bg pipe. + let inp = serde_json::json!({ + "command": "(sleep 0.05; echo bg) &", + "timeout": 5, + }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert!( + !out.summary.contains("timed out"), + "summary: {}", + out.summary + ); + } + + #[tokio::test] + async fn spilled_files_are_cleaned_up_on_drop() { + let h = setup(); + let spill_dir = h.spill.path().to_path_buf(); + let tool = make_tool(&h); + + let inp = serde_json::json!({ + "command": "for i in $(seq 1 200); do echo $i; done", + }); + tool.execute(&inp.to_string()).await.unwrap(); + + // The spill dir should now contain exactly one bash-*.log file. + let files_before: Vec<_> = std::fs::read_dir(&spill_dir) + .unwrap() + .filter_map(Result::ok) + .map(|e| e.path()) + .collect(); + assert_eq!(files_before.len(), 1, "expected one spilled file"); + let path = files_before.into_iter().next().unwrap(); + assert!(path.exists()); + + drop(tool); + // Drop runs synchronously; file should be gone. + assert!( + !path.exists(), + "spilled file should be cleaned up on drop: {path:?}" + ); + } +} diff --git a/crates/tools/src/lib.rs b/crates/tools/src/lib.rs index a5bbfede..28139410 100644 --- a/crates/tools/src/lib.rs +++ b/crates/tools/src/lib.rs @@ -1,8 +1,8 @@ //! Built-in tools for the Insomnia LLM agent. //! -//! Implements Read / Write / Edit / Glob / Grep on top of the `llm-worker` -//! `Tool` infrastructure. Filesystem access is mediated by two orthogonal -//! concerns: +//! Implements Read / Write / Edit / Glob / Grep / Bash on top of the +//! `llm-worker` `Tool` infrastructure. Filesystem access is mediated by +//! two orthogonal concerns: //! //! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for //! the current scope. Derived from the manifest and shareable across @@ -13,17 +13,23 @@ //! //! The Pod layer owns both instances and passes them to //! [`builtin_tools`] when registering tools on a `Worker`. +//! +//! `Bash` is the lone exception — its child processes bypass `ScopedFs` +//! entirely. Safety for arbitrary command execution is delegated to the +//! Permission layer (deny/allow rules on the command string). pub mod error; pub mod scoped_fs; pub mod tracker; +mod bash; mod edit; mod glob; mod grep; mod read; mod write; +pub use bash::bash_tool; pub use edit::edit_tool; pub use error::ToolsError; pub use glob::glob_tool; @@ -39,12 +45,22 @@ pub use write::write_tool; /// All returned factories share the same tracker instance so that /// `Read` / `Write` / `Edit` see a consistent history across tool /// invocations within a single session. -pub fn builtin_tools(fs: ScopedFs, tracker: Tracker) -> Vec { +/// +/// `bash_output_dir` is where the Bash tool spills long outputs. The +/// caller is responsible for adding that path to the readable scope +/// (see [`manifest::Scope::with_extra_read`]) so the agent can `Read` +/// the saved files. +pub fn builtin_tools( + fs: ScopedFs, + tracker: Tracker, + bash_output_dir: std::path::PathBuf, +) -> Vec { vec![ read_tool(fs.clone(), tracker.clone()), write_tool(fs.clone(), tracker.clone()), - edit_tool(fs.clone(), tracker.clone()), + edit_tool(fs.clone(), tracker), glob_tool(fs.clone()), - grep_tool(fs), + grep_tool(fs.clone()), + bash_tool(fs, bash_output_dir), ] } diff --git a/crates/tools/src/tracker.rs b/crates/tools/src/tracker.rs index 6e108474..36671ecd 100644 --- a/crates/tools/src/tracker.rs +++ b/crates/tools/src/tracker.rs @@ -31,7 +31,8 @@ //! let scope = Scope::writable("/workspace").unwrap(); //! let fs = ScopedFs::new(scope, PathBuf::from("/workspace")); // pod lifetime //! let tracker = Tracker::new(); // session lifetime -//! let defs = builtin_tools(fs, tracker); +//! let bash_outputs = PathBuf::from("/run/insomnia/bash-output"); +//! let defs = builtin_tools(fs, tracker, bash_outputs); //! ``` use std::collections::{HashMap, VecDeque}; diff --git a/crates/tools/tests/edge_cases.rs b/crates/tools/tests/edge_cases.rs index 686392ad..5f96fef7 100644 --- a/crates/tools/tests/edge_cases.rs +++ b/crates/tools/tests/edge_cases.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use llm_worker::tool::{Tool, ToolDefinition}; -use manifest::Scope; +use manifest::{Permission, Scope, ScopeConfig, ScopeRule}; use serde_json::json; use tempfile::TempDir; use tools::{ScopedFs, Tracker, builtin_tools}; @@ -27,19 +27,29 @@ impl Registry { } } -fn setup() -> (TempDir, Registry) { +fn setup() -> (TempDir, TempDir, Registry) { let dir = TempDir::new().unwrap(); - let fs = ScopedFs::new( - Scope::writable(dir.path()).unwrap(), - dir.path().to_path_buf(), - ); + let spill = TempDir::new().unwrap(); + let base = Scope::writable(dir.path()).unwrap(); + let mut config = ScopeConfig { + allow: base.allow_rules(), + deny: base.deny_rules(), + }; + config.allow.push(ScopeRule { + target: spill.path().to_path_buf(), + permission: Permission::Read, + recursive: true, + }); + let scope = Scope::from_config(&config).unwrap(); + let fs = ScopedFs::new(scope, dir.path().to_path_buf()); let tracker = Tracker::new(); - (dir, Registry::new(builtin_tools(fs, tracker))) + let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf())); + (dir, spill, reg) } #[tokio::test] async fn unicode_path_and_content() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("日本語ファイル.txt"); let content = "こんにちは 🦀 世界\nabc\n"; @@ -70,7 +80,7 @@ async fn unicode_path_and_content() { async fn symlink_to_outside_scope_is_rejected_for_write() { use std::os::unix::fs::symlink; - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let outside = TempDir::new().unwrap(); let outside_target = outside.path().join("secret.txt"); std::fs::write(&outside_target, "secret").unwrap(); @@ -114,7 +124,7 @@ async fn symlink_to_outside_scope_is_rejected_for_write() { #[tokio::test] async fn empty_file_read_and_edit() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("empty.txt"); std::fs::write(&file, "").unwrap(); @@ -144,7 +154,7 @@ async fn empty_file_read_and_edit() { #[tokio::test] async fn very_long_single_line() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("long.txt"); let big: String = "x".repeat(1024 * 1024); // 1 MiB, no newlines std::fs::write(&file, &big).unwrap(); @@ -160,7 +170,7 @@ async fn very_long_single_line() { #[tokio::test] async fn relative_path_is_rejected() { - let (_dir, reg) = setup(); + let (_dir, _spill, reg) = setup(); let read = reg.get("Read"); let err = read .execute(&json!({ "file_path": "relative.txt" }).to_string()) @@ -171,7 +181,7 @@ async fn relative_path_is_rejected() { #[tokio::test] async fn directory_target_is_rejected_for_read() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let read = reg.get("Read"); let err = read .execute(&json!({ "file_path": dir.path().to_str().unwrap() }).to_string()) @@ -182,7 +192,7 @@ async fn directory_target_is_rejected_for_read() { #[tokio::test] async fn deeply_nested_new_file_is_created() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let deep = dir.path().join("a/b/c/d/e/deep.txt"); let write = reg.get("Write"); write @@ -200,7 +210,7 @@ async fn deeply_nested_new_file_is_created() { #[tokio::test] async fn replace_preserves_unicode() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("u.txt"); std::fs::write(&file, "🦀 rust 🦀\n").unwrap(); @@ -225,7 +235,7 @@ async fn replace_preserves_unicode() { #[tokio::test] async fn grep_handles_unicode_pattern() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("u.txt"); std::fs::write(&file, "English\n日本語\nрусский\n").unwrap(); diff --git a/crates/tools/tests/integration.rs b/crates/tools/tests/integration.rs index f34db06d..328eea18 100644 --- a/crates/tools/tests/integration.rs +++ b/crates/tools/tests/integration.rs @@ -8,11 +8,25 @@ use std::path::Path; use std::sync::Arc; use llm_worker::tool::{Tool, ToolDefinition, ToolMeta}; -use manifest::Scope; +use manifest::{Permission, Scope, ScopeConfig, ScopeRule}; use serde_json::json; use tempfile::TempDir; use tools::{ScopedFs, Tracker, builtin_tools}; +fn scope_with_spill(workspace: &Path, spill: &Path) -> Scope { + let base = Scope::writable(workspace).unwrap(); + let mut config = ScopeConfig { + allow: base.allow_rules(), + deny: base.deny_rules(), + }; + config.allow.push(ScopeRule { + target: spill.to_path_buf(), + permission: Permission::Read, + recursive: true, + }); + Scope::from_config(&config).unwrap() +} + struct Registry { entries: Vec<(ToolMeta, Arc)>, } @@ -36,15 +50,14 @@ impl Registry { } } -fn setup() -> (TempDir, Registry) { +fn setup() -> (TempDir, TempDir, Registry) { let dir = TempDir::new().unwrap(); - let fs = ScopedFs::new( - Scope::writable(dir.path()).unwrap(), - dir.path().to_path_buf(), - ); + let spill = TempDir::new().unwrap(); + let scope = scope_with_spill(dir.path(), spill.path()); + let fs = ScopedFs::new(scope, dir.path().to_path_buf()); let tracker = Tracker::new(); - let reg = Registry::new(builtin_tools(fs, tracker)); - (dir, reg) + let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf())); + (dir, spill, reg) } async fn call(tool: &Arc, input: serde_json::Value) -> llm_worker::tool::ToolOutput { @@ -60,16 +73,16 @@ async fn call_err(tool: &Arc, input: serde_json::Value) -> llm_worker: } #[test] -fn builtin_tools_registers_all_five() { - let (_dir, reg) = setup(); +fn builtin_tools_registers_full_set() { + let (_dir, _spill, reg) = setup(); let mut names = reg.names(); names.sort(); - assert_eq!(names, vec!["Edit", "Glob", "Grep", "Read", "Write"]); + assert_eq!(names, vec!["Bash", "Edit", "Glob", "Grep", "Read", "Write"]); } #[test] fn meta_has_description_and_schema() { - let (_dir, reg) = setup(); + let (_dir, _spill, reg) = setup(); for (meta, _) in ®.entries { assert!( !meta.description.is_empty(), @@ -87,7 +100,7 @@ fn meta_has_description_and_schema() { #[tokio::test] async fn read_then_edit_then_read_roundtrip() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("a.txt"); std::fs::write(&file, "hello world\n").unwrap(); let p = file.to_str().unwrap(); @@ -119,7 +132,7 @@ async fn read_then_edit_then_read_roundtrip() { #[tokio::test] async fn write_then_grep_finds_content() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let write = reg.get("Write"); let grep = reg.get("Grep"); @@ -148,7 +161,7 @@ async fn write_then_grep_finds_content() { #[tokio::test] async fn glob_finds_written_files() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let write = reg.get("Write"); let glob = reg.get("Glob"); @@ -172,7 +185,7 @@ async fn glob_finds_written_files() { #[tokio::test] async fn out_of_scope_write_is_rejected() { - let (_dir, reg) = setup(); + let (_dir, _spill, reg) = setup(); let outside = TempDir::new().unwrap(); let write = reg.get("Write"); @@ -191,7 +204,7 @@ async fn out_of_scope_write_is_rejected() { #[tokio::test] async fn write_to_existing_without_read_fails() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("exists.txt"); std::fs::write(&file, "preexisting").unwrap(); @@ -212,7 +225,7 @@ async fn write_to_existing_without_read_fails() { async fn shared_scoped_fs_across_tools() { // The key invariant: all builtin tools share the same ScopedFs instance, // so read-history set by Read is visible to Edit and Write. - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("shared.txt"); std::fs::write(&file, "one\n").unwrap(); @@ -235,7 +248,7 @@ async fn shared_scoped_fs_across_tools() { #[tokio::test] async fn edit_requires_read_across_tools() { - let (dir, reg) = setup(); + let (dir, _spill, reg) = setup(); let file = dir.path().join("a.txt"); std::fs::write(&file, "foo\n").unwrap(); @@ -256,17 +269,17 @@ async fn edit_requires_read_across_tools() { #[tokio::test] async fn deterministic_tool_order_is_registration_order() { - let (_dir, reg) = setup(); - // Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep + let (_dir, _spill, reg) = setup(); + // Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep, Bash let names: Vec<&str> = reg.entries.iter().map(|(m, _)| m.name.as_str()).collect(); - assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep"]); + assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep", "Bash"]); } // Regression: tool name capitalization matches Claude Code reference #[test] fn tool_names_match_reference_spec() { - let (_dir, reg) = setup(); - for expected in ["Read", "Write", "Edit", "Glob", "Grep"] { + let (_dir, _spill, reg) = setup(); + for expected in ["Read", "Write", "Edit", "Glob", "Grep", "Bash"] { assert!( reg.entries.iter().any(|(m, _)| m.name == expected), "missing tool {expected}" @@ -278,12 +291,11 @@ fn tool_names_match_reference_spec() { async fn tracker_recent_files_tracks_read_write_edit() { // Build a fresh registry that shares a tracker we can query afterwards. let dir = TempDir::new().unwrap(); - let fs = ScopedFs::new( - Scope::writable(dir.path()).unwrap(), - dir.path().to_path_buf(), - ); + let spill = TempDir::new().unwrap(); + let scope = scope_with_spill(dir.path(), spill.path()); + let fs = ScopedFs::new(scope, dir.path().to_path_buf()); let tracker = Tracker::new(); - let reg = Registry::new(builtin_tools(fs, tracker.clone())); + let reg = Registry::new(builtin_tools(fs, tracker.clone(), spill.path().to_path_buf())); let a = dir.path().join("a.txt"); let b = dir.path().join("b.txt"); @@ -324,5 +336,52 @@ async fn tracker_recent_files_tracks_read_write_edit() { ); } +#[tokio::test] +async fn bash_inherits_scoped_fs_pwd() { + // The Bash tool starts at the ScopedFs's pwd. Without any `cd`, its + // `pwd` should canonicalize to the workspace root we set up. + let (dir, _spill, reg) = setup(); + let bash = reg.get("Bash"); + let out = call(&bash, json!({ "command": "pwd" })).await; + let body = out.content.unwrap(); + let actual = std::fs::canonicalize(body.trim()).unwrap(); + let expected = std::fs::canonicalize(dir.path()).unwrap(); + assert_eq!(actual, expected); +} + +#[tokio::test] +async fn bash_spilled_file_is_readable_via_read_tool() { + // Long Bash output spills to a path that the controller has added to + // the readable scope. The agent should be able to Read that path + // exactly like any in-scope file. + let (_dir, spill, reg) = setup(); + let bash = reg.get("Bash"); + let out = call( + &bash, + json!({ "command": "for i in $(seq 1 200); do echo line $i; done" }), + ) + .await; + let body = out.content.unwrap(); + let spill_str = spill.path().to_str().unwrap(); + + // Extract the spilled path from the marker line. + let marker = body.lines().next().unwrap(); + let prefix_pos = marker + .find(spill_str) + .expect("marker should reference the spill dir"); + let path_end_rel = marker[prefix_pos..] + .find(".log") + .expect("marker should end the path with .log"); + let spilled = &marker[prefix_pos..prefix_pos + path_end_rel + 4]; + + // Read the file via the Read tool — must succeed (in scope). + let read_out = call(®.get("Read"), json!({ "file_path": spilled })).await; + let read_body = read_out.content.expect("Read returned content"); + // The full 200 lines should be in the saved file even though Bash + // returned only the tail of 80. + assert!(read_body.contains("line 1\n"), "missing line 1: {read_body}"); + assert!(read_body.contains("line 200"), "missing line 200"); +} + // Sanity: unused Path import guard const _: fn() -> &'static Path = || Path::new("/"); diff --git a/crates/tui/src/tool.rs b/crates/tui/src/tool.rs index 949a7e70..78cbd209 100644 --- a/crates/tui/src/tool.rs +++ b/crates/tui/src/tool.rs @@ -590,22 +590,31 @@ fn render_default(tc: &ToolCallBlock, mode: Mode) -> Vec> { .add_modifier(Modifier::ITALIC), ); - let summary_source: String = match &tc.state { + // Body source: prefer the full output (e.g. Bash's stdout/stderr) so + // Detail mode can expose it. Fall back to the summary when the tool + // didn't emit any content. + let body_source: String = match &tc.state { + ToolCallState::Done { + output: Some(out), .. + } + | ToolCallState::Error { + output: Some(out), .. + } => out.clone(), ToolCallState::Done { summary, .. } | ToolCallState::Error { summary, .. } => { summary.clone() } _ => String::new(), }; - let summary_cap = match mode { + let body_cap = match mode { Mode::Normal => 3, Mode::Detail => usize::MAX, Mode::Overview => unreachable!(), }; - if !summary_source.is_empty() { + if !body_source.is_empty() { emit_capped_lines( &mut lines, - &summary_source, - summary_cap, + &body_source, + body_cap, Style::default().fg(Color::Gray), ); } diff --git a/tickets/bash-tool.md b/tickets/bash-tool.md index 4ac7cbc2..65f9b467 100644 --- a/tickets/bash-tool.md +++ b/tickets/bash-tool.md @@ -25,3 +25,8 @@ Bash の子プロセスは ScopedFs を経由しない。Scope による保護 ## 依存チケット - [permission-extension-point.md](permission-extension-point.md) — deny/allow ルールによる Bash コマンド制御 + +## Review +- 状態: Approve with follow-up +- レビュー詳細: [./bash-tool.review.md](./bash-tool.review.md) +- 日付: 2026-05-01