//! `Bash` tool — execute shell commands in a one-shot, stateless way. //! //! Each call runs `bash -c ` via [`tokio::process::Command`]. //! The wrapper redirects all output to a file so we never have to read //! from a pipe (which would expose us to bg-pipe hangs). There is no //! shell session: every call starts fresh at `cwd`, so the agent must //! chain `cd && cmd` when it wants to operate elsewhere. This //! mirrors Claude Code's own Bash tool — predictable, no hidden state. //! //! Output handling: when output is short (≤ 80 lines, ≤ 12 KiB) it is //! returned inline and the file is cleaned up. When it is longer the //! full output is left on disk and only the **last 80 lines** are //! returned, prefixed with the saved file's path. This sidesteps the //! Worker's blanket `ToolOutputLimits` (default 64 KiB), which would //! otherwise drop the *tail* of the output — usually the most useful //! part (errors, exit messages, summary). The saved file lives under //! a caller-supplied directory that the parent has added to the //! `ScopedFs` allow set, so the agent can inspect it via either Read //! or a follow-up Bash call. //! //! Filesystem and network access are NOT mediated by `ScopedFs`: the //! child process can touch any path. Safety is delegated to the //! Permission layer (deny/allow rules on the command string). use std::path::{Path, PathBuf}; use std::process::Stdio; use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use serde::Deserialize; use tokio::process::Command; use crate::scoped_fs::ScopedFs; const DESCRIPTION: &str = "Execute a shell command via bash. Supports the \ full shell — pipes, redirects, command substitution, `&&`/`||`. Each call \ runs in a fresh shell rooted at the workspace; chain `cd && cmd` \ when you need to operate elsewhere. stdout and stderr are merged. Default \ timeout 120s, max 600s.\n\n\ Output handling: when the command produces more than 80 lines (or ~12 KiB), \ the full output is saved to a file and only the LAST 80 lines are returned, \ prefixed with the saved path. The path is readable by Read; you can also \ inspect it from a follow-up Bash call (`grep ... `, etc.).\n\n\ Prefer dedicated tools when one fits: Read instead of `cat`/`head`/`tail` \ on workspace files, Edit instead of `sed`/`awk` rewrites, Glob instead of \ `find `, Grep instead of `grep`/`rg`. Reach for Bash when the task \ is shell-shaped: building, testing, version control, package management."; const DEFAULT_TIMEOUT_SECS: u64 = 120; const MAX_TIMEOUT_SECS: u64 = 600; /// Number of trailing lines returned when output spills to a file. const TAIL_LINES: usize = 80; /// Inline-return budget. Outputs at or below this are returned in full; /// above it triggers the spill-to-file path. Sized to leave headroom under /// the Worker's 64 KiB default `ToolOutputLimits` cap so the inline path /// reliably reaches the model intact. const INLINE_BYTE_BUDGET: usize = 12 * 1024; /// Maximum bytes loaded into memory from the spilled output file. The /// file itself can be arbitrarily large; we only ever read the tail end /// since that is what we return. const TAIL_READ_BUDGET: usize = 256 * 1024; #[derive(Debug, Deserialize, schemars::JsonSchema)] pub(crate) struct BashParams { /// Shell command to execute. Passed verbatim to `bash -c`. pub command: String, /// Timeout in seconds. Defaults to 120, capped at 600. #[serde(default)] pub timeout: Option, } pub(crate) struct BashTool { /// Workspace root that every invocation starts in. Snapshot of /// `ScopedFs::pwd()` at registration time; never mutated, since we /// don't track `cd` across calls. cwd: PathBuf, /// Directory to spill long outputs into. Caller is expected to have /// added this path to the readable scope so the agent can Read the /// saved files. The directory itself is created lazily. output_dir: PathBuf, /// Files we left on disk for follow-up inspection. Cleaned up on /// `Drop` (= session end). `std::sync::Mutex` because access is /// always synchronous and very brief. spilled_outputs: std::sync::Mutex>, } impl Drop for BashTool { fn drop(&mut self) { if let Ok(mut paths) = self.spilled_outputs.lock() { for p in paths.drain(..) { let _ = std::fs::remove_file(&p); } } } } #[async_trait] impl Tool for BashTool { async fn execute(&self, input_json: &str) -> Result { let params: BashParams = serde_json::from_str(input_json) .map_err(|e| ToolError::InvalidArgument(format!("invalid Bash input: {e}")))?; let timeout_secs = params .timeout .unwrap_or(DEFAULT_TIMEOUT_SECS) .clamp(1, MAX_TIMEOUT_SECS); // Persistent output file in the caller-supplied directory. // `keep()` opts out of auto-delete so the agent can inspect the // full output later; cleanup is deferred to `Drop` on this tool. std::fs::create_dir_all(&self.output_dir).map_err(|e| { ToolError::Internal(format!( "create bash output dir {}: {e}", self.output_dir.display() )) })?; let output_path: PathBuf = tempfile::Builder::new() .prefix("bash-") .suffix(".log") .tempfile_in(&self.output_dir) .map_err(|e| ToolError::Internal(format!("output tempfile: {e}")))? .into_temp_path() .keep() .map_err(|e| ToolError::Internal(format!("persist output tempfile: {e}")))?; let output_path_str = output_path .to_str() .ok_or_else(|| ToolError::Internal("output path is not UTF-8".into()))?; // Wrapper: // exec >file 2>&1 redirect stdout/stderr to the output file // { user_cmd } run in a brace group (no subshell, so any // `cd` inside still affects $? capture below) // __exit=$? preserve the user command's exit code… // wait 2>/dev/null …since `wait` clobbers $?. Reaping bg jobs // guarantees the output file's writers all // close before bash itself exits. // exit $__exit propagate the user's exit let wrapped = format!( "exec >{out} 2>&1\n{{ {user_cmd}\n}}\n__yoi_exit=$?\nwait 2>/dev/null\nexit $__yoi_exit\n", out = shell_single_quote(output_path_str), user_cmd = params.command, ); tracing::debug!(cmd = %params.command, cwd = %self.cwd.display(), timeout_secs, "Bash"); let mut child = Command::new("bash") .arg("-c") .arg(&wrapped) .current_dir(&self.cwd) .stdin(Stdio::null()) .stdout(Stdio::null()) // bash inherits — but the wrapper redirected via `exec` .stderr(Stdio::null()) .kill_on_drop(true) .spawn() .map_err(|e| { let _ = std::fs::remove_file(&output_path); ToolError::ExecutionFailed(format!("spawn bash: {e}")) })?; let timeout_dur = Duration::from_secs(timeout_secs); let wait_result = tokio::time::timeout(timeout_dur, child.wait()).await; let (status, timed_out) = match wait_result { Ok(Ok(s)) => (Some(s), false), Ok(Err(e)) => { let _ = std::fs::remove_file(&output_path); return Err(ToolError::ExecutionFailed(format!("bash wait: {e}"))); } Err(_) => (None, true), }; // Inspect the on-disk output: total size first, tail bytes second. let total_bytes = std::fs::metadata(&output_path) .map(|m| m.len() as usize) .unwrap_or(0); let tail_bytes = read_tail_bytes(&output_path, TAIL_READ_BUDGET).unwrap_or_default(); let tail_text = String::from_utf8_lossy(&tail_bytes).into_owned(); let cmd_summary = truncate_for_summary(¶ms.command); if timed_out { // Preserve the partial output file — even cut-short logs help // diagnose hangs. let content = if total_bytes > 0 { let last = take_last_n_lines(&tail_text, TAIL_LINES); self.remember_spilled(&output_path); Some(format!( "[partial output before timeout — full at {}]\n{last}", output_path.display() )) } else { let _ = std::fs::remove_file(&output_path); None }; return Ok(ToolOutput { summary: format!("$ {cmd_summary} (timed out after {timeout_secs}s)"), content, }); } let status = status.expect("status set on the success branch"); let summary = match status.code() { Some(0) => format!("$ {cmd_summary}"), Some(c) => format!("$ {cmd_summary} (exit {c})"), None => format!("$ {cmd_summary} (terminated by signal)"), }; if total_bytes == 0 { let _ = std::fs::remove_file(&output_path); return Ok(ToolOutput { summary, content: None, }); } // Inline if the whole output fits in our tail-read window AND is // small enough to ride under the Worker's default cap. let line_count = tail_text.lines().count(); let fully_loaded = total_bytes <= tail_bytes.len(); let fits_inline = fully_loaded && total_bytes <= INLINE_BYTE_BUDGET && line_count <= TAIL_LINES; let content = if fits_inline { let _ = std::fs::remove_file(&output_path); Some(tail_text) } else { let last = take_last_n_lines(&tail_text, TAIL_LINES); // When `fully_loaded` we know the exact line count; otherwise // the file is bigger than our read window so we report bytes // and an "approximate" disclaimer. let header = if fully_loaded { format!( "[showing last {TAIL_LINES} of {line_count} lines — full output ({total_bytes} bytes) at {}]", output_path.display() ) } else { format!( "[showing last {TAIL_LINES} lines (tail of {total_bytes}-byte output) — full at {}]", output_path.display() ) }; self.remember_spilled(&output_path); Some(format!("{header}\n{last}")) }; Ok(ToolOutput { summary, content }) } } impl BashTool { fn remember_spilled(&self, path: &Path) { if let Ok(mut v) = self.spilled_outputs.lock() { v.push(path.to_path_buf()); } } } /// Read up to `max_bytes` from the end of `path`. If the file is smaller /// than `max_bytes`, the entire file is returned. fn read_tail_bytes(path: &Path, max_bytes: usize) -> std::io::Result> { use std::io::{Read, Seek, SeekFrom}; let mut f = std::fs::File::open(path)?; let len = f.seek(SeekFrom::End(0))?; let start = if len > max_bytes as u64 { len - max_bytes as u64 } else { 0 }; f.seek(SeekFrom::Start(start))?; let mut buf = Vec::with_capacity((len - start) as usize); f.read_to_end(&mut buf)?; Ok(buf) } /// Return the last `n` lines of `text`. If `text` has `n` or fewer lines /// (per [`str::lines`]), the input is returned as-is (no allocation). fn take_last_n_lines(text: &str, n: usize) -> String { if text.is_empty() { return String::new(); } let total = text.lines().count(); if total <= n { return text.to_owned(); } let skip = total - n; let mut count = 0usize; for (i, b) in text.bytes().enumerate() { if b == b'\n' { count += 1; if count == skip { return text[i + 1..].to_owned(); } } } text.to_owned() } fn truncate_for_summary(command: &str) -> String { let one_line = command.lines().next().unwrap_or(""); let mut chars = one_line.chars(); let head: String = chars.by_ref().take(80).collect(); if chars.next().is_some() { let mut shortened = head; while shortened.chars().count() > 77 { shortened.pop(); } shortened.push_str("..."); shortened } else { head } } /// Wrap a string in single quotes for safe inclusion in a bash command. fn shell_single_quote(s: &str) -> String { let escaped = s.replace('\'', "'\\''"); format!("'{escaped}'") } /// Factory for the `Bash` tool. /// /// `output_dir` is where long outputs spill to; the caller is responsible /// for arranging that the path is in the agent's readable scope. Every /// invocation starts at `fs.pwd()` — the tool is intentionally stateless /// w.r.t. the working directory. pub fn bash_tool(fs: ScopedFs, output_dir: PathBuf) -> ToolDefinition { Arc::new(move || { let schema = schemars::schema_for!(BashParams); let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({})); let meta = ToolMeta::new("Bash") .description(DESCRIPTION) .input_schema(schema_value); let tool: Arc = Arc::new(BashTool { cwd: fs.pwd().to_path_buf(), output_dir: output_dir.clone(), spilled_outputs: std::sync::Mutex::new(Vec::new()), }); (meta, tool) }) } #[cfg(test)] mod tests { use super::*; use manifest::Scope; use tempfile::TempDir; /// Test harness: workspace tempdir + a separate spill tempdir kept /// alive for the test's lifetime. The spill dir is added to the /// scope as readable so callers exercise the production path. struct Harness { _workspace: TempDir, spill: TempDir, fs: ScopedFs, } fn setup() -> Harness { let workspace = TempDir::new().unwrap(); let spill = TempDir::new().unwrap(); let base = Scope::writable(workspace.path()).unwrap(); let mut config = manifest::ScopeConfig { allow: base.allow_rules(), deny: base.deny_rules(), }; config.allow.push(manifest::ScopeRule { target: spill.path().to_path_buf(), permission: manifest::Permission::Read, recursive: true, }); let scope = Scope::from_config(&config).unwrap(); let fs = ScopedFs::new(scope, workspace.path().to_path_buf()); Harness { _workspace: workspace, spill, fs, } } fn make_tool(h: &Harness) -> Arc { let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf()); let (_, tool) = def(); tool } #[tokio::test] async fn runs_simple_command() { let h = setup(); let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf()); let (meta, tool) = def(); assert_eq!(meta.name, "Bash"); let inp = serde_json::json!({ "command": "echo hello" }); let out = tool.execute(&inp.to_string()).await.unwrap(); assert_eq!(out.summary, "$ echo hello"); assert_eq!(out.content.as_deref().map(str::trim), Some("hello")); } #[tokio::test] async fn merges_stdout_and_stderr() { let h = setup(); let tool = make_tool(&h); let inp = serde_json::json!({ "command": "echo out; echo err 1>&2", }); let out = tool.execute(&inp.to_string()).await.unwrap(); let body = out.content.unwrap(); assert!(body.contains("out")); assert!(body.contains("err")); } #[tokio::test] async fn nonzero_exit_is_reported() { let h = setup(); let tool = make_tool(&h); let inp = serde_json::json!({ "command": "exit 7" }); let out = tool.execute(&inp.to_string()).await.unwrap(); assert!(out.summary.contains("exit 7"), "summary: {}", out.summary); assert!( out.content.is_none(), "no output expected, got {:?}", out.content ); } #[tokio::test] async fn cd_does_not_persist_across_calls() { // Stateless: a `cd` in one call must NOT leak into the next. let h = setup(); let sub = h._workspace.path().join("nested"); std::fs::create_dir(&sub).unwrap(); let tool = make_tool(&h); tool.execute( &serde_json::json!({ "command": format!("cd {}", sub.to_str().unwrap()), }) .to_string(), ) .await .unwrap(); let pwd_out = tool .execute(&serde_json::json!({ "command": "pwd" }).to_string()) .await .unwrap(); let body = pwd_out.content.unwrap(); let actual = std::fs::canonicalize(body.trim()).unwrap(); let workspace = std::fs::canonicalize(h._workspace.path()).unwrap(); assert_eq!( actual, workspace, "second call should start at workspace root, not the previous cd target" ); } #[tokio::test] async fn timeout_kills_long_command() { let h = setup(); let tool = make_tool(&h); let inp = serde_json::json!({ "command": "sleep 30", "timeout": 1, }); let out = tool.execute(&inp.to_string()).await.unwrap(); assert!( out.summary.contains("timed out"), "summary: {}", out.summary ); } #[tokio::test] async fn invalid_json_is_invalid_argument() { let h = setup(); let tool = make_tool(&h); let err = tool.execute("not json").await.unwrap_err(); assert!(matches!(err, ToolError::InvalidArgument(_))); } #[tokio::test] async fn long_output_spills_and_returns_tail() { let h = setup(); let spill_dir = h.spill.path().to_path_buf(); let tool = make_tool(&h); // 200 lines: "line 1" .. "line 200". Tail of 80 keeps lines 121-200. let inp = serde_json::json!({ "command": "for i in $(seq 1 200); do echo line $i; done", }); let out = tool.execute(&inp.to_string()).await.unwrap(); let body = out.content.expect("expected content"); assert!( body.contains(&format!("showing last {TAIL_LINES} of 200 lines")), "tail header missing in: {}", &body[..body.len().min(300)] ); assert!( body.contains(spill_dir.to_str().unwrap()), "spill dir path missing: {body}" ); // Last 80 lines are 121..200. assert!(body.contains("\nline 200\n")); assert!(body.contains("\nline 121\n")); // line 120 is the last *elided* line. assert!(!body.contains("\nline 120\n"), "elided line leaked: {body}"); } #[tokio::test] async fn wide_short_output_still_spills_when_byte_budget_exceeded() { let h = setup(); let spill_dir = h.spill.path().to_path_buf(); let tool = make_tool(&h); // One single line of ~20 KiB (over INLINE_BYTE_BUDGET = 12 KiB). let inp = serde_json::json!({ "command": "printf 'x%.0s' {1..20480}", }); let out = tool.execute(&inp.to_string()).await.unwrap(); let body = out.content.unwrap(); assert!( body.contains(spill_dir.to_str().unwrap()), "expected spill marker in: {}", &body[..body.len().min(200)] ); } #[tokio::test] async fn background_job_does_not_hang() { let h = setup(); let tool = make_tool(&h); // The wrapper's `wait` ensures we don't hang on a stray bg pipe. let inp = serde_json::json!({ "command": "(sleep 0.05; echo bg) &", "timeout": 5, }); let out = tool.execute(&inp.to_string()).await.unwrap(); assert!( !out.summary.contains("timed out"), "summary: {}", out.summary ); } #[tokio::test] async fn spilled_files_are_cleaned_up_on_drop() { let h = setup(); let spill_dir = h.spill.path().to_path_buf(); let tool = make_tool(&h); let inp = serde_json::json!({ "command": "for i in $(seq 1 200); do echo $i; done", }); tool.execute(&inp.to_string()).await.unwrap(); // The spill dir should now contain exactly one bash-*.log file. let files_before: Vec<_> = std::fs::read_dir(&spill_dir) .unwrap() .filter_map(Result::ok) .map(|e| e.path()) .collect(); assert_eq!(files_before.len(), 1, "expected one spilled file"); let path = files_before.into_iter().next().unwrap(); assert!(path.exists()); drop(tool); // Drop runs synchronously; file should be gone. assert!( !path.exists(), "spilled file should be cleaned up on drop: {path:?}" ); } }