Compare commits

...

3 Commits

25 changed files with 1364 additions and 102 deletions

View File

@ -1 +1 @@
_memory _staging

View File

View File

@ -15,6 +15,7 @@
- [ ] ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md) - [ ] ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md)
- [ ] サブミット入力 - [ ] サブミット入力
- [ ] TUI 補完 + 型付き atom 化 → [tickets/submit-tui-completion.md](tickets/submit-tui-completion.md) - [ ] TUI 補完 + 型付き atom 化 → [tickets/submit-tui-completion.md](tickets/submit-tui-completion.md)
- [ ] FileRef リゾルバ → [tickets/submit-file-ref-resolver.md](tickets/submit-file-ref-resolver.md)
- [ ] メモリ機構 - [ ] メモリ機構
- [ ] Phase 2 consolidation → [tickets/memory-phase2-consolidation.md](tickets/memory-phase2-consolidation.md) - [ ] Phase 2 consolidation → [tickets/memory-phase2-consolidation.md](tickets/memory-phase2-consolidation.md)
- [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md) - [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md)

View File

@ -17,12 +17,18 @@ use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
// ============================================================================= // =============================================================================
/// Action after prompt submission. /// Action after prompt submission.
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq)]
pub enum PromptAction { pub enum PromptAction {
/// Proceed normally. /// Proceed normally.
Continue, Continue,
/// Cancel with a reason. /// Cancel with a reason.
Cancel(String), Cancel(String),
/// Proceed, and append these items to history right after the user
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
/// submit edge: lets the upper layer attach resolver-produced
/// system messages (e.g. `@<path>` file content) so they sit
/// adjacent to the user message that referenced them.
ContinueWith(Vec<Item>),
} }
/// Action before an LLM request. /// Action before an LLM request.

View File

@ -1338,16 +1338,20 @@ impl<C: LlmClient> Worker<C, Locked> {
self.reset_interruption_state(); self.reset_interruption_state();
// Interceptor: on_prompt_submit // Interceptor: on_prompt_submit
let mut user_item = Item::user_message(user_input); let mut user_item = Item::user_message(user_input);
match self.interceptor.on_prompt_submit(&mut user_item).await { let extras = match self.interceptor.on_prompt_submit(&mut user_item).await {
PromptAction::Cancel(reason) => { PromptAction::Cancel(reason) => {
self.last_run_interrupted = true; self.last_run_interrupted = true;
return self return self
.finalize_interruption(Err(WorkerError::Aborted(reason))) .finalize_interruption(Err(WorkerError::Aborted(reason)))
.await; .await;
} }
PromptAction::Continue => {} PromptAction::Continue => Vec::new(),
} PromptAction::ContinueWith(items) => items,
};
self.history.push(user_item); self.history.push(user_item);
if !extras.is_empty() {
self.history.extend(extras);
}
let result = self.run_turn_loop().await; let result = self.run_turn_loop().await;
self.finalize_interruption(result).await self.finalize_interruption(result).await
} }

View File

@ -156,6 +156,23 @@ impl Scope {
.collect() .collect()
} }
/// Deny rules with their targets resolved to absolute paths.
///
/// Counterpart to [`allow_rules`](Self::allow_rules); together they
/// round-trip through [`ScopeConfig`] for callers that need to
/// rebuild a scope after layering extra rules on top of an
/// already-constructed [`Scope`].
pub fn deny_rules(&self) -> Vec<ScopeRule> {
self.deny
.iter()
.map(|r| ScopeRule {
target: r.target.clone(),
permission: r.permission,
recursive: r.recursive,
})
.collect()
}
/// Iterate over absolute paths granted `Write` by an allow rule. /// Iterate over absolute paths granted `Write` by an allow rule.
/// Subset of [`readable_paths`](Self::readable_paths). /// Subset of [`readable_paths`](Self::readable_paths).
pub fn writable_paths(&self) -> impl Iterator<Item = &Path> { pub fn writable_paths(&self) -> impl Iterator<Item = &Path> {

View File

@ -221,20 +221,49 @@ impl PodController {
}); });
// Register the builtin file-manipulation tools (Read / Write / // Register the builtin file-manipulation tools (Read / Write /
// Edit / Glob / Grep). `ScopedFs` carries the pod-lifetime // Edit / Glob / Grep / Bash). `ScopedFs` carries the pod-
// scope/pwd; `Tracker` is session-scoped — a fresh instance per // lifetime scope/pwd; `Tracker` is session-scoped — a fresh
// controller spawn ensures state from a previous process // instance per controller spawn ensures state from a previous
// lifetime cannot be reused after a resume. The tracker is // process lifetime cannot be reused after a resume. The tracker
// also handed to the Pod itself so Pod-level operations (e.g. // is also handed to the Pod itself so Pod-level operations (e.g.
// context compaction) can ask which files the agent has been // context compaction) can ask which files the agent has been
// touching. // touching.
let fs = tools::ScopedFs::new(scope_for_tools, pwd_for_tools.clone()); //
// Bash spills long outputs to a per-pod subdir under the
// runtime dir. We layer a recursive `allow(Read)` rule for
// that path on top of the user-facing scope so the agent can
// `Read` the saved files without polluting the workspace.
// Same approach memory takes for its deny rules: round-trip
// through `ScopeConfig` and rebuild via `from_config`.
let bash_output_dir = runtime_dir.path().join("bash-output");
std::fs::create_dir_all(&bash_output_dir).map_err(|e| {
std::io::Error::other(format!(
"create bash output dir {}: {e}",
bash_output_dir.display()
))
})?;
let mut scope_config = manifest::ScopeConfig {
allow: scope_for_tools.allow_rules(),
deny: scope_for_tools.deny_rules(),
};
scope_config.allow.push(manifest::ScopeRule {
target: bash_output_dir.clone(),
permission: manifest::Permission::Read,
recursive: true,
});
let scope_with_bash = manifest::Scope::from_config(&scope_config)
.map_err(std::io::Error::other)?;
let fs = tools::ScopedFs::new(scope_with_bash, pwd_for_tools.clone());
let tracker = tools::Tracker::new(); let tracker = tools::Tracker::new();
// The same ScopedFs also powers the IPC `ListCompletions` // The same ScopedFs also powers the IPC `ListCompletions`
// query — keep a clone for the FS view we attach below, // query — keep a clone for the FS view we attach below,
// since the tools consume `fs` itself. // since the tools consume `fs` itself.
fs_for_view = fs.clone(); fs_for_view = fs.clone();
worker.register_tools(tools::builtin_tools(fs, tracker.clone())); worker.register_tools(tools::builtin_tools(
fs,
tracker.clone(),
bash_output_dir,
));
// Memory subsystem opt-in. When `[memory]` is present in // Memory subsystem opt-in. When `[memory]` is present in
// the manifest, register the memory-specific Read/Write/Edit // the manifest, register the memory-specific Read/Write/Edit

View File

@ -13,7 +13,7 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use llm_worker::Item; use llm_worker::Item;
use tools::ScopedFs; use tools::{ScopedFs, ToolsError};
use tracing::warn; use tracing::warn;
/// 補完候補1件の最大数。`list_file_completions` がこの値を超えたら打ち切り。 /// 補完候補1件の最大数。`list_file_completions` がこの値を超えたら打ち切り。
@ -45,6 +45,29 @@ pub struct FileCandidate {
pub is_dir: bool, pub is_dir: bool,
} }
/// `resolve_file_ref` の失敗理由。Pod 側で Alert に振り分けるために
/// ScopedFs / 内部判定の両方を区別できるよう保持する。
#[derive(Debug)]
pub enum ResolveError {
/// Path resolution / scope check failed via `ScopedFs`.
Fs(ToolsError),
/// File contents are not valid UTF-8 (binary / non-text).
Binary { path: PathBuf },
}
impl std::fmt::Display for ResolveError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ResolveError::Fs(e) => write!(f, "{e}"),
ResolveError::Binary { path } => {
write!(f, "file is not valid UTF-8 text: {}", path.display())
}
}
}
}
impl std::error::Error for ResolveError {}
impl PodFsView { impl PodFsView {
pub fn new(fs: ScopedFs) -> Self { pub fn new(fs: ScopedFs) -> Self {
Self { fs } Self { fs }
@ -83,6 +106,41 @@ impl PodFsView {
out out
} }
/// `path` を ScopedFs 経由で読み、`[File: <path>]\n<body>` 形式の
/// system message を返す。submit 時の `Segment::FileRef` リゾルバが
/// 使う経路。
///
/// - `path` は relative なら pwd 相対、absolute なら absolute として解釈
/// - `max_bytes` を超える本文は切り詰め、末尾に
/// `[...truncated, <total> bytes total — use read_file for the rest]`
/// を付与する
/// - 非 UTF-8 (バイナリ) は `ResolveError::Binary` で拒否
/// - スコープ外 / NotFound 等は `ResolveError::Fs` で返す
pub fn resolve_file_ref(&self, path: &str, max_bytes: usize) -> Result<Item, ResolveError> {
let p = Path::new(path);
let abs = if p.is_absolute() {
p.to_path_buf()
} else {
self.fs.pwd().join(p)
};
let bytes = self.fs.read_bytes(&abs).map_err(ResolveError::Fs)?;
let total = bytes.len();
let (body_bytes, truncated) = if total > max_bytes {
(&bytes[..max_bytes], true)
} else {
(bytes.as_slice(), false)
};
let body = std::str::from_utf8(body_bytes)
.map_err(|_| ResolveError::Binary { path: abs.clone() })?;
let mut text = format!("[File: {path}]\n{body}");
if truncated {
text.push_str(&format!(
"\n[...truncated, {total} bytes total — use read_file for the rest]"
));
}
Ok(Item::system_message(text))
}
/// `prefix` にマッチするファイル / ディレクトリを scope 内で浅く列挙する。 /// `prefix` にマッチするファイル / ディレクトリを scope 内で浅く列挙する。
/// ///
/// - `prefix` が空 or `pwd` 相対のときは pwd 直下を見る /// - `prefix` が空 or `pwd` 相対のときは pwd 直下を見る
@ -227,6 +285,61 @@ mod tests {
assert!(!rendered.contains("alpha")); assert!(!rendered.contains("alpha"));
} }
#[test]
fn resolve_file_ref_emits_system_message_with_path_header() {
let dir = TempDir::new().unwrap();
std::fs::write(dir.path().join("hello.txt"), "hello world").unwrap();
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("hello.txt", 1024).unwrap();
let text = format!("{item:?}");
assert!(text.contains("[File: hello.txt]"));
assert!(text.contains("hello world"));
assert!(!text.contains("truncated"));
}
#[test]
fn resolve_file_ref_truncates_with_hint_when_over_cap() {
let dir = TempDir::new().unwrap();
let body = "x".repeat(2048);
std::fs::write(dir.path().join("big.txt"), &body).unwrap();
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("big.txt", 256).unwrap();
let text = format!("{item:?}");
assert!(text.contains("[File: big.txt]"));
assert!(text.contains("truncated"));
assert!(text.contains("2048 bytes total"));
}
#[test]
fn resolve_file_ref_rejects_binary_with_binary_error() {
let dir = TempDir::new().unwrap();
std::fs::write(dir.path().join("blob.bin"), [0xff, 0xfe, 0x00, 0x80]).unwrap();
let view = PodFsView::new(fs_for(&dir));
let err = view.resolve_file_ref("blob.bin", 1024).unwrap_err();
assert!(matches!(err, ResolveError::Binary { .. }));
}
#[test]
fn resolve_file_ref_returns_fs_error_for_out_of_scope() {
let outer = TempDir::new().unwrap();
let inner = outer.path().join("scoped");
std::fs::create_dir(&inner).unwrap();
std::fs::write(outer.path().join("secret.txt"), "nope").unwrap();
let scope = Scope::writable(&inner).unwrap();
let fs = ScopedFs::new(scope, inner.clone());
let view = PodFsView::new(fs);
// Absolute path outside of scope.
let outside = outer.path().join("secret.txt");
let err = view
.resolve_file_ref(outside.to_str().unwrap(), 1024)
.unwrap_err();
assert!(matches!(err, ResolveError::Fs(_)));
}
#[test] #[test]
fn render_auto_read_skips_unreadable_targets() { fn render_auto_read_skips_unreadable_targets() {
let dir = TempDir::new().unwrap(); let dir = TempDir::new().unwrap();

View File

@ -21,6 +21,30 @@ use llm_worker::interceptor::{
use llm_worker::tool::ToolOutput; use llm_worker::tool::ToolOutput;
use serde_json::Value; use serde_json::Value;
/// Hook-facing prompt-submit action.
///
/// A strict subset of [`PromptAction`]: Hooks may continue or cancel
/// the submit, but cannot inject items into history. The
/// `ContinueWith(Vec<Item>)` variant is reserved for the internal
/// `Interceptor` so that Hook (the public extension surface) stays
/// read-only by construction (see module-level doc).
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HookPromptAction {
/// Proceed normally.
Continue,
/// Cancel with a reason.
Cancel(String),
}
impl From<HookPromptAction> for PromptAction {
fn from(action: HookPromptAction) -> Self {
match action {
HookPromptAction::Continue => PromptAction::Continue,
HookPromptAction::Cancel(reason) => PromptAction::Cancel(reason),
}
}
}
// ============================================================================= // =============================================================================
// Hook input summary types (read-only) // Hook input summary types (read-only)
// ============================================================================= // =============================================================================
@ -121,7 +145,7 @@ pub struct OnAbort;
impl HookEventKind for OnPromptSubmit { impl HookEventKind for OnPromptSubmit {
type Input = PromptSubmitInfo; type Input = PromptSubmitInfo;
type Output = PromptAction; type Output = HookPromptAction;
} }
impl HookEventKind for PreLlmRequest { impl HookEventKind for PreLlmRequest {

View File

@ -22,8 +22,8 @@ use tracing::info;
use crate::compact::state::CompactState; use crate::compact::state::CompactState;
use crate::hook::{ use crate::hook::{
AbortInfo, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary, ToolResultSummary, AbortInfo, HookPromptAction, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary,
TurnEndInfo, ToolResultSummary, TurnEndInfo,
}; };
use crate::ipc::notify_buffer::{NotifyBuffer, format_notify}; use crate::ipc::notify_buffer::{NotifyBuffer, format_notify};
use crate::prompt::catalog::PromptCatalog; use crate::prompt::catalog::PromptCatalog;
@ -43,6 +43,11 @@ pub(crate) struct PodInterceptor {
/// Pending-notification buffer drained into the per-request /// Pending-notification buffer drained into the per-request
/// context at the head of `pre_llm_request`. /// context at the head of `pre_llm_request`.
pending_notifies: NotifyBuffer, pending_notifies: NotifyBuffer,
/// Submit-scoped stash of resolver-produced system messages.
/// Drained inside `on_prompt_submit` and returned via
/// `PromptAction::ContinueWith`. Populated by `Pod::run` immediately
/// before handing off to the worker.
pending_attachments: Arc<Mutex<Vec<Item>>>,
/// Prompt catalog used to render the injected notification wrapper. /// Prompt catalog used to render the injected notification wrapper.
prompts: Arc<PromptCatalog>, prompts: Arc<PromptCatalog>,
/// Next turn index assigned by `on_prompt_submit`. /// Next turn index assigned by `on_prompt_submit`.
@ -57,6 +62,7 @@ impl PodInterceptor {
compact_state: Option<Arc<CompactState>>, compact_state: Option<Arc<CompactState>>,
usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>, usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
pending_notifies: NotifyBuffer, pending_notifies: NotifyBuffer,
pending_attachments: Arc<Mutex<Vec<Item>>>,
prompts: Arc<PromptCatalog>, prompts: Arc<PromptCatalog>,
) -> Self { ) -> Self {
Self { Self {
@ -64,6 +70,7 @@ impl PodInterceptor {
compact_state, compact_state,
usage_history, usage_history,
pending_notifies, pending_notifies,
pending_attachments,
prompts, prompts,
next_turn_index: AtomicUsize::new(0), next_turn_index: AtomicUsize::new(0),
tool_calls_this_turn: AtomicUsize::new(0), tool_calls_this_turn: AtomicUsize::new(0),
@ -98,11 +105,21 @@ impl Interceptor for PodInterceptor {
}; };
for hook in &self.registry.on_prompt_submit { for hook in &self.registry.on_prompt_submit {
let action = hook.call(&info).await; let action = hook.call(&info).await;
if !matches!(action, PromptAction::Continue) { if !matches!(action, HookPromptAction::Continue) {
return action; return action.into();
} }
} }
PromptAction::Continue let extras = std::mem::take(
&mut *self
.pending_attachments
.lock()
.expect("pending_attachments poisoned"),
);
if extras.is_empty() {
PromptAction::Continue
} else {
PromptAction::ContinueWith(extras)
}
} }
async fn pre_llm_request(&self, context: &mut Vec<Item>) -> PreRequestAction { async fn pre_llm_request(&self, context: &mut Vec<Item>) -> PreRequestAction {
@ -297,6 +314,7 @@ mod tests {
Some(state), Some(state),
Some(history), Some(history),
NotifyBuffer::new(), NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx = ctx_items; let mut ctx = ctx_items;
@ -321,6 +339,7 @@ mod tests {
Some(state), Some(state),
Some(history), Some(history),
NotifyBuffer::new(), NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx = ctx_items; let mut ctx = ctx_items;
@ -346,6 +365,7 @@ mod tests {
Some(state), Some(state),
Some(history), Some(history),
NotifyBuffer::new(), NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx = ctx_items; let mut ctx = ctx_items;
@ -365,6 +385,7 @@ mod tests {
None, None,
None, None,
NotifyBuffer::new(), NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx: Vec<Item> = Vec::new(); let mut ctx: Vec<Item> = Vec::new();
@ -396,6 +417,7 @@ mod tests {
None, None,
None, None,
buffer.clone(), buffer.clone(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx: Vec<Item> = vec![Item::user_message("hi")]; let mut ctx: Vec<Item> = vec![Item::user_message("hi")];
@ -431,6 +453,7 @@ mod tests {
Some(state), Some(state),
Some(history), Some(history),
buffer.clone(), buffer.clone(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx = ctx_items; let mut ctx = ctx_items;
@ -456,6 +479,7 @@ mod tests {
None, None,
None, None,
NotifyBuffer::new(), NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
PromptCatalog::builtins_only().unwrap(), PromptCatalog::builtins_only().unwrap(),
); );
let mut ctx: Vec<Item> = Vec::new(); let mut ctx: Vec<Item> = Vec::new();

View File

@ -99,6 +99,12 @@ pub struct Pod<C: LlmClient, St: Store> {
/// injection into the next LLM request. Shared with the /// injection into the next LLM request. Shared with the
/// PodInterceptor installed in `ensure_interceptor_installed`. /// PodInterceptor installed in `ensure_interceptor_installed`.
pending_notifies: NotifyBuffer, pending_notifies: NotifyBuffer,
/// Submit-scoped stash for resolver-produced system messages
/// (currently `@<path>` file content). `Pod::run` fills this
/// before handing off to the worker; `PodInterceptor::on_prompt_submit`
/// drains it and returns `ContinueWith` so the items land in
/// history right after the user message that referenced them.
pending_attachments: Arc<Mutex<Vec<Item>>>,
/// Scope allocation in the machine-wide lock file. `Some` for /// Scope allocation in the machine-wide lock file. `Some` for
/// Pods built via `from_manifest` / `from_manifest_spawned` / /// Pods built via `from_manifest` / `from_manifest_spawned` /
/// `restore_from_manifest` (production paths); `None` for the /// `restore_from_manifest` (production paths); `None` for the
@ -185,6 +191,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
alerter: None, alerter: None,
event_tx: None, event_tx: None,
pending_notifies: NotifyBuffer::new(), pending_notifies: NotifyBuffer::new(),
pending_attachments: Arc::new(Mutex::new(Vec::new())),
scope_allocation: None, scope_allocation: None,
callback_socket: None, callback_socket: None,
prompts, prompts,
@ -502,6 +509,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
compact_state, compact_state,
usage_history_handle, usage_history_handle,
self.pending_notifies.clone(), self.pending_notifies.clone(),
self.pending_attachments.clone(),
self.prompts.clone(), self.prompts.clone(),
); );
self.worker_mut().set_interceptor(interceptor); self.worker_mut().set_interceptor(interceptor);
@ -614,6 +622,18 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
.await?; .await?;
self.user_segments.push(input.clone()); self.user_segments.push(input.clone());
// Resolve `@<path>` refs to system messages stashed for the
// PodInterceptor to attach right after the user message. Failures
// surface as user-facing Alerts and the placeholder remains in
// the flattened text so the LLM sees the unresolved intent.
let attachments = self.resolve_file_refs(&input);
if !attachments.is_empty() {
*self
.pending_attachments
.lock()
.expect("pending_attachments poisoned") = attachments;
}
let flattened = self.flatten_segments(&input); let flattened = self.flatten_segments(&input);
let history_before = self.worker.as_ref().unwrap().history().len(); let history_before = self.worker.as_ref().unwrap().history().len();
@ -627,26 +647,46 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
self.handle_worker_result(result, history_before).await self.handle_worker_result(result, history_before).await
} }
/// Resolve every `Segment::FileRef` in `segments` to a `[File: <path>]`
/// system message via `PodFsView`. Resolution failures (out-of-scope,
/// not-found, binary, I/O) surface as `AlertLevel::Warn` Alerts and
/// are skipped — the unresolved placeholder stays in the flattened
/// user message so the LLM still sees the intent.
fn resolve_file_refs(&self, segments: &[Segment]) -> Vec<Item> {
let view = crate::fs_view::PodFsView::new(tools::ScopedFs::new(
self.scope.clone(),
self.pwd.clone(),
));
let mut out = Vec::new();
for seg in segments {
let Segment::FileRef { path } = seg else {
continue;
};
match view.resolve_file_ref(path, manifest::defaults::TOOL_OUTPUT_MAX_BYTES) {
Ok(item) => out.push(item),
Err(e) => {
self.alert(
AlertLevel::Warn,
AlertSource::Pod,
format!("file ref @{path} could not be resolved: {e}"),
);
}
}
}
out
}
/// Flatten a typed segment list into the single string the Worker /// Flatten a typed segment list into the single string the Worker
/// receives as the user message, and emit user-facing alerts for /// receives as the user message, and emit user-facing alerts for
/// segments that fall through to placeholder (file/knowledge/workflow /// segments that fall through to placeholder (knowledge / workflow
/// refs without a resolver, or unknown variants from a newer client). /// refs without a resolver, or unknown variants from a newer client).
/// The text reconstruction itself comes from `Segment::flatten_to_text`, /// `FileRef` is handled separately by `resolve_file_refs`. The text
/// reconstruction itself comes from `Segment::flatten_to_text`,
/// shared with replay paths that should not re-alert. /// shared with replay paths that should not re-alert.
fn flatten_segments(&self, segments: &[Segment]) -> String { fn flatten_segments(&self, segments: &[Segment]) -> String {
for seg in segments { for seg in segments {
match seg { match seg {
Segment::Text { .. } | Segment::Paste { .. } => {} Segment::Text { .. } | Segment::Paste { .. } | Segment::FileRef { .. } => {}
Segment::FileRef { path } => {
self.alert(
AlertLevel::Warn,
AlertSource::Pod,
format!(
"file ref @{path} cannot be resolved \
(resolver not yet implemented); passed to LLM as placeholder"
),
);
}
Segment::KnowledgeRef { slug } => { Segment::KnowledgeRef { slug } => {
self.alert( self.alert(
AlertLevel::Warn, AlertLevel::Warn,
@ -1550,6 +1590,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
alerter: None, alerter: None,
event_tx: None, event_tx: None,
pending_notifies: NotifyBuffer::new(), pending_notifies: NotifyBuffer::new(),
pending_attachments: Arc::new(Mutex::new(Vec::new())),
scope_allocation: Some(scope_allocation), scope_allocation: Some(scope_allocation),
callback_socket: None, callback_socket: None,
prompts: common.prompts, prompts: common.prompts,
@ -1606,6 +1647,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
alerter: None, alerter: None,
event_tx: None, event_tx: None,
pending_notifies: NotifyBuffer::new(), pending_notifies: NotifyBuffer::new(),
pending_attachments: Arc::new(Mutex::new(Vec::new())),
scope_allocation: Some(scope_allocation), scope_allocation: Some(scope_allocation),
callback_socket: Some(callback_socket), callback_socket: Some(callback_socket),
prompts: common.prompts, prompts: common.prompts,
@ -1714,6 +1756,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
alerter: None, alerter: None,
event_tx: None, event_tx: None,
pending_notifies: NotifyBuffer::new(), pending_notifies: NotifyBuffer::new(),
pending_attachments: Arc::new(Mutex::new(Vec::new())),
scope_allocation: Some(scope_allocation), scope_allocation: Some(scope_allocation),
callback_socket: None, callback_socket: None,
prompts: common.prompts, prompts: common.prompts,

View File

@ -123,6 +123,10 @@ permission = "write"
"#; "#;
async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> { async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> {
make_pod_with_pwd(client).await.0
}
async fn make_pod_with_pwd(client: MockClient) -> (Pod<MockClient, FsStore>, std::path::PathBuf) {
let manifest = PodManifest::from_toml(MANIFEST_TOML).unwrap(); let manifest = PodManifest::from_toml(MANIFEST_TOML).unwrap();
let store_tmp = tempfile::tempdir().unwrap(); let store_tmp = tempfile::tempdir().unwrap();
let store = FsStore::new(store_tmp.path()).await.unwrap(); let store = FsStore::new(store_tmp.path()).await.unwrap();
@ -137,7 +141,10 @@ async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> {
std::mem::forget(pwd_tmp); std::mem::forget(pwd_tmp);
let worker = Worker::new(client); let worker = Worker::new(client);
Pod::new(manifest, worker, store, pwd, scope).await.unwrap() let pod = Pod::new(manifest, worker, store, pwd.clone(), scope)
.await
.unwrap();
(pod, pwd)
} }
use pod::PodHandle; use pod::PodHandle;
@ -405,6 +412,58 @@ async fn run_with_paste_segment_inlines_content_and_emits_typed_user_message() {
); );
} }
#[tokio::test]
async fn run_with_resolvable_file_ref_attaches_system_message_after_user() {
let client = MockClient::new(simple_text_events());
let client_for_assert = client.clone();
let (pod, pwd) = make_pod_with_pwd(client).await;
std::fs::write(pwd.join("notes.md"), "alpha\nbeta\n").unwrap();
let handle = spawn_controller(pod).await;
let segments = vec![
protocol::Segment::text("see "),
protocol::Segment::FileRef {
path: "notes.md".into(),
},
];
handle.send(Method::Run { input: segments }).await.unwrap();
// Wait for the turn to complete.
let mut rx = handle.subscribe();
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
loop {
tokio::select! {
event = rx.recv() => match event {
Ok(Event::TurnEnd { .. }) => break,
Err(_) => break,
_ => {}
},
_ = tokio::time::sleep_until(deadline) => break,
}
}
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
let requests = client_for_assert.captured_requests();
let items = &requests[0].items;
// The submit produces 2 history items: user message then file content.
let user_idx = items
.iter()
.position(|i| i.is_user_message())
.expect("user message present");
let next = items
.get(user_idx + 1)
.expect("attachment item present after user");
let next_text = next.as_text().unwrap_or_default();
assert!(
next_text.contains("[File: notes.md]"),
"expected file header, got: {next_text:?}"
);
assert!(
next_text.contains("alpha"),
"expected file body, got: {next_text:?}"
);
}
#[tokio::test] #[tokio::test]
async fn run_with_unresolved_segment_emits_alert_and_placeholder() { async fn run_with_unresolved_segment_emits_alert_and_placeholder() {
let client = MockClient::new(simple_text_events()); let client = MockClient::new(simple_text_events());
@ -448,11 +507,12 @@ async fn run_with_unresolved_segment_emits_alert_and_placeholder() {
.iter() .iter()
.find_map(|i| i.as_text().map(|s| s.to_string())) .find_map(|i| i.as_text().map(|s| s.to_string()))
.unwrap_or_default(); .unwrap_or_default();
// LLM context carries a placeholder so the model can ask for the // The user message keeps the literal `@<path>` token (matching what
// missing content rather than silently miss the user's intent. // the user typed). Resolution failure surfaces via the Alert above;
// the LLM still sees the intent as a sigil-prefixed reference.
assert!( assert!(
user_text.contains("[unresolved file ref: src/lib.rs]"), user_text.contains("@src/lib.rs"),
"placeholder missing, got: {user_text:?}" "literal sigil missing, got: {user_text:?}"
); );
} }

View File

@ -150,9 +150,13 @@ impl Segment {
/// to surface user-visible alerts for unresolved refs should do so /// to surface user-visible alerts for unresolved refs should do so
/// alongside this call (Pod does so at submit time). /// alongside this call (Pod does so at submit time).
/// ///
/// Unresolved variants (`FileRef` / `KnowledgeRef` / `WorkflowInvoke`) /// Sigil-prefixed variants (`FileRef` / `KnowledgeRef` / `WorkflowInvoke`)
/// and `Unknown` map to `[unresolved <kind>: <key>]` placeholders so /// flatten back to their literal sigil form (`@<path>`, `#<slug>`,
/// the LLM sees an explicit token rather than silent omission. /// `/<slug>`) — matching what the user originally typed. Resolved
/// content (e.g. file body for `FileRef`) is delivered as separate
/// `Item::system_message`s adjacent to the user message; the
/// resolution itself is the caller's job. `Unknown` falls back to
/// a bracketed placeholder since there is no sigil to render.
pub fn flatten_to_text(segments: &[Segment]) -> String { pub fn flatten_to_text(segments: &[Segment]) -> String {
let mut out = String::new(); let mut out = String::new();
for seg in segments { for seg in segments {
@ -160,13 +164,16 @@ impl Segment {
Segment::Text { content } => out.push_str(content), Segment::Text { content } => out.push_str(content),
Segment::Paste { content, .. } => out.push_str(content), Segment::Paste { content, .. } => out.push_str(content),
Segment::FileRef { path } => { Segment::FileRef { path } => {
out.push_str(&format!("[unresolved file ref: {path}]")); out.push('@');
out.push_str(path);
} }
Segment::KnowledgeRef { slug } => { Segment::KnowledgeRef { slug } => {
out.push_str(&format!("[unresolved knowledge ref: {slug}]")); out.push('#');
out.push_str(slug);
} }
Segment::WorkflowInvoke { slug } => { Segment::WorkflowInvoke { slug } => {
out.push_str(&format!("[unresolved workflow invoke: {slug}]")); out.push('/');
out.push_str(slug);
} }
Segment::Unknown => { Segment::Unknown => {
out.push_str("[unknown input segment]"); out.push_str("[unknown input segment]");

View File

@ -713,7 +713,7 @@ mod tests {
assert_eq!(content.len(), 1); assert_eq!(content.len(), 1);
match &content[0] { match &content[0] {
llm_worker::ContentPart::Text { text } => { llm_worker::ContentPart::Text { text } => {
assert_eq!(text, "see line1\nline2[unresolved file ref: src/main.rs]"); assert_eq!(text, "see line1\nline2@src/main.rs");
} }
other => panic!("unexpected content: {other:?}"), other => panic!("unexpected content: {other:?}"),
} }

View File

@ -19,7 +19,7 @@ serde_json = "1.0.149"
sha2 = "0.11.0" sha2 = "0.11.0"
tempfile = "3.27.0" tempfile = "3.27.0"
thiserror = "2.0.18" thiserror = "2.0.18"
tokio = { version = "1.51.1", features = ["rt"] } tokio = { version = "1.51.1", features = ["process", "rt", "time"] }
tracing = "0.1.44" tracing = "0.1.44"
[dev-dependencies] [dev-dependencies]

581
crates/tools/src/bash.rs Normal file
View File

@ -0,0 +1,581 @@
//! `Bash` tool — execute shell commands in a one-shot, stateless way.
//!
//! Each call runs `bash -c <command>` via [`tokio::process::Command`].
//! The wrapper redirects all output to a file so we never have to read
//! from a pipe (which would expose us to bg-pipe hangs). There is no
//! shell session: every call starts fresh at `cwd`, so the agent must
//! chain `cd <dir> && cmd` when it wants to operate elsewhere. This
//! mirrors Claude Code's own Bash tool — predictable, no hidden state.
//!
//! Output handling: when output is short (≤ 80 lines, ≤ 12 KiB) it is
//! returned inline and the file is cleaned up. When it is longer the
//! full output is left on disk and only the **last 80 lines** are
//! returned, prefixed with the saved file's path. This sidesteps the
//! Worker's blanket `ToolOutputLimits` (default 16 KiB), which would
//! otherwise drop the *tail* of the output — usually the most useful
//! part (errors, exit messages, summary). The saved file lives under
//! a caller-supplied directory that the parent has added to the
//! `ScopedFs` allow set, so the agent can inspect it via either Read
//! or a follow-up Bash call.
//!
//! Filesystem and network access are NOT mediated by `ScopedFs`: the
//! child process can touch any path. Safety is delegated to the
//! Permission layer (deny/allow rules on the command string).
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use tokio::process::Command;
use crate::scoped_fs::ScopedFs;
const DESCRIPTION: &str = "Execute a shell command via bash. Supports the \
full shell pipes, redirects, command substitution, `&&`/`||`. Each call \
runs in a fresh shell rooted at the workspace; chain `cd <subdir> && cmd` \
when you need to operate elsewhere. stdout and stderr are merged. Default \
timeout 120s, max 600s.\n\n\
Output handling: when the command produces more than 80 lines (or ~12 KiB), \
the full output is saved to a file and only the LAST 80 lines are returned, \
prefixed with the saved path. The path is readable by Read; you can also \
inspect it from a follow-up Bash call (`grep ... <path>`, etc.).\n\n\
Prefer dedicated tools when one fits: Read instead of `cat`/`head`/`tail` \
on workspace files, Edit instead of `sed`/`awk` rewrites, Glob instead of \
`find <name>`, Grep instead of `grep`/`rg`. Reach for Bash when the task \
is shell-shaped: building, testing, version control, package management.";
const DEFAULT_TIMEOUT_SECS: u64 = 120;
const MAX_TIMEOUT_SECS: u64 = 600;
/// Number of trailing lines returned when output spills to a file.
const TAIL_LINES: usize = 80;
/// Inline-return budget. Outputs at or below this are returned in full;
/// above it triggers the spill-to-file path. Sized to leave headroom under
/// the Worker's 16 KiB default `ToolOutputLimits` cap so the inline path
/// reliably reaches the model intact.
const INLINE_BYTE_BUDGET: usize = 12 * 1024;
/// Maximum bytes loaded into memory from the spilled output file. The
/// file itself can be arbitrarily large; we only ever read the tail end
/// since that is what we return.
const TAIL_READ_BUDGET: usize = 256 * 1024;
#[derive(Debug, Deserialize, schemars::JsonSchema)]
pub(crate) struct BashParams {
/// Shell command to execute. Passed verbatim to `bash -c`.
pub command: String,
/// Timeout in seconds. Defaults to 120, capped at 600.
#[serde(default)]
pub timeout: Option<u64>,
}
pub(crate) struct BashTool {
/// Workspace root that every invocation starts in. Snapshot of
/// `ScopedFs::pwd()` at registration time; never mutated, since we
/// don't track `cd` across calls.
cwd: PathBuf,
/// Directory to spill long outputs into. Caller is expected to have
/// added this path to the readable scope so the agent can Read the
/// saved files. The directory itself is created lazily.
output_dir: PathBuf,
/// Files we left on disk for follow-up inspection. Cleaned up on
/// `Drop` (= session end). `std::sync::Mutex` because access is
/// always synchronous and very brief.
spilled_outputs: std::sync::Mutex<Vec<PathBuf>>,
}
impl Drop for BashTool {
fn drop(&mut self) {
if let Ok(mut paths) = self.spilled_outputs.lock() {
for p in paths.drain(..) {
let _ = std::fs::remove_file(&p);
}
}
}
}
#[async_trait]
impl Tool for BashTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let params: BashParams = serde_json::from_str(input_json)
.map_err(|e| ToolError::InvalidArgument(format!("invalid Bash input: {e}")))?;
let timeout_secs = params
.timeout
.unwrap_or(DEFAULT_TIMEOUT_SECS)
.clamp(1, MAX_TIMEOUT_SECS);
// Persistent output file in the caller-supplied directory.
// `keep()` opts out of auto-delete so the agent can inspect the
// full output later; cleanup is deferred to `Drop` on this tool.
std::fs::create_dir_all(&self.output_dir).map_err(|e| {
ToolError::Internal(format!(
"create bash output dir {}: {e}",
self.output_dir.display()
))
})?;
let output_path: PathBuf = tempfile::Builder::new()
.prefix("bash-")
.suffix(".log")
.tempfile_in(&self.output_dir)
.map_err(|e| ToolError::Internal(format!("output tempfile: {e}")))?
.into_temp_path()
.keep()
.map_err(|e| ToolError::Internal(format!("persist output tempfile: {e}")))?;
let output_path_str = output_path
.to_str()
.ok_or_else(|| ToolError::Internal("output path is not UTF-8".into()))?;
// Wrapper:
// exec >file 2>&1 redirect stdout/stderr to the output file
// { user_cmd } run in a brace group (no subshell, so any
// `cd` inside still affects $? capture below)
// __exit=$? preserve the user command's exit code…
// wait 2>/dev/null …since `wait` clobbers $?. Reaping bg jobs
// guarantees the output file's writers all
// close before bash itself exits.
// exit $__exit propagate the user's exit
let wrapped = format!(
"exec >{out} 2>&1\n{{ {user_cmd}\n}}\n__insomnia_exit=$?\nwait 2>/dev/null\nexit $__insomnia_exit\n",
out = shell_single_quote(output_path_str),
user_cmd = params.command,
);
tracing::debug!(cmd = %params.command, cwd = %self.cwd.display(), timeout_secs, "Bash");
let mut child = Command::new("bash")
.arg("-c")
.arg(&wrapped)
.current_dir(&self.cwd)
.stdin(Stdio::null())
.stdout(Stdio::null()) // bash inherits — but the wrapper redirected via `exec`
.stderr(Stdio::null())
.kill_on_drop(true)
.spawn()
.map_err(|e| {
let _ = std::fs::remove_file(&output_path);
ToolError::ExecutionFailed(format!("spawn bash: {e}"))
})?;
let timeout_dur = Duration::from_secs(timeout_secs);
let wait_result = tokio::time::timeout(timeout_dur, child.wait()).await;
let (status, timed_out) = match wait_result {
Ok(Ok(s)) => (Some(s), false),
Ok(Err(e)) => {
let _ = std::fs::remove_file(&output_path);
return Err(ToolError::ExecutionFailed(format!("bash wait: {e}")));
}
Err(_) => (None, true),
};
// Inspect the on-disk output: total size first, tail bytes second.
let total_bytes = std::fs::metadata(&output_path)
.map(|m| m.len() as usize)
.unwrap_or(0);
let tail_bytes = read_tail_bytes(&output_path, TAIL_READ_BUDGET).unwrap_or_default();
let tail_text = String::from_utf8_lossy(&tail_bytes).into_owned();
let cmd_summary = truncate_for_summary(&params.command);
if timed_out {
// Preserve the partial output file — even cut-short logs help
// diagnose hangs.
let content = if total_bytes > 0 {
let last = take_last_n_lines(&tail_text, TAIL_LINES);
self.remember_spilled(&output_path);
Some(format!(
"[partial output before timeout — full at {}]\n{last}",
output_path.display()
))
} else {
let _ = std::fs::remove_file(&output_path);
None
};
return Ok(ToolOutput {
summary: format!("$ {cmd_summary} (timed out after {timeout_secs}s)"),
content,
});
}
let status = status.expect("status set on the success branch");
let summary = match status.code() {
Some(0) => format!("$ {cmd_summary}"),
Some(c) => format!("$ {cmd_summary} (exit {c})"),
None => format!("$ {cmd_summary} (terminated by signal)"),
};
if total_bytes == 0 {
let _ = std::fs::remove_file(&output_path);
return Ok(ToolOutput {
summary,
content: None,
});
}
// Inline if the whole output fits in our tail-read window AND is
// small enough to ride under the Worker's default cap.
let line_count = tail_text.lines().count();
let fully_loaded = total_bytes <= tail_bytes.len();
let fits_inline =
fully_loaded && total_bytes <= INLINE_BYTE_BUDGET && line_count <= TAIL_LINES;
let content = if fits_inline {
let _ = std::fs::remove_file(&output_path);
Some(tail_text)
} else {
let last = take_last_n_lines(&tail_text, TAIL_LINES);
// When `fully_loaded` we know the exact line count; otherwise
// the file is bigger than our read window so we report bytes
// and an "approximate" disclaimer.
let header = if fully_loaded {
format!(
"[showing last {TAIL_LINES} of {line_count} lines — full output ({total_bytes} bytes) at {}]",
output_path.display()
)
} else {
format!(
"[showing last {TAIL_LINES} lines (tail of {total_bytes}-byte output) — full at {}]",
output_path.display()
)
};
self.remember_spilled(&output_path);
Some(format!("{header}\n{last}"))
};
Ok(ToolOutput { summary, content })
}
}
impl BashTool {
fn remember_spilled(&self, path: &Path) {
if let Ok(mut v) = self.spilled_outputs.lock() {
v.push(path.to_path_buf());
}
}
}
/// Read up to `max_bytes` from the end of `path`. If the file is smaller
/// than `max_bytes`, the entire file is returned.
fn read_tail_bytes(path: &Path, max_bytes: usize) -> std::io::Result<Vec<u8>> {
use std::io::{Read, Seek, SeekFrom};
let mut f = std::fs::File::open(path)?;
let len = f.seek(SeekFrom::End(0))?;
let start = if len > max_bytes as u64 {
len - max_bytes as u64
} else {
0
};
f.seek(SeekFrom::Start(start))?;
let mut buf = Vec::with_capacity((len - start) as usize);
f.read_to_end(&mut buf)?;
Ok(buf)
}
/// Return the last `n` lines of `text`. If `text` has `n` or fewer lines
/// (per [`str::lines`]), the input is returned as-is (no allocation).
fn take_last_n_lines(text: &str, n: usize) -> String {
if text.is_empty() {
return String::new();
}
let total = text.lines().count();
if total <= n {
return text.to_owned();
}
let skip = total - n;
let mut count = 0usize;
for (i, b) in text.bytes().enumerate() {
if b == b'\n' {
count += 1;
if count == skip {
return text[i + 1..].to_owned();
}
}
}
text.to_owned()
}
fn truncate_for_summary(command: &str) -> String {
let one_line = command.lines().next().unwrap_or("");
let mut chars = one_line.chars();
let head: String = chars.by_ref().take(80).collect();
if chars.next().is_some() {
let mut shortened = head;
while shortened.chars().count() > 77 {
shortened.pop();
}
shortened.push_str("...");
shortened
} else {
head
}
}
/// Wrap a string in single quotes for safe inclusion in a bash command.
fn shell_single_quote(s: &str) -> String {
let escaped = s.replace('\'', "'\\''");
format!("'{escaped}'")
}
/// Factory for the `Bash` tool.
///
/// `output_dir` is where long outputs spill to; the caller is responsible
/// for arranging that the path is in the agent's readable scope. Every
/// invocation starts at `fs.pwd()` — the tool is intentionally stateless
/// w.r.t. the working directory.
pub fn bash_tool(fs: ScopedFs, output_dir: PathBuf) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(BashParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("Bash")
.description(DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(BashTool {
cwd: fs.pwd().to_path_buf(),
output_dir: output_dir.clone(),
spilled_outputs: std::sync::Mutex::new(Vec::new()),
});
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use manifest::Scope;
use tempfile::TempDir;
/// Test harness: workspace tempdir + a separate spill tempdir kept
/// alive for the test's lifetime. The spill dir is added to the
/// scope as readable so callers exercise the production path.
struct Harness {
_workspace: TempDir,
spill: TempDir,
fs: ScopedFs,
}
fn setup() -> Harness {
let workspace = TempDir::new().unwrap();
let spill = TempDir::new().unwrap();
let base = Scope::writable(workspace.path()).unwrap();
let mut config = manifest::ScopeConfig {
allow: base.allow_rules(),
deny: base.deny_rules(),
};
config.allow.push(manifest::ScopeRule {
target: spill.path().to_path_buf(),
permission: manifest::Permission::Read,
recursive: true,
});
let scope = Scope::from_config(&config).unwrap();
let fs = ScopedFs::new(scope, workspace.path().to_path_buf());
Harness {
_workspace: workspace,
spill,
fs,
}
}
fn make_tool(h: &Harness) -> Arc<dyn Tool> {
let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf());
let (_, tool) = def();
tool
}
#[tokio::test]
async fn runs_simple_command() {
let h = setup();
let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf());
let (meta, tool) = def();
assert_eq!(meta.name, "Bash");
let inp = serde_json::json!({ "command": "echo hello" });
let out = tool.execute(&inp.to_string()).await.unwrap();
assert_eq!(out.summary, "$ echo hello");
assert_eq!(out.content.as_deref().map(str::trim), Some("hello"));
}
#[tokio::test]
async fn merges_stdout_and_stderr() {
let h = setup();
let tool = make_tool(&h);
let inp = serde_json::json!({
"command": "echo out; echo err 1>&2",
});
let out = tool.execute(&inp.to_string()).await.unwrap();
let body = out.content.unwrap();
assert!(body.contains("out"));
assert!(body.contains("err"));
}
#[tokio::test]
async fn nonzero_exit_is_reported() {
let h = setup();
let tool = make_tool(&h);
let inp = serde_json::json!({ "command": "exit 7" });
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(out.summary.contains("exit 7"), "summary: {}", out.summary);
assert!(
out.content.is_none(),
"no output expected, got {:?}",
out.content
);
}
#[tokio::test]
async fn cd_does_not_persist_across_calls() {
// Stateless: a `cd` in one call must NOT leak into the next.
let h = setup();
let sub = h._workspace.path().join("nested");
std::fs::create_dir(&sub).unwrap();
let tool = make_tool(&h);
tool.execute(
&serde_json::json!({
"command": format!("cd {}", sub.to_str().unwrap()),
})
.to_string(),
)
.await
.unwrap();
let pwd_out = tool
.execute(&serde_json::json!({ "command": "pwd" }).to_string())
.await
.unwrap();
let body = pwd_out.content.unwrap();
let actual = std::fs::canonicalize(body.trim()).unwrap();
let workspace = std::fs::canonicalize(h._workspace.path()).unwrap();
assert_eq!(
actual, workspace,
"second call should start at workspace root, not the previous cd target"
);
}
#[tokio::test]
async fn timeout_kills_long_command() {
let h = setup();
let tool = make_tool(&h);
let inp = serde_json::json!({
"command": "sleep 30",
"timeout": 1,
});
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(
out.summary.contains("timed out"),
"summary: {}",
out.summary
);
}
#[tokio::test]
async fn invalid_json_is_invalid_argument() {
let h = setup();
let tool = make_tool(&h);
let err = tool.execute("not json").await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn long_output_spills_and_returns_tail() {
let h = setup();
let spill_dir = h.spill.path().to_path_buf();
let tool = make_tool(&h);
// 200 lines: "line 1" .. "line 200". Tail of 80 keeps lines 121-200.
let inp = serde_json::json!({
"command": "for i in $(seq 1 200); do echo line $i; done",
});
let out = tool.execute(&inp.to_string()).await.unwrap();
let body = out.content.expect("expected content");
assert!(
body.contains(&format!("showing last {TAIL_LINES} of 200 lines")),
"tail header missing in: {}",
&body[..body.len().min(300)]
);
assert!(
body.contains(spill_dir.to_str().unwrap()),
"spill dir path missing: {body}"
);
// Last 80 lines are 121..200.
assert!(body.contains("\nline 200\n"));
assert!(body.contains("\nline 121\n"));
// line 120 is the last *elided* line.
assert!(!body.contains("\nline 120\n"), "elided line leaked: {body}");
}
#[tokio::test]
async fn wide_short_output_still_spills_when_byte_budget_exceeded() {
let h = setup();
let spill_dir = h.spill.path().to_path_buf();
let tool = make_tool(&h);
// One single line of ~20 KiB (over INLINE_BYTE_BUDGET = 12 KiB).
let inp = serde_json::json!({
"command": "printf 'x%.0s' {1..20480}",
});
let out = tool.execute(&inp.to_string()).await.unwrap();
let body = out.content.unwrap();
assert!(
body.contains(spill_dir.to_str().unwrap()),
"expected spill marker in: {}",
&body[..body.len().min(200)]
);
}
#[tokio::test]
async fn background_job_does_not_hang() {
let h = setup();
let tool = make_tool(&h);
// The wrapper's `wait` ensures we don't hang on a stray bg pipe.
let inp = serde_json::json!({
"command": "(sleep 0.05; echo bg) &",
"timeout": 5,
});
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(
!out.summary.contains("timed out"),
"summary: {}",
out.summary
);
}
#[tokio::test]
async fn spilled_files_are_cleaned_up_on_drop() {
let h = setup();
let spill_dir = h.spill.path().to_path_buf();
let tool = make_tool(&h);
let inp = serde_json::json!({
"command": "for i in $(seq 1 200); do echo $i; done",
});
tool.execute(&inp.to_string()).await.unwrap();
// The spill dir should now contain exactly one bash-*.log file.
let files_before: Vec<_> = std::fs::read_dir(&spill_dir)
.unwrap()
.filter_map(Result::ok)
.map(|e| e.path())
.collect();
assert_eq!(files_before.len(), 1, "expected one spilled file");
let path = files_before.into_iter().next().unwrap();
assert!(path.exists());
drop(tool);
// Drop runs synchronously; file should be gone.
assert!(
!path.exists(),
"spilled file should be cleaned up on drop: {path:?}"
);
}
}

View File

@ -1,8 +1,8 @@
//! Built-in tools for the Insomnia LLM agent. //! Built-in tools for the Insomnia LLM agent.
//! //!
//! Implements Read / Write / Edit / Glob / Grep on top of the `llm-worker` //! Implements Read / Write / Edit / Glob / Grep / Bash on top of the
//! `Tool` infrastructure. Filesystem access is mediated by two orthogonal //! `llm-worker` `Tool` infrastructure. Filesystem access is mediated by
//! concerns: //! two orthogonal concerns:
//! //!
//! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for //! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for
//! the current scope. Derived from the manifest and shareable across //! the current scope. Derived from the manifest and shareable across
@ -13,17 +13,23 @@
//! //!
//! The Pod layer owns both instances and passes them to //! The Pod layer owns both instances and passes them to
//! [`builtin_tools`] when registering tools on a `Worker`. //! [`builtin_tools`] when registering tools on a `Worker`.
//!
//! `Bash` is the lone exception — its child processes bypass `ScopedFs`
//! entirely. Safety for arbitrary command execution is delegated to the
//! Permission layer (deny/allow rules on the command string).
pub mod error; pub mod error;
pub mod scoped_fs; pub mod scoped_fs;
pub mod tracker; pub mod tracker;
mod bash;
mod edit; mod edit;
mod glob; mod glob;
mod grep; mod grep;
mod read; mod read;
mod write; mod write;
pub use bash::bash_tool;
pub use edit::edit_tool; pub use edit::edit_tool;
pub use error::ToolsError; pub use error::ToolsError;
pub use glob::glob_tool; pub use glob::glob_tool;
@ -39,12 +45,22 @@ pub use write::write_tool;
/// All returned factories share the same tracker instance so that /// All returned factories share the same tracker instance so that
/// `Read` / `Write` / `Edit` see a consistent history across tool /// `Read` / `Write` / `Edit` see a consistent history across tool
/// invocations within a single session. /// invocations within a single session.
pub fn builtin_tools(fs: ScopedFs, tracker: Tracker) -> Vec<llm_worker::tool::ToolDefinition> { ///
/// `bash_output_dir` is where the Bash tool spills long outputs. The
/// caller is responsible for adding that path to the readable scope
/// (see [`manifest::Scope::with_extra_read`]) so the agent can `Read`
/// the saved files.
pub fn builtin_tools(
fs: ScopedFs,
tracker: Tracker,
bash_output_dir: std::path::PathBuf,
) -> Vec<llm_worker::tool::ToolDefinition> {
vec![ vec![
read_tool(fs.clone(), tracker.clone()), read_tool(fs.clone(), tracker.clone()),
write_tool(fs.clone(), tracker.clone()), write_tool(fs.clone(), tracker.clone()),
edit_tool(fs.clone(), tracker.clone()), edit_tool(fs.clone(), tracker),
glob_tool(fs.clone()), glob_tool(fs.clone()),
grep_tool(fs), grep_tool(fs.clone()),
bash_tool(fs, bash_output_dir),
] ]
} }

View File

@ -31,7 +31,8 @@
//! let scope = Scope::writable("/workspace").unwrap(); //! let scope = Scope::writable("/workspace").unwrap();
//! let fs = ScopedFs::new(scope, PathBuf::from("/workspace")); // pod lifetime //! let fs = ScopedFs::new(scope, PathBuf::from("/workspace")); // pod lifetime
//! let tracker = Tracker::new(); // session lifetime //! let tracker = Tracker::new(); // session lifetime
//! let defs = builtin_tools(fs, tracker); //! let bash_outputs = PathBuf::from("/run/insomnia/bash-output");
//! let defs = builtin_tools(fs, tracker, bash_outputs);
//! ``` //! ```
use std::collections::{HashMap, VecDeque}; use std::collections::{HashMap, VecDeque};

View File

@ -3,7 +3,7 @@
use std::sync::Arc; use std::sync::Arc;
use llm_worker::tool::{Tool, ToolDefinition}; use llm_worker::tool::{Tool, ToolDefinition};
use manifest::Scope; use manifest::{Permission, Scope, ScopeConfig, ScopeRule};
use serde_json::json; use serde_json::json;
use tempfile::TempDir; use tempfile::TempDir;
use tools::{ScopedFs, Tracker, builtin_tools}; use tools::{ScopedFs, Tracker, builtin_tools};
@ -27,19 +27,29 @@ impl Registry {
} }
} }
fn setup() -> (TempDir, Registry) { fn setup() -> (TempDir, TempDir, Registry) {
let dir = TempDir::new().unwrap(); let dir = TempDir::new().unwrap();
let fs = ScopedFs::new( let spill = TempDir::new().unwrap();
Scope::writable(dir.path()).unwrap(), let base = Scope::writable(dir.path()).unwrap();
dir.path().to_path_buf(), let mut config = ScopeConfig {
); allow: base.allow_rules(),
deny: base.deny_rules(),
};
config.allow.push(ScopeRule {
target: spill.path().to_path_buf(),
permission: Permission::Read,
recursive: true,
});
let scope = Scope::from_config(&config).unwrap();
let fs = ScopedFs::new(scope, dir.path().to_path_buf());
let tracker = Tracker::new(); let tracker = Tracker::new();
(dir, Registry::new(builtin_tools(fs, tracker))) let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf()));
(dir, spill, reg)
} }
#[tokio::test] #[tokio::test]
async fn unicode_path_and_content() { async fn unicode_path_and_content() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("日本語ファイル.txt"); let file = dir.path().join("日本語ファイル.txt");
let content = "こんにちは 🦀 世界\nabc\n"; let content = "こんにちは 🦀 世界\nabc\n";
@ -70,7 +80,7 @@ async fn unicode_path_and_content() {
async fn symlink_to_outside_scope_is_rejected_for_write() { async fn symlink_to_outside_scope_is_rejected_for_write() {
use std::os::unix::fs::symlink; use std::os::unix::fs::symlink;
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let outside = TempDir::new().unwrap(); let outside = TempDir::new().unwrap();
let outside_target = outside.path().join("secret.txt"); let outside_target = outside.path().join("secret.txt");
std::fs::write(&outside_target, "secret").unwrap(); std::fs::write(&outside_target, "secret").unwrap();
@ -114,7 +124,7 @@ async fn symlink_to_outside_scope_is_rejected_for_write() {
#[tokio::test] #[tokio::test]
async fn empty_file_read_and_edit() { async fn empty_file_read_and_edit() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("empty.txt"); let file = dir.path().join("empty.txt");
std::fs::write(&file, "").unwrap(); std::fs::write(&file, "").unwrap();
@ -144,7 +154,7 @@ async fn empty_file_read_and_edit() {
#[tokio::test] #[tokio::test]
async fn very_long_single_line() { async fn very_long_single_line() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("long.txt"); let file = dir.path().join("long.txt");
let big: String = "x".repeat(1024 * 1024); // 1 MiB, no newlines let big: String = "x".repeat(1024 * 1024); // 1 MiB, no newlines
std::fs::write(&file, &big).unwrap(); std::fs::write(&file, &big).unwrap();
@ -160,7 +170,7 @@ async fn very_long_single_line() {
#[tokio::test] #[tokio::test]
async fn relative_path_is_rejected() { async fn relative_path_is_rejected() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
let read = reg.get("Read"); let read = reg.get("Read");
let err = read let err = read
.execute(&json!({ "file_path": "relative.txt" }).to_string()) .execute(&json!({ "file_path": "relative.txt" }).to_string())
@ -171,7 +181,7 @@ async fn relative_path_is_rejected() {
#[tokio::test] #[tokio::test]
async fn directory_target_is_rejected_for_read() { async fn directory_target_is_rejected_for_read() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let read = reg.get("Read"); let read = reg.get("Read");
let err = read let err = read
.execute(&json!({ "file_path": dir.path().to_str().unwrap() }).to_string()) .execute(&json!({ "file_path": dir.path().to_str().unwrap() }).to_string())
@ -182,7 +192,7 @@ async fn directory_target_is_rejected_for_read() {
#[tokio::test] #[tokio::test]
async fn deeply_nested_new_file_is_created() { async fn deeply_nested_new_file_is_created() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let deep = dir.path().join("a/b/c/d/e/deep.txt"); let deep = dir.path().join("a/b/c/d/e/deep.txt");
let write = reg.get("Write"); let write = reg.get("Write");
write write
@ -200,7 +210,7 @@ async fn deeply_nested_new_file_is_created() {
#[tokio::test] #[tokio::test]
async fn replace_preserves_unicode() { async fn replace_preserves_unicode() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("u.txt"); let file = dir.path().join("u.txt");
std::fs::write(&file, "🦀 rust 🦀\n").unwrap(); std::fs::write(&file, "🦀 rust 🦀\n").unwrap();
@ -225,7 +235,7 @@ async fn replace_preserves_unicode() {
#[tokio::test] #[tokio::test]
async fn grep_handles_unicode_pattern() { async fn grep_handles_unicode_pattern() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("u.txt"); let file = dir.path().join("u.txt");
std::fs::write(&file, "English\n日本語\nрусский\n").unwrap(); std::fs::write(&file, "English\n日本語\nрусский\n").unwrap();

View File

@ -8,11 +8,25 @@ use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use llm_worker::tool::{Tool, ToolDefinition, ToolMeta}; use llm_worker::tool::{Tool, ToolDefinition, ToolMeta};
use manifest::Scope; use manifest::{Permission, Scope, ScopeConfig, ScopeRule};
use serde_json::json; use serde_json::json;
use tempfile::TempDir; use tempfile::TempDir;
use tools::{ScopedFs, Tracker, builtin_tools}; use tools::{ScopedFs, Tracker, builtin_tools};
fn scope_with_spill(workspace: &Path, spill: &Path) -> Scope {
let base = Scope::writable(workspace).unwrap();
let mut config = ScopeConfig {
allow: base.allow_rules(),
deny: base.deny_rules(),
};
config.allow.push(ScopeRule {
target: spill.to_path_buf(),
permission: Permission::Read,
recursive: true,
});
Scope::from_config(&config).unwrap()
}
struct Registry { struct Registry {
entries: Vec<(ToolMeta, Arc<dyn Tool>)>, entries: Vec<(ToolMeta, Arc<dyn Tool>)>,
} }
@ -36,15 +50,14 @@ impl Registry {
} }
} }
fn setup() -> (TempDir, Registry) { fn setup() -> (TempDir, TempDir, Registry) {
let dir = TempDir::new().unwrap(); let dir = TempDir::new().unwrap();
let fs = ScopedFs::new( let spill = TempDir::new().unwrap();
Scope::writable(dir.path()).unwrap(), let scope = scope_with_spill(dir.path(), spill.path());
dir.path().to_path_buf(), let fs = ScopedFs::new(scope, dir.path().to_path_buf());
);
let tracker = Tracker::new(); let tracker = Tracker::new();
let reg = Registry::new(builtin_tools(fs, tracker)); let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf()));
(dir, reg) (dir, spill, reg)
} }
async fn call(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker::tool::ToolOutput { async fn call(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker::tool::ToolOutput {
@ -60,16 +73,16 @@ async fn call_err(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker:
} }
#[test] #[test]
fn builtin_tools_registers_all_five() { fn builtin_tools_registers_full_set() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
let mut names = reg.names(); let mut names = reg.names();
names.sort(); names.sort();
assert_eq!(names, vec!["Edit", "Glob", "Grep", "Read", "Write"]); assert_eq!(names, vec!["Bash", "Edit", "Glob", "Grep", "Read", "Write"]);
} }
#[test] #[test]
fn meta_has_description_and_schema() { fn meta_has_description_and_schema() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
for (meta, _) in &reg.entries { for (meta, _) in &reg.entries {
assert!( assert!(
!meta.description.is_empty(), !meta.description.is_empty(),
@ -87,7 +100,7 @@ fn meta_has_description_and_schema() {
#[tokio::test] #[tokio::test]
async fn read_then_edit_then_read_roundtrip() { async fn read_then_edit_then_read_roundtrip() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("a.txt"); let file = dir.path().join("a.txt");
std::fs::write(&file, "hello world\n").unwrap(); std::fs::write(&file, "hello world\n").unwrap();
let p = file.to_str().unwrap(); let p = file.to_str().unwrap();
@ -119,7 +132,7 @@ async fn read_then_edit_then_read_roundtrip() {
#[tokio::test] #[tokio::test]
async fn write_then_grep_finds_content() { async fn write_then_grep_finds_content() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let write = reg.get("Write"); let write = reg.get("Write");
let grep = reg.get("Grep"); let grep = reg.get("Grep");
@ -148,7 +161,7 @@ async fn write_then_grep_finds_content() {
#[tokio::test] #[tokio::test]
async fn glob_finds_written_files() { async fn glob_finds_written_files() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let write = reg.get("Write"); let write = reg.get("Write");
let glob = reg.get("Glob"); let glob = reg.get("Glob");
@ -172,7 +185,7 @@ async fn glob_finds_written_files() {
#[tokio::test] #[tokio::test]
async fn out_of_scope_write_is_rejected() { async fn out_of_scope_write_is_rejected() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
let outside = TempDir::new().unwrap(); let outside = TempDir::new().unwrap();
let write = reg.get("Write"); let write = reg.get("Write");
@ -191,7 +204,7 @@ async fn out_of_scope_write_is_rejected() {
#[tokio::test] #[tokio::test]
async fn write_to_existing_without_read_fails() { async fn write_to_existing_without_read_fails() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("exists.txt"); let file = dir.path().join("exists.txt");
std::fs::write(&file, "preexisting").unwrap(); std::fs::write(&file, "preexisting").unwrap();
@ -212,7 +225,7 @@ async fn write_to_existing_without_read_fails() {
async fn shared_scoped_fs_across_tools() { async fn shared_scoped_fs_across_tools() {
// The key invariant: all builtin tools share the same ScopedFs instance, // The key invariant: all builtin tools share the same ScopedFs instance,
// so read-history set by Read is visible to Edit and Write. // so read-history set by Read is visible to Edit and Write.
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("shared.txt"); let file = dir.path().join("shared.txt");
std::fs::write(&file, "one\n").unwrap(); std::fs::write(&file, "one\n").unwrap();
@ -235,7 +248,7 @@ async fn shared_scoped_fs_across_tools() {
#[tokio::test] #[tokio::test]
async fn edit_requires_read_across_tools() { async fn edit_requires_read_across_tools() {
let (dir, reg) = setup(); let (dir, _spill, reg) = setup();
let file = dir.path().join("a.txt"); let file = dir.path().join("a.txt");
std::fs::write(&file, "foo\n").unwrap(); std::fs::write(&file, "foo\n").unwrap();
@ -256,17 +269,17 @@ async fn edit_requires_read_across_tools() {
#[tokio::test] #[tokio::test]
async fn deterministic_tool_order_is_registration_order() { async fn deterministic_tool_order_is_registration_order() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
// Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep // Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep, Bash
let names: Vec<&str> = reg.entries.iter().map(|(m, _)| m.name.as_str()).collect(); let names: Vec<&str> = reg.entries.iter().map(|(m, _)| m.name.as_str()).collect();
assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep"]); assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep", "Bash"]);
} }
// Regression: tool name capitalization matches Claude Code reference // Regression: tool name capitalization matches Claude Code reference
#[test] #[test]
fn tool_names_match_reference_spec() { fn tool_names_match_reference_spec() {
let (_dir, reg) = setup(); let (_dir, _spill, reg) = setup();
for expected in ["Read", "Write", "Edit", "Glob", "Grep"] { for expected in ["Read", "Write", "Edit", "Glob", "Grep", "Bash"] {
assert!( assert!(
reg.entries.iter().any(|(m, _)| m.name == expected), reg.entries.iter().any(|(m, _)| m.name == expected),
"missing tool {expected}" "missing tool {expected}"
@ -278,12 +291,11 @@ fn tool_names_match_reference_spec() {
async fn tracker_recent_files_tracks_read_write_edit() { async fn tracker_recent_files_tracks_read_write_edit() {
// Build a fresh registry that shares a tracker we can query afterwards. // Build a fresh registry that shares a tracker we can query afterwards.
let dir = TempDir::new().unwrap(); let dir = TempDir::new().unwrap();
let fs = ScopedFs::new( let spill = TempDir::new().unwrap();
Scope::writable(dir.path()).unwrap(), let scope = scope_with_spill(dir.path(), spill.path());
dir.path().to_path_buf(), let fs = ScopedFs::new(scope, dir.path().to_path_buf());
);
let tracker = Tracker::new(); let tracker = Tracker::new();
let reg = Registry::new(builtin_tools(fs, tracker.clone())); let reg = Registry::new(builtin_tools(fs, tracker.clone(), spill.path().to_path_buf()));
let a = dir.path().join("a.txt"); let a = dir.path().join("a.txt");
let b = dir.path().join("b.txt"); let b = dir.path().join("b.txt");
@ -324,5 +336,52 @@ async fn tracker_recent_files_tracks_read_write_edit() {
); );
} }
#[tokio::test]
async fn bash_inherits_scoped_fs_pwd() {
// The Bash tool starts at the ScopedFs's pwd. Without any `cd`, its
// `pwd` should canonicalize to the workspace root we set up.
let (dir, _spill, reg) = setup();
let bash = reg.get("Bash");
let out = call(&bash, json!({ "command": "pwd" })).await;
let body = out.content.unwrap();
let actual = std::fs::canonicalize(body.trim()).unwrap();
let expected = std::fs::canonicalize(dir.path()).unwrap();
assert_eq!(actual, expected);
}
#[tokio::test]
async fn bash_spilled_file_is_readable_via_read_tool() {
// Long Bash output spills to a path that the controller has added to
// the readable scope. The agent should be able to Read that path
// exactly like any in-scope file.
let (_dir, spill, reg) = setup();
let bash = reg.get("Bash");
let out = call(
&bash,
json!({ "command": "for i in $(seq 1 200); do echo line $i; done" }),
)
.await;
let body = out.content.unwrap();
let spill_str = spill.path().to_str().unwrap();
// Extract the spilled path from the marker line.
let marker = body.lines().next().unwrap();
let prefix_pos = marker
.find(spill_str)
.expect("marker should reference the spill dir");
let path_end_rel = marker[prefix_pos..]
.find(".log")
.expect("marker should end the path with .log");
let spilled = &marker[prefix_pos..prefix_pos + path_end_rel + 4];
// Read the file via the Read tool — must succeed (in scope).
let read_out = call(&reg.get("Read"), json!({ "file_path": spilled })).await;
let read_body = read_out.content.expect("Read returned content");
// The full 200 lines should be in the saved file even though Bash
// returned only the tail of 80.
assert!(read_body.contains("line 1\n"), "missing line 1: {read_body}");
assert!(read_body.contains("line 200"), "missing line 200");
}
// Sanity: unused Path import guard // Sanity: unused Path import guard
const _: fn() -> &'static Path = || Path::new("/"); const _: fn() -> &'static Path = || Path::new("/");

View File

@ -590,22 +590,31 @@ fn render_default(tc: &ToolCallBlock, mode: Mode) -> Vec<Line<'static>> {
.add_modifier(Modifier::ITALIC), .add_modifier(Modifier::ITALIC),
); );
let summary_source: String = match &tc.state { // Body source: prefer the full output (e.g. Bash's stdout/stderr) so
// Detail mode can expose it. Fall back to the summary when the tool
// didn't emit any content.
let body_source: String = match &tc.state {
ToolCallState::Done {
output: Some(out), ..
}
| ToolCallState::Error {
output: Some(out), ..
} => out.clone(),
ToolCallState::Done { summary, .. } | ToolCallState::Error { summary, .. } => { ToolCallState::Done { summary, .. } | ToolCallState::Error { summary, .. } => {
summary.clone() summary.clone()
} }
_ => String::new(), _ => String::new(),
}; };
let summary_cap = match mode { let body_cap = match mode {
Mode::Normal => 3, Mode::Normal => 3,
Mode::Detail => usize::MAX, Mode::Detail => usize::MAX,
Mode::Overview => unreachable!(), Mode::Overview => unreachable!(),
}; };
if !summary_source.is_empty() { if !body_source.is_empty() {
emit_capped_lines( emit_capped_lines(
&mut lines, &mut lines,
&summary_source, &body_source,
summary_cap, body_cap,
Style::default().fg(Color::Gray), Style::default().fg(Color::Gray),
); );
} }

View File

@ -53,6 +53,22 @@ CronCreate
が返ってくる。「上のツール定義と同じエンコーディング」と明示されており、以降そのツールは通常通り呼べるようになる。 が返ってくる。「上のツール定義と同じエンコーディング」と明示されており、以降そのツールは通常通り呼べるようになる。
### 1.5 パラメータ値のエンコーディング規約
ツール呼び出しの `<parameter>` タグの中身は、値の型に応じて異なるエンコーディングを使う:
- プリミティブ (string / number / boolean): そのままテキスト
- 配列・オブジェクト: JSON 文字列としてシリアライズしてテキストに
system prompt 末尾にも以下のように明記されている:
```
When making function calls using tools that accept array or object parameters
ensure those are structured using JSON.
```
つまり「XML が外側の骨格、中身は型に応じてテキスト/JSON」という二層構造。
--- ---
## 2. パラダイムの推測: prompted tool use ## 2. パラダイムの推測: prompted tool use
@ -180,3 +196,115 @@ deferred tools 方式は **prompted tool use を前提とする限り**、これ
> Claude Code の観察上は、ツール定義や呼び出しが prompt 内テキストとして見えている。ただし、公開されている Anthropic API の tool search は `tools` 配列、`defer_loading`、`tool_reference`、`tool_use` を使う structured tool use として説明されている。したがって、Claude Code 内部が完全な prompted tool use なのか、API の structured tool use を CLI / ハーネス側で別表現にレンダリングしているのか、あるいはそのハイブリッドなのかは未確認。 > Claude Code の観察上は、ツール定義や呼び出しが prompt 内テキストとして見えている。ただし、公開されている Anthropic API の tool search は `tools` 配列、`defer_loading`、`tool_reference`、`tool_use` を使う structured tool use として説明されている。したがって、Claude Code 内部が完全な prompted tool use なのか、API の structured tool use を CLI / ハーネス側で別表現にレンダリングしているのか、あるいはそのハイブリッドなのかは未確認。
Pod / insomnia への示唆としては、deferred tools の設計目的である context 圧縮、tool selection accuracy の維持、prefix cache の安定化は公式情報でも裏付けられる。一方で、Anthropic API の現在の公開設計を参考にするなら、`tool_search` 相当の実装は「単なる schema text の注入」だけでなく、内部 registry 上の tool reference、ロード済み tool の状態管理、検証レイヤを明確に分けて設計する方がよい。 Pod / insomnia への示唆としては、deferred tools の設計目的である context 圧縮、tool selection accuracy の維持、prefix cache の安定化は公式情報でも裏付けられる。一方で、Anthropic API の現在の公開設計を参考にするなら、`tool_search` 相当の実装は「単なる schema text の注入」だけでなく、内部 registry 上の tool reference、ロード済み tool の状態管理、検証レイヤを明確に分けて設計する方がよい。
---
## 9. ツール I/O の実際のフォーマット
(2026-05-01 追記)
deferred tools の本論からはやや脇道だが、ToolSearch がスキーマテキストを context に注入することで「ツールが使える状態」になる仕組みを理解するためには、ツール定義・呼び出しの実フォーマットと、Anthropic API の公開 surface との対応関係を押さえておく必要がある。
### 9.1 ツール定義の入力フォーマット
system prompt 冒頭に置かれる:
```
<functions>
<function>{"description": "...", "name": "Read", "parameters": {...JSONSchema...}}</function>
</functions>
```
外側は XML、`<function>` の中身は単行 JSON。JSON 部分は `name`, `description`, `parameters` の 3 フィールドで、`parameters` は標準的な JSONSchema (`type: "object"`, `properties`, `required`, `additionalProperties` 等)。
### 9.2 ツール呼び出しの出力フォーマット
モデル側の生成は完全に XML タグ列:
```
<function_calls>
<invoke name="Read">
<parameter name="file_path">/foo/bar</parameter>
</invoke>
</function_calls>
```
`<parameter>` の中身は §1.5 のエンコード規則に従う。
### 9.3 標準 Anthropic API との関係
開発者から見える API surface は完全に JSON ベース:
- リクエスト: `tools: [{name, description, input_schema}]`
- レスポンス: `tool_use` content block (JSON)
ところが Claude Code 上の観察では XML+JSON のハイブリッド表現が実際に流れている。両者の整合は次のように理解できる:
| | 標準 API (structured tool use) | Claude Code (prompted tool use) |
|---|---|---|
| 開発者が渡す形式 | JSON (`tools` 配列) | — (ハーネス内製) |
| モデルが受け取る prompt | 非公開 (推測: XML+JSON) | XML+JSON (観察可) |
| モデルが返す表現 | 非公開 (推測: XML タグ) → API が parse | XML タグ (観察可) |
| 開発者が受け取る形式 | `tool_use` block (JSON) | — |
標準 API では JSON ↔ モデル内部表現の変換が API サーバ側で隠蔽されている。Claude Code が観察できるのはその「裸の」表現で、Anthropic がモデル訓練に用いているフォーマットそのものと推測される (同じモデルなので)。
### 9.4 バリデーションとリトライの内製化
この構造を見ると、Tool Call API は実質「フォーマット規約 + schema validation + retry」をプロバイダー側に押し込めた仕様と読める:
1. **フォーマット規約**: XML 骨格と parameter エンコード規則
2. **バリデーション**: schema 違反の検出
3. **リトライ**: malformed なら API 内部で再生成し、開発者には完成品だけ返す
4. **訓練投資**: そのフォーマットで RLHF / SFT 済み
開発者が `tool_use` block を常に正しい JSON として受け取れるのは、(4) のおかげで失敗率が低く、(1)-(3) のおかげで失敗時も隠蔽されているから。Cline 等の prompted tool use 実装が同じことをやろうとしても、(4) が効かないため精度・安定性で見劣りしていたのは、この訓練投資の差で説明できる。
ただし Claude Code のハーネスは **token-level 制約 (grammar-based sampling) を入れていない**ことが、§10 の実演から推測できる。「自由に生成 → パース失敗なら error を tool_result で返して retry」という設計で、token 制約は使っていない。これは inference サーバ側の実装コストを避けつつ、(4) の訓練品質に依存する方針。
### 9.5 ローカル LLM への含意
Pod / insomnia でローカル LLM を使う場合、(4) が効かない。最近のローカル向けエージェントモデルは tool use 用に訓練されているので XML パースのような原始的処理は不要だが、各モデルが訓練された自前のフォーマット (Hermes / Llama / Qwen / Mistral 等で異なる) があり、それに合わせてレンダリングする必要がある。
具体的な責務分担は以下:
- ツール定義のレンダリング: モデル固有のテンプレート (chat template の `tools` 拡張等) に合わせる
- 出力パース: モデルが生成した形式 (タグ / JSON / 独自トークン) をハーネスでパース
- バリデーション: 自前で schema 照合
- リトライ: パース失敗・schema 違反時に error を返してモデル側に修正させる
これは Claude Code が内製化しているもののローカル版そのもの。
---
## 10. 実演: schema 未ロードでの呼び出し
(2026-05-01 実演)
§3 の推測「検証の真実はレジストリであり、context にスキーマテキストが現れたかどうかではない」を確認するため、deferred tool である `TaskList` を ToolSearch せずに直接呼び出した。
### 10.1 結果
受理された (`No tasks found` が返ってきた)。InputValidationError は発生しなかった。
### 10.2 含意
1. **schema 未ロード = 呼び出せない、ではない**。少なくとも引数なしで呼べるツールでは、schema text が context に無くても通る
2. ハーネスのバリデーションは「schema text が context にあるか」ではなく、**実引数が registry の schema に合致するか**で判定している
3. system-reminder の "calling them directly will fail with InputValidationError" は厳密には常に真ではない。引数が schema と矛盾しないケース (特に必須引数のないツールに引数なしで呼ぶ場合) では素通りする
4. context に積まれる schema text は、モデルが正しい引数を生成するための **誘導 / プロンプト材料** であって、validation の入力ではない
§3 の推測がそのまま裏付けられた形になる。
### 10.3 system-reminder の役割の再解釈
警告が「常に fail する」と読めるのは過剰表現で、実際には「**引数 schema が必要なツールは引数指定が必須なので、schema を知らずに呼べば事実上 fail する**」というモデルへの誘導と理解するのが正確。registry 側のバリデーションは引数の中身を見ているだけで、context に schema text があるかは見ていない。
### 10.4 設計上の含意
Pod / insomnia 側で同様の機構を作る場合:
- 「schema text が context にあるか」を validation 条件にする必要はない (むしろしない方が単純)
- registry に常時全 tool を登録しておき、context へのレンダリングだけ deferred にする
- モデルが (誘導を無視して) schema 未ロードのツールを呼んでも、引数が合っていれば実行してよい
- この方が registry の真実性が一本化されて実装が単純になる

View File

@ -25,3 +25,8 @@ Bash の子プロセスは ScopedFs を経由しない。Scope による保護
## 依存チケット ## 依存チケット
- [permission-extension-point.md](permission-extension-point.md) — deny/allow ルールによる Bash コマンド制御 - [permission-extension-point.md](permission-extension-point.md) — deny/allow ルールによる Bash コマンド制御
## Review
- 状態: Approve with follow-up
- レビュー詳細: [./bash-tool.review.md](./bash-tool.review.md)
- 日付: 2026-05-01

View File

@ -0,0 +1,44 @@
# Review: Bash ツール
## 前提・要件の確認
- **コマンド実行 (`tokio::process::Command`)**: 満たされている。`crates/tools/src/bash.rs:94-103` で `bash -c <wrapped>` を起動。`stdin(null)` で stdin ブロックを防止、`kill_on_drop(true)` でタイムアウト時のリーク防止。
- **timeout (default 120s / max 600s)**: 満たされている。`bash.rs:38-39, 64-67` の `clamp(1, 600)`、`bash.rs:130-144` の `tokio::time::timeout`。`timeout_kills_long_command` で動作確認済み。
- **作業ディレクトリの永続**: 満たされている。`cd` のパースに頼らず wrapper script + tempfile で post-command の `pwd` を取得(`bash.rs:74-90`)。`cd_persists_across_calls` テストで `subdir` 移動後の `pwd` が反映されることを確認。`canonicalize` 同士で比較しており macOS の `/private/tmp` ずれにも耐性あり。
- **stdout/stderr 結合**: 満たされている。wrapper 内 `exec 2>&1` で実装、`merges_stdout_and_stderr` テストで両方含まれることを確認。子プロセス側の `stderr(Stdio::null())` も整合。
- **`ToolOutput` summaryコマンド + exit code+ content出力**: 満たされている。`bash.rs:164-175` で exit 0 / 非 0 / シグナルを区別。content が空のときは `None` を返しており、`SUMMARY_THRESHOLD` を意識した良い実装。
## アーキテクチャ・スコープ
- **層分離**: `tools` クレート内に閉じており、`llm-worker` を低レベル基盤に保つ方針と整合(`bash.rs:20` で `Tool` trait のみ依存)。`builtin_tools()` のファクトリ列に追加するだけで、層を跨ぐ侵入はない。
- **クレート命名/構造**: `bash.rs` を独立モジュールに切り出し、`lib.rs` で `pub use bash::bash_tool` のみ公開。`read.rs/write.rs/...` と一貫。
- **依存追加**: `Cargo.toml` の tokio features に `process`/`time`/`io-util`/`sync` を追加(`Cargo.toml:22`)。`tempfile` は既存。`cargo add` 経由前提のフィールド追加で違和感なし。
- **Permission 層との関係**: ticket の前提通り、ScopedFs では保護せず Permission 層に委譲。`lib.rs:18-19` のドキュメントコメントで明示しており、設計意図は読み手に伝わる。
- **設計判断 1wrapper による pwd 取得)**: `cd` パースの脆さ(サブシェル、変数展開、関数定義内 `cd` 等)を回避できるので妥当。`exec` で bash 自体が置換されると wrapper が走らないが、`bash.rs:149-155` が「ファイル読めなければ pwd 据え置き」とフェイルソフトしておりロバスト。
- **設計判断 2wrapper の `wait`**: `(sleep 0.05; echo bg) &` のようなジョブで stdout が EOF せずハングする問題に対する実装上必須の対処。`background_job_does_not_hang` で回帰防止済み。
- **設計判断 3`tokio::sync::Mutex` で逐次化)**: pwd の共有可変状態と「順序のある shell セッション」の意味論を考えると正解。長時間コマンドの間 lock を握り続けるのは仕様上自然(同一セッションの bash は元々直列)。
- **設計判断 4256KB cap**: worker 側 `ToolOutputLimits` の手前で OOM を抑える二重防壁。truncated marker の追記後に `String::from_utf8_lossy` で UTF-8 化しており、マルチバイト切断もロスレスではないが panic はしない。妥当。
- **設計判断 5summary/content**: 既存ツールと API 形状が一致。`SUMMARY_THRESHOLD` の境界も意識されている。
- **設計判断 6description のプロンプト誘導)**: Read/Write/Edit/Glob/Grep を優先させる文言は、Claude Code リファレンスとも整合し、ローカルモデルでも効きやすい簡潔さ。
## 指摘事項
### Non-blocking / Follow-up
- **TUI 側の `render_default` 修正の同梱について** (`crates/tui/src/tool.rs:590-619`)
- 内容としては正しいバグ修正。Bash のような汎用ツールが Detail モードでも summary しか出ない状態を解消している。
- ただし、厳密には Bash チケットの範囲外(既存の任意の "default 経路の" ツールに同じ問題があったはず)。同梱の妥当性: Bash 投入によりバグが顕在化したこと、5 行程度の置き換えで完結すること、Bash 単体だと UX として未完であることを踏まえれば現実的な判断と言える。次回同種の状況では、TUI 表示仕様の修正として別チケットを切るほうがレビュー単位がきれいになる、というレベル。
- フォローアップ提案: `crates/tui/` 配下に `output` を含むレンダリングが Detail/Normal で正しく出ることを確認するスナップショット/ユニットテストを 1 本追加すると、将来の `summary` フォールバック方向への意図しない退行を防げる(現状はロジックレビューのみで担保)。
- **`docs/ref/claude-code-deferred-tools.md` への追記**: Bash 実装と直接関係しない文献参照の追加Anthropic vs OpenAI 比較への言及。1 段落で軽微とはいえ、チケットスコープからは外れている。次回はドキュメント更新も別コミット/別チケット推奨。
- **pwd 更新の堅牢性についての観察 (`bash.rs:149-155`)**: ユーザーコマンドが `exec some-program` で bash を置換した場合や、wrapper の `pwd > tempfile` がディスクフル等で失敗した場合に pwd が据え置かれる挙動になっている。仕様上は妥当だが、ユーザー視点では「`cd foo && exec bar` 後に `cd` が消えた」ように見える可能性がある。コメントで現挙動の合理性は説明されているので blocking ではないが、将来 Permission 層導入時にエッジケースとして再考の余地あり。
### Nits
- `BashParams``timeout` フィールドが `Option<u64>``#[serde(default)]` だが、`Option` は serde が自動的に欠落を `None` にするため `#[serde(default)]` は冗長(害はない)。
- `bash.rs:111-112``let mut child = child; let mut stdout = stdout;``async move` ブロックで mutable に再束縛しているだけ。慣用的だが `let mut` を引数側で書いてもよい。スタイル差。
## 判断
**Approve with follow-up** — チケット要件は完全に満たされており、設計判断もすべて合理的に説明されている。テストカバレッジ (8 unit + 1 integration) も妥当。同梱されている TUI 修正は実害のあるバグ修正で内容は正しいが、本来は別チケット相当のスコープ越えがあり、回帰テストの追加は次回までのフォローアップとして残しておくとよい。

View File

@ -0,0 +1,81 @@
# サブミット入力: FileRef リゾルバ
## 背景
`tickets/submit-tui-completion.md``@<path>` が typed atom として入力され、submit 時に `Segment::FileRef { path }` で Pod へ届く経路が完成した。一方 Pod 側 (`Pod::flatten_segments` in `crates/pod/src/pod.rs`) は今 `FileRef` を見ても resolver を持たず、`Segment::flatten_to_text` の placeholder (`[unresolved file ref: ...]`) を user message に inline するだけで、Warn alert を吐いて終わっている。
ClaudeCode の `@<path>` と同等の挙動 — submit 時にファイル本文を読み、LLM context にそのまま見せる — を入れる。`compact/worker.rs` の `mark_read_required` 経路で完成済の auto-read`PodFsView::render_auto_read`と兄弟関係になる、submit 時版のリゾルバ。
## 要件
### Item 配置
履歴に永続化する形は以下の **2 つの Item** にする:
```
[..., user_message, system_message(file1), system_message(file2), ...]
```
user message 自体は今と同じく `Segment::flatten_to_text` 由来のテキスト(`@<path>` トークンが残った placeholder 込み)。直後に `[File: <path>]\n<本文>` 形式の system message を、`FileRef` の出現順に追加する。次ターン以降も LLM が見える状態で残すcompact が走った時点で既存の auto-read 機構が引き継ぐ)。
inline 結合user 1 メッセージに本文を流し込む)は採らない。
### 本文の取り扱い
- `PodFsView` (`crates/pod/src/fs_view.rs`) 経由で読む。スコープ判定は `ScopedFs` 任せ。
- 上限は通常の Tool Output と同じ `manifest::defaults::TOOL_OUTPUT_MAX_BYTES` (16 KB)。超過分は捨て、末尾に `[...truncated, <total> bytes total — use read_file for the rest]` を付ける。LLM が必要なら自分で `read_file` を呼ぶ前提。
- 非 UTF-8バイナリはリゾルバが拒否する。後述の失敗扱いに倒す。
### 失敗時の扱い
スコープ外 / NotFound / バイナリ拒否は **Alert + placeholder 残置**:
- ユーザー向け Alert を `AlertLevel::Warn` で発火(理由を含めた一文)
- 該当 segment の system message は出さないuser message 中の `[unresolved file ref: <path>]` プレースホルダーがそのまま LLM に届く)
これは「ユーザーの誤入力を早期に可視化する」狙い。silent fallback にしない。
### Worker 側 API 拡張
submit 時に user message と system messages を一つの turn の前置として履歴に積む経路を、既存の `Interceptor` action-return パターンに合わせて足す。`TurnEndAction::ContinueWithMessages(Vec<Item>)` (`crates/llm-worker/src/worker.rs:903`) と同形:
- `Interceptor::on_prompt_submit` の戻り値を拡張し、`Continue` / `Cancel(String)` に加えて `ContinueWith(Vec<Item>)` を返せるようにする
- Worker の `Locked::run``ContinueWith` を受けたら user_item の push 直後に extras を `history.extend` する
- Hook (`crates/pod/src/hook.rs`) 側の戻り値(`PromptAction`はこの拡張に乗せない。Hook は read-only な公開拡張面という設計hook.rs:8-15 のコメントを維持するため、Hook と Interceptor で戻り値型を分離する
### Pod 側の resolver 配線
- `PodFsView::resolve_file_ref(&self, path: &str, max_bytes: usize) -> Result<Item, ResolveError>` を新設。`ScopedFs` で読み、UTF-8 検証 + 16 KB 切詰めを行い `Item::system_message` を返す。エラーは `OutOfScope` / `NotFound` / `Binary` / `Io(io::Error)` を区別する
- `PodSharedState` に submit 中だけ使う stash (`Mutex<Vec<Item>>`) を一個追加。`pending_notifies` / `compact_state` と同じ流儀
- `Pod::run` で submit 直前に `Vec<Segment>` を走査して FileRef を resolver に通し、成功分は stash、失敗分は Alert に流す
- `PodInterceptor::on_prompt_submit` で stash を取り出して空でなければ `ContinueWith(items)` を返す
## 範囲外
- Knowledge / Workflow resolverそれぞれ `tickets/memory-phase2-consolidation.md``tickets/workflow.md` 側)
- 画像など binary attachment の typed メッセージ化(将来 `ContentPart::Image` 等を入れる別チケット)
- `@<path>:<line>-<line>` のような行範囲指定構文
- compact 後の auto-read との重複排除compact が user message 由来の FileRef を読み直す可能性は許容)
## 完了条件
- `@<path>` を含む submit が、user message + 解決済み system message の 2 Item として履歴に残る
- 16 KB を超えるファイルは truncate され、その旨が LLM に見える形で示される
- スコープ外 / NotFound / バイナリは Alert として通知され、LLM 側は placeholder を見るのみ
- Hook の戻り値型は据え置き、Interceptor のみ `ContinueWith` を受け付ける
- 既存ビルド・テストを壊さない
## 依存
- `tickets/submit-tui-completion.md`FileRef segment の wire 接続)
## 参照
- `crates/pod/src/pod.rs``flatten_segments`, `Pod::run`
- `crates/pod/src/fs_view.rs``PodFsView` — auto-read の隣に置く)
- `crates/pod/src/ipc/interceptor.rs``PodInterceptor::on_prompt_submit`
- `crates/pod/src/shared_state.rs`stash 追加先)
- `crates/llm-worker/src/interceptor.rs``PromptAction` 拡張)
- `crates/llm-worker/src/worker.rs:903``TurnEndAction::ContinueWithMessages` 既存パターン)
- `crates/pod/src/hook.rs:8-15`Hook と Interceptor の責務分離 doc
- `crates/manifest/src/defaults.rs``TOOL_OUTPUT_MAX_BYTES`