Compare commits
3 Commits
75c61bd3cb
...
732f959c9d
| Author | SHA1 | Date | |
|---|---|---|---|
| 732f959c9d | |||
| 6e9ef385c8 | |||
| c331936455 |
2
.insomnia/.gitignore
vendored
2
.insomnia/.gitignore
vendored
|
|
@ -1 +1 @@
|
||||||
_memory
|
_staging
|
||||||
|
|
|
||||||
0
.insomnia/knowledge/test.md
Normal file
0
.insomnia/knowledge/test.md
Normal file
1
TODO.md
1
TODO.md
|
|
@ -15,6 +15,7 @@
|
||||||
- [ ] ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md)
|
- [ ] ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md)
|
||||||
- [ ] サブミット入力
|
- [ ] サブミット入力
|
||||||
- [ ] TUI 補完 + 型付き atom 化 → [tickets/submit-tui-completion.md](tickets/submit-tui-completion.md)
|
- [ ] TUI 補完 + 型付き atom 化 → [tickets/submit-tui-completion.md](tickets/submit-tui-completion.md)
|
||||||
|
- [ ] FileRef リゾルバ → [tickets/submit-file-ref-resolver.md](tickets/submit-file-ref-resolver.md)
|
||||||
- [ ] メモリ機構
|
- [ ] メモリ機構
|
||||||
- [ ] Phase 2 consolidation → [tickets/memory-phase2-consolidation.md](tickets/memory-phase2-consolidation.md)
|
- [ ] Phase 2 consolidation → [tickets/memory-phase2-consolidation.md](tickets/memory-phase2-consolidation.md)
|
||||||
- [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md)
|
- [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md)
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,18 @@ use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
||||||
/// Action after prompt submission.
|
/// Action after prompt submission.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum PromptAction {
|
pub enum PromptAction {
|
||||||
/// Proceed normally.
|
/// Proceed normally.
|
||||||
Continue,
|
Continue,
|
||||||
/// Cancel with a reason.
|
/// Cancel with a reason.
|
||||||
Cancel(String),
|
Cancel(String),
|
||||||
|
/// Proceed, and append these items to history right after the user
|
||||||
|
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
|
||||||
|
/// submit edge: lets the upper layer attach resolver-produced
|
||||||
|
/// system messages (e.g. `@<path>` file content) so they sit
|
||||||
|
/// adjacent to the user message that referenced them.
|
||||||
|
ContinueWith(Vec<Item>),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Action before an LLM request.
|
/// Action before an LLM request.
|
||||||
|
|
|
||||||
|
|
@ -1338,16 +1338,20 @@ impl<C: LlmClient> Worker<C, Locked> {
|
||||||
self.reset_interruption_state();
|
self.reset_interruption_state();
|
||||||
// Interceptor: on_prompt_submit
|
// Interceptor: on_prompt_submit
|
||||||
let mut user_item = Item::user_message(user_input);
|
let mut user_item = Item::user_message(user_input);
|
||||||
match self.interceptor.on_prompt_submit(&mut user_item).await {
|
let extras = match self.interceptor.on_prompt_submit(&mut user_item).await {
|
||||||
PromptAction::Cancel(reason) => {
|
PromptAction::Cancel(reason) => {
|
||||||
self.last_run_interrupted = true;
|
self.last_run_interrupted = true;
|
||||||
return self
|
return self
|
||||||
.finalize_interruption(Err(WorkerError::Aborted(reason)))
|
.finalize_interruption(Err(WorkerError::Aborted(reason)))
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
PromptAction::Continue => {}
|
PromptAction::Continue => Vec::new(),
|
||||||
}
|
PromptAction::ContinueWith(items) => items,
|
||||||
|
};
|
||||||
self.history.push(user_item);
|
self.history.push(user_item);
|
||||||
|
if !extras.is_empty() {
|
||||||
|
self.history.extend(extras);
|
||||||
|
}
|
||||||
let result = self.run_turn_loop().await;
|
let result = self.run_turn_loop().await;
|
||||||
self.finalize_interruption(result).await
|
self.finalize_interruption(result).await
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -156,6 +156,23 @@ impl Scope {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Deny rules with their targets resolved to absolute paths.
|
||||||
|
///
|
||||||
|
/// Counterpart to [`allow_rules`](Self::allow_rules); together they
|
||||||
|
/// round-trip through [`ScopeConfig`] for callers that need to
|
||||||
|
/// rebuild a scope after layering extra rules on top of an
|
||||||
|
/// already-constructed [`Scope`].
|
||||||
|
pub fn deny_rules(&self) -> Vec<ScopeRule> {
|
||||||
|
self.deny
|
||||||
|
.iter()
|
||||||
|
.map(|r| ScopeRule {
|
||||||
|
target: r.target.clone(),
|
||||||
|
permission: r.permission,
|
||||||
|
recursive: r.recursive,
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
/// Iterate over absolute paths granted `Write` by an allow rule.
|
/// Iterate over absolute paths granted `Write` by an allow rule.
|
||||||
/// Subset of [`readable_paths`](Self::readable_paths).
|
/// Subset of [`readable_paths`](Self::readable_paths).
|
||||||
pub fn writable_paths(&self) -> impl Iterator<Item = &Path> {
|
pub fn writable_paths(&self) -> impl Iterator<Item = &Path> {
|
||||||
|
|
|
||||||
|
|
@ -221,20 +221,49 @@ impl PodController {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Register the builtin file-manipulation tools (Read / Write /
|
// Register the builtin file-manipulation tools (Read / Write /
|
||||||
// Edit / Glob / Grep). `ScopedFs` carries the pod-lifetime
|
// Edit / Glob / Grep / Bash). `ScopedFs` carries the pod-
|
||||||
// scope/pwd; `Tracker` is session-scoped — a fresh instance per
|
// lifetime scope/pwd; `Tracker` is session-scoped — a fresh
|
||||||
// controller spawn ensures state from a previous process
|
// instance per controller spawn ensures state from a previous
|
||||||
// lifetime cannot be reused after a resume. The tracker is
|
// process lifetime cannot be reused after a resume. The tracker
|
||||||
// also handed to the Pod itself so Pod-level operations (e.g.
|
// is also handed to the Pod itself so Pod-level operations (e.g.
|
||||||
// context compaction) can ask which files the agent has been
|
// context compaction) can ask which files the agent has been
|
||||||
// touching.
|
// touching.
|
||||||
let fs = tools::ScopedFs::new(scope_for_tools, pwd_for_tools.clone());
|
//
|
||||||
|
// Bash spills long outputs to a per-pod subdir under the
|
||||||
|
// runtime dir. We layer a recursive `allow(Read)` rule for
|
||||||
|
// that path on top of the user-facing scope so the agent can
|
||||||
|
// `Read` the saved files without polluting the workspace.
|
||||||
|
// Same approach memory takes for its deny rules: round-trip
|
||||||
|
// through `ScopeConfig` and rebuild via `from_config`.
|
||||||
|
let bash_output_dir = runtime_dir.path().join("bash-output");
|
||||||
|
std::fs::create_dir_all(&bash_output_dir).map_err(|e| {
|
||||||
|
std::io::Error::other(format!(
|
||||||
|
"create bash output dir {}: {e}",
|
||||||
|
bash_output_dir.display()
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let mut scope_config = manifest::ScopeConfig {
|
||||||
|
allow: scope_for_tools.allow_rules(),
|
||||||
|
deny: scope_for_tools.deny_rules(),
|
||||||
|
};
|
||||||
|
scope_config.allow.push(manifest::ScopeRule {
|
||||||
|
target: bash_output_dir.clone(),
|
||||||
|
permission: manifest::Permission::Read,
|
||||||
|
recursive: true,
|
||||||
|
});
|
||||||
|
let scope_with_bash = manifest::Scope::from_config(&scope_config)
|
||||||
|
.map_err(std::io::Error::other)?;
|
||||||
|
let fs = tools::ScopedFs::new(scope_with_bash, pwd_for_tools.clone());
|
||||||
let tracker = tools::Tracker::new();
|
let tracker = tools::Tracker::new();
|
||||||
// The same ScopedFs also powers the IPC `ListCompletions`
|
// The same ScopedFs also powers the IPC `ListCompletions`
|
||||||
// query — keep a clone for the FS view we attach below,
|
// query — keep a clone for the FS view we attach below,
|
||||||
// since the tools consume `fs` itself.
|
// since the tools consume `fs` itself.
|
||||||
fs_for_view = fs.clone();
|
fs_for_view = fs.clone();
|
||||||
worker.register_tools(tools::builtin_tools(fs, tracker.clone()));
|
worker.register_tools(tools::builtin_tools(
|
||||||
|
fs,
|
||||||
|
tracker.clone(),
|
||||||
|
bash_output_dir,
|
||||||
|
));
|
||||||
|
|
||||||
// Memory subsystem opt-in. When `[memory]` is present in
|
// Memory subsystem opt-in. When `[memory]` is present in
|
||||||
// the manifest, register the memory-specific Read/Write/Edit
|
// the manifest, register the memory-specific Read/Write/Edit
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use llm_worker::Item;
|
use llm_worker::Item;
|
||||||
use tools::ScopedFs;
|
use tools::{ScopedFs, ToolsError};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
/// 補完候補1件の最大数。`list_file_completions` がこの値を超えたら打ち切り。
|
/// 補完候補1件の最大数。`list_file_completions` がこの値を超えたら打ち切り。
|
||||||
|
|
@ -45,6 +45,29 @@ pub struct FileCandidate {
|
||||||
pub is_dir: bool,
|
pub is_dir: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `resolve_file_ref` の失敗理由。Pod 側で Alert に振り分けるために
|
||||||
|
/// ScopedFs / 内部判定の両方を区別できるよう保持する。
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum ResolveError {
|
||||||
|
/// Path resolution / scope check failed via `ScopedFs`.
|
||||||
|
Fs(ToolsError),
|
||||||
|
/// File contents are not valid UTF-8 (binary / non-text).
|
||||||
|
Binary { path: PathBuf },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ResolveError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
ResolveError::Fs(e) => write!(f, "{e}"),
|
||||||
|
ResolveError::Binary { path } => {
|
||||||
|
write!(f, "file is not valid UTF-8 text: {}", path.display())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for ResolveError {}
|
||||||
|
|
||||||
impl PodFsView {
|
impl PodFsView {
|
||||||
pub fn new(fs: ScopedFs) -> Self {
|
pub fn new(fs: ScopedFs) -> Self {
|
||||||
Self { fs }
|
Self { fs }
|
||||||
|
|
@ -83,6 +106,41 @@ impl PodFsView {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// `path` を ScopedFs 経由で読み、`[File: <path>]\n<body>` 形式の
|
||||||
|
/// system message を返す。submit 時の `Segment::FileRef` リゾルバが
|
||||||
|
/// 使う経路。
|
||||||
|
///
|
||||||
|
/// - `path` は relative なら pwd 相対、absolute なら absolute として解釈
|
||||||
|
/// - `max_bytes` を超える本文は切り詰め、末尾に
|
||||||
|
/// `[...truncated, <total> bytes total — use read_file for the rest]`
|
||||||
|
/// を付与する
|
||||||
|
/// - 非 UTF-8 (バイナリ) は `ResolveError::Binary` で拒否
|
||||||
|
/// - スコープ外 / NotFound 等は `ResolveError::Fs` で返す
|
||||||
|
pub fn resolve_file_ref(&self, path: &str, max_bytes: usize) -> Result<Item, ResolveError> {
|
||||||
|
let p = Path::new(path);
|
||||||
|
let abs = if p.is_absolute() {
|
||||||
|
p.to_path_buf()
|
||||||
|
} else {
|
||||||
|
self.fs.pwd().join(p)
|
||||||
|
};
|
||||||
|
let bytes = self.fs.read_bytes(&abs).map_err(ResolveError::Fs)?;
|
||||||
|
let total = bytes.len();
|
||||||
|
let (body_bytes, truncated) = if total > max_bytes {
|
||||||
|
(&bytes[..max_bytes], true)
|
||||||
|
} else {
|
||||||
|
(bytes.as_slice(), false)
|
||||||
|
};
|
||||||
|
let body = std::str::from_utf8(body_bytes)
|
||||||
|
.map_err(|_| ResolveError::Binary { path: abs.clone() })?;
|
||||||
|
let mut text = format!("[File: {path}]\n{body}");
|
||||||
|
if truncated {
|
||||||
|
text.push_str(&format!(
|
||||||
|
"\n[...truncated, {total} bytes total — use read_file for the rest]"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
Ok(Item::system_message(text))
|
||||||
|
}
|
||||||
|
|
||||||
/// `prefix` にマッチするファイル / ディレクトリを scope 内で浅く列挙する。
|
/// `prefix` にマッチするファイル / ディレクトリを scope 内で浅く列挙する。
|
||||||
///
|
///
|
||||||
/// - `prefix` が空 or `pwd` 相対のときは pwd 直下を見る
|
/// - `prefix` が空 or `pwd` 相対のときは pwd 直下を見る
|
||||||
|
|
@ -227,6 +285,61 @@ mod tests {
|
||||||
assert!(!rendered.contains("alpha"));
|
assert!(!rendered.contains("alpha"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_file_ref_emits_system_message_with_path_header() {
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
std::fs::write(dir.path().join("hello.txt"), "hello world").unwrap();
|
||||||
|
let view = PodFsView::new(fs_for(&dir));
|
||||||
|
|
||||||
|
let item = view.resolve_file_ref("hello.txt", 1024).unwrap();
|
||||||
|
let text = format!("{item:?}");
|
||||||
|
assert!(text.contains("[File: hello.txt]"));
|
||||||
|
assert!(text.contains("hello world"));
|
||||||
|
assert!(!text.contains("truncated"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_file_ref_truncates_with_hint_when_over_cap() {
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
let body = "x".repeat(2048);
|
||||||
|
std::fs::write(dir.path().join("big.txt"), &body).unwrap();
|
||||||
|
let view = PodFsView::new(fs_for(&dir));
|
||||||
|
|
||||||
|
let item = view.resolve_file_ref("big.txt", 256).unwrap();
|
||||||
|
let text = format!("{item:?}");
|
||||||
|
assert!(text.contains("[File: big.txt]"));
|
||||||
|
assert!(text.contains("truncated"));
|
||||||
|
assert!(text.contains("2048 bytes total"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_file_ref_rejects_binary_with_binary_error() {
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
std::fs::write(dir.path().join("blob.bin"), [0xff, 0xfe, 0x00, 0x80]).unwrap();
|
||||||
|
let view = PodFsView::new(fs_for(&dir));
|
||||||
|
|
||||||
|
let err = view.resolve_file_ref("blob.bin", 1024).unwrap_err();
|
||||||
|
assert!(matches!(err, ResolveError::Binary { .. }));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn resolve_file_ref_returns_fs_error_for_out_of_scope() {
|
||||||
|
let outer = TempDir::new().unwrap();
|
||||||
|
let inner = outer.path().join("scoped");
|
||||||
|
std::fs::create_dir(&inner).unwrap();
|
||||||
|
std::fs::write(outer.path().join("secret.txt"), "nope").unwrap();
|
||||||
|
let scope = Scope::writable(&inner).unwrap();
|
||||||
|
let fs = ScopedFs::new(scope, inner.clone());
|
||||||
|
let view = PodFsView::new(fs);
|
||||||
|
|
||||||
|
// Absolute path outside of scope.
|
||||||
|
let outside = outer.path().join("secret.txt");
|
||||||
|
let err = view
|
||||||
|
.resolve_file_ref(outside.to_str().unwrap(), 1024)
|
||||||
|
.unwrap_err();
|
||||||
|
assert!(matches!(err, ResolveError::Fs(_)));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn render_auto_read_skips_unreadable_targets() {
|
fn render_auto_read_skips_unreadable_targets() {
|
||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,30 @@ use llm_worker::interceptor::{
|
||||||
use llm_worker::tool::ToolOutput;
|
use llm_worker::tool::ToolOutput;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
|
/// Hook-facing prompt-submit action.
|
||||||
|
///
|
||||||
|
/// A strict subset of [`PromptAction`]: Hooks may continue or cancel
|
||||||
|
/// the submit, but cannot inject items into history. The
|
||||||
|
/// `ContinueWith(Vec<Item>)` variant is reserved for the internal
|
||||||
|
/// `Interceptor` so that Hook (the public extension surface) stays
|
||||||
|
/// read-only by construction (see module-level doc).
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub enum HookPromptAction {
|
||||||
|
/// Proceed normally.
|
||||||
|
Continue,
|
||||||
|
/// Cancel with a reason.
|
||||||
|
Cancel(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<HookPromptAction> for PromptAction {
|
||||||
|
fn from(action: HookPromptAction) -> Self {
|
||||||
|
match action {
|
||||||
|
HookPromptAction::Continue => PromptAction::Continue,
|
||||||
|
HookPromptAction::Cancel(reason) => PromptAction::Cancel(reason),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Hook input summary types (read-only)
|
// Hook input summary types (read-only)
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|
@ -121,7 +145,7 @@ pub struct OnAbort;
|
||||||
|
|
||||||
impl HookEventKind for OnPromptSubmit {
|
impl HookEventKind for OnPromptSubmit {
|
||||||
type Input = PromptSubmitInfo;
|
type Input = PromptSubmitInfo;
|
||||||
type Output = PromptAction;
|
type Output = HookPromptAction;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl HookEventKind for PreLlmRequest {
|
impl HookEventKind for PreLlmRequest {
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ use tracing::info;
|
||||||
|
|
||||||
use crate::compact::state::CompactState;
|
use crate::compact::state::CompactState;
|
||||||
use crate::hook::{
|
use crate::hook::{
|
||||||
AbortInfo, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary, ToolResultSummary,
|
AbortInfo, HookPromptAction, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary,
|
||||||
TurnEndInfo,
|
ToolResultSummary, TurnEndInfo,
|
||||||
};
|
};
|
||||||
use crate::ipc::notify_buffer::{NotifyBuffer, format_notify};
|
use crate::ipc::notify_buffer::{NotifyBuffer, format_notify};
|
||||||
use crate::prompt::catalog::PromptCatalog;
|
use crate::prompt::catalog::PromptCatalog;
|
||||||
|
|
@ -43,6 +43,11 @@ pub(crate) struct PodInterceptor {
|
||||||
/// Pending-notification buffer drained into the per-request
|
/// Pending-notification buffer drained into the per-request
|
||||||
/// context at the head of `pre_llm_request`.
|
/// context at the head of `pre_llm_request`.
|
||||||
pending_notifies: NotifyBuffer,
|
pending_notifies: NotifyBuffer,
|
||||||
|
/// Submit-scoped stash of resolver-produced system messages.
|
||||||
|
/// Drained inside `on_prompt_submit` and returned via
|
||||||
|
/// `PromptAction::ContinueWith`. Populated by `Pod::run` immediately
|
||||||
|
/// before handing off to the worker.
|
||||||
|
pending_attachments: Arc<Mutex<Vec<Item>>>,
|
||||||
/// Prompt catalog used to render the injected notification wrapper.
|
/// Prompt catalog used to render the injected notification wrapper.
|
||||||
prompts: Arc<PromptCatalog>,
|
prompts: Arc<PromptCatalog>,
|
||||||
/// Next turn index assigned by `on_prompt_submit`.
|
/// Next turn index assigned by `on_prompt_submit`.
|
||||||
|
|
@ -57,6 +62,7 @@ impl PodInterceptor {
|
||||||
compact_state: Option<Arc<CompactState>>,
|
compact_state: Option<Arc<CompactState>>,
|
||||||
usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
|
usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
|
||||||
pending_notifies: NotifyBuffer,
|
pending_notifies: NotifyBuffer,
|
||||||
|
pending_attachments: Arc<Mutex<Vec<Item>>>,
|
||||||
prompts: Arc<PromptCatalog>,
|
prompts: Arc<PromptCatalog>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
|
@ -64,6 +70,7 @@ impl PodInterceptor {
|
||||||
compact_state,
|
compact_state,
|
||||||
usage_history,
|
usage_history,
|
||||||
pending_notifies,
|
pending_notifies,
|
||||||
|
pending_attachments,
|
||||||
prompts,
|
prompts,
|
||||||
next_turn_index: AtomicUsize::new(0),
|
next_turn_index: AtomicUsize::new(0),
|
||||||
tool_calls_this_turn: AtomicUsize::new(0),
|
tool_calls_this_turn: AtomicUsize::new(0),
|
||||||
|
|
@ -98,11 +105,21 @@ impl Interceptor for PodInterceptor {
|
||||||
};
|
};
|
||||||
for hook in &self.registry.on_prompt_submit {
|
for hook in &self.registry.on_prompt_submit {
|
||||||
let action = hook.call(&info).await;
|
let action = hook.call(&info).await;
|
||||||
if !matches!(action, PromptAction::Continue) {
|
if !matches!(action, HookPromptAction::Continue) {
|
||||||
return action;
|
return action.into();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PromptAction::Continue
|
let extras = std::mem::take(
|
||||||
|
&mut *self
|
||||||
|
.pending_attachments
|
||||||
|
.lock()
|
||||||
|
.expect("pending_attachments poisoned"),
|
||||||
|
);
|
||||||
|
if extras.is_empty() {
|
||||||
|
PromptAction::Continue
|
||||||
|
} else {
|
||||||
|
PromptAction::ContinueWith(extras)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn pre_llm_request(&self, context: &mut Vec<Item>) -> PreRequestAction {
|
async fn pre_llm_request(&self, context: &mut Vec<Item>) -> PreRequestAction {
|
||||||
|
|
@ -297,6 +314,7 @@ mod tests {
|
||||||
Some(state),
|
Some(state),
|
||||||
Some(history),
|
Some(history),
|
||||||
NotifyBuffer::new(),
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx = ctx_items;
|
let mut ctx = ctx_items;
|
||||||
|
|
@ -321,6 +339,7 @@ mod tests {
|
||||||
Some(state),
|
Some(state),
|
||||||
Some(history),
|
Some(history),
|
||||||
NotifyBuffer::new(),
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx = ctx_items;
|
let mut ctx = ctx_items;
|
||||||
|
|
@ -346,6 +365,7 @@ mod tests {
|
||||||
Some(state),
|
Some(state),
|
||||||
Some(history),
|
Some(history),
|
||||||
NotifyBuffer::new(),
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx = ctx_items;
|
let mut ctx = ctx_items;
|
||||||
|
|
@ -365,6 +385,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
NotifyBuffer::new(),
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx: Vec<Item> = Vec::new();
|
let mut ctx: Vec<Item> = Vec::new();
|
||||||
|
|
@ -396,6 +417,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
buffer.clone(),
|
buffer.clone(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx: Vec<Item> = vec![Item::user_message("hi")];
|
let mut ctx: Vec<Item> = vec![Item::user_message("hi")];
|
||||||
|
|
@ -431,6 +453,7 @@ mod tests {
|
||||||
Some(state),
|
Some(state),
|
||||||
Some(history),
|
Some(history),
|
||||||
buffer.clone(),
|
buffer.clone(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx = ctx_items;
|
let mut ctx = ctx_items;
|
||||||
|
|
@ -456,6 +479,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
NotifyBuffer::new(),
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
PromptCatalog::builtins_only().unwrap(),
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
);
|
);
|
||||||
let mut ctx: Vec<Item> = Vec::new();
|
let mut ctx: Vec<Item> = Vec::new();
|
||||||
|
|
|
||||||
|
|
@ -99,6 +99,12 @@ pub struct Pod<C: LlmClient, St: Store> {
|
||||||
/// injection into the next LLM request. Shared with the
|
/// injection into the next LLM request. Shared with the
|
||||||
/// PodInterceptor installed in `ensure_interceptor_installed`.
|
/// PodInterceptor installed in `ensure_interceptor_installed`.
|
||||||
pending_notifies: NotifyBuffer,
|
pending_notifies: NotifyBuffer,
|
||||||
|
/// Submit-scoped stash for resolver-produced system messages
|
||||||
|
/// (currently `@<path>` file content). `Pod::run` fills this
|
||||||
|
/// before handing off to the worker; `PodInterceptor::on_prompt_submit`
|
||||||
|
/// drains it and returns `ContinueWith` so the items land in
|
||||||
|
/// history right after the user message that referenced them.
|
||||||
|
pending_attachments: Arc<Mutex<Vec<Item>>>,
|
||||||
/// Scope allocation in the machine-wide lock file. `Some` for
|
/// Scope allocation in the machine-wide lock file. `Some` for
|
||||||
/// Pods built via `from_manifest` / `from_manifest_spawned` /
|
/// Pods built via `from_manifest` / `from_manifest_spawned` /
|
||||||
/// `restore_from_manifest` (production paths); `None` for the
|
/// `restore_from_manifest` (production paths); `None` for the
|
||||||
|
|
@ -185,6 +191,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
alerter: None,
|
alerter: None,
|
||||||
event_tx: None,
|
event_tx: None,
|
||||||
pending_notifies: NotifyBuffer::new(),
|
pending_notifies: NotifyBuffer::new(),
|
||||||
|
pending_attachments: Arc::new(Mutex::new(Vec::new())),
|
||||||
scope_allocation: None,
|
scope_allocation: None,
|
||||||
callback_socket: None,
|
callback_socket: None,
|
||||||
prompts,
|
prompts,
|
||||||
|
|
@ -502,6 +509,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
compact_state,
|
compact_state,
|
||||||
usage_history_handle,
|
usage_history_handle,
|
||||||
self.pending_notifies.clone(),
|
self.pending_notifies.clone(),
|
||||||
|
self.pending_attachments.clone(),
|
||||||
self.prompts.clone(),
|
self.prompts.clone(),
|
||||||
);
|
);
|
||||||
self.worker_mut().set_interceptor(interceptor);
|
self.worker_mut().set_interceptor(interceptor);
|
||||||
|
|
@ -614,6 +622,18 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
.await?;
|
.await?;
|
||||||
self.user_segments.push(input.clone());
|
self.user_segments.push(input.clone());
|
||||||
|
|
||||||
|
// Resolve `@<path>` refs to system messages stashed for the
|
||||||
|
// PodInterceptor to attach right after the user message. Failures
|
||||||
|
// surface as user-facing Alerts and the placeholder remains in
|
||||||
|
// the flattened text so the LLM sees the unresolved intent.
|
||||||
|
let attachments = self.resolve_file_refs(&input);
|
||||||
|
if !attachments.is_empty() {
|
||||||
|
*self
|
||||||
|
.pending_attachments
|
||||||
|
.lock()
|
||||||
|
.expect("pending_attachments poisoned") = attachments;
|
||||||
|
}
|
||||||
|
|
||||||
let flattened = self.flatten_segments(&input);
|
let flattened = self.flatten_segments(&input);
|
||||||
|
|
||||||
let history_before = self.worker.as_ref().unwrap().history().len();
|
let history_before = self.worker.as_ref().unwrap().history().len();
|
||||||
|
|
@ -627,26 +647,46 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
self.handle_worker_result(result, history_before).await
|
self.handle_worker_result(result, history_before).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve every `Segment::FileRef` in `segments` to a `[File: <path>]`
|
||||||
|
/// system message via `PodFsView`. Resolution failures (out-of-scope,
|
||||||
|
/// not-found, binary, I/O) surface as `AlertLevel::Warn` Alerts and
|
||||||
|
/// are skipped — the unresolved placeholder stays in the flattened
|
||||||
|
/// user message so the LLM still sees the intent.
|
||||||
|
fn resolve_file_refs(&self, segments: &[Segment]) -> Vec<Item> {
|
||||||
|
let view = crate::fs_view::PodFsView::new(tools::ScopedFs::new(
|
||||||
|
self.scope.clone(),
|
||||||
|
self.pwd.clone(),
|
||||||
|
));
|
||||||
|
let mut out = Vec::new();
|
||||||
|
for seg in segments {
|
||||||
|
let Segment::FileRef { path } = seg else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
match view.resolve_file_ref(path, manifest::defaults::TOOL_OUTPUT_MAX_BYTES) {
|
||||||
|
Ok(item) => out.push(item),
|
||||||
|
Err(e) => {
|
||||||
|
self.alert(
|
||||||
|
AlertLevel::Warn,
|
||||||
|
AlertSource::Pod,
|
||||||
|
format!("file ref @{path} could not be resolved: {e}"),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
/// Flatten a typed segment list into the single string the Worker
|
/// Flatten a typed segment list into the single string the Worker
|
||||||
/// receives as the user message, and emit user-facing alerts for
|
/// receives as the user message, and emit user-facing alerts for
|
||||||
/// segments that fall through to placeholder (file/knowledge/workflow
|
/// segments that fall through to placeholder (knowledge / workflow
|
||||||
/// refs without a resolver, or unknown variants from a newer client).
|
/// refs without a resolver, or unknown variants from a newer client).
|
||||||
/// The text reconstruction itself comes from `Segment::flatten_to_text`,
|
/// `FileRef` is handled separately by `resolve_file_refs`. The text
|
||||||
|
/// reconstruction itself comes from `Segment::flatten_to_text`,
|
||||||
/// shared with replay paths that should not re-alert.
|
/// shared with replay paths that should not re-alert.
|
||||||
fn flatten_segments(&self, segments: &[Segment]) -> String {
|
fn flatten_segments(&self, segments: &[Segment]) -> String {
|
||||||
for seg in segments {
|
for seg in segments {
|
||||||
match seg {
|
match seg {
|
||||||
Segment::Text { .. } | Segment::Paste { .. } => {}
|
Segment::Text { .. } | Segment::Paste { .. } | Segment::FileRef { .. } => {}
|
||||||
Segment::FileRef { path } => {
|
|
||||||
self.alert(
|
|
||||||
AlertLevel::Warn,
|
|
||||||
AlertSource::Pod,
|
|
||||||
format!(
|
|
||||||
"file ref @{path} cannot be resolved \
|
|
||||||
(resolver not yet implemented); passed to LLM as placeholder"
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Segment::KnowledgeRef { slug } => {
|
Segment::KnowledgeRef { slug } => {
|
||||||
self.alert(
|
self.alert(
|
||||||
AlertLevel::Warn,
|
AlertLevel::Warn,
|
||||||
|
|
@ -1550,6 +1590,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
|
||||||
alerter: None,
|
alerter: None,
|
||||||
event_tx: None,
|
event_tx: None,
|
||||||
pending_notifies: NotifyBuffer::new(),
|
pending_notifies: NotifyBuffer::new(),
|
||||||
|
pending_attachments: Arc::new(Mutex::new(Vec::new())),
|
||||||
scope_allocation: Some(scope_allocation),
|
scope_allocation: Some(scope_allocation),
|
||||||
callback_socket: None,
|
callback_socket: None,
|
||||||
prompts: common.prompts,
|
prompts: common.prompts,
|
||||||
|
|
@ -1606,6 +1647,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
|
||||||
alerter: None,
|
alerter: None,
|
||||||
event_tx: None,
|
event_tx: None,
|
||||||
pending_notifies: NotifyBuffer::new(),
|
pending_notifies: NotifyBuffer::new(),
|
||||||
|
pending_attachments: Arc::new(Mutex::new(Vec::new())),
|
||||||
scope_allocation: Some(scope_allocation),
|
scope_allocation: Some(scope_allocation),
|
||||||
callback_socket: Some(callback_socket),
|
callback_socket: Some(callback_socket),
|
||||||
prompts: common.prompts,
|
prompts: common.prompts,
|
||||||
|
|
@ -1714,6 +1756,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
|
||||||
alerter: None,
|
alerter: None,
|
||||||
event_tx: None,
|
event_tx: None,
|
||||||
pending_notifies: NotifyBuffer::new(),
|
pending_notifies: NotifyBuffer::new(),
|
||||||
|
pending_attachments: Arc::new(Mutex::new(Vec::new())),
|
||||||
scope_allocation: Some(scope_allocation),
|
scope_allocation: Some(scope_allocation),
|
||||||
callback_socket: None,
|
callback_socket: None,
|
||||||
prompts: common.prompts,
|
prompts: common.prompts,
|
||||||
|
|
|
||||||
|
|
@ -123,6 +123,10 @@ permission = "write"
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> {
|
async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> {
|
||||||
|
make_pod_with_pwd(client).await.0
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn make_pod_with_pwd(client: MockClient) -> (Pod<MockClient, FsStore>, std::path::PathBuf) {
|
||||||
let manifest = PodManifest::from_toml(MANIFEST_TOML).unwrap();
|
let manifest = PodManifest::from_toml(MANIFEST_TOML).unwrap();
|
||||||
let store_tmp = tempfile::tempdir().unwrap();
|
let store_tmp = tempfile::tempdir().unwrap();
|
||||||
let store = FsStore::new(store_tmp.path()).await.unwrap();
|
let store = FsStore::new(store_tmp.path()).await.unwrap();
|
||||||
|
|
@ -137,7 +141,10 @@ async fn make_pod(client: MockClient) -> Pod<MockClient, FsStore> {
|
||||||
std::mem::forget(pwd_tmp);
|
std::mem::forget(pwd_tmp);
|
||||||
|
|
||||||
let worker = Worker::new(client);
|
let worker = Worker::new(client);
|
||||||
Pod::new(manifest, worker, store, pwd, scope).await.unwrap()
|
let pod = Pod::new(manifest, worker, store, pwd.clone(), scope)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
(pod, pwd)
|
||||||
}
|
}
|
||||||
|
|
||||||
use pod::PodHandle;
|
use pod::PodHandle;
|
||||||
|
|
@ -405,6 +412,58 @@ async fn run_with_paste_segment_inlines_content_and_emits_typed_user_message() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn run_with_resolvable_file_ref_attaches_system_message_after_user() {
|
||||||
|
let client = MockClient::new(simple_text_events());
|
||||||
|
let client_for_assert = client.clone();
|
||||||
|
let (pod, pwd) = make_pod_with_pwd(client).await;
|
||||||
|
std::fs::write(pwd.join("notes.md"), "alpha\nbeta\n").unwrap();
|
||||||
|
let handle = spawn_controller(pod).await;
|
||||||
|
|
||||||
|
let segments = vec![
|
||||||
|
protocol::Segment::text("see "),
|
||||||
|
protocol::Segment::FileRef {
|
||||||
|
path: "notes.md".into(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
handle.send(Method::Run { input: segments }).await.unwrap();
|
||||||
|
|
||||||
|
// Wait for the turn to complete.
|
||||||
|
let mut rx = handle.subscribe();
|
||||||
|
let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
event = rx.recv() => match event {
|
||||||
|
Ok(Event::TurnEnd { .. }) => break,
|
||||||
|
Err(_) => break,
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
_ = tokio::time::sleep_until(deadline) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||||
|
|
||||||
|
let requests = client_for_assert.captured_requests();
|
||||||
|
let items = &requests[0].items;
|
||||||
|
// The submit produces 2 history items: user message then file content.
|
||||||
|
let user_idx = items
|
||||||
|
.iter()
|
||||||
|
.position(|i| i.is_user_message())
|
||||||
|
.expect("user message present");
|
||||||
|
let next = items
|
||||||
|
.get(user_idx + 1)
|
||||||
|
.expect("attachment item present after user");
|
||||||
|
let next_text = next.as_text().unwrap_or_default();
|
||||||
|
assert!(
|
||||||
|
next_text.contains("[File: notes.md]"),
|
||||||
|
"expected file header, got: {next_text:?}"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
next_text.contains("alpha"),
|
||||||
|
"expected file body, got: {next_text:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn run_with_unresolved_segment_emits_alert_and_placeholder() {
|
async fn run_with_unresolved_segment_emits_alert_and_placeholder() {
|
||||||
let client = MockClient::new(simple_text_events());
|
let client = MockClient::new(simple_text_events());
|
||||||
|
|
@ -448,11 +507,12 @@ async fn run_with_unresolved_segment_emits_alert_and_placeholder() {
|
||||||
.iter()
|
.iter()
|
||||||
.find_map(|i| i.as_text().map(|s| s.to_string()))
|
.find_map(|i| i.as_text().map(|s| s.to_string()))
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
// LLM context carries a placeholder so the model can ask for the
|
// The user message keeps the literal `@<path>` token (matching what
|
||||||
// missing content rather than silently miss the user's intent.
|
// the user typed). Resolution failure surfaces via the Alert above;
|
||||||
|
// the LLM still sees the intent as a sigil-prefixed reference.
|
||||||
assert!(
|
assert!(
|
||||||
user_text.contains("[unresolved file ref: src/lib.rs]"),
|
user_text.contains("@src/lib.rs"),
|
||||||
"placeholder missing, got: {user_text:?}"
|
"literal sigil missing, got: {user_text:?}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -150,9 +150,13 @@ impl Segment {
|
||||||
/// to surface user-visible alerts for unresolved refs should do so
|
/// to surface user-visible alerts for unresolved refs should do so
|
||||||
/// alongside this call (Pod does so at submit time).
|
/// alongside this call (Pod does so at submit time).
|
||||||
///
|
///
|
||||||
/// Unresolved variants (`FileRef` / `KnowledgeRef` / `WorkflowInvoke`)
|
/// Sigil-prefixed variants (`FileRef` / `KnowledgeRef` / `WorkflowInvoke`)
|
||||||
/// and `Unknown` map to `[unresolved <kind>: <key>]` placeholders so
|
/// flatten back to their literal sigil form (`@<path>`, `#<slug>`,
|
||||||
/// the LLM sees an explicit token rather than silent omission.
|
/// `/<slug>`) — matching what the user originally typed. Resolved
|
||||||
|
/// content (e.g. file body for `FileRef`) is delivered as separate
|
||||||
|
/// `Item::system_message`s adjacent to the user message; the
|
||||||
|
/// resolution itself is the caller's job. `Unknown` falls back to
|
||||||
|
/// a bracketed placeholder since there is no sigil to render.
|
||||||
pub fn flatten_to_text(segments: &[Segment]) -> String {
|
pub fn flatten_to_text(segments: &[Segment]) -> String {
|
||||||
let mut out = String::new();
|
let mut out = String::new();
|
||||||
for seg in segments {
|
for seg in segments {
|
||||||
|
|
@ -160,13 +164,16 @@ impl Segment {
|
||||||
Segment::Text { content } => out.push_str(content),
|
Segment::Text { content } => out.push_str(content),
|
||||||
Segment::Paste { content, .. } => out.push_str(content),
|
Segment::Paste { content, .. } => out.push_str(content),
|
||||||
Segment::FileRef { path } => {
|
Segment::FileRef { path } => {
|
||||||
out.push_str(&format!("[unresolved file ref: {path}]"));
|
out.push('@');
|
||||||
|
out.push_str(path);
|
||||||
}
|
}
|
||||||
Segment::KnowledgeRef { slug } => {
|
Segment::KnowledgeRef { slug } => {
|
||||||
out.push_str(&format!("[unresolved knowledge ref: {slug}]"));
|
out.push('#');
|
||||||
|
out.push_str(slug);
|
||||||
}
|
}
|
||||||
Segment::WorkflowInvoke { slug } => {
|
Segment::WorkflowInvoke { slug } => {
|
||||||
out.push_str(&format!("[unresolved workflow invoke: {slug}]"));
|
out.push('/');
|
||||||
|
out.push_str(slug);
|
||||||
}
|
}
|
||||||
Segment::Unknown => {
|
Segment::Unknown => {
|
||||||
out.push_str("[unknown input segment]");
|
out.push_str("[unknown input segment]");
|
||||||
|
|
|
||||||
|
|
@ -713,7 +713,7 @@ mod tests {
|
||||||
assert_eq!(content.len(), 1);
|
assert_eq!(content.len(), 1);
|
||||||
match &content[0] {
|
match &content[0] {
|
||||||
llm_worker::ContentPart::Text { text } => {
|
llm_worker::ContentPart::Text { text } => {
|
||||||
assert_eq!(text, "see line1\nline2[unresolved file ref: src/main.rs]");
|
assert_eq!(text, "see line1\nline2@src/main.rs");
|
||||||
}
|
}
|
||||||
other => panic!("unexpected content: {other:?}"),
|
other => panic!("unexpected content: {other:?}"),
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ serde_json = "1.0.149"
|
||||||
sha2 = "0.11.0"
|
sha2 = "0.11.0"
|
||||||
tempfile = "3.27.0"
|
tempfile = "3.27.0"
|
||||||
thiserror = "2.0.18"
|
thiserror = "2.0.18"
|
||||||
tokio = { version = "1.51.1", features = ["rt"] }
|
tokio = { version = "1.51.1", features = ["process", "rt", "time"] }
|
||||||
tracing = "0.1.44"
|
tracing = "0.1.44"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
|
|
||||||
581
crates/tools/src/bash.rs
Normal file
581
crates/tools/src/bash.rs
Normal file
|
|
@ -0,0 +1,581 @@
|
||||||
|
//! `Bash` tool — execute shell commands in a one-shot, stateless way.
|
||||||
|
//!
|
||||||
|
//! Each call runs `bash -c <command>` via [`tokio::process::Command`].
|
||||||
|
//! The wrapper redirects all output to a file so we never have to read
|
||||||
|
//! from a pipe (which would expose us to bg-pipe hangs). There is no
|
||||||
|
//! shell session: every call starts fresh at `cwd`, so the agent must
|
||||||
|
//! chain `cd <dir> && cmd` when it wants to operate elsewhere. This
|
||||||
|
//! mirrors Claude Code's own Bash tool — predictable, no hidden state.
|
||||||
|
//!
|
||||||
|
//! Output handling: when output is short (≤ 80 lines, ≤ 12 KiB) it is
|
||||||
|
//! returned inline and the file is cleaned up. When it is longer the
|
||||||
|
//! full output is left on disk and only the **last 80 lines** are
|
||||||
|
//! returned, prefixed with the saved file's path. This sidesteps the
|
||||||
|
//! Worker's blanket `ToolOutputLimits` (default 16 KiB), which would
|
||||||
|
//! otherwise drop the *tail* of the output — usually the most useful
|
||||||
|
//! part (errors, exit messages, summary). The saved file lives under
|
||||||
|
//! a caller-supplied directory that the parent has added to the
|
||||||
|
//! `ScopedFs` allow set, so the agent can inspect it via either Read
|
||||||
|
//! or a follow-up Bash call.
|
||||||
|
//!
|
||||||
|
//! Filesystem and network access are NOT mediated by `ScopedFs`: the
|
||||||
|
//! child process can touch any path. Safety is delegated to the
|
||||||
|
//! Permission layer (deny/allow rules on the command string).
|
||||||
|
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::process::Stdio;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use async_trait::async_trait;
|
||||||
|
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
|
||||||
|
use serde::Deserialize;
|
||||||
|
use tokio::process::Command;
|
||||||
|
|
||||||
|
use crate::scoped_fs::ScopedFs;
|
||||||
|
|
||||||
|
const DESCRIPTION: &str = "Execute a shell command via bash. Supports the \
|
||||||
|
full shell — pipes, redirects, command substitution, `&&`/`||`. Each call \
|
||||||
|
runs in a fresh shell rooted at the workspace; chain `cd <subdir> && cmd` \
|
||||||
|
when you need to operate elsewhere. stdout and stderr are merged. Default \
|
||||||
|
timeout 120s, max 600s.\n\n\
|
||||||
|
Output handling: when the command produces more than 80 lines (or ~12 KiB), \
|
||||||
|
the full output is saved to a file and only the LAST 80 lines are returned, \
|
||||||
|
prefixed with the saved path. The path is readable by Read; you can also \
|
||||||
|
inspect it from a follow-up Bash call (`grep ... <path>`, etc.).\n\n\
|
||||||
|
Prefer dedicated tools when one fits: Read instead of `cat`/`head`/`tail` \
|
||||||
|
on workspace files, Edit instead of `sed`/`awk` rewrites, Glob instead of \
|
||||||
|
`find <name>`, Grep instead of `grep`/`rg`. Reach for Bash when the task \
|
||||||
|
is shell-shaped: building, testing, version control, package management.";
|
||||||
|
|
||||||
|
const DEFAULT_TIMEOUT_SECS: u64 = 120;
|
||||||
|
const MAX_TIMEOUT_SECS: u64 = 600;
|
||||||
|
|
||||||
|
/// Number of trailing lines returned when output spills to a file.
|
||||||
|
const TAIL_LINES: usize = 80;
|
||||||
|
|
||||||
|
/// Inline-return budget. Outputs at or below this are returned in full;
|
||||||
|
/// above it triggers the spill-to-file path. Sized to leave headroom under
|
||||||
|
/// the Worker's 16 KiB default `ToolOutputLimits` cap so the inline path
|
||||||
|
/// reliably reaches the model intact.
|
||||||
|
const INLINE_BYTE_BUDGET: usize = 12 * 1024;
|
||||||
|
|
||||||
|
/// Maximum bytes loaded into memory from the spilled output file. The
|
||||||
|
/// file itself can be arbitrarily large; we only ever read the tail end
|
||||||
|
/// since that is what we return.
|
||||||
|
const TAIL_READ_BUDGET: usize = 256 * 1024;
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||||
|
pub(crate) struct BashParams {
|
||||||
|
/// Shell command to execute. Passed verbatim to `bash -c`.
|
||||||
|
pub command: String,
|
||||||
|
/// Timeout in seconds. Defaults to 120, capped at 600.
|
||||||
|
#[serde(default)]
|
||||||
|
pub timeout: Option<u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) struct BashTool {
|
||||||
|
/// Workspace root that every invocation starts in. Snapshot of
|
||||||
|
/// `ScopedFs::pwd()` at registration time; never mutated, since we
|
||||||
|
/// don't track `cd` across calls.
|
||||||
|
cwd: PathBuf,
|
||||||
|
/// Directory to spill long outputs into. Caller is expected to have
|
||||||
|
/// added this path to the readable scope so the agent can Read the
|
||||||
|
/// saved files. The directory itself is created lazily.
|
||||||
|
output_dir: PathBuf,
|
||||||
|
/// Files we left on disk for follow-up inspection. Cleaned up on
|
||||||
|
/// `Drop` (= session end). `std::sync::Mutex` because access is
|
||||||
|
/// always synchronous and very brief.
|
||||||
|
spilled_outputs: std::sync::Mutex<Vec<PathBuf>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Drop for BashTool {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
if let Ok(mut paths) = self.spilled_outputs.lock() {
|
||||||
|
for p in paths.drain(..) {
|
||||||
|
let _ = std::fs::remove_file(&p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Tool for BashTool {
|
||||||
|
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||||
|
let params: BashParams = serde_json::from_str(input_json)
|
||||||
|
.map_err(|e| ToolError::InvalidArgument(format!("invalid Bash input: {e}")))?;
|
||||||
|
let timeout_secs = params
|
||||||
|
.timeout
|
||||||
|
.unwrap_or(DEFAULT_TIMEOUT_SECS)
|
||||||
|
.clamp(1, MAX_TIMEOUT_SECS);
|
||||||
|
|
||||||
|
// Persistent output file in the caller-supplied directory.
|
||||||
|
// `keep()` opts out of auto-delete so the agent can inspect the
|
||||||
|
// full output later; cleanup is deferred to `Drop` on this tool.
|
||||||
|
std::fs::create_dir_all(&self.output_dir).map_err(|e| {
|
||||||
|
ToolError::Internal(format!(
|
||||||
|
"create bash output dir {}: {e}",
|
||||||
|
self.output_dir.display()
|
||||||
|
))
|
||||||
|
})?;
|
||||||
|
let output_path: PathBuf = tempfile::Builder::new()
|
||||||
|
.prefix("bash-")
|
||||||
|
.suffix(".log")
|
||||||
|
.tempfile_in(&self.output_dir)
|
||||||
|
.map_err(|e| ToolError::Internal(format!("output tempfile: {e}")))?
|
||||||
|
.into_temp_path()
|
||||||
|
.keep()
|
||||||
|
.map_err(|e| ToolError::Internal(format!("persist output tempfile: {e}")))?;
|
||||||
|
|
||||||
|
let output_path_str = output_path
|
||||||
|
.to_str()
|
||||||
|
.ok_or_else(|| ToolError::Internal("output path is not UTF-8".into()))?;
|
||||||
|
|
||||||
|
// Wrapper:
|
||||||
|
// exec >file 2>&1 redirect stdout/stderr to the output file
|
||||||
|
// { user_cmd } run in a brace group (no subshell, so any
|
||||||
|
// `cd` inside still affects $? capture below)
|
||||||
|
// __exit=$? preserve the user command's exit code…
|
||||||
|
// wait 2>/dev/null …since `wait` clobbers $?. Reaping bg jobs
|
||||||
|
// guarantees the output file's writers all
|
||||||
|
// close before bash itself exits.
|
||||||
|
// exit $__exit propagate the user's exit
|
||||||
|
let wrapped = format!(
|
||||||
|
"exec >{out} 2>&1\n{{ {user_cmd}\n}}\n__insomnia_exit=$?\nwait 2>/dev/null\nexit $__insomnia_exit\n",
|
||||||
|
out = shell_single_quote(output_path_str),
|
||||||
|
user_cmd = params.command,
|
||||||
|
);
|
||||||
|
|
||||||
|
tracing::debug!(cmd = %params.command, cwd = %self.cwd.display(), timeout_secs, "Bash");
|
||||||
|
|
||||||
|
let mut child = Command::new("bash")
|
||||||
|
.arg("-c")
|
||||||
|
.arg(&wrapped)
|
||||||
|
.current_dir(&self.cwd)
|
||||||
|
.stdin(Stdio::null())
|
||||||
|
.stdout(Stdio::null()) // bash inherits — but the wrapper redirected via `exec`
|
||||||
|
.stderr(Stdio::null())
|
||||||
|
.kill_on_drop(true)
|
||||||
|
.spawn()
|
||||||
|
.map_err(|e| {
|
||||||
|
let _ = std::fs::remove_file(&output_path);
|
||||||
|
ToolError::ExecutionFailed(format!("spawn bash: {e}"))
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let timeout_dur = Duration::from_secs(timeout_secs);
|
||||||
|
let wait_result = tokio::time::timeout(timeout_dur, child.wait()).await;
|
||||||
|
let (status, timed_out) = match wait_result {
|
||||||
|
Ok(Ok(s)) => (Some(s), false),
|
||||||
|
Ok(Err(e)) => {
|
||||||
|
let _ = std::fs::remove_file(&output_path);
|
||||||
|
return Err(ToolError::ExecutionFailed(format!("bash wait: {e}")));
|
||||||
|
}
|
||||||
|
Err(_) => (None, true),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Inspect the on-disk output: total size first, tail bytes second.
|
||||||
|
let total_bytes = std::fs::metadata(&output_path)
|
||||||
|
.map(|m| m.len() as usize)
|
||||||
|
.unwrap_or(0);
|
||||||
|
let tail_bytes = read_tail_bytes(&output_path, TAIL_READ_BUDGET).unwrap_or_default();
|
||||||
|
let tail_text = String::from_utf8_lossy(&tail_bytes).into_owned();
|
||||||
|
|
||||||
|
let cmd_summary = truncate_for_summary(¶ms.command);
|
||||||
|
|
||||||
|
if timed_out {
|
||||||
|
// Preserve the partial output file — even cut-short logs help
|
||||||
|
// diagnose hangs.
|
||||||
|
let content = if total_bytes > 0 {
|
||||||
|
let last = take_last_n_lines(&tail_text, TAIL_LINES);
|
||||||
|
self.remember_spilled(&output_path);
|
||||||
|
Some(format!(
|
||||||
|
"[partial output before timeout — full at {}]\n{last}",
|
||||||
|
output_path.display()
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
let _ = std::fs::remove_file(&output_path);
|
||||||
|
None
|
||||||
|
};
|
||||||
|
return Ok(ToolOutput {
|
||||||
|
summary: format!("$ {cmd_summary} (timed out after {timeout_secs}s)"),
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let status = status.expect("status set on the success branch");
|
||||||
|
let summary = match status.code() {
|
||||||
|
Some(0) => format!("$ {cmd_summary}"),
|
||||||
|
Some(c) => format!("$ {cmd_summary} (exit {c})"),
|
||||||
|
None => format!("$ {cmd_summary} (terminated by signal)"),
|
||||||
|
};
|
||||||
|
|
||||||
|
if total_bytes == 0 {
|
||||||
|
let _ = std::fs::remove_file(&output_path);
|
||||||
|
return Ok(ToolOutput {
|
||||||
|
summary,
|
||||||
|
content: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inline if the whole output fits in our tail-read window AND is
|
||||||
|
// small enough to ride under the Worker's default cap.
|
||||||
|
let line_count = tail_text.lines().count();
|
||||||
|
let fully_loaded = total_bytes <= tail_bytes.len();
|
||||||
|
let fits_inline =
|
||||||
|
fully_loaded && total_bytes <= INLINE_BYTE_BUDGET && line_count <= TAIL_LINES;
|
||||||
|
|
||||||
|
let content = if fits_inline {
|
||||||
|
let _ = std::fs::remove_file(&output_path);
|
||||||
|
Some(tail_text)
|
||||||
|
} else {
|
||||||
|
let last = take_last_n_lines(&tail_text, TAIL_LINES);
|
||||||
|
// When `fully_loaded` we know the exact line count; otherwise
|
||||||
|
// the file is bigger than our read window so we report bytes
|
||||||
|
// and an "approximate" disclaimer.
|
||||||
|
let header = if fully_loaded {
|
||||||
|
format!(
|
||||||
|
"[showing last {TAIL_LINES} of {line_count} lines — full output ({total_bytes} bytes) at {}]",
|
||||||
|
output_path.display()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!(
|
||||||
|
"[showing last {TAIL_LINES} lines (tail of {total_bytes}-byte output) — full at {}]",
|
||||||
|
output_path.display()
|
||||||
|
)
|
||||||
|
};
|
||||||
|
self.remember_spilled(&output_path);
|
||||||
|
Some(format!("{header}\n{last}"))
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(ToolOutput { summary, content })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BashTool {
|
||||||
|
fn remember_spilled(&self, path: &Path) {
|
||||||
|
if let Ok(mut v) = self.spilled_outputs.lock() {
|
||||||
|
v.push(path.to_path_buf());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read up to `max_bytes` from the end of `path`. If the file is smaller
|
||||||
|
/// than `max_bytes`, the entire file is returned.
|
||||||
|
fn read_tail_bytes(path: &Path, max_bytes: usize) -> std::io::Result<Vec<u8>> {
|
||||||
|
use std::io::{Read, Seek, SeekFrom};
|
||||||
|
let mut f = std::fs::File::open(path)?;
|
||||||
|
let len = f.seek(SeekFrom::End(0))?;
|
||||||
|
let start = if len > max_bytes as u64 {
|
||||||
|
len - max_bytes as u64
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
f.seek(SeekFrom::Start(start))?;
|
||||||
|
let mut buf = Vec::with_capacity((len - start) as usize);
|
||||||
|
f.read_to_end(&mut buf)?;
|
||||||
|
Ok(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the last `n` lines of `text`. If `text` has `n` or fewer lines
|
||||||
|
/// (per [`str::lines`]), the input is returned as-is (no allocation).
|
||||||
|
fn take_last_n_lines(text: &str, n: usize) -> String {
|
||||||
|
if text.is_empty() {
|
||||||
|
return String::new();
|
||||||
|
}
|
||||||
|
let total = text.lines().count();
|
||||||
|
if total <= n {
|
||||||
|
return text.to_owned();
|
||||||
|
}
|
||||||
|
let skip = total - n;
|
||||||
|
let mut count = 0usize;
|
||||||
|
for (i, b) in text.bytes().enumerate() {
|
||||||
|
if b == b'\n' {
|
||||||
|
count += 1;
|
||||||
|
if count == skip {
|
||||||
|
return text[i + 1..].to_owned();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text.to_owned()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_for_summary(command: &str) -> String {
|
||||||
|
let one_line = command.lines().next().unwrap_or("");
|
||||||
|
let mut chars = one_line.chars();
|
||||||
|
let head: String = chars.by_ref().take(80).collect();
|
||||||
|
if chars.next().is_some() {
|
||||||
|
let mut shortened = head;
|
||||||
|
while shortened.chars().count() > 77 {
|
||||||
|
shortened.pop();
|
||||||
|
}
|
||||||
|
shortened.push_str("...");
|
||||||
|
shortened
|
||||||
|
} else {
|
||||||
|
head
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wrap a string in single quotes for safe inclusion in a bash command.
|
||||||
|
fn shell_single_quote(s: &str) -> String {
|
||||||
|
let escaped = s.replace('\'', "'\\''");
|
||||||
|
format!("'{escaped}'")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Factory for the `Bash` tool.
|
||||||
|
///
|
||||||
|
/// `output_dir` is where long outputs spill to; the caller is responsible
|
||||||
|
/// for arranging that the path is in the agent's readable scope. Every
|
||||||
|
/// invocation starts at `fs.pwd()` — the tool is intentionally stateless
|
||||||
|
/// w.r.t. the working directory.
|
||||||
|
pub fn bash_tool(fs: ScopedFs, output_dir: PathBuf) -> ToolDefinition {
|
||||||
|
Arc::new(move || {
|
||||||
|
let schema = schemars::schema_for!(BashParams);
|
||||||
|
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||||
|
let meta = ToolMeta::new("Bash")
|
||||||
|
.description(DESCRIPTION)
|
||||||
|
.input_schema(schema_value);
|
||||||
|
let tool: Arc<dyn Tool> = Arc::new(BashTool {
|
||||||
|
cwd: fs.pwd().to_path_buf(),
|
||||||
|
output_dir: output_dir.clone(),
|
||||||
|
spilled_outputs: std::sync::Mutex::new(Vec::new()),
|
||||||
|
});
|
||||||
|
(meta, tool)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use manifest::Scope;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
/// Test harness: workspace tempdir + a separate spill tempdir kept
|
||||||
|
/// alive for the test's lifetime. The spill dir is added to the
|
||||||
|
/// scope as readable so callers exercise the production path.
|
||||||
|
struct Harness {
|
||||||
|
_workspace: TempDir,
|
||||||
|
spill: TempDir,
|
||||||
|
fs: ScopedFs,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn setup() -> Harness {
|
||||||
|
let workspace = TempDir::new().unwrap();
|
||||||
|
let spill = TempDir::new().unwrap();
|
||||||
|
let base = Scope::writable(workspace.path()).unwrap();
|
||||||
|
let mut config = manifest::ScopeConfig {
|
||||||
|
allow: base.allow_rules(),
|
||||||
|
deny: base.deny_rules(),
|
||||||
|
};
|
||||||
|
config.allow.push(manifest::ScopeRule {
|
||||||
|
target: spill.path().to_path_buf(),
|
||||||
|
permission: manifest::Permission::Read,
|
||||||
|
recursive: true,
|
||||||
|
});
|
||||||
|
let scope = Scope::from_config(&config).unwrap();
|
||||||
|
let fs = ScopedFs::new(scope, workspace.path().to_path_buf());
|
||||||
|
Harness {
|
||||||
|
_workspace: workspace,
|
||||||
|
spill,
|
||||||
|
fs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_tool(h: &Harness) -> Arc<dyn Tool> {
|
||||||
|
let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf());
|
||||||
|
let (_, tool) = def();
|
||||||
|
tool
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn runs_simple_command() {
|
||||||
|
let h = setup();
|
||||||
|
let def = bash_tool(h.fs.clone(), h.spill.path().to_path_buf());
|
||||||
|
let (meta, tool) = def();
|
||||||
|
assert_eq!(meta.name, "Bash");
|
||||||
|
|
||||||
|
let inp = serde_json::json!({ "command": "echo hello" });
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
assert_eq!(out.summary, "$ echo hello");
|
||||||
|
assert_eq!(out.content.as_deref().map(str::trim), Some("hello"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn merges_stdout_and_stderr() {
|
||||||
|
let h = setup();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "echo out; echo err 1>&2",
|
||||||
|
});
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
let body = out.content.unwrap();
|
||||||
|
assert!(body.contains("out"));
|
||||||
|
assert!(body.contains("err"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn nonzero_exit_is_reported() {
|
||||||
|
let h = setup();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
let inp = serde_json::json!({ "command": "exit 7" });
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
assert!(out.summary.contains("exit 7"), "summary: {}", out.summary);
|
||||||
|
assert!(
|
||||||
|
out.content.is_none(),
|
||||||
|
"no output expected, got {:?}",
|
||||||
|
out.content
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn cd_does_not_persist_across_calls() {
|
||||||
|
// Stateless: a `cd` in one call must NOT leak into the next.
|
||||||
|
let h = setup();
|
||||||
|
let sub = h._workspace.path().join("nested");
|
||||||
|
std::fs::create_dir(&sub).unwrap();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
tool.execute(
|
||||||
|
&serde_json::json!({
|
||||||
|
"command": format!("cd {}", sub.to_str().unwrap()),
|
||||||
|
})
|
||||||
|
.to_string(),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let pwd_out = tool
|
||||||
|
.execute(&serde_json::json!({ "command": "pwd" }).to_string())
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
let body = pwd_out.content.unwrap();
|
||||||
|
let actual = std::fs::canonicalize(body.trim()).unwrap();
|
||||||
|
let workspace = std::fs::canonicalize(h._workspace.path()).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
actual, workspace,
|
||||||
|
"second call should start at workspace root, not the previous cd target"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn timeout_kills_long_command() {
|
||||||
|
let h = setup();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "sleep 30",
|
||||||
|
"timeout": 1,
|
||||||
|
});
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
out.summary.contains("timed out"),
|
||||||
|
"summary: {}",
|
||||||
|
out.summary
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn invalid_json_is_invalid_argument() {
|
||||||
|
let h = setup();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
let err = tool.execute("not json").await.unwrap_err();
|
||||||
|
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn long_output_spills_and_returns_tail() {
|
||||||
|
let h = setup();
|
||||||
|
let spill_dir = h.spill.path().to_path_buf();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
// 200 lines: "line 1" .. "line 200". Tail of 80 keeps lines 121-200.
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "for i in $(seq 1 200); do echo line $i; done",
|
||||||
|
});
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
let body = out.content.expect("expected content");
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
body.contains(&format!("showing last {TAIL_LINES} of 200 lines")),
|
||||||
|
"tail header missing in: {}",
|
||||||
|
&body[..body.len().min(300)]
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
body.contains(spill_dir.to_str().unwrap()),
|
||||||
|
"spill dir path missing: {body}"
|
||||||
|
);
|
||||||
|
// Last 80 lines are 121..200.
|
||||||
|
assert!(body.contains("\nline 200\n"));
|
||||||
|
assert!(body.contains("\nline 121\n"));
|
||||||
|
// line 120 is the last *elided* line.
|
||||||
|
assert!(!body.contains("\nline 120\n"), "elided line leaked: {body}");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn wide_short_output_still_spills_when_byte_budget_exceeded() {
|
||||||
|
let h = setup();
|
||||||
|
let spill_dir = h.spill.path().to_path_buf();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
// One single line of ~20 KiB (over INLINE_BYTE_BUDGET = 12 KiB).
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "printf 'x%.0s' {1..20480}",
|
||||||
|
});
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
let body = out.content.unwrap();
|
||||||
|
assert!(
|
||||||
|
body.contains(spill_dir.to_str().unwrap()),
|
||||||
|
"expected spill marker in: {}",
|
||||||
|
&body[..body.len().min(200)]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn background_job_does_not_hang() {
|
||||||
|
let h = setup();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
// The wrapper's `wait` ensures we don't hang on a stray bg pipe.
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "(sleep 0.05; echo bg) &",
|
||||||
|
"timeout": 5,
|
||||||
|
});
|
||||||
|
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
assert!(
|
||||||
|
!out.summary.contains("timed out"),
|
||||||
|
"summary: {}",
|
||||||
|
out.summary
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn spilled_files_are_cleaned_up_on_drop() {
|
||||||
|
let h = setup();
|
||||||
|
let spill_dir = h.spill.path().to_path_buf();
|
||||||
|
let tool = make_tool(&h);
|
||||||
|
|
||||||
|
let inp = serde_json::json!({
|
||||||
|
"command": "for i in $(seq 1 200); do echo $i; done",
|
||||||
|
});
|
||||||
|
tool.execute(&inp.to_string()).await.unwrap();
|
||||||
|
|
||||||
|
// The spill dir should now contain exactly one bash-*.log file.
|
||||||
|
let files_before: Vec<_> = std::fs::read_dir(&spill_dir)
|
||||||
|
.unwrap()
|
||||||
|
.filter_map(Result::ok)
|
||||||
|
.map(|e| e.path())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(files_before.len(), 1, "expected one spilled file");
|
||||||
|
let path = files_before.into_iter().next().unwrap();
|
||||||
|
assert!(path.exists());
|
||||||
|
|
||||||
|
drop(tool);
|
||||||
|
// Drop runs synchronously; file should be gone.
|
||||||
|
assert!(
|
||||||
|
!path.exists(),
|
||||||
|
"spilled file should be cleaned up on drop: {path:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
//! Built-in tools for the Insomnia LLM agent.
|
//! Built-in tools for the Insomnia LLM agent.
|
||||||
//!
|
//!
|
||||||
//! Implements Read / Write / Edit / Glob / Grep on top of the `llm-worker`
|
//! Implements Read / Write / Edit / Glob / Grep / Bash on top of the
|
||||||
//! `Tool` infrastructure. Filesystem access is mediated by two orthogonal
|
//! `llm-worker` `Tool` infrastructure. Filesystem access is mediated by
|
||||||
//! concerns:
|
//! two orthogonal concerns:
|
||||||
//!
|
//!
|
||||||
//! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for
|
//! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for
|
||||||
//! the current scope. Derived from the manifest and shareable across
|
//! the current scope. Derived from the manifest and shareable across
|
||||||
|
|
@ -13,17 +13,23 @@
|
||||||
//!
|
//!
|
||||||
//! The Pod layer owns both instances and passes them to
|
//! The Pod layer owns both instances and passes them to
|
||||||
//! [`builtin_tools`] when registering tools on a `Worker`.
|
//! [`builtin_tools`] when registering tools on a `Worker`.
|
||||||
|
//!
|
||||||
|
//! `Bash` is the lone exception — its child processes bypass `ScopedFs`
|
||||||
|
//! entirely. Safety for arbitrary command execution is delegated to the
|
||||||
|
//! Permission layer (deny/allow rules on the command string).
|
||||||
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod scoped_fs;
|
pub mod scoped_fs;
|
||||||
pub mod tracker;
|
pub mod tracker;
|
||||||
|
|
||||||
|
mod bash;
|
||||||
mod edit;
|
mod edit;
|
||||||
mod glob;
|
mod glob;
|
||||||
mod grep;
|
mod grep;
|
||||||
mod read;
|
mod read;
|
||||||
mod write;
|
mod write;
|
||||||
|
|
||||||
|
pub use bash::bash_tool;
|
||||||
pub use edit::edit_tool;
|
pub use edit::edit_tool;
|
||||||
pub use error::ToolsError;
|
pub use error::ToolsError;
|
||||||
pub use glob::glob_tool;
|
pub use glob::glob_tool;
|
||||||
|
|
@ -39,12 +45,22 @@ pub use write::write_tool;
|
||||||
/// All returned factories share the same tracker instance so that
|
/// All returned factories share the same tracker instance so that
|
||||||
/// `Read` / `Write` / `Edit` see a consistent history across tool
|
/// `Read` / `Write` / `Edit` see a consistent history across tool
|
||||||
/// invocations within a single session.
|
/// invocations within a single session.
|
||||||
pub fn builtin_tools(fs: ScopedFs, tracker: Tracker) -> Vec<llm_worker::tool::ToolDefinition> {
|
///
|
||||||
|
/// `bash_output_dir` is where the Bash tool spills long outputs. The
|
||||||
|
/// caller is responsible for adding that path to the readable scope
|
||||||
|
/// (see [`manifest::Scope::with_extra_read`]) so the agent can `Read`
|
||||||
|
/// the saved files.
|
||||||
|
pub fn builtin_tools(
|
||||||
|
fs: ScopedFs,
|
||||||
|
tracker: Tracker,
|
||||||
|
bash_output_dir: std::path::PathBuf,
|
||||||
|
) -> Vec<llm_worker::tool::ToolDefinition> {
|
||||||
vec![
|
vec![
|
||||||
read_tool(fs.clone(), tracker.clone()),
|
read_tool(fs.clone(), tracker.clone()),
|
||||||
write_tool(fs.clone(), tracker.clone()),
|
write_tool(fs.clone(), tracker.clone()),
|
||||||
edit_tool(fs.clone(), tracker.clone()),
|
edit_tool(fs.clone(), tracker),
|
||||||
glob_tool(fs.clone()),
|
glob_tool(fs.clone()),
|
||||||
grep_tool(fs),
|
grep_tool(fs.clone()),
|
||||||
|
bash_tool(fs, bash_output_dir),
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,8 @@
|
||||||
//! let scope = Scope::writable("/workspace").unwrap();
|
//! let scope = Scope::writable("/workspace").unwrap();
|
||||||
//! let fs = ScopedFs::new(scope, PathBuf::from("/workspace")); // pod lifetime
|
//! let fs = ScopedFs::new(scope, PathBuf::from("/workspace")); // pod lifetime
|
||||||
//! let tracker = Tracker::new(); // session lifetime
|
//! let tracker = Tracker::new(); // session lifetime
|
||||||
//! let defs = builtin_tools(fs, tracker);
|
//! let bash_outputs = PathBuf::from("/run/insomnia/bash-output");
|
||||||
|
//! let defs = builtin_tools(fs, tracker, bash_outputs);
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
use std::collections::{HashMap, VecDeque};
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use llm_worker::tool::{Tool, ToolDefinition};
|
use llm_worker::tool::{Tool, ToolDefinition};
|
||||||
use manifest::Scope;
|
use manifest::{Permission, Scope, ScopeConfig, ScopeRule};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use tools::{ScopedFs, Tracker, builtin_tools};
|
use tools::{ScopedFs, Tracker, builtin_tools};
|
||||||
|
|
@ -27,19 +27,29 @@ impl Registry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup() -> (TempDir, Registry) {
|
fn setup() -> (TempDir, TempDir, Registry) {
|
||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
let fs = ScopedFs::new(
|
let spill = TempDir::new().unwrap();
|
||||||
Scope::writable(dir.path()).unwrap(),
|
let base = Scope::writable(dir.path()).unwrap();
|
||||||
dir.path().to_path_buf(),
|
let mut config = ScopeConfig {
|
||||||
);
|
allow: base.allow_rules(),
|
||||||
|
deny: base.deny_rules(),
|
||||||
|
};
|
||||||
|
config.allow.push(ScopeRule {
|
||||||
|
target: spill.path().to_path_buf(),
|
||||||
|
permission: Permission::Read,
|
||||||
|
recursive: true,
|
||||||
|
});
|
||||||
|
let scope = Scope::from_config(&config).unwrap();
|
||||||
|
let fs = ScopedFs::new(scope, dir.path().to_path_buf());
|
||||||
let tracker = Tracker::new();
|
let tracker = Tracker::new();
|
||||||
(dir, Registry::new(builtin_tools(fs, tracker)))
|
let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf()));
|
||||||
|
(dir, spill, reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn unicode_path_and_content() {
|
async fn unicode_path_and_content() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("日本語ファイル.txt");
|
let file = dir.path().join("日本語ファイル.txt");
|
||||||
let content = "こんにちは 🦀 世界\nabc\n";
|
let content = "こんにちは 🦀 世界\nabc\n";
|
||||||
|
|
||||||
|
|
@ -70,7 +80,7 @@ async fn unicode_path_and_content() {
|
||||||
async fn symlink_to_outside_scope_is_rejected_for_write() {
|
async fn symlink_to_outside_scope_is_rejected_for_write() {
|
||||||
use std::os::unix::fs::symlink;
|
use std::os::unix::fs::symlink;
|
||||||
|
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let outside = TempDir::new().unwrap();
|
let outside = TempDir::new().unwrap();
|
||||||
let outside_target = outside.path().join("secret.txt");
|
let outside_target = outside.path().join("secret.txt");
|
||||||
std::fs::write(&outside_target, "secret").unwrap();
|
std::fs::write(&outside_target, "secret").unwrap();
|
||||||
|
|
@ -114,7 +124,7 @@ async fn symlink_to_outside_scope_is_rejected_for_write() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn empty_file_read_and_edit() {
|
async fn empty_file_read_and_edit() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("empty.txt");
|
let file = dir.path().join("empty.txt");
|
||||||
std::fs::write(&file, "").unwrap();
|
std::fs::write(&file, "").unwrap();
|
||||||
|
|
||||||
|
|
@ -144,7 +154,7 @@ async fn empty_file_read_and_edit() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn very_long_single_line() {
|
async fn very_long_single_line() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("long.txt");
|
let file = dir.path().join("long.txt");
|
||||||
let big: String = "x".repeat(1024 * 1024); // 1 MiB, no newlines
|
let big: String = "x".repeat(1024 * 1024); // 1 MiB, no newlines
|
||||||
std::fs::write(&file, &big).unwrap();
|
std::fs::write(&file, &big).unwrap();
|
||||||
|
|
@ -160,7 +170,7 @@ async fn very_long_single_line() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn relative_path_is_rejected() {
|
async fn relative_path_is_rejected() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
let read = reg.get("Read");
|
let read = reg.get("Read");
|
||||||
let err = read
|
let err = read
|
||||||
.execute(&json!({ "file_path": "relative.txt" }).to_string())
|
.execute(&json!({ "file_path": "relative.txt" }).to_string())
|
||||||
|
|
@ -171,7 +181,7 @@ async fn relative_path_is_rejected() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn directory_target_is_rejected_for_read() {
|
async fn directory_target_is_rejected_for_read() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let read = reg.get("Read");
|
let read = reg.get("Read");
|
||||||
let err = read
|
let err = read
|
||||||
.execute(&json!({ "file_path": dir.path().to_str().unwrap() }).to_string())
|
.execute(&json!({ "file_path": dir.path().to_str().unwrap() }).to_string())
|
||||||
|
|
@ -182,7 +192,7 @@ async fn directory_target_is_rejected_for_read() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn deeply_nested_new_file_is_created() {
|
async fn deeply_nested_new_file_is_created() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let deep = dir.path().join("a/b/c/d/e/deep.txt");
|
let deep = dir.path().join("a/b/c/d/e/deep.txt");
|
||||||
let write = reg.get("Write");
|
let write = reg.get("Write");
|
||||||
write
|
write
|
||||||
|
|
@ -200,7 +210,7 @@ async fn deeply_nested_new_file_is_created() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn replace_preserves_unicode() {
|
async fn replace_preserves_unicode() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("u.txt");
|
let file = dir.path().join("u.txt");
|
||||||
std::fs::write(&file, "🦀 rust 🦀\n").unwrap();
|
std::fs::write(&file, "🦀 rust 🦀\n").unwrap();
|
||||||
|
|
||||||
|
|
@ -225,7 +235,7 @@ async fn replace_preserves_unicode() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn grep_handles_unicode_pattern() {
|
async fn grep_handles_unicode_pattern() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("u.txt");
|
let file = dir.path().join("u.txt");
|
||||||
std::fs::write(&file, "English\n日本語\nрусский\n").unwrap();
|
std::fs::write(&file, "English\n日本語\nрусский\n").unwrap();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,25 @@ use std::path::Path;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use llm_worker::tool::{Tool, ToolDefinition, ToolMeta};
|
use llm_worker::tool::{Tool, ToolDefinition, ToolMeta};
|
||||||
use manifest::Scope;
|
use manifest::{Permission, Scope, ScopeConfig, ScopeRule};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
use tools::{ScopedFs, Tracker, builtin_tools};
|
use tools::{ScopedFs, Tracker, builtin_tools};
|
||||||
|
|
||||||
|
fn scope_with_spill(workspace: &Path, spill: &Path) -> Scope {
|
||||||
|
let base = Scope::writable(workspace).unwrap();
|
||||||
|
let mut config = ScopeConfig {
|
||||||
|
allow: base.allow_rules(),
|
||||||
|
deny: base.deny_rules(),
|
||||||
|
};
|
||||||
|
config.allow.push(ScopeRule {
|
||||||
|
target: spill.to_path_buf(),
|
||||||
|
permission: Permission::Read,
|
||||||
|
recursive: true,
|
||||||
|
});
|
||||||
|
Scope::from_config(&config).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
struct Registry {
|
struct Registry {
|
||||||
entries: Vec<(ToolMeta, Arc<dyn Tool>)>,
|
entries: Vec<(ToolMeta, Arc<dyn Tool>)>,
|
||||||
}
|
}
|
||||||
|
|
@ -36,15 +50,14 @@ impl Registry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn setup() -> (TempDir, Registry) {
|
fn setup() -> (TempDir, TempDir, Registry) {
|
||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
let fs = ScopedFs::new(
|
let spill = TempDir::new().unwrap();
|
||||||
Scope::writable(dir.path()).unwrap(),
|
let scope = scope_with_spill(dir.path(), spill.path());
|
||||||
dir.path().to_path_buf(),
|
let fs = ScopedFs::new(scope, dir.path().to_path_buf());
|
||||||
);
|
|
||||||
let tracker = Tracker::new();
|
let tracker = Tracker::new();
|
||||||
let reg = Registry::new(builtin_tools(fs, tracker));
|
let reg = Registry::new(builtin_tools(fs, tracker, spill.path().to_path_buf()));
|
||||||
(dir, reg)
|
(dir, spill, reg)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn call(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker::tool::ToolOutput {
|
async fn call(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker::tool::ToolOutput {
|
||||||
|
|
@ -60,16 +73,16 @@ async fn call_err(tool: &Arc<dyn Tool>, input: serde_json::Value) -> llm_worker:
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn builtin_tools_registers_all_five() {
|
fn builtin_tools_registers_full_set() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
let mut names = reg.names();
|
let mut names = reg.names();
|
||||||
names.sort();
|
names.sort();
|
||||||
assert_eq!(names, vec!["Edit", "Glob", "Grep", "Read", "Write"]);
|
assert_eq!(names, vec!["Bash", "Edit", "Glob", "Grep", "Read", "Write"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn meta_has_description_and_schema() {
|
fn meta_has_description_and_schema() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
for (meta, _) in ®.entries {
|
for (meta, _) in ®.entries {
|
||||||
assert!(
|
assert!(
|
||||||
!meta.description.is_empty(),
|
!meta.description.is_empty(),
|
||||||
|
|
@ -87,7 +100,7 @@ fn meta_has_description_and_schema() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn read_then_edit_then_read_roundtrip() {
|
async fn read_then_edit_then_read_roundtrip() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("a.txt");
|
let file = dir.path().join("a.txt");
|
||||||
std::fs::write(&file, "hello world\n").unwrap();
|
std::fs::write(&file, "hello world\n").unwrap();
|
||||||
let p = file.to_str().unwrap();
|
let p = file.to_str().unwrap();
|
||||||
|
|
@ -119,7 +132,7 @@ async fn read_then_edit_then_read_roundtrip() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn write_then_grep_finds_content() {
|
async fn write_then_grep_finds_content() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let write = reg.get("Write");
|
let write = reg.get("Write");
|
||||||
let grep = reg.get("Grep");
|
let grep = reg.get("Grep");
|
||||||
|
|
||||||
|
|
@ -148,7 +161,7 @@ async fn write_then_grep_finds_content() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn glob_finds_written_files() {
|
async fn glob_finds_written_files() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let write = reg.get("Write");
|
let write = reg.get("Write");
|
||||||
let glob = reg.get("Glob");
|
let glob = reg.get("Glob");
|
||||||
|
|
||||||
|
|
@ -172,7 +185,7 @@ async fn glob_finds_written_files() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn out_of_scope_write_is_rejected() {
|
async fn out_of_scope_write_is_rejected() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
let outside = TempDir::new().unwrap();
|
let outside = TempDir::new().unwrap();
|
||||||
let write = reg.get("Write");
|
let write = reg.get("Write");
|
||||||
|
|
||||||
|
|
@ -191,7 +204,7 @@ async fn out_of_scope_write_is_rejected() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn write_to_existing_without_read_fails() {
|
async fn write_to_existing_without_read_fails() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("exists.txt");
|
let file = dir.path().join("exists.txt");
|
||||||
std::fs::write(&file, "preexisting").unwrap();
|
std::fs::write(&file, "preexisting").unwrap();
|
||||||
|
|
||||||
|
|
@ -212,7 +225,7 @@ async fn write_to_existing_without_read_fails() {
|
||||||
async fn shared_scoped_fs_across_tools() {
|
async fn shared_scoped_fs_across_tools() {
|
||||||
// The key invariant: all builtin tools share the same ScopedFs instance,
|
// The key invariant: all builtin tools share the same ScopedFs instance,
|
||||||
// so read-history set by Read is visible to Edit and Write.
|
// so read-history set by Read is visible to Edit and Write.
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("shared.txt");
|
let file = dir.path().join("shared.txt");
|
||||||
std::fs::write(&file, "one\n").unwrap();
|
std::fs::write(&file, "one\n").unwrap();
|
||||||
|
|
||||||
|
|
@ -235,7 +248,7 @@ async fn shared_scoped_fs_across_tools() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn edit_requires_read_across_tools() {
|
async fn edit_requires_read_across_tools() {
|
||||||
let (dir, reg) = setup();
|
let (dir, _spill, reg) = setup();
|
||||||
let file = dir.path().join("a.txt");
|
let file = dir.path().join("a.txt");
|
||||||
std::fs::write(&file, "foo\n").unwrap();
|
std::fs::write(&file, "foo\n").unwrap();
|
||||||
|
|
||||||
|
|
@ -256,17 +269,17 @@ async fn edit_requires_read_across_tools() {
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn deterministic_tool_order_is_registration_order() {
|
async fn deterministic_tool_order_is_registration_order() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
// Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep
|
// Registration order from builtin_tools(): Read, Write, Edit, Glob, Grep, Bash
|
||||||
let names: Vec<&str> = reg.entries.iter().map(|(m, _)| m.name.as_str()).collect();
|
let names: Vec<&str> = reg.entries.iter().map(|(m, _)| m.name.as_str()).collect();
|
||||||
assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep"]);
|
assert_eq!(names, vec!["Read", "Write", "Edit", "Glob", "Grep", "Bash"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regression: tool name capitalization matches Claude Code reference
|
// Regression: tool name capitalization matches Claude Code reference
|
||||||
#[test]
|
#[test]
|
||||||
fn tool_names_match_reference_spec() {
|
fn tool_names_match_reference_spec() {
|
||||||
let (_dir, reg) = setup();
|
let (_dir, _spill, reg) = setup();
|
||||||
for expected in ["Read", "Write", "Edit", "Glob", "Grep"] {
|
for expected in ["Read", "Write", "Edit", "Glob", "Grep", "Bash"] {
|
||||||
assert!(
|
assert!(
|
||||||
reg.entries.iter().any(|(m, _)| m.name == expected),
|
reg.entries.iter().any(|(m, _)| m.name == expected),
|
||||||
"missing tool {expected}"
|
"missing tool {expected}"
|
||||||
|
|
@ -278,12 +291,11 @@ fn tool_names_match_reference_spec() {
|
||||||
async fn tracker_recent_files_tracks_read_write_edit() {
|
async fn tracker_recent_files_tracks_read_write_edit() {
|
||||||
// Build a fresh registry that shares a tracker we can query afterwards.
|
// Build a fresh registry that shares a tracker we can query afterwards.
|
||||||
let dir = TempDir::new().unwrap();
|
let dir = TempDir::new().unwrap();
|
||||||
let fs = ScopedFs::new(
|
let spill = TempDir::new().unwrap();
|
||||||
Scope::writable(dir.path()).unwrap(),
|
let scope = scope_with_spill(dir.path(), spill.path());
|
||||||
dir.path().to_path_buf(),
|
let fs = ScopedFs::new(scope, dir.path().to_path_buf());
|
||||||
);
|
|
||||||
let tracker = Tracker::new();
|
let tracker = Tracker::new();
|
||||||
let reg = Registry::new(builtin_tools(fs, tracker.clone()));
|
let reg = Registry::new(builtin_tools(fs, tracker.clone(), spill.path().to_path_buf()));
|
||||||
|
|
||||||
let a = dir.path().join("a.txt");
|
let a = dir.path().join("a.txt");
|
||||||
let b = dir.path().join("b.txt");
|
let b = dir.path().join("b.txt");
|
||||||
|
|
@ -324,5 +336,52 @@ async fn tracker_recent_files_tracks_read_write_edit() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bash_inherits_scoped_fs_pwd() {
|
||||||
|
// The Bash tool starts at the ScopedFs's pwd. Without any `cd`, its
|
||||||
|
// `pwd` should canonicalize to the workspace root we set up.
|
||||||
|
let (dir, _spill, reg) = setup();
|
||||||
|
let bash = reg.get("Bash");
|
||||||
|
let out = call(&bash, json!({ "command": "pwd" })).await;
|
||||||
|
let body = out.content.unwrap();
|
||||||
|
let actual = std::fs::canonicalize(body.trim()).unwrap();
|
||||||
|
let expected = std::fs::canonicalize(dir.path()).unwrap();
|
||||||
|
assert_eq!(actual, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn bash_spilled_file_is_readable_via_read_tool() {
|
||||||
|
// Long Bash output spills to a path that the controller has added to
|
||||||
|
// the readable scope. The agent should be able to Read that path
|
||||||
|
// exactly like any in-scope file.
|
||||||
|
let (_dir, spill, reg) = setup();
|
||||||
|
let bash = reg.get("Bash");
|
||||||
|
let out = call(
|
||||||
|
&bash,
|
||||||
|
json!({ "command": "for i in $(seq 1 200); do echo line $i; done" }),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
let body = out.content.unwrap();
|
||||||
|
let spill_str = spill.path().to_str().unwrap();
|
||||||
|
|
||||||
|
// Extract the spilled path from the marker line.
|
||||||
|
let marker = body.lines().next().unwrap();
|
||||||
|
let prefix_pos = marker
|
||||||
|
.find(spill_str)
|
||||||
|
.expect("marker should reference the spill dir");
|
||||||
|
let path_end_rel = marker[prefix_pos..]
|
||||||
|
.find(".log")
|
||||||
|
.expect("marker should end the path with .log");
|
||||||
|
let spilled = &marker[prefix_pos..prefix_pos + path_end_rel + 4];
|
||||||
|
|
||||||
|
// Read the file via the Read tool — must succeed (in scope).
|
||||||
|
let read_out = call(®.get("Read"), json!({ "file_path": spilled })).await;
|
||||||
|
let read_body = read_out.content.expect("Read returned content");
|
||||||
|
// The full 200 lines should be in the saved file even though Bash
|
||||||
|
// returned only the tail of 80.
|
||||||
|
assert!(read_body.contains("line 1\n"), "missing line 1: {read_body}");
|
||||||
|
assert!(read_body.contains("line 200"), "missing line 200");
|
||||||
|
}
|
||||||
|
|
||||||
// Sanity: unused Path import guard
|
// Sanity: unused Path import guard
|
||||||
const _: fn() -> &'static Path = || Path::new("/");
|
const _: fn() -> &'static Path = || Path::new("/");
|
||||||
|
|
|
||||||
|
|
@ -590,22 +590,31 @@ fn render_default(tc: &ToolCallBlock, mode: Mode) -> Vec<Line<'static>> {
|
||||||
.add_modifier(Modifier::ITALIC),
|
.add_modifier(Modifier::ITALIC),
|
||||||
);
|
);
|
||||||
|
|
||||||
let summary_source: String = match &tc.state {
|
// Body source: prefer the full output (e.g. Bash's stdout/stderr) so
|
||||||
|
// Detail mode can expose it. Fall back to the summary when the tool
|
||||||
|
// didn't emit any content.
|
||||||
|
let body_source: String = match &tc.state {
|
||||||
|
ToolCallState::Done {
|
||||||
|
output: Some(out), ..
|
||||||
|
}
|
||||||
|
| ToolCallState::Error {
|
||||||
|
output: Some(out), ..
|
||||||
|
} => out.clone(),
|
||||||
ToolCallState::Done { summary, .. } | ToolCallState::Error { summary, .. } => {
|
ToolCallState::Done { summary, .. } | ToolCallState::Error { summary, .. } => {
|
||||||
summary.clone()
|
summary.clone()
|
||||||
}
|
}
|
||||||
_ => String::new(),
|
_ => String::new(),
|
||||||
};
|
};
|
||||||
let summary_cap = match mode {
|
let body_cap = match mode {
|
||||||
Mode::Normal => 3,
|
Mode::Normal => 3,
|
||||||
Mode::Detail => usize::MAX,
|
Mode::Detail => usize::MAX,
|
||||||
Mode::Overview => unreachable!(),
|
Mode::Overview => unreachable!(),
|
||||||
};
|
};
|
||||||
if !summary_source.is_empty() {
|
if !body_source.is_empty() {
|
||||||
emit_capped_lines(
|
emit_capped_lines(
|
||||||
&mut lines,
|
&mut lines,
|
||||||
&summary_source,
|
&body_source,
|
||||||
summary_cap,
|
body_cap,
|
||||||
Style::default().fg(Color::Gray),
|
Style::default().fg(Color::Gray),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,22 @@ CronCreate
|
||||||
|
|
||||||
が返ってくる。「上のツール定義と同じエンコーディング」と明示されており、以降そのツールは通常通り呼べるようになる。
|
が返ってくる。「上のツール定義と同じエンコーディング」と明示されており、以降そのツールは通常通り呼べるようになる。
|
||||||
|
|
||||||
|
### 1.5 パラメータ値のエンコーディング規約
|
||||||
|
|
||||||
|
ツール呼び出しの `<parameter>` タグの中身は、値の型に応じて異なるエンコーディングを使う:
|
||||||
|
|
||||||
|
- プリミティブ (string / number / boolean): そのままテキスト
|
||||||
|
- 配列・オブジェクト: JSON 文字列としてシリアライズしてテキストに
|
||||||
|
|
||||||
|
system prompt 末尾にも以下のように明記されている:
|
||||||
|
|
||||||
|
```
|
||||||
|
When making function calls using tools that accept array or object parameters
|
||||||
|
ensure those are structured using JSON.
|
||||||
|
```
|
||||||
|
|
||||||
|
つまり「XML が外側の骨格、中身は型に応じてテキスト/JSON」という二層構造。
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 2. パラダイムの推測: prompted tool use
|
## 2. パラダイムの推測: prompted tool use
|
||||||
|
|
@ -180,3 +196,115 @@ deferred tools 方式は **prompted tool use を前提とする限り**、これ
|
||||||
> Claude Code の観察上は、ツール定義や呼び出しが prompt 内テキストとして見えている。ただし、公開されている Anthropic API の tool search は `tools` 配列、`defer_loading`、`tool_reference`、`tool_use` を使う structured tool use として説明されている。したがって、Claude Code 内部が完全な prompted tool use なのか、API の structured tool use を CLI / ハーネス側で別表現にレンダリングしているのか、あるいはそのハイブリッドなのかは未確認。
|
> Claude Code の観察上は、ツール定義や呼び出しが prompt 内テキストとして見えている。ただし、公開されている Anthropic API の tool search は `tools` 配列、`defer_loading`、`tool_reference`、`tool_use` を使う structured tool use として説明されている。したがって、Claude Code 内部が完全な prompted tool use なのか、API の structured tool use を CLI / ハーネス側で別表現にレンダリングしているのか、あるいはそのハイブリッドなのかは未確認。
|
||||||
|
|
||||||
Pod / insomnia への示唆としては、deferred tools の設計目的である context 圧縮、tool selection accuracy の維持、prefix cache の安定化は公式情報でも裏付けられる。一方で、Anthropic API の現在の公開設計を参考にするなら、`tool_search` 相当の実装は「単なる schema text の注入」だけでなく、内部 registry 上の tool reference、ロード済み tool の状態管理、検証レイヤを明確に分けて設計する方がよい。
|
Pod / insomnia への示唆としては、deferred tools の設計目的である context 圧縮、tool selection accuracy の維持、prefix cache の安定化は公式情報でも裏付けられる。一方で、Anthropic API の現在の公開設計を参考にするなら、`tool_search` 相当の実装は「単なる schema text の注入」だけでなく、内部 registry 上の tool reference、ロード済み tool の状態管理、検証レイヤを明確に分けて設計する方がよい。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. ツール I/O の実際のフォーマット
|
||||||
|
|
||||||
|
(2026-05-01 追記)
|
||||||
|
|
||||||
|
deferred tools の本論からはやや脇道だが、ToolSearch がスキーマテキストを context に注入することで「ツールが使える状態」になる仕組みを理解するためには、ツール定義・呼び出しの実フォーマットと、Anthropic API の公開 surface との対応関係を押さえておく必要がある。
|
||||||
|
|
||||||
|
### 9.1 ツール定義の入力フォーマット
|
||||||
|
|
||||||
|
system prompt 冒頭に置かれる:
|
||||||
|
|
||||||
|
```
|
||||||
|
<functions>
|
||||||
|
<function>{"description": "...", "name": "Read", "parameters": {...JSONSchema...}}</function>
|
||||||
|
</functions>
|
||||||
|
```
|
||||||
|
|
||||||
|
外側は XML、`<function>` の中身は単行 JSON。JSON 部分は `name`, `description`, `parameters` の 3 フィールドで、`parameters` は標準的な JSONSchema (`type: "object"`, `properties`, `required`, `additionalProperties` 等)。
|
||||||
|
|
||||||
|
### 9.2 ツール呼び出しの出力フォーマット
|
||||||
|
|
||||||
|
モデル側の生成は完全に XML タグ列:
|
||||||
|
|
||||||
|
```
|
||||||
|
<function_calls>
|
||||||
|
<invoke name="Read">
|
||||||
|
<parameter name="file_path">/foo/bar</parameter>
|
||||||
|
</invoke>
|
||||||
|
</function_calls>
|
||||||
|
```
|
||||||
|
|
||||||
|
`<parameter>` の中身は §1.5 のエンコード規則に従う。
|
||||||
|
|
||||||
|
### 9.3 標準 Anthropic API との関係
|
||||||
|
|
||||||
|
開発者から見える API surface は完全に JSON ベース:
|
||||||
|
|
||||||
|
- リクエスト: `tools: [{name, description, input_schema}]`
|
||||||
|
- レスポンス: `tool_use` content block (JSON)
|
||||||
|
|
||||||
|
ところが Claude Code 上の観察では XML+JSON のハイブリッド表現が実際に流れている。両者の整合は次のように理解できる:
|
||||||
|
|
||||||
|
| | 標準 API (structured tool use) | Claude Code (prompted tool use) |
|
||||||
|
|---|---|---|
|
||||||
|
| 開発者が渡す形式 | JSON (`tools` 配列) | — (ハーネス内製) |
|
||||||
|
| モデルが受け取る prompt | 非公開 (推測: XML+JSON) | XML+JSON (観察可) |
|
||||||
|
| モデルが返す表現 | 非公開 (推測: XML タグ) → API が parse | XML タグ (観察可) |
|
||||||
|
| 開発者が受け取る形式 | `tool_use` block (JSON) | — |
|
||||||
|
|
||||||
|
標準 API では JSON ↔ モデル内部表現の変換が API サーバ側で隠蔽されている。Claude Code が観察できるのはその「裸の」表現で、Anthropic がモデル訓練に用いているフォーマットそのものと推測される (同じモデルなので)。
|
||||||
|
|
||||||
|
### 9.4 バリデーションとリトライの内製化
|
||||||
|
|
||||||
|
この構造を見ると、Tool Call API は実質「フォーマット規約 + schema validation + retry」をプロバイダー側に押し込めた仕様と読める:
|
||||||
|
|
||||||
|
1. **フォーマット規約**: XML 骨格と parameter エンコード規則
|
||||||
|
2. **バリデーション**: schema 違反の検出
|
||||||
|
3. **リトライ**: malformed なら API 内部で再生成し、開発者には完成品だけ返す
|
||||||
|
4. **訓練投資**: そのフォーマットで RLHF / SFT 済み
|
||||||
|
|
||||||
|
開発者が `tool_use` block を常に正しい JSON として受け取れるのは、(4) のおかげで失敗率が低く、(1)-(3) のおかげで失敗時も隠蔽されているから。Cline 等の prompted tool use 実装が同じことをやろうとしても、(4) が効かないため精度・安定性で見劣りしていたのは、この訓練投資の差で説明できる。
|
||||||
|
|
||||||
|
ただし Claude Code のハーネスは **token-level 制約 (grammar-based sampling) を入れていない**ことが、§10 の実演から推測できる。「自由に生成 → パース失敗なら error を tool_result で返して retry」という設計で、token 制約は使っていない。これは inference サーバ側の実装コストを避けつつ、(4) の訓練品質に依存する方針。
|
||||||
|
|
||||||
|
### 9.5 ローカル LLM への含意
|
||||||
|
|
||||||
|
Pod / insomnia でローカル LLM を使う場合、(4) が効かない。最近のローカル向けエージェントモデルは tool use 用に訓練されているので XML パースのような原始的処理は不要だが、各モデルが訓練された自前のフォーマット (Hermes / Llama / Qwen / Mistral 等で異なる) があり、それに合わせてレンダリングする必要がある。
|
||||||
|
|
||||||
|
具体的な責務分担は以下:
|
||||||
|
|
||||||
|
- ツール定義のレンダリング: モデル固有のテンプレート (chat template の `tools` 拡張等) に合わせる
|
||||||
|
- 出力パース: モデルが生成した形式 (タグ / JSON / 独自トークン) をハーネスでパース
|
||||||
|
- バリデーション: 自前で schema 照合
|
||||||
|
- リトライ: パース失敗・schema 違反時に error を返してモデル側に修正させる
|
||||||
|
|
||||||
|
これは Claude Code が内製化しているもののローカル版そのもの。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. 実演: schema 未ロードでの呼び出し
|
||||||
|
|
||||||
|
(2026-05-01 実演)
|
||||||
|
|
||||||
|
§3 の推測「検証の真実はレジストリであり、context にスキーマテキストが現れたかどうかではない」を確認するため、deferred tool である `TaskList` を ToolSearch せずに直接呼び出した。
|
||||||
|
|
||||||
|
### 10.1 結果
|
||||||
|
|
||||||
|
受理された (`No tasks found` が返ってきた)。InputValidationError は発生しなかった。
|
||||||
|
|
||||||
|
### 10.2 含意
|
||||||
|
|
||||||
|
1. **schema 未ロード = 呼び出せない、ではない**。少なくとも引数なしで呼べるツールでは、schema text が context に無くても通る
|
||||||
|
2. ハーネスのバリデーションは「schema text が context にあるか」ではなく、**実引数が registry の schema に合致するか**で判定している
|
||||||
|
3. system-reminder の "calling them directly will fail with InputValidationError" は厳密には常に真ではない。引数が schema と矛盾しないケース (特に必須引数のないツールに引数なしで呼ぶ場合) では素通りする
|
||||||
|
4. context に積まれる schema text は、モデルが正しい引数を生成するための **誘導 / プロンプト材料** であって、validation の入力ではない
|
||||||
|
|
||||||
|
§3 の推測がそのまま裏付けられた形になる。
|
||||||
|
|
||||||
|
### 10.3 system-reminder の役割の再解釈
|
||||||
|
|
||||||
|
警告が「常に fail する」と読めるのは過剰表現で、実際には「**引数 schema が必要なツールは引数指定が必須なので、schema を知らずに呼べば事実上 fail する**」というモデルへの誘導と理解するのが正確。registry 側のバリデーションは引数の中身を見ているだけで、context に schema text があるかは見ていない。
|
||||||
|
|
||||||
|
### 10.4 設計上の含意
|
||||||
|
|
||||||
|
Pod / insomnia 側で同様の機構を作る場合:
|
||||||
|
|
||||||
|
- 「schema text が context にあるか」を validation 条件にする必要はない (むしろしない方が単純)
|
||||||
|
- registry に常時全 tool を登録しておき、context へのレンダリングだけ deferred にする
|
||||||
|
- モデルが (誘導を無視して) schema 未ロードのツールを呼んでも、引数が合っていれば実行してよい
|
||||||
|
- この方が registry の真実性が一本化されて実装が単純になる
|
||||||
|
|
|
||||||
|
|
@ -25,3 +25,8 @@ Bash の子プロセスは ScopedFs を経由しない。Scope による保護
|
||||||
## 依存チケット
|
## 依存チケット
|
||||||
|
|
||||||
- [permission-extension-point.md](permission-extension-point.md) — deny/allow ルールによる Bash コマンド制御
|
- [permission-extension-point.md](permission-extension-point.md) — deny/allow ルールによる Bash コマンド制御
|
||||||
|
|
||||||
|
## Review
|
||||||
|
- 状態: Approve with follow-up
|
||||||
|
- レビュー詳細: [./bash-tool.review.md](./bash-tool.review.md)
|
||||||
|
- 日付: 2026-05-01
|
||||||
|
|
|
||||||
44
tickets/bash-tool.review.md
Normal file
44
tickets/bash-tool.review.md
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# Review: Bash ツール
|
||||||
|
|
||||||
|
## 前提・要件の確認
|
||||||
|
|
||||||
|
- **コマンド実行 (`tokio::process::Command`)**: 満たされている。`crates/tools/src/bash.rs:94-103` で `bash -c <wrapped>` を起動。`stdin(null)` で stdin ブロックを防止、`kill_on_drop(true)` でタイムアウト時のリーク防止。
|
||||||
|
- **timeout (default 120s / max 600s)**: 満たされている。`bash.rs:38-39, 64-67` の `clamp(1, 600)`、`bash.rs:130-144` の `tokio::time::timeout`。`timeout_kills_long_command` で動作確認済み。
|
||||||
|
- **作業ディレクトリの永続**: 満たされている。`cd` のパースに頼らず wrapper script + tempfile で post-command の `pwd` を取得(`bash.rs:74-90`)。`cd_persists_across_calls` テストで `subdir` 移動後の `pwd` が反映されることを確認。`canonicalize` 同士で比較しており macOS の `/private/tmp` ずれにも耐性あり。
|
||||||
|
- **stdout/stderr 結合**: 満たされている。wrapper 内 `exec 2>&1` で実装、`merges_stdout_and_stderr` テストで両方含まれることを確認。子プロセス側の `stderr(Stdio::null())` も整合。
|
||||||
|
- **`ToolOutput` summary(コマンド + exit code)+ content(出力)**: 満たされている。`bash.rs:164-175` で exit 0 / 非 0 / シグナルを区別。content が空のときは `None` を返しており、`SUMMARY_THRESHOLD` を意識した良い実装。
|
||||||
|
|
||||||
|
## アーキテクチャ・スコープ
|
||||||
|
|
||||||
|
- **層分離**: `tools` クレート内に閉じており、`llm-worker` を低レベル基盤に保つ方針と整合(`bash.rs:20` で `Tool` trait のみ依存)。`builtin_tools()` のファクトリ列に追加するだけで、層を跨ぐ侵入はない。
|
||||||
|
- **クレート命名/構造**: `bash.rs` を独立モジュールに切り出し、`lib.rs` で `pub use bash::bash_tool` のみ公開。`read.rs/write.rs/...` と一貫。
|
||||||
|
- **依存追加**: `Cargo.toml` の tokio features に `process`/`time`/`io-util`/`sync` を追加(`Cargo.toml:22`)。`tempfile` は既存。`cargo add` 経由前提のフィールド追加で違和感なし。
|
||||||
|
- **Permission 層との関係**: ticket の前提通り、ScopedFs では保護せず Permission 層に委譲。`lib.rs:18-19` のドキュメントコメントで明示しており、設計意図は読み手に伝わる。
|
||||||
|
- **設計判断 1(wrapper による pwd 取得)**: `cd` パースの脆さ(サブシェル、変数展開、関数定義内 `cd` 等)を回避できるので妥当。`exec` で bash 自体が置換されると wrapper が走らないが、`bash.rs:149-155` が「ファイル読めなければ pwd 据え置き」とフェイルソフトしておりロバスト。
|
||||||
|
- **設計判断 2(wrapper の `wait`)**: `(sleep 0.05; echo bg) &` のようなジョブで stdout が EOF せずハングする問題に対する実装上必須の対処。`background_job_does_not_hang` で回帰防止済み。
|
||||||
|
- **設計判断 3(`tokio::sync::Mutex` で逐次化)**: pwd の共有可変状態と「順序のある shell セッション」の意味論を考えると正解。長時間コマンドの間 lock を握り続けるのは仕様上自然(同一セッションの bash は元々直列)。
|
||||||
|
- **設計判断 4(256KB cap)**: worker 側 `ToolOutputLimits` の手前で OOM を抑える二重防壁。truncated marker の追記後に `String::from_utf8_lossy` で UTF-8 化しており、マルチバイト切断もロスレスではないが panic はしない。妥当。
|
||||||
|
- **設計判断 5(summary/content)**: 既存ツールと API 形状が一致。`SUMMARY_THRESHOLD` の境界も意識されている。
|
||||||
|
- **設計判断 6(description のプロンプト誘導)**: Read/Write/Edit/Glob/Grep を優先させる文言は、Claude Code リファレンスとも整合し、ローカルモデルでも効きやすい簡潔さ。
|
||||||
|
|
||||||
|
## 指摘事項
|
||||||
|
|
||||||
|
### Non-blocking / Follow-up
|
||||||
|
|
||||||
|
- **TUI 側の `render_default` 修正の同梱について** (`crates/tui/src/tool.rs:590-619`)
|
||||||
|
- 内容としては正しいバグ修正。Bash のような汎用ツールが Detail モードでも summary しか出ない状態を解消している。
|
||||||
|
- ただし、厳密には Bash チケットの範囲外(既存の任意の "default 経路の" ツールに同じ問題があったはず)。同梱の妥当性: Bash 投入によりバグが顕在化したこと、5 行程度の置き換えで完結すること、Bash 単体だと UX として未完であることを踏まえれば現実的な判断と言える。次回同種の状況では、TUI 表示仕様の修正として別チケットを切るほうがレビュー単位がきれいになる、というレベル。
|
||||||
|
- フォローアップ提案: `crates/tui/` 配下に `output` を含むレンダリングが Detail/Normal で正しく出ることを確認するスナップショット/ユニットテストを 1 本追加すると、将来の `summary` フォールバック方向への意図しない退行を防げる(現状はロジックレビューのみで担保)。
|
||||||
|
|
||||||
|
- **`docs/ref/claude-code-deferred-tools.md` への追記**: Bash 実装と直接関係しない文献参照の追加(Anthropic vs OpenAI 比較への言及)。1 段落で軽微とはいえ、チケットスコープからは外れている。次回はドキュメント更新も別コミット/別チケット推奨。
|
||||||
|
|
||||||
|
- **pwd 更新の堅牢性についての観察 (`bash.rs:149-155`)**: ユーザーコマンドが `exec some-program` で bash を置換した場合や、wrapper の `pwd > tempfile` がディスクフル等で失敗した場合に pwd が据え置かれる挙動になっている。仕様上は妥当だが、ユーザー視点では「`cd foo && exec bar` 後に `cd` が消えた」ように見える可能性がある。コメントで現挙動の合理性は説明されているので blocking ではないが、将来 Permission 層導入時にエッジケースとして再考の余地あり。
|
||||||
|
|
||||||
|
### Nits
|
||||||
|
|
||||||
|
- `BashParams` の `timeout` フィールドが `Option<u64>` で `#[serde(default)]` だが、`Option` は serde が自動的に欠落を `None` にするため `#[serde(default)]` は冗長(害はない)。
|
||||||
|
- `bash.rs:111-112` の `let mut child = child; let mut stdout = stdout;` は `async move` ブロックで mutable に再束縛しているだけ。慣用的だが `let mut` を引数側で書いてもよい。スタイル差。
|
||||||
|
|
||||||
|
## 判断
|
||||||
|
|
||||||
|
**Approve with follow-up** — チケット要件は完全に満たされており、設計判断もすべて合理的に説明されている。テストカバレッジ (8 unit + 1 integration) も妥当。同梱されている TUI 修正は実害のあるバグ修正で内容は正しいが、本来は別チケット相当のスコープ越えがあり、回帰テストの追加は次回までのフォローアップとして残しておくとよい。
|
||||||
81
tickets/submit-file-ref-resolver.md
Normal file
81
tickets/submit-file-ref-resolver.md
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
# サブミット入力: FileRef リゾルバ
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
`tickets/submit-tui-completion.md` で `@<path>` が typed atom として入力され、submit 時に `Segment::FileRef { path }` で Pod へ届く経路が完成した。一方 Pod 側 (`Pod::flatten_segments` in `crates/pod/src/pod.rs`) は今 `FileRef` を見ても resolver を持たず、`Segment::flatten_to_text` の placeholder (`[unresolved file ref: ...]`) を user message に inline するだけで、Warn alert を吐いて終わっている。
|
||||||
|
|
||||||
|
ClaudeCode の `@<path>` と同等の挙動 — submit 時にファイル本文を読み、LLM context にそのまま見せる — を入れる。`compact/worker.rs` の `mark_read_required` 経路で完成済の auto-read(`PodFsView::render_auto_read`)と兄弟関係になる、submit 時版のリゾルバ。
|
||||||
|
|
||||||
|
## 要件
|
||||||
|
|
||||||
|
### Item 配置
|
||||||
|
|
||||||
|
履歴に永続化する形は以下の **2 つの Item** にする:
|
||||||
|
|
||||||
|
```
|
||||||
|
[..., user_message, system_message(file1), system_message(file2), ...]
|
||||||
|
```
|
||||||
|
|
||||||
|
user message 自体は今と同じく `Segment::flatten_to_text` 由来のテキスト(`@<path>` トークンが残った placeholder 込み)。直後に `[File: <path>]\n<本文>` 形式の system message を、`FileRef` の出現順に追加する。次ターン以降も LLM が見える状態で残す(compact が走った時点で既存の auto-read 機構が引き継ぐ)。
|
||||||
|
|
||||||
|
inline 結合(user 1 メッセージに本文を流し込む)は採らない。
|
||||||
|
|
||||||
|
### 本文の取り扱い
|
||||||
|
|
||||||
|
- `PodFsView` (`crates/pod/src/fs_view.rs`) 経由で読む。スコープ判定は `ScopedFs` 任せ。
|
||||||
|
- 上限は通常の Tool Output と同じ `manifest::defaults::TOOL_OUTPUT_MAX_BYTES` (16 KB)。超過分は捨て、末尾に `[...truncated, <total> bytes total — use read_file for the rest]` を付ける。LLM が必要なら自分で `read_file` を呼ぶ前提。
|
||||||
|
- 非 UTF-8(バイナリ)はリゾルバが拒否する。後述の失敗扱いに倒す。
|
||||||
|
|
||||||
|
### 失敗時の扱い
|
||||||
|
|
||||||
|
スコープ外 / NotFound / バイナリ拒否は **Alert + placeholder 残置**:
|
||||||
|
|
||||||
|
- ユーザー向け Alert を `AlertLevel::Warn` で発火(理由を含めた一文)
|
||||||
|
- 該当 segment の system message は出さない(user message 中の `[unresolved file ref: <path>]` プレースホルダーがそのまま LLM に届く)
|
||||||
|
|
||||||
|
これは「ユーザーの誤入力を早期に可視化する」狙い。silent fallback にしない。
|
||||||
|
|
||||||
|
### Worker 側 API 拡張
|
||||||
|
|
||||||
|
submit 時に user message と system messages を一つの turn の前置として履歴に積む経路を、既存の `Interceptor` action-return パターンに合わせて足す。`TurnEndAction::ContinueWithMessages(Vec<Item>)` (`crates/llm-worker/src/worker.rs:903`) と同形:
|
||||||
|
|
||||||
|
- `Interceptor::on_prompt_submit` の戻り値を拡張し、`Continue` / `Cancel(String)` に加えて `ContinueWith(Vec<Item>)` を返せるようにする
|
||||||
|
- Worker の `Locked::run` は `ContinueWith` を受けたら user_item の push 直後に extras を `history.extend` する
|
||||||
|
- Hook (`crates/pod/src/hook.rs`) 側の戻り値(`PromptAction`)はこの拡張に乗せない。Hook は read-only な公開拡張面という設計(hook.rs:8-15 のコメント)を維持するため、Hook と Interceptor で戻り値型を分離する
|
||||||
|
|
||||||
|
### Pod 側の resolver 配線
|
||||||
|
|
||||||
|
- `PodFsView::resolve_file_ref(&self, path: &str, max_bytes: usize) -> Result<Item, ResolveError>` を新設。`ScopedFs` で読み、UTF-8 検証 + 16 KB 切詰めを行い `Item::system_message` を返す。エラーは `OutOfScope` / `NotFound` / `Binary` / `Io(io::Error)` を区別する
|
||||||
|
- `PodSharedState` に submit 中だけ使う stash (`Mutex<Vec<Item>>`) を一個追加。`pending_notifies` / `compact_state` と同じ流儀
|
||||||
|
- `Pod::run` で submit 直前に `Vec<Segment>` を走査して FileRef を resolver に通し、成功分は stash、失敗分は Alert に流す
|
||||||
|
- `PodInterceptor::on_prompt_submit` で stash を取り出して空でなければ `ContinueWith(items)` を返す
|
||||||
|
|
||||||
|
## 範囲外
|
||||||
|
|
||||||
|
- Knowledge / Workflow resolver(それぞれ `tickets/memory-phase2-consolidation.md` と `tickets/workflow.md` 側)
|
||||||
|
- 画像など binary attachment の typed メッセージ化(将来 `ContentPart::Image` 等を入れる別チケット)
|
||||||
|
- `@<path>:<line>-<line>` のような行範囲指定構文
|
||||||
|
- compact 後の auto-read との重複排除(compact が user message 由来の FileRef を読み直す可能性は許容)
|
||||||
|
|
||||||
|
## 完了条件
|
||||||
|
|
||||||
|
- `@<path>` を含む submit が、user message + 解決済み system message の 2 Item として履歴に残る
|
||||||
|
- 16 KB を超えるファイルは truncate され、その旨が LLM に見える形で示される
|
||||||
|
- スコープ外 / NotFound / バイナリは Alert として通知され、LLM 側は placeholder を見るのみ
|
||||||
|
- Hook の戻り値型は据え置き、Interceptor のみ `ContinueWith` を受け付ける
|
||||||
|
- 既存ビルド・テストを壊さない
|
||||||
|
|
||||||
|
## 依存
|
||||||
|
|
||||||
|
- `tickets/submit-tui-completion.md`(FileRef segment の wire 接続)
|
||||||
|
|
||||||
|
## 参照
|
||||||
|
|
||||||
|
- `crates/pod/src/pod.rs`(`flatten_segments`, `Pod::run`)
|
||||||
|
- `crates/pod/src/fs_view.rs`(`PodFsView` — auto-read の隣に置く)
|
||||||
|
- `crates/pod/src/ipc/interceptor.rs`(`PodInterceptor::on_prompt_submit`)
|
||||||
|
- `crates/pod/src/shared_state.rs`(stash 追加先)
|
||||||
|
- `crates/llm-worker/src/interceptor.rs`(`PromptAction` 拡張)
|
||||||
|
- `crates/llm-worker/src/worker.rs:903`(`TurnEndAction::ContinueWithMessages` 既存パターン)
|
||||||
|
- `crates/pod/src/hook.rs:8-15`(Hook と Interceptor の責務分離 doc)
|
||||||
|
- `crates/manifest/src/defaults.rs`(`TOOL_OUTPUT_MAX_BYTES`)
|
||||||
Loading…
Reference in New Issue
Block a user