merge: file-ref-directory

This commit is contained in:
Keisuke Hirata 2026-05-13 06:30:45 +09:00
commit 418451ebf8
No known key found for this signature in database
7 changed files with 312 additions and 25 deletions

View File

@ -200,6 +200,8 @@ pub struct WorkerManifest {
#[serde(default)]
pub tool_output: ToolOutputLimits,
/// Byte-size cap applied to submit-time FileRef uploads / attachments.
/// For file refs this caps the file body; for normal directory refs this
/// caps the rendered shallow listing body.
/// This is intentionally separate from tool-output truncation because
/// user-requested file attachments can usually tolerate a larger budget.
#[serde(default)]
@ -226,11 +228,13 @@ pub struct ToolOutputLimits {
/// Byte-size cap for submit-time FileRef uploads / attachments.
///
/// This governs the `[File: <path>]` system-message attachment produced
/// when a user explicitly submits a `@<path>` reference. It does not affect
/// tool result truncation; see [`ToolOutputLimits`] for that path.
/// when a user explicitly submits a `@<path>` file reference, and the
/// rendered body of a shallow `[Dir: <path>]` listing for a normal directory
/// reference. It does not affect tool result truncation; see
/// [`ToolOutputLimits`] for that path.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileUploadLimits {
/// Cap applied to each resolved FileRef body.
/// Cap applied to each resolved FileRef file body or directory-listing body.
#[serde(default = "default_file_upload_max_bytes")]
pub max_bytes: usize,
}

View File

@ -13,11 +13,16 @@
use std::path::{Path, PathBuf};
use llm_worker::Item;
use manifest::Scope;
use tools::scoped_fs::first_symlink;
use tools::{ScopedFs, ToolsError};
use tracing::warn;
/// 補完候補1件の最大数。`list_file_completions` がこの値を超えたら打ち切り。
const COMPLETION_LIMIT: usize = 100;
/// submit-time directory FileRef の shallow listing で返す最大 entry 数。
/// TUI completion と同じ浅い一覧という意味論に揃えるため、同じ上限を使う。
const DIR_FILE_REF_ENTRY_LIMIT: usize = COMPLETION_LIMIT;
/// Compact worker が `mark_read_required` で nominate した「次セッション開始時に
/// 自動で再読すべきファイル」のエントリ。
@ -106,16 +111,16 @@ impl PodFsView {
out
}
/// `path` を ScopedFs 経由で読み、`[File: <path>]\n<body>` 形式の
/// system message を返す。submit 時の `Segment::FileRef` リゾルバが
/// 使う経路。
/// `path` を ScopedFs 経由で解決し、submit 時の `Segment::FileRef`
/// attachment 用 system message を返す。
///
/// - `path` は relative なら pwd 相対、absolute なら absolute として解釈
/// - `max_bytes` を超える本文は切り詰め、末尾に
/// `[...truncated, <total> bytes total — use read_file for the rest]`
/// を付与する
/// - 通常ディレクトリは浅い entry listing として `[Dir: <path>]\n<body>` に展開する
/// - ディレクトリ listing は hidden / gitignore を特別扱いせず、scope 上 readable な
/// 直下 entry だけを最大 `DIR_FILE_REF_ENTRY_LIMIT` 件返す
/// - ファイル本文またはディレクトリ listing 本文が `max_bytes` を超える場合は切り詰める
/// - 非 UTF-8 (バイナリ) は `ResolveError::Binary` で拒否
/// - スコープ外 / NotFound 等は `ResolveError::Fs` で返す
/// - スコープ外 / NotFound / symlink directory 等は `ResolveError::Fs` で返す
pub fn resolve_file_ref(&self, path: &str, max_bytes: usize) -> Result<Item, ResolveError> {
let p = Path::new(path);
let abs = if p.is_absolute() {
@ -123,6 +128,21 @@ impl PodFsView {
} else {
self.fs.pwd().join(p)
};
// 通常ディレクトリだけを FileRef listing として扱う。symlink を含むパスは
// `ScopedFs::read_bytes` に委ね、既存の symlink 診断
// (`SymlinkTargetIsDirectory` / `SymlinkOutOfScope` 等) を保つ。
if first_symlink(&abs).is_none() {
let scope = self.fs.scope();
if !scope.is_readable(&abs) {
return Err(ResolveError::Fs(ToolsError::OutOfScope(abs)));
}
let meta = metadata_for_file_ref(&abs).map_err(ResolveError::Fs)?;
if meta.is_dir() {
return render_dir_file_ref(path, &abs, max_bytes, scope.as_ref());
}
}
let bytes = self.fs.read_bytes(&abs).map_err(ResolveError::Fs)?;
let total = bytes.len();
let (body_bytes, truncated) = if total > max_bytes {
@ -204,6 +224,116 @@ pub fn slice_lines(text: &str, offset: usize, limit: Option<usize>) -> String {
lines[start..end].join("\n")
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DirListingEntry {
display: String,
kind_rank: u8,
}
fn metadata_for_file_ref(path: &Path) -> Result<std::fs::Metadata, ToolsError> {
std::fs::metadata(path).map_err(|e| match e.kind() {
std::io::ErrorKind::NotFound => ToolsError::NotFound(path.to_path_buf()),
_ => ToolsError::io(path, e),
})
}
fn render_dir_file_ref(
original_path: &str,
abs: &Path,
max_bytes: usize,
scope: &Scope,
) -> Result<Item, ResolveError> {
let read_dir = std::fs::read_dir(abs).map_err(|e| ResolveError::Fs(ToolsError::io(abs, e)))?;
let mut entries = Vec::new();
for entry in read_dir {
let entry = entry.map_err(|e| ResolveError::Fs(ToolsError::io(abs, e)))?;
let path = entry.path();
if !scope.is_readable(&path) {
continue;
}
let file_type = match entry.file_type() {
Ok(ft) => ft,
Err(e) => return Err(ResolveError::Fs(ToolsError::io(&path, e))),
};
let mut display = entry.file_name().to_string_lossy().into_owned();
let kind_rank = if file_type.is_dir() {
display.push('/');
0
} else if file_type.is_symlink() {
display.push('@');
1
} else {
2
};
entries.push(DirListingEntry { display, kind_rank });
}
entries.sort_by(|a, b| {
a.kind_rank
.cmp(&b.kind_rank)
.then_with(|| a.display.cmp(&b.display))
});
let total_entries = entries.len();
let entry_truncated = total_entries > DIR_FILE_REF_ENTRY_LIMIT;
let body = if total_entries == 0 {
"(empty directory)".to_string()
} else {
entries
.iter()
.take(DIR_FILE_REF_ENTRY_LIMIT)
.map(|e| e.display.as_str())
.collect::<Vec<_>>()
.join("\n")
};
let body_total_bytes = body.len();
let (body, byte_truncated) = truncate_utf8_bytes(&body, max_bytes);
let mut text = format!("[Dir: {original_path}]\n{body}");
if entry_truncated || byte_truncated {
text.push('\n');
text.push_str(&dir_listing_truncation_hint(
entry_truncated,
byte_truncated,
total_entries,
body_total_bytes,
));
}
Ok(Item::system_message(text))
}
fn truncate_utf8_bytes(s: &str, max_bytes: usize) -> (&str, bool) {
if s.len() <= max_bytes {
return (s, false);
}
let mut end = max_bytes;
while !s.is_char_boundary(end) {
end -= 1;
}
(&s[..end], true)
}
fn dir_listing_truncation_hint(
entry_truncated: bool,
byte_truncated: bool,
total_entries: usize,
body_total_bytes: usize,
) -> String {
match (entry_truncated, byte_truncated) {
(true, true) => format!(
"[...truncated, {total_entries} readable entries total; first {DIR_FILE_REF_ENTRY_LIMIT} entries were {body_total_bytes} bytes before byte cap — use Glob for more]"
),
(true, false) => {
format!("[...truncated, {total_entries} readable entries total — use Glob for more]")
}
(false, true) => {
format!("[...truncated, {body_total_bytes} bytes total — use Glob or Read for more]")
}
(false, false) => String::new(),
}
}
fn format_range(offset: Option<usize>, limit: Option<usize>) -> String {
match (offset, limit) {
(None, None) => String::new(),
@ -239,6 +369,7 @@ fn split_prefix(prefix: &str, pwd: &Path) -> (PathBuf, String, bool) {
#[cfg(test)]
mod tests {
use super::*;
use llm_worker::ContentPart;
use manifest::{Permission, Scope, ScopeConfig, ScopeRule};
use tempfile::TempDir;
@ -256,6 +387,16 @@ mod tests {
std::fs::write(path, content).unwrap();
}
fn system_text(item: &Item) -> &str {
let Item::Message { content, .. } = item else {
panic!("expected message item");
};
let Some(ContentPart::Text { text }) = content.first() else {
panic!("expected text content");
};
text
}
#[test]
fn slice_lines_handles_offset_and_limit() {
let text = "a\nb\nc\nd";
@ -312,6 +453,115 @@ mod tests {
assert!(text.contains("2048 bytes total"));
}
#[test]
fn resolve_file_ref_lists_directory_shallow_entries() {
let dir = TempDir::new().unwrap();
std::fs::create_dir_all(dir.path().join("docs/sub")).unwrap();
touch(&dir.path().join("docs/.hidden"), "hidden");
touch(&dir.path().join("docs/.gitignore"), "ignored.txt\n");
touch(
&dir.path().join("docs/ignored.txt"),
"not ignored for FileRef",
);
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("docs", 4096).unwrap();
let text = system_text(&item);
assert!(text.starts_with("[Dir: docs]\n"));
assert!(text.contains("sub/"));
assert!(text.contains(".hidden"));
assert!(text.contains(".gitignore"));
assert!(text.contains("ignored.txt"));
let sub_pos = text.find("sub/").unwrap();
let hidden_pos = text.find(".hidden").unwrap();
assert!(
sub_pos < hidden_pos,
"directories should sort before files:\n{text}"
);
}
#[test]
fn resolve_file_ref_directory_listing_filters_unreadable_entries() {
let dir = TempDir::new().unwrap();
let docs = dir.path().join("docs");
let secret = docs.join("secret");
std::fs::create_dir_all(&secret).unwrap();
touch(&docs.join("visible.txt"), "ok");
touch(&secret.join("hidden.txt"), "nope");
let cfg = ScopeConfig {
allow: vec![ScopeRule {
target: dir.path().to_path_buf(),
permission: Permission::Write,
recursive: true,
}],
deny: vec![ScopeRule {
target: secret.clone(),
permission: Permission::Read,
recursive: true,
}],
};
let scope = Scope::from_config(&cfg).unwrap();
let fs = ScopedFs::new(scope, dir.path().to_path_buf());
let view = PodFsView::new(fs);
let item = view.resolve_file_ref("docs", 4096).unwrap();
let text = system_text(&item);
assert!(text.contains("visible.txt"));
assert!(!text.contains("secret"));
assert!(!text.contains("hidden.txt"));
}
#[test]
fn resolve_file_ref_directory_listing_uses_upload_byte_cap() {
let dir = TempDir::new().unwrap();
std::fs::create_dir(dir.path().join("docs")).unwrap();
touch(&dir.path().join("docs/very-long-file-name.txt"), "");
touch(&dir.path().join("docs/another-long-file-name.txt"), "");
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("docs", 10).unwrap();
let text = system_text(&item);
assert!(text.starts_with("[Dir: docs]\n"));
assert!(text.contains("truncated"));
assert!(text.contains("bytes total"));
assert!(text.contains("use Glob or Read for more"));
}
#[test]
fn resolve_file_ref_directory_listing_uses_completion_entry_limit() {
let dir = TempDir::new().unwrap();
std::fs::create_dir(dir.path().join("docs")).unwrap();
for i in 0..(DIR_FILE_REF_ENTRY_LIMIT + 5) {
touch(&dir.path().join(format!("docs/file-{i:03}.txt")), "");
}
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("docs", 4096).unwrap();
let text = system_text(&item);
assert!(text.contains("105 readable entries total"));
assert!(text.contains("file-099.txt"));
assert!(!text.contains("file-100.txt"));
assert!(text.contains("use Glob for more"));
}
#[cfg(unix)]
#[test]
fn resolve_file_ref_directory_listing_marks_readable_symlink_entries() {
use std::os::unix::fs::symlink;
let dir = TempDir::new().unwrap();
std::fs::create_dir(dir.path().join("docs")).unwrap();
touch(&dir.path().join("docs/target.txt"), "target");
symlink("target.txt", dir.path().join("docs/link.txt")).unwrap();
let view = PodFsView::new(fs_for(&dir));
let item = view.resolve_file_ref("docs", 4096).unwrap();
let text = system_text(&item);
assert!(text.contains("link.txt@"));
}
#[test]
fn resolve_file_ref_rejects_binary_with_binary_error() {
let dir = TempDir::new().unwrap();

View File

@ -962,10 +962,9 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
///
/// `input` is a typed segment list (see [`protocol::Segment`]). The
/// Pod flattens it into a single user-message string for the
/// underlying Worker, expanding paste content inline and surfacing
/// alerts for any segment kind the current Pod has no resolver for
/// (file refs, knowledge refs, workflow invocations, unknown
/// variants from a newer client).
/// underlying Worker, expanding paste content inline, resolving file refs
/// into adjacent attachments where possible, and surfacing alerts for
/// unresolved refs / unsupported segment kinds.
///
/// If the between-turns compaction threshold is exceeded mid-run,
/// the Worker is aborted, history is compacted, and execution resumes
@ -1018,10 +1017,11 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
}
/// Resolve every `Segment::FileRef` in `segments` to a `[File: <path>]`
/// system message via `PodFsView`. Resolution failures (out-of-scope,
/// not-found, binary, I/O) surface as `AlertLevel::Warn` Alerts and
/// are skipped — the unresolved placeholder stays in the flattened
/// user message so the LLM still sees the intent.
/// or shallow `[Dir: <path>]` system message via `PodFsView`. Resolution
/// failures (out-of-scope, not-found, binary, I/O, unsupported symlink
/// directory) surface as `AlertLevel::Warn` Alerts and are skipped — the
/// unresolved placeholder stays in the flattened user message so the LLM
/// still sees the intent.
fn resolve_file_refs(&self, segments: &[Segment]) -> Vec<Item> {
let view = crate::fs_view::PodFsView::new(tools::ScopedFs::with_shared_scope(
self.scope.clone(),

View File

@ -124,9 +124,10 @@ pub enum Segment {
lines: u32,
content: String,
},
/// `@<path>` file reference. Pod resolves to scope-checked file
/// content when a resolver is registered (resolver implementation
/// out of scope for this ticket).
/// `@<path>` file-system reference. Pod resolves readable files to
/// `[File: <path>]` attachments and readable normal directories to shallow
/// `[Dir: <path>]` listings; the flattened user text keeps the literal
/// `@<path>` placeholder either way.
FileRef { path: String },
/// `#<slug>` Knowledge reference (see `docs/plan/memory.md`).
KnowledgeRef { slug: String },
@ -153,9 +154,9 @@ impl Segment {
/// Sigil-prefixed variants (`FileRef` / `KnowledgeRef` / `WorkflowInvoke`)
/// flatten back to their literal sigil form (`@<path>`, `#<slug>`,
/// `/<slug>`) — matching what the user originally typed. Resolved
/// content (e.g. file body for `FileRef`) is delivered as separate
/// `Item::system_message`s adjacent to the user message; the
/// resolution itself is the caller's job. `Unknown` falls back to
/// content (e.g. file body or shallow directory listing for `FileRef`) is
/// delivered as separate `Item::system_message`s adjacent to the user
/// message; the resolution itself is the caller's job. `Unknown` falls back to
/// a bracketed placeholder since there is no sigil to render.
pub fn flatten_to_text(segments: &[Segment]) -> String {
let mut out = String::new();

View File

@ -32,7 +32,9 @@ impl PasteRef {
}
}
/// `@<path>` chip — confirmed completion of a file reference.
/// `@<path>` chip — confirmed completion of a file-system reference.
/// Directories remain valid chips because Pod resolves normal directory refs
/// to shallow `[Dir: <path>]` listings at submit time.
#[derive(Debug, Clone)]
pub struct FileRefAtom {
pub path: String,

View File

@ -62,3 +62,8 @@ submit 時に `Segment::FileRef` がディレクトリを指している場合
- `crates/tools/src/scoped_fs.rs` `read_bytes`
- `tickets/file-ref-symlink-diagnostics.md`
- `tickets/manifest-output-upload-limits.md`
## Review
- 状態: Approve
- レビュー詳細: [./file-ref-directory.review.md](./file-ref-directory.review.md)
- 日付: 2026-05-12

View File

@ -0,0 +1,25 @@
# Review: Submit 時 FileRef でディレクトリを参照したときの挙動
## 前提・要件の確認
- 通常ディレクトリの FileRef が `IsDirectory` Warn で黙って捨てられないこと: 満たされています。`PodFsView::resolve_file_ref` が symlink を含まない通常ディレクトリを検出し、`[Dir: <path>]` system message に展開する経路へ分岐しています(`crates/pod/src/fs_view.rs:124-143`, `crates/pod/src/fs_view.rs:240-304`。Pod 側の alert 文脈も `[File]` / `[Dir]` 両対応に更新されています(`crates/pod/src/pod.rs:1019-1044`)。
- 採用する挙動の明確化: 満たされています。通常ディレクトリは浅い entry listing として `[Dir: <path>]` で返す仕様になっており、再帰走査やファイル本文集約には踏み込んでいません(`crates/pod/src/fs_view.rs:114-123`, `crates/pod/src/fs_view.rs:240-304`)。
- listing 上限: 満たされています。entry 件数は TUI completion と同じ `COMPLETION_LIMIT``DIR_FILE_REF_ENTRY_LIMIT` として使い、本文 byte 数は既存の `file_upload.max_bytes` を使う設計です(`crates/pod/src/fs_view.rs:21-25`, `crates/pod/src/fs_view.rs:278-291`, `crates/manifest/src/lib.rs:228-239`)。
- 隠しファイル・gitignore・scope 外 entry の扱い: 満たされています。hidden / gitignore は特別扱いせず、scope 上 readable な直下 entry のみを返します(`crates/pod/src/fs_view.rs:117-123`, `crates/pod/src/fs_view.rs:249-269`)。テストでも hidden / gitignore を含め、deny された entry を除外する挙動が確認されています(`crates/pod/src/fs_view.rs:456-514`)。
- symlink entry / symlink directory との整合: 満たされています。解決対象パス自体に symlink が含まれる場合は従来の `ScopedFs::read_bytes` 経路に委ね、通常ディレクトリ listing 内の symlink entry は `@` 付きで表示しています(`crates/pod/src/fs_view.rs:132-146`, `crates/pod/src/fs_view.rs:263-265`, `crates/pod/src/fs_view.rs:549-563`)。
- TUI completion とのギャップ解消: 満たされています。completion がディレクトリ候補を出す前提を維持し、submit 側でも通常ディレクトリを扱う方向に寄せています。TUI 側コメントもその仕様に更新されています(`crates/tui/src/input.rs:35-37`)。
- `Segment::FileRef` のドキュメント / コメント更新: 満たされています。Protocol 上の `Segment::FileRef` と flatten の説明が `[Dir: <path>]` listing に追従しています(`crates/protocol/src/lib.rs:127-130`, `crates/protocol/src/lib.rs:154-159`)。
## アーキテクチャ・スコープ
- FileRef 解決は既存の `PodFsView` に集約されており、Pod 本体には alert 化と attachment 組み立て以上の責務を増やしていません。層の置き方は妥当です。
- TUI completion の挙動変更ではなく、submit 側の意味論を completion に合わせる実装で、チケットの UX ギャップに対して最小限です。
- directory listing は浅い直下列挙に留まり、範囲外の再帰走査・glob 展開・ファイル本文集約には踏み込んでいません。
- `ScopedFs::read_bytes` の symlink 診断経路を温存しつつ、通常ディレクトリだけを新仕様にしているため、`file-ref-symlink-diagnostics` 側の関心と衝突していません。
## 判断
Approve — チケットで求められた通常ディレクトリ FileRef の仕様化・実装・テスト・コメント更新が揃っており、Blocking 指摘はありません。
## 確認
- `cargo fmt --check`
- `cargo test -p pod`
- `cargo test -p pod fs_view::tests::resolve_file_ref -- --nocapture`
- `cargo test -p tui input::tests -- --nocapture`