diff --git a/crates/tools/src/error.rs b/crates/tools/src/error.rs index fb548f7e..a4c4174b 100644 --- a/crates/tools/src/error.rs +++ b/crates/tools/src/error.rs @@ -16,6 +16,47 @@ pub enum ToolsError { #[error("path is outside allowed scope: {}", .0.display())] OutOfScope(PathBuf), + #[error( + "path resolves through a symlink outside allowed {required_permission} scope: {} -> {}; add the symlink target to the Pod {required_permission} scope, copy it into the workspace, or recreate the symlink with the correct target", + .path.display(), + .target.display() + )] + SymlinkOutOfScope { + path: PathBuf, + target: PathBuf, + required_permission: &'static str, + }, + + #[error( + "broken symlink while resolving {}: {} -> {} (target does not exist); recreate the symlink with an absolute target or a correct relative target", + .path.display(), + .link.display(), + .target.display() + )] + BrokenSymlink { + path: PathBuf, + link: PathBuf, + target: PathBuf, + }, + + #[error( + "path resolves through a symlink to a directory, not a file: {} -> {}", + .path.display(), + .target.display() + )] + SymlinkTargetIsDirectory { path: PathBuf, target: PathBuf }, + + #[error( + "{tool} does not follow symlink directories: {} -> {}; use the resolved target path directly, or add the target to read scope and reference it without the symlink", + .path.display(), + .target.display() + )] + SymlinkDirectoryNotTraversed { + tool: &'static str, + path: PathBuf, + target: PathBuf, + }, + #[error("path is read-only in this scope: {}", .0.display())] ReadOnly(PathBuf), @@ -73,6 +114,10 @@ impl From for ToolError { match err { RelativePath(_) | OutOfScope(_) + | SymlinkOutOfScope { .. } + | BrokenSymlink { .. } + | SymlinkTargetIsDirectory { .. } + | SymlinkDirectoryNotTraversed { .. } | ReadOnly(_) | IsDirectory(_) | NotRead(_) diff --git a/crates/tools/src/glob.rs b/crates/tools/src/glob.rs index 514a45f6..97bb75cf 100644 --- a/crates/tools/src/glob.rs +++ b/crates/tools/src/glob.rs @@ -10,7 +10,7 @@ use manifest::Scope; use serde::Deserialize; use crate::error::ToolsError; -use crate::scoped_fs::ScopedFs; +use crate::scoped_fs::{ScopedFs, direct_symlink}; const DESCRIPTION: &str = "Recursively find files matching a glob pattern \ (e.g. \"**/*.rs\"). Results are sorted by modification time, newest first, \ @@ -98,8 +98,52 @@ fn run_glob(base: &Path, pattern: &str, scope: &Scope) -> Result, T if !base.is_absolute() { return Err(ToolsError::RelativePath(base.to_path_buf())); } - if !base.exists() { - return Err(ToolsError::NotFound(base.to_path_buf())); + let symlink = direct_symlink(base); + if !scope.is_readable(base) { + return Err(if let Some(info) = symlink.as_ref() { + let link_parent_readable = info + .link_path + .parent() + .map(|parent| scope.is_readable(parent)) + .unwrap_or(false); + if info.target_exists && link_parent_readable { + ToolsError::SymlinkOutOfScope { + path: base.to_path_buf(), + target: info.resolved_path.clone(), + required_permission: "read", + } + } else { + ToolsError::OutOfScope(base.to_path_buf()) + } + } else { + ToolsError::OutOfScope(base.to_path_buf()) + }); + } + if let Some(info) = symlink.as_ref() { + if !info.target_exists { + return Err(ToolsError::BrokenSymlink { + path: base.to_path_buf(), + link: info.link_path.clone(), + target: info.target_path.clone(), + }); + } + } + let base_meta = std::fs::metadata(base).map_err(|e| match e.kind() { + std::io::ErrorKind::NotFound => ToolsError::NotFound(base.to_path_buf()), + _ => ToolsError::io(base, e), + })?; + if !base_meta.is_dir() { + return Err(ToolsError::InvalidArgument(format!( + "glob search path is not a directory: {}", + base.display() + ))); + } + if let Some(info) = symlink.as_ref() { + return Err(ToolsError::SymlinkDirectoryNotTraversed { + tool: "Glob", + path: base.to_path_buf(), + target: info.resolved_path.clone(), + }); } let glob = globset::Glob::new(pattern) @@ -296,4 +340,34 @@ mod tests { assert!(body.contains(".hidden.rs")); assert!(body.contains("visible.rs")); } + + #[cfg(unix)] + #[tokio::test] + async fn glob_reports_scope_inside_symlink_directory_is_not_traversed() { + use std::os::unix::fs::symlink; + + let (dir, fs) = setup(); + let target = dir.path().join("target-dir"); + touch(&target.join("visible.rs"), ""); + let link = dir.path().join("external-project"); + symlink(&target, &link).unwrap(); + + let def = glob_tool(fs); + let (_, tool) = def(); + let inp = serde_json::json!({ + "path": link.to_str().unwrap(), + "pattern": "**/*.rs", + }); + let err = tool.execute(&inp.to_string()).await.unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("Glob does not follow symlink directories"), + "{msg}" + ); + assert!(msg.contains(&link.display().to_string()), "{msg}"); + assert!( + msg.contains(&target.canonicalize().unwrap().display().to_string()), + "{msg}" + ); + } } diff --git a/crates/tools/src/grep.rs b/crates/tools/src/grep.rs index 65556691..c8f7fbcc 100644 --- a/crates/tools/src/grep.rs +++ b/crates/tools/src/grep.rs @@ -15,7 +15,7 @@ use manifest::Scope; use serde::Deserialize; use crate::error::ToolsError; -use crate::scoped_fs::ScopedFs; +use crate::scoped_fs::{ScopedFs, direct_symlink}; const DESCRIPTION: &str = "Recursive regex search across files, powered by \ ripgrep. Supports file filtering (`glob`, `type`), context lines, multiline \ @@ -255,8 +255,52 @@ fn run_grep(default_base: PathBuf, p: GrepParams, scope: &Scope) -> Result ToolsError::NotFound(base.clone()), + _ => ToolsError::io(&base, e), + })?; + if !base_meta.is_dir() { + return Err(ToolsError::InvalidArgument(format!( + "grep search path is not a directory: {}", + base.display() + ))); + } + if let Some(info) = symlink.as_ref() { + return Err(ToolsError::SymlinkDirectoryNotTraversed { + tool: "Grep", + path: base.clone(), + target: info.resolved_path.clone(), + }); } let mut wb = WalkBuilder::new(&base); diff --git a/crates/tools/src/scoped_fs.rs b/crates/tools/src/scoped_fs.rs index 91e204da..72d9d892 100644 --- a/crates/tools/src/scoped_fs.rs +++ b/crates/tools/src/scoped_fs.rs @@ -42,6 +42,23 @@ pub struct WriteOutcome { pub created: bool, } +/// First symlink encountered while resolving a path. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SymlinkInfo { + /// The symlink path as it appears in the original path chain. + pub link_path: PathBuf, + /// The symlink target resolved relative to the symlink's parent when the + /// link stores a relative target. + pub target_path: PathBuf, + /// Best-effort resolved form of the full requested path after replacing + /// the symlink component with its target and rejoining any remaining tail. + /// Existing targets are canonicalized; broken targets are left absolute. + pub resolved_path: PathBuf, + /// Whether the symlink target itself exists. A missing target is a broken + /// symlink even when the symlink lives inside an allowed scope. + pub target_exists: bool, +} + impl ScopedFs { /// Create a new [`ScopedFs`] wrapping `scope` and `pwd` in a fresh /// [`SharedScope`]. Use [`ScopedFs::with_shared_scope`] when you @@ -92,15 +109,34 @@ impl ScopedFs { if !path.is_absolute() { return Err(ToolsError::RelativePath(path.to_path_buf())); } - if !self.inner.scope.load().is_readable(path) { - return Err(ToolsError::OutOfScope(path.to_path_buf())); + let symlink = first_symlink(path); + let scope = self.inner.scope.load(); + if !scope.is_readable(path) { + return Err(symlink_out_of_scope_or_plain( + path, + symlink.as_ref(), + "read", + &scope, + )); + } + if let Some(info) = symlink.as_ref() { + if !info.target_exists { + return Err(broken_symlink_error(path, info)); + } } let meta = std::fs::metadata(path).map_err(|e| match e.kind() { std::io::ErrorKind::NotFound => ToolsError::NotFound(path.to_path_buf()), _ => ToolsError::io(path, e), })?; if meta.is_dir() { - return Err(ToolsError::IsDirectory(path.to_path_buf())); + return Err(if let Some(info) = symlink.as_ref() { + ToolsError::SymlinkTargetIsDirectory { + path: path.to_path_buf(), + target: info.resolved_path.clone(), + } + } else { + ToolsError::IsDirectory(path.to_path_buf()) + }); } std::fs::read(path).map_err(|e| ToolsError::io(path, e)) } @@ -125,28 +161,50 @@ impl ScopedFs { if !path.is_absolute() { return Err(ToolsError::RelativePath(path.to_path_buf())); } + let symlink = first_symlink(path); let scope = self.inner.scope.load(); if !scope.is_writable(path) { return Err(if scope.is_readable(path) { ToolsError::ReadOnly(path.to_path_buf()) } else { - ToolsError::OutOfScope(path.to_path_buf()) + symlink_out_of_scope_or_plain(path, symlink.as_ref(), "write", &scope) }); } drop(scope); + if let Some(info) = symlink.as_ref() { + if !info.target_exists { + return Err(broken_symlink_error(path, info)); + } + } + // Reject existing directory targets. match std::fs::metadata(path) { Ok(meta) if meta.is_dir() => { - return Err(ToolsError::IsDirectory(path.to_path_buf())); + return Err(if let Some(info) = symlink.as_ref() { + ToolsError::SymlinkTargetIsDirectory { + path: path.to_path_buf(), + target: info.resolved_path.clone(), + } + } else { + ToolsError::IsDirectory(path.to_path_buf()) + }); } _ => {} } let existed = path.exists(); + let write_target = if existed { + path.canonicalize().unwrap_or_else(|_| path.to_path_buf()) + } else { + path.to_path_buf() + }; - let parent = path.parent().ok_or_else(|| { - ToolsError::InvalidArgument(format!("path has no parent directory: {}", path.display())) + let parent = write_target.parent().ok_or_else(|| { + ToolsError::InvalidArgument(format!( + "path has no parent directory: {}", + write_target.display() + )) })?; if !parent.as_os_str().is_empty() && !parent.exists() { std::fs::create_dir_all(parent).map_err(|e| ToolsError::io(parent, e))?; @@ -160,12 +218,12 @@ impl ScopedFs { let mut tmp = tempfile::NamedTempFile::new_in(tmp_parent) .map_err(|e| ToolsError::io(tmp_parent, e))?; tmp.write_all(content) - .map_err(|e| ToolsError::io(path, e))?; + .map_err(|e| ToolsError::io(&write_target, e))?; tmp.as_file() .sync_all() - .map_err(|e| ToolsError::io(path, e))?; - tmp.persist(path) - .map_err(|e| ToolsError::io(path, e.error))?; + .map_err(|e| ToolsError::io(&write_target, e))?; + tmp.persist(&write_target) + .map_err(|e| ToolsError::io(&write_target, e.error))?; Ok(WriteOutcome { bytes_written: content.len(), @@ -174,6 +232,93 @@ impl ScopedFs { } } +/// Return the first symlink component in `path`, if one exists. +/// +/// The function only inspects existing path components. It intentionally uses +/// `symlink_metadata` so the symlink itself can be diagnosed before any later +/// `metadata` call follows it and collapses the reason into `NotFound` or +/// `OutOfScope`. +pub fn first_symlink(path: &Path) -> Option { + if !path.is_absolute() { + return None; + } + + let mut cur = PathBuf::new(); + let mut components = path.components().peekable(); + while let Some(component) = components.next() { + cur.push(component.as_os_str()); + let meta = std::fs::symlink_metadata(&cur).ok()?; + if !meta.file_type().is_symlink() { + continue; + } + + let raw_target = std::fs::read_link(&cur).ok()?; + let target_path = if raw_target.is_absolute() { + raw_target + } else { + cur.parent() + .unwrap_or_else(|| Path::new("/")) + .join(raw_target) + }; + let target_exists = target_path.exists(); + let mut resolved_path = target_path + .canonicalize() + .unwrap_or_else(|_| target_path.clone()); + for remaining in components { + resolved_path.push(remaining.as_os_str()); + } + + return Some(SymlinkInfo { + link_path: cur, + target_path, + resolved_path, + target_exists, + }); + } + + None +} + +pub fn direct_symlink(path: &Path) -> Option { + let meta = std::fs::symlink_metadata(path).ok()?; + if meta.file_type().is_symlink() { + first_symlink(path) + } else { + None + } +} + +fn symlink_out_of_scope_or_plain( + path: &Path, + symlink: Option<&SymlinkInfo>, + required_permission: &'static str, + scope: &Scope, +) -> ToolsError { + if let Some(info) = symlink { + let link_parent_readable = info + .link_path + .parent() + .map(|parent| scope.is_readable(parent)) + .unwrap_or(false); + if info.target_exists && link_parent_readable { + return ToolsError::SymlinkOutOfScope { + path: path.to_path_buf(), + target: info.resolved_path.clone(), + required_permission, + }; + } + } + ToolsError::OutOfScope(path.to_path_buf()) +} + +fn broken_symlink_error(path: &Path, info: &SymlinkInfo) -> ToolsError { + ToolsError::BrokenSymlink { + path: path.to_path_buf(), + link: info.link_path.clone(), + target: info.target_path.clone(), + } +} + // ============================================================================= // Tests // ============================================================================= @@ -241,6 +386,90 @@ mod tests { assert!(matches!(err, ToolsError::OutOfScope(_))); } + #[cfg(unix)] + #[test] + fn read_bytes_reports_broken_symlink_target() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let fs = make_fs(&dir); + let link = dir.path().join("external-project"); + let target = dir.path().join("missing-target"); + symlink(&target, &link).unwrap(); + + let err = fs.read_bytes(&link).unwrap_err(); + assert!( + matches!( + err, + ToolsError::BrokenSymlink { ref path, link: ref err_link, target: ref err_target } + if path == &link && err_link == &link && err_target == &target + ), + "expected broken symlink diagnostic, got {err:?}" + ); + } + + #[cfg(unix)] + #[test] + fn read_bytes_reports_symlink_target_outside_scope() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let outside = TempDir::new().unwrap(); + let target = outside.path().join("target.txt"); + fs::write(&target, b"secret").unwrap(); + let link = dir.path().join("outside-repo.txt"); + symlink(&target, &link).unwrap(); + + let fs = make_fs(&dir); + let err = fs.read_bytes(&link).unwrap_err(); + assert!( + matches!( + err, + ToolsError::SymlinkOutOfScope { ref path, target: ref err_target, required_permission: "read" } + if path == &link && err_target == &target.canonicalize().unwrap() + ), + "expected symlink out-of-scope diagnostic, got {err:?}" + ); + } + + #[cfg(unix)] + #[test] + fn read_bytes_allows_symlink_file_when_target_is_inside_scope() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let target = dir.path().join("target.txt"); + fs::write(&target, b"visible").unwrap(); + let link = dir.path().join("link.txt"); + symlink(&target, &link).unwrap(); + + let fs = make_fs(&dir); + assert_eq!(fs.read_bytes(&link).unwrap(), b"visible"); + } + + #[cfg(unix)] + #[test] + fn read_bytes_reports_symlink_to_directory_as_wrong_file_type() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let target_dir = dir.path().join("target-dir"); + fs::create_dir(&target_dir).unwrap(); + let link = dir.path().join("dir-link"); + symlink(&target_dir, &link).unwrap(); + + let fs = make_fs(&dir); + let err = fs.read_bytes(&link).unwrap_err(); + assert!( + matches!( + err, + ToolsError::SymlinkTargetIsDirectory { ref path, ref target } + if path == &link && target == &target_dir.canonicalize().unwrap() + ), + "expected symlink directory type diagnostic, got {err:?}" + ); + } + // ------------------------------------------------------------------------- // write // ------------------------------------------------------------------------- @@ -267,6 +496,53 @@ mod tests { assert_eq!(fs::read(&file).unwrap(), b"new"); } + #[cfg(unix)] + #[test] + fn write_existing_symlink_file_updates_in_scope_target() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let fs = make_fs(&dir); + let target = dir.path().join("target.txt"); + fs::write(&target, b"old").unwrap(); + let link = dir.path().join("link.txt"); + symlink(&target, &link).unwrap(); + + let out = fs.write(&link, b"new").unwrap(); + assert!(!out.created); + assert_eq!(fs::read(&target).unwrap(), b"new"); + assert!( + fs::symlink_metadata(&link) + .unwrap() + .file_type() + .is_symlink() + ); + } + + #[cfg(unix)] + #[test] + fn write_reports_symlink_target_outside_scope() { + use std::os::unix::fs::symlink; + + let dir = TempDir::new().unwrap(); + let outside = TempDir::new().unwrap(); + let target = outside.path().join("target.txt"); + fs::write(&target, b"secret").unwrap(); + let link = dir.path().join("outside-repo.txt"); + symlink(&target, &link).unwrap(); + + let fs = make_fs(&dir); + let err = fs.write(&link, b"new").unwrap_err(); + assert!( + matches!( + err, + ToolsError::SymlinkOutOfScope { ref path, target: ref err_target, required_permission: "write" } + if path == &link && err_target == &target.canonicalize().unwrap() + ), + "expected write symlink out-of-scope diagnostic, got {err:?}" + ); + } + #[test] fn write_rejects_out_of_scope() { let dir = TempDir::new().unwrap(); diff --git a/crates/tools/tests/edge_cases.rs b/crates/tools/tests/edge_cases.rs index 0c9c70ac..e9f1e4cc 100644 --- a/crates/tools/tests/edge_cases.rs +++ b/crates/tools/tests/edge_cases.rs @@ -102,9 +102,13 @@ async fn symlink_to_outside_scope_is_rejected_for_write() { .await .unwrap_err(); assert!( - format!("{read_err}").contains("outside allowed scope"), + format!("{read_err}").contains("outside allowed read scope"), "symlink read escape not rejected: {read_err}" ); + assert!( + format!("{read_err}").contains(&outside_target.display().to_string()), + "symlink read diagnostic should include resolved target: {read_err}" + ); // Write through the symlink must be rejected for the same reason. let write = reg.get("Write"); @@ -120,13 +124,39 @@ async fn symlink_to_outside_scope_is_rejected_for_write() { .unwrap_err(); let msg = format!("{err}"); assert!( - msg.contains("outside allowed scope"), + msg.contains("outside allowed read scope") || msg.contains("outside allowed write scope"), "symlink escape not rejected: {msg}" ); + assert!( + msg.contains("add the symlink target"), + "symlink escape diagnostic should include remediation: {msg}" + ); // Outside file must not have been touched. assert_eq!(std::fs::read_to_string(&outside_target).unwrap(), "secret"); } +#[cfg(unix)] +#[tokio::test] +async fn broken_symlink_reports_target_and_repair_hint() { + use std::os::unix::fs::symlink; + + let (dir, _spill, reg) = setup(); + let link = dir.path().join("external-project"); + let target = dir.path().join("missing-target"); + symlink(&target, &link).unwrap(); + + let read = reg.get("Read"); + let err = read + .execute(&json!({ "file_path": link.to_str().unwrap() }).to_string()) + .await + .unwrap_err(); + let msg = format!("{err}"); + assert!(msg.contains("broken symlink"), "{msg}"); + assert!(msg.contains(&link.display().to_string()), "{msg}"); + assert!(msg.contains(&target.display().to_string()), "{msg}"); + assert!(msg.contains("correct relative target"), "{msg}"); +} + #[tokio::test] async fn empty_file_read_and_edit() { let (dir, _spill, reg) = setup(); diff --git a/docs/file-ref-symlinks.md b/docs/file-ref-symlinks.md new file mode 100644 index 00000000..57b710dd --- /dev/null +++ b/docs/file-ref-symlinks.md @@ -0,0 +1,12 @@ +# File references and symlinks + +FileRef resolution and file tools follow symlinks only after the resolved target passes the Pod scope check. A symlink placed inside the workspace does not grant access to the target by itself. + +Recommended external-reference workflow: + +- Prefer adding the real external project path, such as a local `ghq` clone, to the Pod read scope when the Pod is started or spawned. +- If a workspace symlink is used, the symlink target still must be inside readable scope. For writes, the resolved target must be inside writable scope. +- If a relative symlink is broken, recreate it with the correct relative target from the symlink's parent directory, or use an absolute symlink. +- Directory traversal tools such as Glob and Grep do not follow symlink directories. Use the resolved target directory directly when it is in read scope. + +This preserves symlink escape safety: access decisions are made on the canonicalized target whenever the target exists, and broken or out-of-scope symlinks are rejected with diagnostics that include the original path and target where possible.