ToolsのTracker実装

This commit is contained in:
Keisuke Hirata 2026-04-13 04:26:27 +09:00
parent 5bc4a6d6d6
commit 9b9e37cc84
14 changed files with 442 additions and 358 deletions

View File

@ -18,7 +18,7 @@
- [x] api_key_file: ファイルパスによるAPIキー解決 → [tickets/api-key-file.md](tickets/api-key-file.md)
- [x] コンテキスト圧縮 (Prune + Compact) → [tickets/context-compaction.md](tickets/context-compaction.md)
- [ ] LlmClient へ Tokenizer の導入 → [tickets/token-counter.md](tickets/token-counter.md)
- [ ] Tracker: ReadTracker リネーム + recent_files 追加 → [tickets/tracker.md](tickets/tracker.md)
- [x] Tracker: ReadTracker リネーム + recent_files 追加 → [tickets/tracker.md](tickets/tracker.md)
- [ ] Compact の改善(要約品質 + 挙動詳細) → [tickets/compact-improvements.md](tickets/compact-improvements.md)
- [ ] Protocol の設計 → [tickets/protocol-design.md](tickets/protocol-design.md)
- [x] Protocol: request-response パターン (GetHistory等) → [tickets/request-response-protocol.md](tickets/request-response-protocol.md)

View File

@ -164,13 +164,17 @@ impl PodController {
// Edit / Glob / Grep) when the manifest declares a scope.
//
// `ScopedFs` carries the pod-lifetime write boundary (derived
// from the manifest scope). `ReadTracker` is session-scoped —
// from the manifest scope). `Tracker` is session-scoped —
// a fresh instance per controller spawn ensures state from a
// previous process lifetime cannot be reused after a resume.
// The tracker is also handed to the Pod itself so Pod-level
// operations (e.g. context compaction) can ask which files
// the agent has been touching.
if let Some(scope) = scope_for_tools {
let fs = tools::ScopedFs::new(scope);
let tracker = tools::ReadTracker::new();
worker.register_tools(tools::builtin_tools(fs, tracker));
let tracker = tools::Tracker::new();
worker.register_tools(tools::builtin_tools(fs, tracker.clone()));
pod.attach_tracker(tracker);
}
}

View File

@ -53,6 +53,11 @@ pub struct Pod<C: LlmClient, St: Store> {
manifest_dir: Option<PathBuf>,
/// Shared compaction state (present when compact_threshold is configured).
compact_state: Option<Arc<CompactState>>,
/// Session-lifetime file-operation tracker from the builtin `tools`
/// crate. Populated by the Controller when it registers the builtin
/// tools so that Pod-owned operations (e.g. compaction) can consult
/// the recency of touched files.
tracker: Option<tools::Tracker>,
}
impl<C: LlmClient, St: Store> Pod<C, St> {
@ -80,6 +85,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
interceptor_installed: false,
manifest_dir: None,
compact_state: None,
tracker: None,
})
}
@ -112,6 +118,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
interceptor_installed: false,
manifest_dir: None,
compact_state: None,
tracker: None,
})
}
@ -148,6 +155,19 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
&self.store
}
/// Attach the session-scoped file-operation tracker from the builtin
/// `tools` crate. Called by the Controller immediately after it
/// registers the builtin tools on the Worker. Overwrites any
/// previously attached tracker.
pub fn attach_tracker(&mut self, tracker: tools::Tracker) {
self.tracker = Some(tracker);
}
/// The attached session-scoped file-operation tracker, if any.
pub fn tracker(&self) -> Option<&tools::Tracker> {
self.tracker.as_ref()
}
// --- Hook registration ---
fn assert_hooks_open(&self) {
@ -577,6 +597,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
interceptor_installed: false,
manifest_dir,
compact_state: None,
tracker: None,
})
}

View File

@ -8,8 +8,8 @@ use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::error::ToolsError;
use crate::read_tracker::ReadTracker;
use crate::scoped_fs::ScopedFs;
use crate::tracker::Tracker;
const DESCRIPTION: &str = "Replace a substring in an existing file. By default \
`old_string` must be unique in the file; set `replace_all: true` to replace \
@ -31,7 +31,7 @@ pub(crate) struct EditParams {
pub(crate) struct EditTool {
fs: ScopedFs,
tracker: ReadTracker,
tracker: Tracker,
}
#[async_trait]
@ -135,7 +135,7 @@ fn make_preview(text: &str, needle: &str) -> String {
}
/// Factory for the `Edit` tool.
pub fn edit_tool(fs: ScopedFs, tracker: ReadTracker) -> ToolDefinition {
pub fn edit_tool(fs: ScopedFs, tracker: Tracker) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(EditParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
@ -157,13 +157,13 @@ mod tests {
use manifest::Scope;
use tempfile::TempDir;
fn setup() -> (TempDir, ScopedFs, ReadTracker) {
fn setup() -> (TempDir, ScopedFs, Tracker) {
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
(dir, fs, ReadTracker::new())
(dir, fs, Tracker::new())
}
async fn read_first(fs: &ScopedFs, tracker: &ReadTracker, file: &std::path::Path) {
async fn read_first(fs: &ScopedFs, tracker: &Tracker, file: &std::path::Path) {
let def = read_tool(fs.clone(), tracker.clone());
let (_, reader) = def();
let inp = serde_json::json!({ "file_path": file.to_str().unwrap() });

View File

@ -7,15 +7,16 @@
//! - [`ScopedFs`] — pod-lifetime, expresses the write-block boundary for
//! the current scope. Derived from the manifest and shareable across
//! sessions.
//! - [`ReadTracker`] — session-lifetime, enforces the "read before edit"
//! policy via content hashes. Recreated fresh per session.
//! - [`Tracker`] — session-lifetime, enforces the "read before edit"
//! policy via content hashes and tracks the recency of touched files.
//! Recreated fresh per session.
//!
//! The Pod layer owns both instances and passes them to
//! [`builtin_tools`] when registering tools on a `Worker`.
pub mod error;
pub mod read_tracker;
pub mod scoped_fs;
pub mod tracker;
mod edit;
mod glob;
@ -28,19 +29,19 @@ pub use error::ToolsError;
pub use glob::glob_tool;
pub use grep::grep_tool;
pub use read::read_tool;
pub use read_tracker::ReadTracker;
pub use scoped_fs::ScopedFs;
pub use tracker::Tracker;
pub use write::write_tool;
/// Register all builtin tools, wiring them to a shared `ScopedFs`
/// (pod-lifetime) and `ReadTracker` (session-lifetime).
/// (pod-lifetime) and `Tracker` (session-lifetime).
///
/// All returned factories share the same tracker instance so that
/// `Read` / `Write` / `Edit` see a consistent history across tool
/// invocations within a single session.
pub fn builtin_tools(
fs: ScopedFs,
tracker: ReadTracker,
tracker: Tracker,
) -> Vec<llm_worker::tool::ToolDefinition> {
vec![
read_tool(fs.clone(), tracker.clone()),

View File

@ -7,8 +7,8 @@ use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::read_tracker::ReadTracker;
use crate::scoped_fs::ScopedFs;
use crate::tracker::Tracker;
const DESCRIPTION: &str = "Read a text file from the local filesystem. \
Supports offset/limit for large files. Returns line-numbered output (1-based). \
@ -31,7 +31,7 @@ pub(crate) struct ReadParams {
pub(crate) struct ReadTool {
fs: ScopedFs,
tracker: ReadTracker,
tracker: Tracker,
}
#[async_trait]
@ -114,7 +114,7 @@ fn render_numbered(text: &str, offset: usize, limit: usize) -> Rendered {
}
/// Factory for the `Read` tool.
pub fn read_tool(fs: ScopedFs, tracker: ReadTracker) -> ToolDefinition {
pub fn read_tool(fs: ScopedFs, tracker: Tracker) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(ReadParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
@ -135,10 +135,10 @@ mod tests {
use manifest::Scope;
use tempfile::TempDir;
fn setup() -> (TempDir, ScopedFs, ReadTracker) {
fn setup() -> (TempDir, ScopedFs, Tracker) {
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
(dir, fs, ReadTracker::new())
(dir, fs, Tracker::new())
}
#[tokio::test]

View File

@ -1,213 +0,0 @@
//! Read-before-edit policy tracker for the builtin file-manipulation tools.
//!
//! A `ReadTracker` records a SHA-256 hash of each file's contents at the
//! moment it was observed via the `Read` tool, and lets `Write` / `Edit`
//! later verify that the file has not been externally modified since then.
//!
//! # Lifetime
//!
//! A `ReadTracker` is **session-scoped**: the Pod layer creates a fresh
//! instance at the start of each agent session and discards it when the
//! session ends. The `ScopedFs` write boundary, by contrast, is
//! pod-lifetime (derived from the manifest). The two are orthogonal and
//! the Pod wires them together when registering builtin tools.
//!
//! ```no_run
//! # use manifest::Scope;
//! # use tools::{ScopedFs, ReadTracker, builtin_tools};
//! let scope = Scope::new("/workspace").unwrap();
//! let fs = ScopedFs::new(scope); // pod lifetime
//! let tracker = ReadTracker::new(); // session lifetime
//! let defs = builtin_tools(fs, tracker);
//! ```
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use sha2::{Digest, Sha256};
use crate::error::ToolsError;
/// Fixed-size content hash recorded per file.
type ContentHash = [u8; 32];
fn hash_bytes(bytes: &[u8]) -> ContentHash {
let mut hasher = Sha256::new();
hasher.update(bytes);
hasher.finalize().into()
}
/// Canonical-path keyed record of which files have been observed and at
/// what content hash.
///
/// Cheap to clone: internally an `Arc<Mutex<HashMap<...>>>`, so sharing a
/// `ReadTracker` across every builtin tool in a session is effectively
/// free and keeps their views consistent.
#[derive(Debug, Clone, Default)]
pub struct ReadTracker {
inner: Arc<Mutex<HashMap<PathBuf, ContentHash>>>,
}
impl ReadTracker {
/// Create an empty tracker. Typically called once per session.
pub fn new() -> Self {
Self::default()
}
/// Record that `path` has been observed with the given content bytes.
///
/// Called by the `Read` tool after a successful read, and by the
/// `Write` / `Edit` tools after a successful modification (so that
/// subsequent edits see a clean history).
pub fn record(&self, path: &Path, bytes: &[u8]) {
let key = canonicalize_or_owned(path);
let hash = hash_bytes(bytes);
self.inner
.lock()
.unwrap_or_else(|e| e.into_inner())
.insert(key, hash);
}
/// Verify that `path` was previously recorded and its current bytes
/// match the recorded hash.
///
/// - If the path has no history entry, returns [`ToolsError::NotRead`].
/// - If the current content hashes differ from the recorded value,
/// returns [`ToolsError::ExternallyModified`].
pub fn verify(&self, path: &Path, current_bytes: &[u8]) -> Result<(), ToolsError> {
let key = canonicalize_or_owned(path);
let guard = self.inner.lock().unwrap_or_else(|e| e.into_inner());
let recorded = guard
.get(&key)
.ok_or_else(|| ToolsError::NotRead(path.to_path_buf()))?;
let current = hash_bytes(current_bytes);
if *recorded != current {
return Err(ToolsError::ExternallyModified(path.to_path_buf()));
}
Ok(())
}
/// Returns true if `path` has a history entry. Test-only.
#[cfg(test)]
pub(crate) fn has(&self, path: &Path) -> bool {
let key = canonicalize_or_owned(path);
self.inner
.lock()
.unwrap_or_else(|e| e.into_inner())
.contains_key(&key)
}
/// Number of distinct files in the history. Test-only.
#[cfg(test)]
pub(crate) fn len(&self) -> usize {
self.inner
.lock()
.unwrap_or_else(|e| e.into_inner())
.len()
}
}
fn canonicalize_or_owned(path: &Path) -> PathBuf {
path.canonicalize().unwrap_or_else(|_| path.to_path_buf())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn record_then_verify_clean_ok() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"hello").unwrap();
let tracker = ReadTracker::new();
tracker.record(&file, b"hello");
assert!(tracker.has(&file));
assert_eq!(tracker.len(), 1);
tracker.verify(&file, b"hello").unwrap();
}
#[test]
fn verify_without_record_returns_not_read() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"x").unwrap();
let tracker = ReadTracker::new();
let err = tracker.verify(&file, b"x").unwrap_err();
assert!(matches!(err, ToolsError::NotRead(_)));
}
#[test]
fn verify_mismatch_returns_externally_modified() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"original").unwrap();
let tracker = ReadTracker::new();
tracker.record(&file, b"original");
let err = tracker.verify(&file, b"tampered").unwrap_err();
assert!(matches!(err, ToolsError::ExternallyModified(_)));
}
#[test]
fn record_overwrites_previous_hash() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"v1").unwrap();
let tracker = ReadTracker::new();
tracker.record(&file, b"v1");
tracker.record(&file, b"v2");
tracker.verify(&file, b"v2").unwrap();
assert!(tracker.verify(&file, b"v1").is_err());
}
#[test]
fn canonical_keys_collapse_symlink_variants() {
#[cfg(unix)]
{
use std::os::unix::fs::symlink;
let dir = TempDir::new().unwrap();
let real = dir.path().join("real.txt");
fs::write(&real, b"data").unwrap();
let link = dir.path().join("link.txt");
symlink(&real, &link).unwrap();
let tracker = ReadTracker::new();
tracker.record(&real, b"data");
// Looking up via the symlink should hit the same entry.
tracker.verify(&link, b"data").unwrap();
// Exactly one entry.
assert_eq!(tracker.len(), 1);
}
}
#[test]
fn clone_shares_state() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"x").unwrap();
let t1 = ReadTracker::new();
let t2 = t1.clone();
t1.record(&file, b"x");
t2.verify(&file, b"x").unwrap();
}
#[test]
fn empty_bytes_hash_stable() {
let tracker = ReadTracker::new();
let dir = TempDir::new().unwrap();
let file = dir.path().join("empty.txt");
fs::write(&file, b"").unwrap();
tracker.record(&file, b"");
tracker.verify(&file, b"").unwrap();
assert!(tracker.verify(&file, b"x").is_err());
}
}

View File

@ -4,7 +4,7 @@
//! [`manifest::Scope`] and refuses writes outside of it. It carries no
//! per-session state and is cheap to clone (pod-lifetime, reusable across
//! sessions). The read-before-edit policy lives separately in
//! [`crate::ReadTracker`].
//! [`crate::Tracker`].
//!
//! Reads are unrestricted by design (see `tickets/builtin-tools.md`).
@ -83,7 +83,7 @@ impl ScopedFs {
///
/// This method does **not** consult any read history. Callers that
/// want the "must read before overwrite" policy should verify with a
/// [`ReadTracker`](crate::ReadTracker) beforehand.
/// [`Tracker`](crate::Tracker) beforehand.
pub fn write(&self, path: &Path, content: &[u8]) -> Result<WriteOutcome, ToolsError> {
if !path.is_absolute() {
return Err(ToolsError::RelativePath(path.to_path_buf()));

346
crates/tools/src/tracker.rs Normal file
View File

@ -0,0 +1,346 @@
//! Session-scoped tracker for file operations performed by the builtin
//! file-manipulation tools.
//!
//! A `Tracker` serves two orthogonal purposes:
//!
//! 1. **Read-before-edit policy.** It records a SHA-256 hash of each
//! file's contents at the moment it was observed via `Read` (or
//! mutated via `Write` / `Edit`), and lets `Write` / `Edit` later
//! verify that the file has not been externally modified since then.
//!
//! 2. **Recency of touched files.** It keeps an LRU-ordered list of
//! files that have been touched by any of the tools, so the Pod
//! layer can ask "which files did the agent recently look at?" —
//! used e.g. as a default reference set passed to context compaction.
//!
//! Despite its historic name, the Tracker already watches all three of
//! Read / Write / Edit; the rename away from `ReadTracker` reflects this.
//!
//! # Lifetime
//!
//! A `Tracker` is **session-scoped**: the Pod layer creates a fresh
//! instance at the start of each agent session and discards it when the
//! session ends. The `ScopedFs` write boundary, by contrast, is
//! pod-lifetime (derived from the manifest). The two are orthogonal and
//! the Pod wires them together when registering builtin tools.
//!
//! ```no_run
//! # use manifest::Scope;
//! # use tools::{ScopedFs, Tracker, builtin_tools};
//! let scope = Scope::new("/workspace").unwrap();
//! let fs = ScopedFs::new(scope); // pod lifetime
//! let tracker = Tracker::new(); // session lifetime
//! let defs = builtin_tools(fs, tracker);
//! ```
use std::collections::{HashMap, VecDeque};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use sha2::{Digest, Sha256};
use crate::error::ToolsError;
/// Fixed-size content hash recorded per file.
type ContentHash = [u8; 32];
/// How many distinct paths the recency list keeps before evicting the
/// least-recently-touched entry.
const RECENCY_CAPACITY: usize = 20;
fn hash_bytes(bytes: &[u8]) -> ContentHash {
let mut hasher = Sha256::new();
hasher.update(bytes);
hasher.finalize().into()
}
#[derive(Debug, Default)]
struct Inner {
/// Hash of each file's last observed contents, keyed by canonical path.
hashes: HashMap<PathBuf, ContentHash>,
/// LRU list of touched files. Front = most recently touched.
recency: VecDeque<PathBuf>,
}
/// Canonical-path keyed tracker of file observations and their recency.
///
/// Cheap to clone: internally an `Arc<Mutex<Inner>>`, so sharing a
/// `Tracker` across every builtin tool in a session is effectively free
/// and keeps their views consistent.
#[derive(Debug, Clone, Default)]
pub struct Tracker {
inner: Arc<Mutex<Inner>>,
}
impl Tracker {
/// Create an empty tracker. Typically called once per session.
pub fn new() -> Self {
Self::default()
}
/// Record that `path` has been observed with the given content bytes.
///
/// Called by the `Read` tool after a successful read, and by the
/// `Write` / `Edit` tools after a successful modification (so that
/// subsequent edits see a clean history).
///
/// Also bumps `path` to the front of the recency list. If the list
/// grows past [`RECENCY_CAPACITY`], the oldest entry is evicted.
pub fn record(&self, path: &Path, bytes: &[u8]) {
let key = canonicalize_or_owned(path);
let hash = hash_bytes(bytes);
let mut inner = self.inner.lock().unwrap_or_else(|e| e.into_inner());
inner.hashes.insert(key.clone(), hash);
// LRU bump: remove an existing entry for this path then push to
// the front. We intentionally compare by the canonical key so
// symlink/real-path pairs collapse into a single slot.
inner.recency.retain(|p| p != &key);
inner.recency.push_front(key);
if inner.recency.len() > RECENCY_CAPACITY {
inner.recency.pop_back();
}
}
/// Verify that `path` was previously recorded and its current bytes
/// match the recorded hash.
///
/// - If the path has no history entry, returns [`ToolsError::NotRead`].
/// - If the current content hashes differ from the recorded value,
/// returns [`ToolsError::ExternallyModified`].
pub fn verify(&self, path: &Path, current_bytes: &[u8]) -> Result<(), ToolsError> {
let key = canonicalize_or_owned(path);
let guard = self.inner.lock().unwrap_or_else(|e| e.into_inner());
let recorded = guard
.hashes
.get(&key)
.ok_or_else(|| ToolsError::NotRead(path.to_path_buf()))?;
let current = hash_bytes(current_bytes);
if *recorded != current {
return Err(ToolsError::ExternallyModified(path.to_path_buf()));
}
Ok(())
}
/// Return up to `n` most recently touched file paths, most-recent first.
///
/// Intended for callers like the Pod's context-compaction path, which
/// wants to know which files the agent has been working with so it
/// can pass them as default references to the compaction worker.
pub fn recent_files(&self, n: usize) -> Vec<PathBuf> {
let inner = self.inner.lock().unwrap_or_else(|e| e.into_inner());
inner.recency.iter().take(n).cloned().collect()
}
/// Returns true if `path` has a history entry. Test-only.
#[cfg(test)]
pub(crate) fn has(&self, path: &Path) -> bool {
let key = canonicalize_or_owned(path);
self.inner
.lock()
.unwrap_or_else(|e| e.into_inner())
.hashes
.contains_key(&key)
}
/// Number of distinct files in the history. Test-only.
#[cfg(test)]
pub(crate) fn len(&self) -> usize {
self.inner
.lock()
.unwrap_or_else(|e| e.into_inner())
.hashes
.len()
}
}
fn canonicalize_or_owned(path: &Path) -> PathBuf {
path.canonicalize().unwrap_or_else(|_| path.to_path_buf())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn record_then_verify_clean_ok() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"hello").unwrap();
let tracker = Tracker::new();
tracker.record(&file, b"hello");
assert!(tracker.has(&file));
assert_eq!(tracker.len(), 1);
tracker.verify(&file, b"hello").unwrap();
}
#[test]
fn verify_without_record_returns_not_read() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"x").unwrap();
let tracker = Tracker::new();
let err = tracker.verify(&file, b"x").unwrap_err();
assert!(matches!(err, ToolsError::NotRead(_)));
}
#[test]
fn verify_mismatch_returns_externally_modified() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"original").unwrap();
let tracker = Tracker::new();
tracker.record(&file, b"original");
let err = tracker.verify(&file, b"tampered").unwrap_err();
assert!(matches!(err, ToolsError::ExternallyModified(_)));
}
#[test]
fn record_overwrites_previous_hash() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"v1").unwrap();
let tracker = Tracker::new();
tracker.record(&file, b"v1");
tracker.record(&file, b"v2");
tracker.verify(&file, b"v2").unwrap();
assert!(tracker.verify(&file, b"v1").is_err());
}
#[test]
fn canonical_keys_collapse_symlink_variants() {
#[cfg(unix)]
{
use std::os::unix::fs::symlink;
let dir = TempDir::new().unwrap();
let real = dir.path().join("real.txt");
fs::write(&real, b"data").unwrap();
let link = dir.path().join("link.txt");
symlink(&real, &link).unwrap();
let tracker = Tracker::new();
tracker.record(&real, b"data");
// Looking up via the symlink should hit the same entry.
tracker.verify(&link, b"data").unwrap();
// Exactly one entry.
assert_eq!(tracker.len(), 1);
}
}
#[test]
fn clone_shares_state() {
let dir = TempDir::new().unwrap();
let file = dir.path().join("a.txt");
fs::write(&file, b"x").unwrap();
let t1 = Tracker::new();
let t2 = t1.clone();
t1.record(&file, b"x");
t2.verify(&file, b"x").unwrap();
}
#[test]
fn empty_bytes_hash_stable() {
let tracker = Tracker::new();
let dir = TempDir::new().unwrap();
let file = dir.path().join("empty.txt");
fs::write(&file, b"").unwrap();
tracker.record(&file, b"");
tracker.verify(&file, b"").unwrap();
assert!(tracker.verify(&file, b"x").is_err());
}
// --- recency ---
#[test]
fn recent_files_returns_in_lru_order() {
let dir = TempDir::new().unwrap();
let a = dir.path().join("a.txt");
let b = dir.path().join("b.txt");
let c = dir.path().join("c.txt");
fs::write(&a, b"").unwrap();
fs::write(&b, b"").unwrap();
fs::write(&c, b"").unwrap();
let tracker = Tracker::new();
tracker.record(&a, b"");
tracker.record(&b, b"");
tracker.record(&c, b"");
let recent = tracker.recent_files(10);
// Most recent first.
assert_eq!(recent.len(), 3);
assert!(recent[0].ends_with("c.txt"));
assert!(recent[1].ends_with("b.txt"));
assert!(recent[2].ends_with("a.txt"));
}
#[test]
fn recent_files_respects_n_limit() {
let dir = TempDir::new().unwrap();
let tracker = Tracker::new();
for i in 0..5 {
let p = dir.path().join(format!("f{i}.txt"));
fs::write(&p, b"").unwrap();
tracker.record(&p, b"");
}
assert_eq!(tracker.recent_files(3).len(), 3);
assert_eq!(tracker.recent_files(0).len(), 0);
assert_eq!(tracker.recent_files(100).len(), 5);
}
#[test]
fn re_recording_moves_entry_to_front() {
let dir = TempDir::new().unwrap();
let a = dir.path().join("a.txt");
let b = dir.path().join("b.txt");
let c = dir.path().join("c.txt");
fs::write(&a, b"").unwrap();
fs::write(&b, b"").unwrap();
fs::write(&c, b"").unwrap();
let tracker = Tracker::new();
tracker.record(&a, b"");
tracker.record(&b, b"");
tracker.record(&c, b"");
// Touching `a` again promotes it to the front.
tracker.record(&a, b"");
let recent = tracker.recent_files(10);
assert_eq!(recent.len(), 3);
assert!(recent[0].ends_with("a.txt"));
assert!(recent[1].ends_with("c.txt"));
assert!(recent[2].ends_with("b.txt"));
}
#[test]
fn recency_capacity_evicts_oldest() {
let dir = TempDir::new().unwrap();
let tracker = Tracker::new();
// Record one more than the capacity.
for i in 0..(RECENCY_CAPACITY + 5) {
let p = dir.path().join(format!("f{i:02}.txt"));
fs::write(&p, b"").unwrap();
tracker.record(&p, b"");
}
let recent = tracker.recent_files(RECENCY_CAPACITY + 100);
assert_eq!(recent.len(), RECENCY_CAPACITY);
// Newest-first: f24 down to f05. f00..f04 must be evicted.
assert!(recent[0].ends_with(&format!("f{:02}.txt", RECENCY_CAPACITY + 4)));
let last = recent.last().unwrap();
assert!(last.ends_with("f05.txt"), "oldest surviving: {last:?}");
// The evicted oldest ones must not appear.
for i in 0..5 {
let name = format!("f{i:02}.txt");
assert!(recent.iter().all(|p| !p.ends_with(&name)));
}
}
}

View File

@ -7,8 +7,8 @@ use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::read_tracker::ReadTracker;
use crate::scoped_fs::ScopedFs;
use crate::tracker::Tracker;
const DESCRIPTION: &str = "Create a new file or overwrite an existing one with \
the given content. Missing parent directories within scope are created \
@ -25,7 +25,7 @@ pub(crate) struct WriteParams {
pub(crate) struct WriteTool {
fs: ScopedFs,
tracker: ReadTracker,
tracker: Tracker,
}
#[async_trait]
@ -69,7 +69,7 @@ impl Tool for WriteTool {
}
/// Factory for the `Write` tool.
pub fn write_tool(fs: ScopedFs, tracker: ReadTracker) -> ToolDefinition {
pub fn write_tool(fs: ScopedFs, tracker: Tracker) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(WriteParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
@ -91,10 +91,10 @@ mod tests {
use manifest::Scope;
use tempfile::TempDir;
fn setup() -> (TempDir, ScopedFs, ReadTracker) {
fn setup() -> (TempDir, ScopedFs, Tracker) {
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
(dir, fs, ReadTracker::new())
(dir, fs, Tracker::new())
}
#[tokio::test]

View File

@ -6,7 +6,7 @@ use llm_worker::tool::{Tool, ToolDefinition};
use manifest::Scope;
use serde_json::json;
use tempfile::TempDir;
use tools::{ReadTracker, ScopedFs, builtin_tools};
use tools::{Tracker, ScopedFs, builtin_tools};
struct Registry {
entries: Vec<(llm_worker::tool::ToolMeta, Arc<dyn Tool>)>,
@ -30,7 +30,7 @@ impl Registry {
fn setup() -> (TempDir, Registry) {
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
let tracker = ReadTracker::new();
let tracker = Tracker::new();
(dir, Registry::new(builtin_tools(fs, tracker)))
}

View File

@ -11,7 +11,7 @@ use llm_worker::tool::{Tool, ToolDefinition, ToolMeta};
use manifest::Scope;
use serde_json::json;
use tempfile::TempDir;
use tools::{ReadTracker, ScopedFs, builtin_tools};
use tools::{Tracker, ScopedFs, builtin_tools};
struct Registry {
entries: Vec<(ToolMeta, Arc<dyn Tool>)>,
@ -39,7 +39,7 @@ impl Registry {
fn setup() -> (TempDir, Registry) {
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
let tracker = ReadTracker::new();
let tracker = Tracker::new();
let reg = Registry::new(builtin_tools(fs, tracker));
(dir, reg)
}
@ -270,5 +270,42 @@ fn tool_names_match_reference_spec() {
}
}
#[tokio::test]
async fn tracker_recent_files_tracks_read_write_edit() {
// Build a fresh registry that shares a tracker we can query afterwards.
let dir = TempDir::new().unwrap();
let fs = ScopedFs::new(Scope::new(dir.path()).unwrap());
let tracker = Tracker::new();
let reg = Registry::new(builtin_tools(fs, tracker.clone()));
let a = dir.path().join("a.txt");
let b = dir.path().join("b.txt");
std::fs::write(&a, "one\n").unwrap();
// Read `a` — should appear in recency.
call(&reg.get("Read"), json!({ "file_path": a.to_str().unwrap() })).await;
// Write `b` (new file) — should appear ahead of `a`.
call(
&reg.get("Write"),
json!({ "file_path": b.to_str().unwrap(), "content": "hello\n" }),
)
.await;
// Edit `a` — should bump it back to the front.
call(
&reg.get("Edit"),
json!({
"file_path": a.to_str().unwrap(),
"old_string": "one",
"new_string": "two",
}),
)
.await;
let recent = tracker.recent_files(10);
assert_eq!(recent.len(), 2);
assert!(recent[0].ends_with("a.txt"), "front should be a.txt: {recent:?}");
assert!(recent[1].ends_with("b.txt"), "second should be b.txt: {recent:?}");
}
// Sanity: unused Path import guard
const _: fn() -> &'static Path = || Path::new("/");

View File

@ -69,7 +69,7 @@ if let Some(scope) = scope_for_tools { ... }
// 後: 常にツール登録scope は必須)
let fs = tools::ScopedFs::new(pod.scope().clone());
let tracker = tools::ReadTracker::new();
let tracker = tools::Tracker::new();
worker.register_tools(tools::builtin_tools(fs, tracker));
```

View File

@ -1,112 +0,0 @@
# Tracker: ReadTracker のリネームと機能追加
## 背景
`tools::ReadTracker` は既に Read/Write/Edit のすべてでファイル操作を記録している
(`record()` が各ツールから呼ばれる)。名前に反して「read 以外も追跡している」状態。
また compact の改善 ([compact-improvements.md](compact-improvements.md)) で
「最近触られたファイル一覧」をデフォルトリファレンスとして compact worker に渡したい
要求があり、既存の Tracker を拡張すれば自然に解決する。
## 方針
1. `ReadTracker``Tracker` にリネーム (crate 全体)
2. 順序付き (recency) の履歴を追加
3. `recent_files(n)` メソッドで最近 N 件を取得できるようにする
4. Pod が Tracker を保持して compact 時に参照
## 実装
### リネーム
- `crates/tools/src/read_tracker.rs``crates/tools/src/tracker.rs`
- `pub struct ReadTracker``pub struct Tracker`
- `crates/tools/src/lib.rs` の pub 再公開 (`pub use read_tracker::ReadTracker` → `pub use tracker::Tracker`)
- crate 内呼び出し側 (`read.rs`, `write.rs`, `edit.rs`, `scoped_fs.rs` など)
- テスト (`tests/integration.rs`, `tests/edge_cases.rs`)
- `crates/pod/src/controller.rs:172``tools::ReadTracker::new()``tools::Tracker::new()`
### 内部構造の変更
```rust
#[derive(Debug, Clone, Default)]
pub struct Tracker {
inner: Arc<Mutex<Inner>>,
}
#[derive(Debug, Default)]
struct Inner {
hashes: HashMap<PathBuf, ContentHash>,
recency: VecDeque<PathBuf>, // 先頭が最新
}
const RECENCY_CAPACITY: usize = 20;
```
### `record()` の挙動追加
既存の hash 記録に加えて:
```rust
pub fn record(&self, path: &Path, bytes: &[u8]) {
let key = canonicalize_or_owned(path);
let hash = hash_bytes(bytes);
let mut inner = self.inner.lock().unwrap_or_else(|e| e.into_inner());
inner.hashes.insert(key.clone(), hash);
// LRU: 既存エントリを除去 → 先頭に push
inner.recency.retain(|p| p != &key);
inner.recency.push_front(key);
if inner.recency.len() > RECENCY_CAPACITY {
inner.recency.pop_back();
}
}
```
### 新メソッド
```rust
/// Return up to `n` most recently recorded file paths.
/// Order: most recent first.
pub fn recent_files(&self, n: usize) -> Vec<PathBuf> {
let inner = self.inner.lock().unwrap_or_else(|e| e.into_inner());
inner.recency.iter().take(n).cloned().collect()
}
```
### Pod への接続
- `Pod``tracker: Option<tools::Tracker>` フィールド追加 (builtin-tools 未登録の場合は None)
- Controller が `Tracker::new()` した時点で Pod にも `attach_tracker(tracker.clone())` で共有
- Compact 実行時 (`Pod::compact` 内) に `self.tracker.as_ref().map(|t| t.recent_files(5))` でデフォルトリファレンスを取得
### ライフサイクルの整合性
既存のドキュメント: 「Tracker は session-scoped。Controller spawn ごとに new」
この方針は維持。compact 後も同じ Controller spawn 内で状態が継続するので、
compact worker が `read_file` で追加で参照したファイルも次回 compact 時に効く。
### テスト追加
- `recent_files` が recency 順で返ること
- `RECENCY_CAPACITY` を超えた場合に古いものが落ちること
- 既存パスを再 record したら先頭に移動すること
- Read/Write/Edit 実行後に recent_files に現れること (integration テスト)
## 影響範囲
- `crates/tools/src/read_tracker.rs` — リネーム + Inner 構造体化 + recency フィールド + recent_files メソッド
- `crates/tools/src/lib.rs` — pub use 修正
- `crates/tools/src/{read,write,edit,scoped_fs}.rs` — 型名追従
- `crates/tools/tests/*` — 型名追従 + recent_files のテスト追加
- `crates/pod/src/pod.rs``tracker` フィールド + `attach_tracker` メソッド
- `crates/pod/src/controller.rs``tracker.clone()` を Pod にも渡す
## 依存
- なし (単独で実装可能)
## ブロックする後続
- [compact-improvements.md](compact-improvements.md) — デフォルトリファレンスの抽出がこれに依存