From 56c6758da5ef50da414a7c739264ed086437d262 Mon Sep 17 00:00:00 2001 From: Hare Date: Mon, 27 Apr 2026 17:24:08 +0900 Subject: [PATCH] =?UTF-8?q?memory=E3=82=B5=E3=83=BC=E3=83=81=E3=83=84?= =?UTF-8?q?=E3=83=BC=E3=83=AB=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/manifest/src/config.rs | 2 + crates/manifest/src/lib.rs | 8 + crates/memory/src/tool/edit.rs | 133 ++---- crates/memory/src/tool/mod.rs | 83 +++- crates/memory/src/tool/read.rs | 118 +++--- crates/memory/src/tool/search.rs | 560 ++++++++++++++++++++++++++ crates/memory/src/tool/write.rs | 99 +++-- crates/pod/src/controller.rs | 11 +- tickets/memory-search-tools.md | 67 ++- tickets/memory-search-tools.review.md | 63 +++ 10 files changed, 936 insertions(+), 208 deletions(-) create mode 100644 crates/memory/src/tool/search.rs create mode 100644 tickets/memory-search-tools.review.md diff --git a/crates/manifest/src/config.rs b/crates/manifest/src/config.rs index c4caddfd..6b40a11b 100644 --- a/crates/manifest/src/config.rs +++ b/crates/manifest/src/config.rs @@ -205,6 +205,8 @@ impl MemoryConfig { fn merge(self, upper: Self) -> Self { Self { workspace_root: upper.workspace_root.or(self.workspace_root), + search_hit_limit: upper.search_hit_limit.or(self.search_hit_limit), + search_excerpt_lines: upper.search_excerpt_lines.or(self.search_excerpt_lines), } } } diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs index 65043915..7909c286 100644 --- a/crates/manifest/src/lib.rs +++ b/crates/manifest/src/lib.rs @@ -54,6 +54,14 @@ pub struct MemoryConfig { /// absolute path. #[serde(default)] pub workspace_root: Option, + /// Maximum number of hits returned by `MemorySearch` / + /// `KnowledgeSearch`. `None` ⇒ tool default (20). + #[serde(default)] + pub search_hit_limit: Option, + /// Lines of context before and after each match in search excerpts. + /// `None` ⇒ tool default (3). + #[serde(default)] + pub search_excerpt_lines: Option, } /// Pod metadata. diff --git a/crates/memory/src/tool/edit.rs b/crates/memory/src/tool/edit.rs index a5520b63..d9c3e74f 100644 --- a/crates/memory/src/tool/edit.rs +++ b/crates/memory/src/tool/edit.rs @@ -1,11 +1,11 @@ //! `MemoryEdit` tool — partial string replacement on an existing memory record. //! -//! Reads current content, applies the replacement, runs the Linter on -//! the result, writes only on success. The current-then-write window -//! is single-tool-call narrow; an external tracker is intentionally -//! omitted (memory tools are self-contained, no `tools` crate dep). +//! Reads current content by `(kind, slug)`, applies the replacement, +//! runs the Linter on the result, writes only on success. The +//! current-then-write window is single-tool-call narrow; an external +//! tracker is intentionally omitted (memory tools are self-contained, +//! no `tools` crate dep). -use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; @@ -13,18 +13,21 @@ use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use serde::Deserialize; use crate::linter::{LintReport, Linter, WriteMode}; +use crate::tool::MemoryToolKind; use crate::workspace::WorkspaceLayout; const DESCRIPTION: &str = "Replace a substring in an existing memory or knowledge \ -record file. By default `old_string` must be unique in the file; set \ -`replace_all: true` to replace every occurrence. The resulting content is \ -re-validated by the memory linter; failure leaves the file untouched. Path \ -must be absolute and lie inside the workspace's `memory/` or `knowledge/` tree."; +record selected by `kind` + `slug`. By default `old_string` must be unique in the \ +file; set `replace_all: true` to replace every occurrence. The resulting content \ +is re-validated by the memory linter; failure leaves the file untouched."; #[derive(Debug, Deserialize, schemars::JsonSchema)] struct EditParams { - /// Absolute path under the workspace's `memory/` or `knowledge/` tree. - file_path: PathBuf, + /// Record kind: `summary` | `decision` | `request` | `knowledge`. + kind: MemoryToolKind, + /// Slug. Required for everything except `summary`; forbidden for `summary`. + #[serde(default)] + slug: Option, /// String to replace. Must be unique in the file unless `replace_all` is true. old_string: String, /// Replacement string. Must differ from `old_string`. @@ -35,6 +38,7 @@ struct EditParams { } struct EditTool { + layout: WorkspaceLayout, linter: Linter, } @@ -45,12 +49,6 @@ impl Tool for EditTool { ToolError::InvalidArgument(format!("invalid MemoryEdit input: {e}")) })?; - if !params.file_path.is_absolute() { - return Err(ToolError::InvalidArgument(format!( - "file_path must be absolute: {}", - params.file_path.display() - ))); - } if params.old_string.is_empty() { return Err(ToolError::InvalidArgument( "old_string must not be empty".into(), @@ -62,49 +60,30 @@ impl Tool for EditTool { )); } - // Path-shape check; the layout::classify also runs inside the - // linter but we want a crisp error before reading the file. - if self - .linter - .layout() - .classify(¶ms.file_path) - .map_err(|e| ToolError::InvalidArgument(e.to_string()))? - .is_none() - { - return Err(ToolError::InvalidArgument(format!( - "path is not under the memory tree: {}", - params.file_path.display() - ))); - } + let path = params.kind.resolve_path(&self.layout, params.slug.as_deref())?; - let current_bytes = std::fs::read(¶ms.file_path).map_err(|e| match e.kind() { + let current_bytes = std::fs::read(&path).map_err(|e| match e.kind() { std::io::ErrorKind::NotFound => ToolError::ExecutionFailed(format!( - "file not found (use MemoryWrite to create): {}", - params.file_path.display() - )), - _ => ToolError::ExecutionFailed(format!( - "read failed at {}: {e}", - params.file_path.display() + "record not found (use MemoryWrite to create): {}", + path.display() )), + _ => ToolError::ExecutionFailed(format!("read failed at {}: {e}", path.display())), })?; let current_text = std::str::from_utf8(¤t_bytes).map_err(|_| { - ToolError::InvalidArgument(format!( - "file is not valid UTF-8: {}", - params.file_path.display() - )) + ToolError::InvalidArgument(format!("file is not valid UTF-8: {}", path.display())) })?; let count = current_text.matches(¶ms.old_string).count(); if count == 0 { return Err(ToolError::InvalidArgument(format!( "old_string not found in {}", - params.file_path.display() + path.display() ))); } if !params.replace_all && count > 1 { return Err(ToolError::InvalidArgument(format!( "old_string occurs {count} times in {}; pass replace_all: true or narrow the snippet", - params.file_path.display() + path.display() ))); } @@ -115,21 +94,18 @@ impl Tool for EditTool { }; let occurrences = if params.replace_all { count } else { 1 }; - let report = self.linter.lint(¶ms.file_path, &new_text, WriteMode::Update); + let report = self.linter.lint(&path, &new_text, WriteMode::Update); if report.has_errors() { return Err(ToolError::InvalidArgument(format_report(&report))); } - std::fs::write(¶ms.file_path, new_text.as_bytes()).map_err(|e| { - ToolError::ExecutionFailed(format!( - "failed to write {}: {e}", - params.file_path.display() - )) + std::fs::write(&path, new_text.as_bytes()).map_err(|e| { + ToolError::ExecutionFailed(format!("failed to write {}: {e}", path.display())) })?; let summary = format!( "Edited {} ({} replacement{}){}", - params.file_path.display(), + path.display(), occurrences, if occurrences == 1 { "" } else { "s" }, warning_tail(&report), @@ -176,6 +152,7 @@ pub fn edit_tool(layout: WorkspaceLayout) -> ToolDefinition { .description(DESCRIPTION) .input_schema(schema_value); let tool: Arc = Arc::new(EditTool { + layout: layout.clone(), linter: Linter::new(layout.clone()), }); (meta, tool) @@ -186,6 +163,7 @@ pub fn edit_tool(layout: WorkspaceLayout) -> ToolDefinition { mod tests { use super::*; use chrono::Utc; + use std::path::PathBuf; use tempfile::TempDir; fn now() -> String { @@ -212,7 +190,8 @@ mod tests { assert_eq!(meta.name, "MemoryEdit"); let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "decision", + "slug": "foo", "old_string": "body body", "new_string": "edited", }); @@ -230,7 +209,8 @@ mod tests { // Drop the `status` field by replacing it with nothing. let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "decision", + "slug": "foo", "old_string": "status: open\n", "new_string": "", }); @@ -244,12 +224,12 @@ mod tests { } #[tokio::test] - async fn edit_missing_file() { - let (dir, layout, _) = setup(); - let other = dir.path().join("memory/decisions/ghost.md"); + async fn edit_missing_record() { + let (_dir, layout, _) = setup(); let (_, tool) = edit_tool(layout)(); let inp = serde_json::json!({ - "file_path": other.to_str().unwrap(), + "kind": "decision", + "slug": "ghost", "old_string": "x", "new_string": "y", }); @@ -258,42 +238,17 @@ mod tests { } #[tokio::test] - async fn edit_outside_memory_tree_rejected() { - let (dir, layout, _) = setup(); - let other = dir.path().join("src/lib.rs"); - std::fs::create_dir_all(other.parent().unwrap()).unwrap(); - std::fs::write(&other, "fn main() {}").unwrap(); + async fn edit_workflow_kind_rejected() { + // Workflow is not exposed via MemoryToolKind, so deserialization fails. + let (_dir, layout, _) = setup(); let (_, tool) = edit_tool(layout)(); let inp = serde_json::json!({ - "file_path": other.to_str().unwrap(), - "old_string": "fn", - "new_string": "pub fn", + "kind": "workflow", + "slug": "wf", + "old_string": "x", + "new_string": "y", }); let err = tool.execute(&inp.to_string()).await.unwrap_err(); assert!(matches!(err, ToolError::InvalidArgument(_))); } - - #[tokio::test] - async fn edit_workflow_path_rejected() { - let (dir, layout, _) = setup(); - let path = dir.path().join("memory/workflow/wf.md"); - std::fs::create_dir_all(path.parent().unwrap()).unwrap(); - let initial = format!( - "---\nupdated_at: {n}\ndescription: x\nauto_invoke: false\nuser_invocable: true\n---\nbody\n", - n = now() - ); - std::fs::write(&path, &initial).unwrap(); - - let (_, tool) = edit_tool(layout)(); - let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), - "old_string": "body", - "new_string": "edited", - }); - let err = tool.execute(&inp.to_string()).await.unwrap_err(); - let msg = format!("{err}"); - assert!(msg.to_lowercase().contains("workflow"), "{msg}"); - // Original untouched. - assert!(std::fs::read_to_string(&path).unwrap().contains("body")); - } } diff --git a/crates/memory/src/tool/mod.rs b/crates/memory/src/tool/mod.rs index 755bc55d..67809047 100644 --- a/crates/memory/src/tool/mod.rs +++ b/crates/memory/src/tool/mod.rs @@ -1,9 +1,90 @@ -//! Tool implementations stub. Filled in once the linter compiles green. +//! Memory-scoped tools: Read / Write / Edit / Search. +//! +//! All four take `kind` + `slug` (Summary takes only `kind`) and +//! resolve the path through [`WorkspaceLayout`]. The agent never has +//! to know the on-disk layout — Search returns `{slug, kind, ...}` and +//! that pair feeds straight into Read / Edit. mod edit; mod read; +mod search; mod write; +use std::path::PathBuf; + +use llm_worker::tool::ToolError; +use serde::Deserialize; + +use crate::slug::Slug; +use crate::workspace::{RecordKind, WorkspaceLayout}; + pub use edit::edit_tool; pub use read::read_tool; +pub use search::{knowledge_search_tool, memory_search_tool, SearchConfig}; pub use write::write_tool; + +/// Kinds the memory tools accept as input. `Workflow` is intentionally +/// excluded — workflows are sub-Worker context, not agent-editable. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, schemars::JsonSchema)] +#[serde(rename_all = "lowercase")] +pub enum MemoryToolKind { + Summary, + Decision, + Request, + Knowledge, +} + +impl MemoryToolKind { + pub fn as_str(self) -> &'static str { + match self { + Self::Summary => "summary", + Self::Decision => "decision", + Self::Request => "request", + Self::Knowledge => "knowledge", + } + } + + pub fn record_kind(self) -> RecordKind { + match self { + Self::Summary => RecordKind::Summary, + Self::Decision => RecordKind::Decision, + Self::Request => RecordKind::Request, + Self::Knowledge => RecordKind::Knowledge, + } + } + + /// Resolve `(kind, slug)` to an absolute path under the workspace. + /// Summary forbids a slug; the per-record kinds require one. + pub fn resolve_path( + self, + layout: &WorkspaceLayout, + slug: Option<&str>, + ) -> Result { + match self { + Self::Summary => { + if slug.is_some() { + return Err(ToolError::InvalidArgument( + "kind=summary does not accept a slug".to_string(), + )); + } + Ok(layout.summary_path()) + } + other => { + let raw = slug.ok_or_else(|| { + ToolError::InvalidArgument(format!( + "kind={} requires `slug`", + other.as_str() + )) + })?; + let parsed = Slug::parse(raw) + .map_err(|e| ToolError::InvalidArgument(e.to_string()))?; + Ok(match other { + Self::Decision => layout.decision_path(&parsed), + Self::Request => layout.request_path(&parsed), + Self::Knowledge => layout.knowledge_path(&parsed), + Self::Summary => unreachable!(), + }) + } + } + } +} diff --git a/crates/memory/src/tool/read.rs b/crates/memory/src/tool/read.rs index 0560e53b..c2826376 100644 --- a/crates/memory/src/tool/read.rs +++ b/crates/memory/src/tool/read.rs @@ -1,29 +1,33 @@ //! `MemoryRead` tool. //! -//! Constrained to `/memory/` and `/knowledge/` -//! paths. Returns line-numbered content (1-based), like the generic -//! Read tool, but rejects anything outside the memory tree so the -//! agent can't sneak in a non-memory read through this surface. +//! Reads a memory or knowledge record by `(kind, slug)`. Returns +//! line-numbered content (1-based), like the generic Read tool. The +//! agent never names a path — `Search` returns `{kind, slug, ...}` +//! and that pair feeds straight into Read. -use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use serde::Deserialize; +use crate::tool::MemoryToolKind; use crate::workspace::WorkspaceLayout; -const DESCRIPTION: &str = "Read a memory or knowledge record file under the \ -workspace's `memory/` or `knowledge/` tree. Returns line-numbered output \ -(1-based). Paths must be absolute and lie inside the memory tree."; +const DESCRIPTION: &str = "Read a memory or knowledge record by `kind` + `slug`. \ +`kind` is one of: summary, decision, request, knowledge. \ +For `summary` omit `slug`; for the others `slug` is required. \ +Returns line-numbered output (1-based)."; const DEFAULT_LIMIT: usize = 2000; #[derive(Debug, Deserialize, schemars::JsonSchema)] struct ReadParams { - /// Absolute path to a file under the workspace's `memory/` or `knowledge/` tree. - file_path: PathBuf, + /// Record kind: `summary` | `decision` | `request` | `knowledge`. + kind: MemoryToolKind, + /// Slug. Required for everything except `summary`; forbidden for `summary`. + #[serde(default)] + slug: Option, /// 0-based line offset from the start. Defaults to 0. #[serde(default)] offset: Option, @@ -43,33 +47,13 @@ impl Tool for ReadTool { ToolError::InvalidArgument(format!("invalid MemoryRead input: {e}")) })?; - if !params.file_path.is_absolute() { - return Err(ToolError::InvalidArgument(format!( - "file_path must be absolute: {}", - params.file_path.display() - ))); - } - if self - .layout - .classify(¶ms.file_path) - .map_err(|e| ToolError::InvalidArgument(e.to_string()))? - .is_none() - { - return Err(ToolError::InvalidArgument(format!( - "path is not under the memory tree: {}", - params.file_path.display() - ))); - } + let path = params.kind.resolve_path(&self.layout, params.slug.as_deref())?; - let bytes = std::fs::read(¶ms.file_path).map_err(|e| match e.kind() { - std::io::ErrorKind::NotFound => ToolError::ExecutionFailed(format!( - "file not found: {}", - params.file_path.display() - )), - _ => ToolError::ExecutionFailed(format!( - "read failed at {}: {e}", - params.file_path.display() - )), + let bytes = std::fs::read(&path).map_err(|e| match e.kind() { + std::io::ErrorKind::NotFound => { + ToolError::ExecutionFailed(format!("record not found: {}", path.display())) + } + _ => ToolError::ExecutionFailed(format!("read failed at {}: {e}", path.display())), })?; let text = String::from_utf8_lossy(&bytes).into_owned(); @@ -84,13 +68,13 @@ impl Tool for ReadTool { offset + 1, offset + rendered.line_count, rendered.total_lines, - params.file_path.display() + path.display() ) } else { format!( "Read {} line(s) from {}", rendered.line_count, - params.file_path.display() + path.display() ) }; @@ -157,14 +141,14 @@ mod tests { } #[tokio::test] - async fn read_returns_numbered_lines() { + async fn read_decision_by_slug() { let (dir, layout) = setup(); let path = dir.path().join("memory/decisions/foo.md"); std::fs::create_dir_all(path.parent().unwrap()).unwrap(); std::fs::write(&path, "alpha\nbeta\n").unwrap(); let (_meta, tool) = read_tool(layout)(); - let inp = serde_json::json!({ "file_path": path.to_str().unwrap() }); + let inp = serde_json::json!({ "kind": "decision", "slug": "foo" }); let out = tool.execute(&inp.to_string()).await.unwrap(); let body = out.content.unwrap(); assert!(body.contains(" 1\talpha")); @@ -172,24 +156,64 @@ mod tests { } #[tokio::test] - async fn rejects_outside_memory_tree() { + async fn read_summary_without_slug() { let (dir, layout) = setup(); - let other = dir.path().join("src/main.rs"); - std::fs::create_dir_all(other.parent().unwrap()).unwrap(); - std::fs::write(&other, "fn main() {}").unwrap(); + let path = dir.path().join("memory/summary.md"); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, "summary body\n").unwrap(); let (_, tool) = read_tool(layout)(); - let inp = serde_json::json!({ "file_path": other.to_str().unwrap() }); + let inp = serde_json::json!({ "kind": "summary" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert!(out.content.unwrap().contains("summary body")); + } + + #[tokio::test] + async fn summary_with_slug_rejected() { + let (_dir, layout) = setup(); + let (_, tool) = read_tool(layout)(); + let inp = serde_json::json!({ "kind": "summary", "slug": "x" }); let err = tool.execute(&inp.to_string()).await.unwrap_err(); assert!(matches!(err, ToolError::InvalidArgument(_))); } #[tokio::test] - async fn rejects_relative_path() { + async fn decision_without_slug_rejected() { let (_dir, layout) = setup(); let (_, tool) = read_tool(layout)(); - let inp = serde_json::json!({ "file_path": "memory/summary.md" }); + let inp = serde_json::json!({ "kind": "decision" }); let err = tool.execute(&inp.to_string()).await.unwrap_err(); assert!(matches!(err, ToolError::InvalidArgument(_))); } + + #[tokio::test] + async fn invalid_slug_rejected() { + let (_dir, layout) = setup(); + let (_, tool) = read_tool(layout)(); + let inp = serde_json::json!({ "kind": "decision", "slug": "Bad-Slug" }); + let err = tool.execute(&inp.to_string()).await.unwrap_err(); + assert!(matches!(err, ToolError::InvalidArgument(_))); + } + + #[tokio::test] + async fn knowledge_path_resolution() { + let (dir, layout) = setup(); + let path = dir.path().join("knowledge/policy.md"); + std::fs::create_dir_all(path.parent().unwrap()).unwrap(); + std::fs::write(&path, "k\n").unwrap(); + + let (_, tool) = read_tool(layout)(); + let inp = serde_json::json!({ "kind": "knowledge", "slug": "policy" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + assert!(out.content.unwrap().contains("k")); + } + + #[tokio::test] + async fn missing_file_returns_execution_failed() { + let (_dir, layout) = setup(); + let (_, tool) = read_tool(layout)(); + let inp = serde_json::json!({ "kind": "decision", "slug": "missing" }); + let err = tool.execute(&inp.to_string()).await.unwrap_err(); + assert!(matches!(err, ToolError::ExecutionFailed(_))); + } } diff --git a/crates/memory/src/tool/search.rs b/crates/memory/src/tool/search.rs new file mode 100644 index 00000000..8b2e9338 --- /dev/null +++ b/crates/memory/src/tool/search.rs @@ -0,0 +1,560 @@ +//! `MemorySearch` / `KnowledgeSearch` tools. +//! +//! Both perform a case-insensitive substring scan over markdown record +//! files, returning a list of `{slug, kind, ..., excerpt}` entries. +//! Excerpts are `excerpt_lines` lines before and after the matched +//! line (so 2N+1 lines per excerpt when not clipped). +//! +//! - `MemorySearch` walks `memory/summary.md`, `memory/decisions/`, +//! `memory/requests/`. `memory/workflow/` and `memory/_staging/` +//! are excluded by construction. +//! - `KnowledgeSearch` walks `knowledge/*.md` and supports a `kind` +//! filter against the Knowledge frontmatter's `kind` field. +//! +//! No derived index — the file tree is the source of truth and is +//! re-scanned per call. grep 出現順: within a file by line order, +//! across files by sorted filename. + +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use async_trait::async_trait; +use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; +use serde::{Deserialize, Serialize}; + +use crate::schema::{KnowledgeFrontmatter, split_frontmatter}; +use crate::workspace::WorkspaceLayout; + +const DEFAULT_HIT_LIMIT: usize = 20; +const DEFAULT_EXCERPT_LINES: usize = 3; + +const MEMORY_SEARCH_DESCRIPTION: &str = "Search memory records (summary / decisions / \ +requests) for a substring. Returns up to `hit_limit` matches as `{slug, kind, excerpt}` \ +entries with line context. Use the returned `slug` + `kind` with MemoryRead to fetch \ +the full record. Workflow and staging directories are not searched."; + +const KNOWLEDGE_SEARCH_DESCRIPTION: &str = "Search knowledge records for a substring. \ +Optional `kind` filters by the Knowledge frontmatter's `kind` field. Returns up to \ +`hit_limit` matches as `{slug, kind, description, model_invokation, excerpt}` entries \ +with line context. Use the returned `slug` with MemoryRead (kind=knowledge) for the \ +full record."; + +/// Tunables passed in from the manifest. +#[derive(Debug, Clone, Copy)] +pub struct SearchConfig { + pub hit_limit: usize, + /// Lines of context before and after each matched line. + pub excerpt_lines: usize, +} + +impl Default for SearchConfig { + fn default() -> Self { + Self { + hit_limit: DEFAULT_HIT_LIMIT, + excerpt_lines: DEFAULT_EXCERPT_LINES, + } + } +} + +#[derive(Debug, Deserialize, schemars::JsonSchema)] +struct MemorySearchParams { + /// Substring to search for. Case-insensitive. + query: String, +} + +#[derive(Debug, Deserialize, schemars::JsonSchema)] +struct KnowledgeSearchParams { + /// Substring to search for. Case-insensitive. + query: String, + /// Optional filter on the Knowledge frontmatter's `kind` field. + #[serde(default)] + kind: Option, +} + +#[derive(Debug, Serialize)] +struct MemoryHit { + slug: String, + kind: &'static str, + excerpt: String, +} + +#[derive(Debug, Serialize)] +struct KnowledgeHit { + slug: String, + kind: Option, + description: Option, + model_invokation: Option, + excerpt: String, +} + +struct MemorySearchTool { + layout: WorkspaceLayout, + config: SearchConfig, +} + +struct KnowledgeSearchTool { + layout: WorkspaceLayout, + config: SearchConfig, +} + +#[async_trait] +impl Tool for MemorySearchTool { + async fn execute(&self, input_json: &str) -> Result { + let params: MemorySearchParams = serde_json::from_str(input_json).map_err(|e| { + ToolError::InvalidArgument(format!("invalid MemorySearch input: {e}")) + })?; + let needle = validate_query(¶ms.query)?; + + let mut hits: Vec = Vec::new(); + let limit = self.config.hit_limit; + let ctx = self.config.excerpt_lines; + + // summary + let summary_path = self.layout.summary_path(); + if summary_path.is_file() { + scan_file(&summary_path, &needle, ctx, limit - hits.len(), |excerpt| { + hits.push(MemoryHit { + slug: "summary".to_string(), + kind: "summary", + excerpt, + }); + }); + } + + // decisions + if hits.len() < limit { + for (path, slug) in list_md_files(&self.layout.decisions_dir()) { + if hits.len() >= limit { + break; + } + scan_file(&path, &needle, ctx, limit - hits.len(), |excerpt| { + hits.push(MemoryHit { + slug: slug.clone(), + kind: "decision", + excerpt, + }); + }); + } + } + + // requests + if hits.len() < limit { + for (path, slug) in list_md_files(&self.layout.requests_dir()) { + if hits.len() >= limit { + break; + } + scan_file(&path, &needle, ctx, limit - hits.len(), |excerpt| { + hits.push(MemoryHit { + slug: slug.clone(), + kind: "request", + excerpt, + }); + }); + } + } + + let body = serde_json::to_string_pretty(&hits) + .map_err(|e| ToolError::ExecutionFailed(format!("serialize hits: {e}")))?; + Ok(ToolOutput { + summary: format!("{} hit(s) for {:?}", hits.len(), params.query), + content: Some(body), + }) + } +} + +#[async_trait] +impl Tool for KnowledgeSearchTool { + async fn execute(&self, input_json: &str) -> Result { + let params: KnowledgeSearchParams = serde_json::from_str(input_json).map_err(|e| { + ToolError::InvalidArgument(format!("invalid KnowledgeSearch input: {e}")) + })?; + let needle = validate_query(¶ms.query)?; + let kind_filter = params.kind.as_deref(); + + let mut hits: Vec = Vec::new(); + let limit = self.config.hit_limit; + let ctx = self.config.excerpt_lines; + + for (path, slug) in list_md_files(&self.layout.knowledge_dir()) { + if hits.len() >= limit { + break; + } + // Try to parse frontmatter for description/model_invokation/kind. + let raw = match std::fs::read_to_string(&path) { + Ok(s) => s, + Err(_) => continue, + }; + let fm = parse_knowledge_frontmatter(&raw); + + // kind filter applies to the frontmatter's kind field. + if let Some(filter) = kind_filter { + let matches = fm + .as_ref() + .map(|f| f.kind.as_str() == filter) + .unwrap_or(false); + if !matches { + continue; + } + } + + let kind = fm.as_ref().map(|f| f.kind.clone()); + let description = fm.as_ref().map(|f| f.description.clone()); + let model_invokation = fm.as_ref().map(|f| f.model_invokation); + + scan_text(&raw, &needle, ctx, limit - hits.len(), |excerpt| { + hits.push(KnowledgeHit { + slug: slug.clone(), + kind: kind.clone(), + description: description.clone(), + model_invokation, + excerpt, + }); + }); + } + + let body = serde_json::to_string_pretty(&hits) + .map_err(|e| ToolError::ExecutionFailed(format!("serialize hits: {e}")))?; + Ok(ToolOutput { + summary: format!("{} hit(s) for {:?}", hits.len(), params.query), + content: Some(body), + }) + } +} + +fn validate_query(query: &str) -> Result { + if query.trim().is_empty() { + return Err(ToolError::InvalidArgument( + "query must not be empty".into(), + )); + } + Ok(query.to_lowercase()) +} + +/// Sorted list of `(path, slug)` for `*.md` files directly under `dir`. +/// Returns empty if the directory doesn't exist. +fn list_md_files(dir: &Path) -> Vec<(PathBuf, String)> { + let mut out: Vec<(PathBuf, String)> = Vec::new(); + let entries = match std::fs::read_dir(dir) { + Ok(it) => it, + Err(_) => return out, + }; + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_file() { + continue; + } + let name = match path.file_name().and_then(|n| n.to_str()) { + Some(n) => n, + None => continue, + }; + let slug = match name.strip_suffix(".md") { + Some(s) => s.to_string(), + None => continue, + }; + out.push((path, slug)); + } + out.sort_by(|a, b| a.1.cmp(&b.1)); + out +} + +fn scan_file( + path: &Path, + needle_lower: &str, + ctx: usize, + remaining: usize, + mut on_match: impl FnMut(String), +) { + if remaining == 0 { + return; + } + let text = match std::fs::read_to_string(path) { + Ok(t) => t, + Err(_) => return, + }; + scan_text(&text, needle_lower, ctx, remaining, |e| on_match(e)); +} + +fn scan_text( + text: &str, + needle_lower: &str, + ctx: usize, + remaining: usize, + mut on_match: impl FnMut(String), +) { + if remaining == 0 { + return; + } + let lines: Vec<&str> = text.lines().collect(); + let mut produced = 0; + for (i, line) in lines.iter().enumerate() { + if produced >= remaining { + break; + } + if line.to_lowercase().contains(needle_lower) { + let start = i.saturating_sub(ctx); + let end = i.saturating_add(ctx + 1).min(lines.len()); + let excerpt = lines[start..end].join("\n"); + on_match(excerpt); + produced += 1; + } + } +} + +/// Best-effort frontmatter parse. Returns `None` if missing/malformed +/// — search still finds matches in the body even when the header is +/// broken. +fn parse_knowledge_frontmatter(raw: &str) -> Option { + let (yaml, _body) = split_frontmatter(raw).ok()?; + serde_yaml::from_str::(yaml).ok() +} + +pub fn memory_search_tool(layout: WorkspaceLayout, config: SearchConfig) -> ToolDefinition { + Arc::new(move || { + let schema = schemars::schema_for!(MemorySearchParams); + let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({})); + let meta = ToolMeta::new("MemorySearch") + .description(MEMORY_SEARCH_DESCRIPTION) + .input_schema(schema_value); + let tool: Arc = Arc::new(MemorySearchTool { + layout: layout.clone(), + config, + }); + (meta, tool) + }) +} + +pub fn knowledge_search_tool(layout: WorkspaceLayout, config: SearchConfig) -> ToolDefinition { + Arc::new(move || { + let schema = schemars::schema_for!(KnowledgeSearchParams); + let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({})); + let meta = ToolMeta::new("KnowledgeSearch") + .description(KNOWLEDGE_SEARCH_DESCRIPTION) + .input_schema(schema_value); + let tool: Arc = Arc::new(KnowledgeSearchTool { + layout: layout.clone(), + config, + }); + (meta, tool) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use tempfile::TempDir; + + fn now() -> String { + Utc::now().to_rfc3339() + } + + fn setup() -> (TempDir, WorkspaceLayout) { + let dir = TempDir::new().unwrap(); + let layout = WorkspaceLayout::new(dir.path().to_path_buf()); + std::fs::create_dir_all(dir.path().join("memory/decisions")).unwrap(); + std::fs::create_dir_all(dir.path().join("memory/requests")).unwrap(); + std::fs::create_dir_all(dir.path().join("memory/workflow")).unwrap(); + std::fs::create_dir_all(dir.path().join("memory/_staging")).unwrap(); + std::fs::create_dir_all(dir.path().join("knowledge")).unwrap(); + (dir, layout) + } + + fn write_decision(dir: &Path, slug: &str, body: &str) { + let path = dir.join("memory/decisions").join(format!("{slug}.md")); + let content = format!( + "---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n{body}", + n = now() + ); + std::fs::write(path, content).unwrap(); + } + + fn write_knowledge(dir: &Path, slug: &str, kind: &str, description: &str, body: &str) { + let path = dir.join("knowledge").join(format!("{slug}.md")); + let content = format!( + "---\ncreated_at: {n}\nupdated_at: {n}\nkind: {kind}\ndescription: \"{description}\"\nmodel_invokation: false\nuser_invocable: true\nlast_sources: []\n---\n{body}", + n = now() + ); + std::fs::write(path, content).unwrap(); + } + + fn parse_hits serde::Deserialize<'de>>(out: &ToolOutput) -> Vec { + serde_json::from_str(out.content.as_ref().unwrap()).unwrap() + } + + #[derive(Deserialize)] + struct OwnedMemoryHit { + slug: String, + kind: String, + excerpt: String, + } + + #[derive(Deserialize)] + struct OwnedKnowledgeHit { + slug: String, + kind: Option, + description: Option, + model_invokation: Option, + excerpt: String, + } + + #[tokio::test] + async fn memory_search_finds_decision_body() { + let (dir, layout) = setup(); + write_decision(dir.path(), "alpha", "we chose Ollama because it works\n"); + write_decision(dir.path(), "beta", "no match here\n"); + let (_, tool) = memory_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "ollama" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].slug, "alpha"); + assert_eq!(hits[0].kind, "decision"); + assert!(hits[0].excerpt.to_lowercase().contains("ollama")); + } + + #[tokio::test] + async fn memory_search_finds_summary() { + let (dir, layout) = setup(); + let summary_path = dir.path().join("memory/summary.md"); + std::fs::write( + &summary_path, + format!("---\nupdated_at: {n}\n---\nthe needle is here\n", n = now()), + ) + .unwrap(); + let (_, tool) = memory_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "needle" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].slug, "summary"); + assert_eq!(hits[0].kind, "summary"); + } + + #[tokio::test] + async fn memory_search_excludes_workflow_and_staging() { + let (dir, layout) = setup(); + // Workflow and staging files contain the needle but must be ignored. + let wf = dir.path().join("memory/workflow/wf.md"); + std::fs::write(&wf, "needle in workflow\n").unwrap(); + let stg = dir.path().join("memory/_staging/abc.json"); + std::fs::write(&stg, "needle in staging\n").unwrap(); + + let (_, tool) = memory_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "needle" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert!(hits.is_empty(), "got hits: {:?}", out.content); + } + + #[tokio::test] + async fn memory_search_respects_hit_limit() { + let (dir, layout) = setup(); + for i in 0..10 { + write_decision(dir.path(), &format!("rec-{i}"), "needle line\n"); + } + let cfg = SearchConfig { + hit_limit: 3, + excerpt_lines: 1, + }; + let (_, tool) = memory_search_tool(layout, cfg)(); + let inp = serde_json::json!({ "query": "needle" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 3); + } + + #[tokio::test] + async fn memory_search_excerpt_includes_context_lines() { + let (dir, layout) = setup(); + write_decision( + dir.path(), + "ctx", + "line a\nline b\nNEEDLE here\nline d\nline e\n", + ); + let cfg = SearchConfig { + hit_limit: 5, + excerpt_lines: 1, + }; + let (_, tool) = memory_search_tool(layout, cfg)(); + let inp = serde_json::json!({ "query": "needle" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + let e = &hits[0].excerpt; + assert!(e.contains("line b")); + assert!(e.contains("NEEDLE here")); + assert!(e.contains("line d")); + assert!(!e.contains("line a")); + assert!(!e.contains("line e")); + } + + #[tokio::test] + async fn memory_search_empty_query_rejected() { + let (_dir, layout) = setup(); + let (_, tool) = memory_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": " " }); + let err = tool.execute(&inp.to_string()).await.unwrap_err(); + assert!(matches!(err, ToolError::InvalidArgument(_))); + } + + #[tokio::test] + async fn knowledge_search_returns_frontmatter_fields() { + let (dir, layout) = setup(); + write_knowledge( + dir.path(), + "policy", + "policy", + "the policy doc", + "Ollama first\n", + ); + let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "ollama" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].slug, "policy"); + assert_eq!(hits[0].kind.as_deref(), Some("policy")); + assert_eq!(hits[0].description.as_deref(), Some("the policy doc")); + assert_eq!(hits[0].model_invokation, Some(false)); + assert!(hits[0].excerpt.to_lowercase().contains("ollama")); + } + + #[tokio::test] + async fn knowledge_search_kind_filter() { + let (dir, layout) = setup(); + write_knowledge(dir.path(), "p1", "policy", "d1", "needle\n"); + write_knowledge(dir.path(), "h1", "howto", "d2", "needle\n"); + + let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "needle", "kind": "howto" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].slug, "h1"); + } + + #[tokio::test] + async fn knowledge_search_searches_frontmatter_too() { + // Spec completion criteria: "frontmatter 含む全文から excerpt 付きでヒットが返る" + let (dir, layout) = setup(); + write_knowledge(dir.path(), "p", "policy", "mentions xyzzy here", "body\n"); + + let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "xyzzy" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].slug, "p"); + } + + #[tokio::test] + async fn knowledge_search_no_matches_returns_empty() { + let (dir, layout) = setup(); + write_knowledge(dir.path(), "p", "policy", "d", "no match\n"); + let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())(); + let inp = serde_json::json!({ "query": "absent" }); + let out = tool.execute(&inp.to_string()).await.unwrap(); + let hits: Vec = parse_hits(&out); + assert!(hits.is_empty()); + } +} diff --git a/crates/memory/src/tool/write.rs b/crates/memory/src/tool/write.rs index 11bac8f5..4a159525 100644 --- a/crates/memory/src/tool/write.rs +++ b/crates/memory/src/tool/write.rs @@ -1,12 +1,11 @@ //! `MemoryWrite` tool. //! -//! Creates or overwrites a memory or knowledge record with full content. +//! Creates or overwrites a memory or knowledge record by `(kind, slug)`. //! Pre-write Linter validates frontmatter, slug uniqueness (Create only), //! reference integrity, size limits, and the workflow-write ban. On any //! Linter error the tool returns `ToolError::InvalidArgument` with all //! violations aggregated and the file is **not** written. -use std::path::PathBuf; use std::sync::Arc; use async_trait::async_trait; @@ -14,22 +13,27 @@ use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use serde::Deserialize; use crate::linter::{LintReport, Linter, WriteMode}; +use crate::tool::MemoryToolKind; use crate::workspace::WorkspaceLayout; -const DESCRIPTION: &str = "Create or overwrite a memory or knowledge record file. \ -Path must be absolute and lie inside the workspace's `memory/` or `knowledge/` \ -tree. Frontmatter is validated before the file is written; on validation \ -failure no write occurs and every violation is returned in the error message."; +const DESCRIPTION: &str = "Create or overwrite a memory or knowledge record by \ +`kind` + `slug`. `kind`: summary | decision | request | knowledge. For `summary` \ +omit `slug`. Frontmatter is validated before write; on validation failure no \ +write occurs and every violation is returned in the error message."; #[derive(Debug, Deserialize, schemars::JsonSchema)] struct WriteParams { - /// Absolute path under the workspace's `memory/` or `knowledge/` tree. - file_path: PathBuf, + /// Record kind: `summary` | `decision` | `request` | `knowledge`. + kind: MemoryToolKind, + /// Slug. Required for everything except `summary`; forbidden for `summary`. + #[serde(default)] + slug: Option, /// Full file contents (frontmatter + body). content: String, } struct WriteTool { + layout: WorkspaceLayout, linter: Linter, } @@ -40,26 +44,21 @@ impl Tool for WriteTool { ToolError::InvalidArgument(format!("invalid MemoryWrite input: {e}")) })?; - if !params.file_path.is_absolute() { - return Err(ToolError::InvalidArgument(format!( - "file_path must be absolute: {}", - params.file_path.display() - ))); - } + let path = params.kind.resolve_path(&self.layout, params.slug.as_deref())?; - let already_exists = params.file_path.exists(); + let already_exists = path.exists(); let mode = if already_exists { WriteMode::Update } else { WriteMode::Create }; - let report = self.linter.lint(¶ms.file_path, ¶ms.content, mode); + let report = self.linter.lint(&path, ¶ms.content, mode); if report.has_errors() { return Err(ToolError::InvalidArgument(format_report(&report))); } - if let Some(parent) = params.file_path.parent() { + if let Some(parent) = path.parent() { std::fs::create_dir_all(parent).map_err(|e| { ToolError::ExecutionFailed(format!( "failed to create directory {}: {e}", @@ -67,17 +66,14 @@ impl Tool for WriteTool { )) })?; } - std::fs::write(¶ms.file_path, params.content.as_bytes()).map_err(|e| { - ToolError::ExecutionFailed(format!( - "failed to write {}: {e}", - params.file_path.display() - )) + std::fs::write(&path, params.content.as_bytes()).map_err(|e| { + ToolError::ExecutionFailed(format!("failed to write {}: {e}", path.display())) })?; let summary = format!( "{} {}{}", if already_exists { "Overwrote" } else { "Created" }, - params.file_path.display(), + path.display(), warning_tail(&report), ); Ok(ToolOutput { @@ -122,6 +118,7 @@ pub fn write_tool(layout: WorkspaceLayout) -> ToolDefinition { .description(DESCRIPTION) .input_schema(schema_value); let tool: Arc = Arc::new(WriteTool { + layout: layout.clone(), linter: Linter::new(layout.clone()), }); (meta, tool) @@ -154,7 +151,7 @@ mod tests { assert_eq!(meta.name, "MemoryWrite"); let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "summary", "content": content, }); let out = tool.execute(&inp.to_string()).await.unwrap(); @@ -162,30 +159,10 @@ mod tests { assert!(path.exists()); } - #[tokio::test] - async fn write_rejects_workflow() { - let (dir, layout) = setup(); - let path = dir.path().join("memory/workflow/wf.md"); - let content = format!( - "---\nupdated_at: {n}\ndescription: x\nauto_invoke: false\nuser_invocable: true\n---\n", - n = now() - ); - let (_, tool) = write_tool(layout)(); - let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), - "content": content, - }); - let err = tool.execute(&inp.to_string()).await.unwrap_err(); - let msg = format!("{err}"); - assert!(msg.contains("workflow"), "unexpected error: {msg}"); - assert!(!path.exists(), "workflow file must not be written"); - } - #[tokio::test] async fn write_aggregates_multiple_errors() { - let (dir, layout) = setup(); - let path = dir.path().join("memory/decisions/foo.md"); - // Missing required `status` field AND body too long. + let (_dir, layout) = setup(); + // Missing required `status` field for decisions. let huge = "x".repeat(8001); let content = format!( "---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\n---\n{huge}", @@ -193,7 +170,8 @@ mod tests { ); let (_, tool) = write_tool(layout)(); let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "decision", + "slug": "foo", "content": content, }); let err = tool.execute(&inp.to_string()).await.unwrap_err(); @@ -202,7 +180,7 @@ mod tests { } #[tokio::test] - async fn write_blocks_create_when_existing() { + async fn write_update_existing() { let (dir, layout) = setup(); let path = dir.path().join("memory/decisions/foo.md"); std::fs::create_dir_all(path.parent().unwrap()).unwrap(); @@ -212,10 +190,10 @@ mod tests { ); std::fs::write(&path, &initial).unwrap(); - // Same content as a re-write should pass (Update mode). let (_, tool) = write_tool(layout.clone())(); let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "decision", + "slug": "foo", "content": initial, }); let out = tool.execute(&inp.to_string()).await.unwrap(); @@ -223,11 +201,11 @@ mod tests { } #[tokio::test] - async fn write_rejects_non_absolute() { + async fn write_decision_requires_slug() { let (_dir, layout) = setup(); let (_, tool) = write_tool(layout)(); let inp = serde_json::json!({ - "file_path": "memory/summary.md", + "kind": "decision", "content": "ignored", }); let err = tool.execute(&inp.to_string()).await.unwrap_err(); @@ -241,10 +219,25 @@ mod tests { let bad = "no frontmatter at all"; let (_, tool) = write_tool(layout)(); let inp = serde_json::json!({ - "file_path": path.to_str().unwrap(), + "kind": "decision", + "slug": "foo", "content": bad, }); assert!(tool.execute(&inp.to_string()).await.is_err()); assert!(!path.exists()); } + + #[tokio::test] + async fn workflow_kind_not_acceptable() { + // The MemoryToolKind enum doesn't include Workflow, so deserialization fails. + let (_dir, layout) = setup(); + let (_, tool) = write_tool(layout)(); + let inp = serde_json::json!({ + "kind": "workflow", + "slug": "wf", + "content": "---\n---\n", + }); + let err = tool.execute(&inp.to_string()).await.unwrap_err(); + assert!(matches!(err, ToolError::InvalidArgument(_))); + } } diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index a70328d5..ce6a38a9 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -246,9 +246,18 @@ impl PodController { .clone() .unwrap_or_else(|| pwd_for_tools.clone()); let layout = memory::WorkspaceLayout::new(workspace_root); + let mut search_cfg = memory::tool::SearchConfig::default(); + if let Some(n) = mem.search_hit_limit { + search_cfg.hit_limit = n; + } + if let Some(n) = mem.search_excerpt_lines { + search_cfg.excerpt_lines = n; + } worker.register_tool(memory::tool::read_tool(layout.clone())); worker.register_tool(memory::tool::write_tool(layout.clone())); - worker.register_tool(memory::tool::edit_tool(layout)); + worker.register_tool(memory::tool::edit_tool(layout.clone())); + worker.register_tool(memory::tool::memory_search_tool(layout.clone(), search_cfg)); + worker.register_tool(memory::tool::knowledge_search_tool(layout, search_cfg)); } // Pod-orchestration tools (SpawnPod + the four comm tools) diff --git a/tickets/memory-search-tools.md b/tickets/memory-search-tools.md index 3eaf169a..3a2a6f84 100644 --- a/tickets/memory-search-tools.md +++ b/tickets/memory-search-tools.md @@ -2,33 +2,59 @@ ## 背景 -`docs/plan/memory.md` §retrieval 経路 で定義した 2 本の検索ツールを Pod から呼べる LLM ツールとして実装する。memory 検索と Knowledge 検索は対象ディレクトリが違うだけで同型の仕様。grep ベースで始め、FTS / vector は将来検討。 +`docs/plan/memory.md` §retrieval 経路 の 2 本の検索ツールを Pod から呼べる LLM ツールとして実装する。memory 検索と Knowledge 検索は対象ディレクトリが違うだけ。grep ベースで始め、FTS / vector は将来検討。 このツールは Phase 2 Pod の agentic 探索経路としても、通常 Pod の `#` 展開経路としても、使用頻度メトリクスの観測点としても使う(メトリクスの hook 挿入は本チケットの範囲外、経路だけ揃える)。 +slug 完全一致 1 件返しは検索ツールではなく `MemoryRead`(kind+slug 入力)が担当する。検索 → slug 入手 → Read で本文展開、という二段経路に整理する。 + ## 要件 -### ツール仕様(両者共通) +### ツール構成 + +- **MemorySearch**: memory 配下の検索専用 Tool +- **KnowledgeSearch**: knowledge 配下の検索専用 Tool +- 単一ファイル取得(slug 完全一致)は `MemoryRead`(既存、本チケットで `kind+slug` 入力に切り替え) + +### `MemoryRead` の入力変更(memory-file-format からの補正) + +- 旧: `file_path`(絶対パス) +- 新: `kind: summary | decision | request | knowledge` + `slug`(summary は slug なし) +- 同じ補正を `MemoryWrite` / `MemoryEdit` にも適用し、Search 出力をそのまま投げ込める一貫した経路にする + +### MemorySearch ツール仕様 - Input: - - `query: string`(自由文字列、必須) - - `slug: string`(完全一致 1 件返し、`#` 解決に使う) - - `kind: string`(Knowledge のみ、filter) -- Output: `{ slug, kind, description, model_invokation, excerpt }` の配列 - - `excerpt` はマッチ箇所の前後数行 -- ソート: grep 出現順(初期) -- ヒット件数上限と excerpt 行数は設定ファイルで tune -- 対象ファイルは都度スキャン。派生 index は持たない + - `query: string`(自由文字列、必須、case-insensitive 部分一致) +- Output: `{ slug, kind, excerpt }[]` + - `kind` は record kind(`summary` / `decision` / `request`) + - summary は slug を `"summary"` 固定で返す + - `excerpt` はマッチ行の前後 N 行 +- 対象: `memory/summary.md`, `memory/decisions/*.md`, `memory/requests/*.md` +- 除外: `memory/workflow/`, `memory/_staging/`(読みもしない) -### 対象 +### KnowledgeSearch ツール仕様 -- memory 検索: `memory/{summary,decisions,requests}/*.md`(workflow / \_staging は除外) -- Knowledge 検索: `knowledge/*.md` +- Input: + - `query: string`(必須) + - `kind: string`(任意、KnowledgeFrontmatter の `kind` フィールドで filter) +- Output: `{ slug, kind, description, model_invokation, excerpt }[]` + - `kind` / `description` / `model_invokation` は frontmatter から + - frontmatter parse 失敗時は `null`(本文 grep は機能継続) +- 対象: `knowledge/*.md` + +### 共通 + +- ソート: ファイル名昇順 → ファイル内行順(grep 出現順) +- ヒット件数上限と excerpt 行数は manifest `[memory]` で tune(`search_hit_limit` / `search_excerpt_lines`、デフォルト 20 / 前後 3 行) +- 対象ファイルは都度スキャン、派生 index は持たない +- frontmatter も検索対象に含める ### 登録 - 通常 Pod と Phase 2 Pod の両方に渡せる tool 定義 - Phase 2 Pod には Knowledge 検索を必ず渡す(全 Knowledge 本文を prompt に埋めない前提) +- 本チケットでは通常 Pod の controller への登録まで実装。Phase 2 Pod 側の配布は別チケット ## 範囲外 @@ -36,16 +62,23 @@ - slug サジェスト補完 UI(TUI 側、別途) - FTS / vector index - 常駐注入(別チケット) +- Phase 2 Pod 側のツール配布(Phase 2 チケットの責務) ## 完了条件 -- 通常 Pod / Phase 2 Pod 双方から両ツールが呼べる -- `slug` 指定で完全一致の 1 件が返り、`#` の本文展開経路として成立する +- 通常 Pod から MemorySearch / KnowledgeSearch / MemoryRead / MemoryWrite / MemoryEdit すべて呼べる +- Search が返す `slug` + `kind` をそのまま `MemoryRead` に渡して本文取得が成立する(`#` 展開経路) - `query` 指定で frontmatter 含む全文から excerpt 付きでヒットが返る -- Knowledge 検索の `kind` filter が効く +- KnowledgeSearch の `kind` filter(frontmatter.kind 完全一致)が効く - 対象外ディレクトリ(`memory/workflow/`, `memory/_staging/`)はヒットしない +- `search_hit_limit` / `search_excerpt_lines` の tuning が効く ## 参照 - `docs/plan/memory.md` §retrieval 経路 / §Knowledge の採択基準 -- `tickets/memory-file-format.md`(依存: frontmatter スキーマ) +- `tickets/memory-file-format.md`(依存: frontmatter スキーマ、本チケットで Read/Write/Edit 入力 schema を補正) + +## Review +- 状態: Approve +- レビュー詳細: [./memory-search-tools.review.md](./memory-search-tools.review.md) +- 日付: 2026-04-27 diff --git a/tickets/memory-search-tools.review.md b/tickets/memory-search-tools.review.md new file mode 100644 index 00000000..cbff39a3 --- /dev/null +++ b/tickets/memory-search-tools.review.md @@ -0,0 +1,63 @@ +# Review: メモリ機構: memory / Knowledge 検索ツール + +## 前提・要件の確認 + +### ツール構成 +- MemorySearch / KnowledgeSearch を新規 Tool として実装、単一取得は MemoryRead に役割分離 — `crates/memory/src/tool/search.rs`、`tool/mod.rs:21-24` で `read_tool` / `write_tool` / `edit_tool` / `memory_search_tool` / `knowledge_search_tool` を re-export。チケット冒頭の「slug 完全一致は MemoryRead 側」という補正が公開 API・ドキュコメントどちらにも反映されている (`search.rs:31-40`, `read.rs:17-20`)。 + +### Read/Write/Edit signature 補正(memory-file-format からの繰越) +- `MemoryToolKind` を `tool/mod.rs:28-90` に集約し、Read/Write/Edit 全部で `kind` + `slug` 入力に切り替え済み。`Workflow` を意図的に enum から外しているため、`workflow` を投げた瞬間 deserialize で弾ける(`write.rs:230-243`, `edit.rs:240-253` のテストで担保)。Search 出力 `{slug, kind}` をそのまま Read/Edit に渡せる経路は完了条件どおり成立。 +- summary は `slug` 禁止、その他は必須、というルールを `MemoryToolKind::resolve_path` に一箇所で集約しており、3 ツール全部が同じ振る舞いになる。テストも `summary_with_slug_rejected` / `decision_without_slug_rejected` で担保 (`read.rs:172-187`). + +### MemorySearch 仕様 +- 入力 `query: string` のみ、case-insensitive 部分一致 — `validate_query` で空文字拒否 + lower-case 化 (`search.rs:224-231`)、`scan_text` で行ごとに lower-case 化して比較 (`search.rs:289-300`)。 +- 出力 `{slug, kind, excerpt}[]`、summary は slug を `"summary"` 固定で返す — `search.rs:115-122` と `memory_search_finds_summary` テスト。 +- 対象/除外ディレクトリ: `summary_path` / `decisions_dir` / `requests_dir` のみ列挙し、`workflow_dir` / `staging_dir` はそもそも触らない構造。`memory_search_excludes_workflow_and_staging` テストで「needle を含めても 0 件」を担保。 + +### KnowledgeSearch 仕様 +- `query` 必須 + `kind` 任意フィルタ。frontmatter が壊れていても本文 grep を継続、frontmatter フィールドは `None` を返す動作 (`search.rs:182-213`)。`knowledge_search_searches_frontmatter_too` で frontmatter 文字列にもヒットすることを担保。 +- 出力スキーマ差別化(description / model_invokation を持つ)はチケット改訂版にも記述あり、原チケットの「同型」を読み手にも明示している。 + +### 共通・ソート・上限 +- `list_md_files` がスラグ昇順ソート (`search.rs:256`)、ファイル内は `lines().enumerate()` で出現順 — チケットの「ファイル名昇順 → 行順」と一致。 +- `manifest [memory]` から `search_hit_limit` / `search_excerpt_lines` を tune できる経路あり (`crates/manifest/src/lib.rs:54-64`, `config.rs:204-212`, `controller.rs:249-260`)。デフォルト 20 / 3 行で原仕様と一致。 +- `memory_search_respects_hit_limit` / `memory_search_excerpt_includes_context_lines` テストで両方が効くことを担保。 + +### 登録 +- `pod/src/controller.rs:256-260` で memory enabled 時にのみ 5 ツールが並列登録される。Phase 2 Pod 側はチケットでも明示的に範囲外と書かれていて整合。 + +### 完了条件マッピング +- 通常 Pod から 5 ツール呼べる ✅ (`controller.rs`) +- Search → Read 経路成立 ✅(共通 `MemoryToolKind` で型一致) +- frontmatter 含む全文ヒット ✅ (`scan_text` は raw 全体を走査、専用テストあり) +- `kind` filter ✅ (`knowledge_search_kind_filter` テスト) +- 対象外ディレクトリ非ヒット ✅ +- tuning 可 ✅ + +## アーキテクチャ・スコープ + +- `crates/memory/src/tool/` にファイル分割で同居、既存 read/write/edit と完全に同じパターン。`MemoryToolKind` を `mod.rs` に置いて 4 ツール全部が共有しているのは適切。 +- 依存追加なし(`std::fs` / `serde_yaml` / 既存の `schema::*` のみ)。`cargo add` ルールに抵触しない。 +- `ToolDefinition` を `Arc` で返す既存ファクトリ規約を踏襲し、`SearchConfig` を closure に move するだけのミニマルな受け渡し。 +- `manifest::MemoryConfig` への 2 フィールド追加は `Option` + cascade で merge する既存パターンに従っている。 +- スコープ越境なし(llm-worker / pod / scope の責務境界に変更なし)。「memory crate は self-contained」という既存ポリシー (`lib.rs:1-7`) もそのまま守られている。 +- 派生 index を持たず都度スキャンとしている点も、原チケットの「FTS / vector は将来」とブレなし。 + +## 指摘事項 + +### Blocking +なし。 + +### Non-blocking / Follow-up(任意) +- **`search.rs:115-122` の summary 走査が `hit_limit` 上限ガードを通っていない**。現状は `limit - hits.len()` を `scan_file` に渡しているので `limit == 0` でも 0 が伝わって動作上は問題ないが、可読性として decisions / requests と同じ `if hits.len() < limit` ガードを前置きしておく方が一貫し、将来の修正で誤って `usize` underflow を踏み込む隙を消せる。 +- **`SearchConfig` 適用ロジックが controller 側に展開コードとして書かれている** (`controller.rs:249-255`)。`MemoryConfig` → `SearchConfig` の変換を `From` impl で memory crate 側に持たせるとテストしやすく、controller を細くできる。manifest を memory に依存させたくない場合は逆向き (`manifest` 側に `to_search_config`) でも可。今のままでも動くので任意。 +- **frontmatter の YAML パース失敗時、`kind` filter 指定があるとそのファイルが完全に excluded になる** (`search.rs:189-198`)。仕様上「frontmatter parse 失敗時は本文 grep 継続」と書かれているので、kind filter 指定時のみ skip するのは spec 通り(filter で絞り込みたいユーザーが unknown-kind のファイルを取りたいケースは無い)。ただしドキュコメントに「kind filter ありで frontmatter 壊れているファイルは skip」と一行足しておくと混乱しない。 +- **`KnowledgeSearchTool` は frontmatter parse 失敗ファイルも逐次 lower-case 比較するため負荷がやや高い**。本チケットの「都度スキャン」「FTS は将来」前提なので問題視はしないが、`hit_limit` を 0 や巨大値にしたユーザーに対して description などに足切り入れる余地は将来検討。 + +### Nits +- `MEMORY_SEARCH_DESCRIPTION` / `KNOWLEDGE_SEARCH_DESCRIPTION` が `hit_limit` に言及しているが、tool input にはこのフィールドが無い(manifest 側 tunable)。LLM が `hit_limit` を引数に入れたくならないか軽く心配。「(configurable via manifest)」のような括弧書きで明示してもよい。 +- `parse_hits` テストヘルパーが `OwnedMemoryHit` / `OwnedKnowledgeHit` で「Owned」プレフィックスを持つが、テスト内のシリアライズ用 struct を「Owned」と呼ぶ意図が明示されていない。`parsed_*` などでも可。 + +## 判断 + +**Approve(完了可)** — 原チケットの要件と完了条件はすべて実装に対応付けが取れており、補正(Read/Write/Edit signature 切り替え・出力スキーマ差別化・slug 完全一致を Read 側へ移譲)はチケット本文にも明確に反映されている。コードベースの既存パターンを踏襲しており、歪みも gold-plating も無し。指摘は全て任意の follow-up。