memoryのクエリと動作のテスト
This commit is contained in:
parent
4273d2a463
commit
dfa6213c18
24
.insomnia/knowledge/insomnia-greeting.md
Normal file
24
.insomnia/knowledge/insomnia-greeting.md
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
---
|
||||
created_at: 2026-04-28T12:00:00Z
|
||||
updated_at: 2026-04-28T12:00:00Z
|
||||
kind: policy
|
||||
description: "When the user greets you in a new turn, reply with the exact phrase: 'insomnia resident knowledge OK'. This proves the resident-injection path is wired."
|
||||
model_invokation: true
|
||||
user_invocable: true
|
||||
last_sources: []
|
||||
---
|
||||
# insomnia-greeting
|
||||
|
||||
This record is loaded into the system prompt because `model_invokation: true`.
|
||||
The agent only sees the `description` line above in the resident slot — this
|
||||
body is reachable via MemoryRead (kind=knowledge, slug=insomnia-greeting).
|
||||
|
||||
## Trigger
|
||||
|
||||
A bare user greeting at the start of a turn.
|
||||
|
||||
## Action
|
||||
|
||||
Reply verbatim:
|
||||
|
||||
> insomnia resident knowledge OK
|
||||
14
.insomnia/memory/decisions/use-codex-oauth.md
Normal file
14
.insomnia/memory/decisions/use-codex-oauth.md
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
---
|
||||
created_at: 2026-04-28T12:00:00Z
|
||||
updated_at: 2026-04-28T12:00:00Z
|
||||
sources: []
|
||||
status: resolved
|
||||
---
|
||||
# use-codex-oauth
|
||||
|
||||
We default the local test pod to `codex-oauth/gpt-5.5` because the OAuth
|
||||
flow is already wired and avoids burning Anthropic API credits during
|
||||
manual smoke tests of the memory subsystem.
|
||||
|
||||
The unique probe phrase for MemoryQuery is **xyzzy-codex-decision** — a
|
||||
query for `xyzzy-codex-decision` should hit this file and nothing else.
|
||||
1
TODO.md
1
TODO.md
|
|
@ -18,3 +18,4 @@
|
|||
- [ ] Phase 2 consolidation → [tickets/memory-phase2-consolidation.md](tickets/memory-phase2-consolidation.md)
|
||||
- [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md)
|
||||
- [ ] GC(定期再評価) → [tickets/memory-gc.md](tickets/memory-gc.md)
|
||||
- ワークスペースのメモリーをLintするヘッドレスCLI
|
||||
|
|
|
|||
|
|
@ -210,8 +210,8 @@ impl MemoryConfig {
|
|||
fn merge(self, upper: Self) -> Self {
|
||||
Self {
|
||||
workspace_root: upper.workspace_root.or(self.workspace_root),
|
||||
search_hit_limit: upper.search_hit_limit.or(self.search_hit_limit),
|
||||
search_excerpt_lines: upper.search_excerpt_lines.or(self.search_excerpt_lines),
|
||||
query_result_limit: upper.query_result_limit.or(self.query_result_limit),
|
||||
query_excerpt_lines: upper.query_excerpt_lines.or(self.query_excerpt_lines),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,14 +56,14 @@ pub struct MemoryConfig {
|
|||
/// absolute path.
|
||||
#[serde(default)]
|
||||
pub workspace_root: Option<PathBuf>,
|
||||
/// Maximum number of hits returned by `MemorySearch` /
|
||||
/// `KnowledgeSearch`. `None` ⇒ tool default (20).
|
||||
/// Maximum number of records returned by `MemoryQuery` /
|
||||
/// `KnowledgeQuery` per call. `None` ⇒ tool default (20).
|
||||
#[serde(default)]
|
||||
pub search_hit_limit: Option<usize>,
|
||||
/// Lines of context before and after each match in search excerpts.
|
||||
/// `None` ⇒ tool default (3).
|
||||
pub query_result_limit: Option<usize>,
|
||||
/// Lines of context before and after each match in query excerpts.
|
||||
/// Ignored when the request omits `query`. `None` ⇒ tool default (3).
|
||||
#[serde(default)]
|
||||
pub search_excerpt_lines: Option<usize>,
|
||||
pub query_excerpt_lines: Option<usize>,
|
||||
}
|
||||
|
||||
/// Pod metadata.
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@
|
|||
//! that pair feeds straight into Read / Edit.
|
||||
|
||||
mod edit;
|
||||
mod query;
|
||||
mod read;
|
||||
mod search;
|
||||
mod write;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -19,8 +19,8 @@ use crate::slug::Slug;
|
|||
use crate::workspace::{RecordKind, WorkspaceLayout};
|
||||
|
||||
pub use edit::edit_tool;
|
||||
pub use query::{QueryConfig, knowledge_query_tool, memory_query_tool};
|
||||
pub use read::read_tool;
|
||||
pub use search::{SearchConfig, knowledge_search_tool, memory_search_tool};
|
||||
pub use write::write_tool;
|
||||
|
||||
/// Kinds the memory tools accept as input. `Workflow` is intentionally
|
||||
|
|
|
|||
712
crates/memory/src/tool/query.rs
Normal file
712
crates/memory/src/tool/query.rs
Normal file
|
|
@ -0,0 +1,712 @@
|
|||
//! `MemoryQuery` / `KnowledgeQuery` tools.
|
||||
//!
|
||||
//! Both perform a case-insensitive substring scan over markdown record
|
||||
//! files. With a `query` set, returns `{slug, kind, ..., excerpt}` hits
|
||||
//! with `excerpt_lines` lines of context around each match. With `query`
|
||||
//! omitted, returns one entry per file (no excerpt) so the agent can
|
||||
//! enumerate what records exist without knowing what's inside them.
|
||||
//!
|
||||
//! - `MemoryQuery` walks `memory/summary.md`, `memory/decisions/`,
|
||||
//! `memory/requests/`. `memory/workflow/` and `memory/_staging/`
|
||||
//! are excluded by construction.
|
||||
//! - `KnowledgeQuery` walks `knowledge/*.md` and supports a `kind`
|
||||
//! filter against the Knowledge frontmatter's `kind` field.
|
||||
//!
|
||||
//! No derived index — the file tree is the source of truth and is
|
||||
//! re-scanned per call. 出現順: within a file by line order, across
|
||||
//! files by sorted filename.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::schema::{KnowledgeFrontmatter, split_frontmatter};
|
||||
use crate::workspace::WorkspaceLayout;
|
||||
|
||||
const DEFAULT_RESULT_LIMIT: usize = 20;
|
||||
const DEFAULT_EXCERPT_LINES: usize = 3;
|
||||
|
||||
const MEMORY_QUERY_DESCRIPTION: &str = "Inspect memory records (summary / decisions / \
|
||||
requests). With `query` set, returns substring hits as `{slug, kind, excerpt}` entries \
|
||||
with line context. Omit `query` to list every record (one entry per file, no excerpt) \
|
||||
when you don't yet know what's in there. Result count is capped (configurable via the \
|
||||
manifest's `[memory]` section). Use the returned `slug` + `kind` with MemoryRead to fetch \
|
||||
the full record. Workflow and staging directories are not visible.";
|
||||
|
||||
const KNOWLEDGE_QUERY_DESCRIPTION: &str = "Inspect knowledge records. With `query` set, \
|
||||
returns substring hits with line context; omit `query` to list every record (one entry \
|
||||
per file, no excerpt). Optional `kind` filters by the Knowledge frontmatter's `kind` \
|
||||
field; records whose frontmatter fails to parse are skipped when `kind` is given. Result \
|
||||
count is capped (configurable via the manifest's `[memory]` section). Returns \
|
||||
`{slug, kind, description, model_invokation, excerpt}` entries. Use the returned `slug` \
|
||||
with MemoryRead (kind=knowledge) for the full record.";
|
||||
|
||||
/// Tunables passed in from the manifest.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct QueryConfig {
|
||||
pub result_limit: usize,
|
||||
/// Lines of context before and after each matched line. Ignored
|
||||
/// when the request omits `query`.
|
||||
pub excerpt_lines: usize,
|
||||
}
|
||||
|
||||
impl Default for QueryConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
result_limit: DEFAULT_RESULT_LIMIT,
|
||||
excerpt_lines: DEFAULT_EXCERPT_LINES,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&manifest::MemoryConfig> for QueryConfig {
|
||||
fn from(cfg: &manifest::MemoryConfig) -> Self {
|
||||
let mut out = Self::default();
|
||||
if let Some(n) = cfg.query_result_limit {
|
||||
out.result_limit = n;
|
||||
}
|
||||
if let Some(n) = cfg.query_excerpt_lines {
|
||||
out.excerpt_lines = n;
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||
struct MemoryQueryParams {
|
||||
/// Optional substring filter. Case-insensitive. Omit to list every
|
||||
/// record under the query scope.
|
||||
#[serde(default)]
|
||||
query: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||
struct KnowledgeQueryParams {
|
||||
/// Optional substring filter. Case-insensitive. Omit to list every
|
||||
/// knowledge record under the query scope.
|
||||
#[serde(default)]
|
||||
query: Option<String>,
|
||||
/// Optional filter on the Knowledge frontmatter's `kind` field.
|
||||
#[serde(default)]
|
||||
kind: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct MemoryRecord {
|
||||
slug: String,
|
||||
kind: &'static str,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
excerpt: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct KnowledgeRecord {
|
||||
slug: String,
|
||||
kind: Option<String>,
|
||||
description: Option<String>,
|
||||
model_invokation: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
excerpt: Option<String>,
|
||||
}
|
||||
|
||||
struct MemoryQueryTool {
|
||||
layout: WorkspaceLayout,
|
||||
config: QueryConfig,
|
||||
}
|
||||
|
||||
struct KnowledgeQueryTool {
|
||||
layout: WorkspaceLayout,
|
||||
config: QueryConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for MemoryQueryTool {
|
||||
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||
let params: MemoryQueryParams = serde_json::from_str(input_json)
|
||||
.map_err(|e| ToolError::InvalidArgument(format!("invalid MemoryQuery input: {e}")))?;
|
||||
let needle = match params.query.as_deref() {
|
||||
Some(q) => Some(validate_query(q)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let mut records: Vec<MemoryRecord> = Vec::new();
|
||||
let limit = self.config.result_limit;
|
||||
let ctx = self.config.excerpt_lines;
|
||||
|
||||
// summary
|
||||
if records.len() < limit {
|
||||
let summary_path = self.layout.summary_path();
|
||||
if summary_path.is_file() {
|
||||
collect_memory_records(
|
||||
&summary_path,
|
||||
"summary",
|
||||
"summary",
|
||||
needle.as_deref(),
|
||||
ctx,
|
||||
limit - records.len(),
|
||||
&mut records,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// decisions
|
||||
if records.len() < limit {
|
||||
for (path, slug) in list_md_files(&self.layout.decisions_dir()) {
|
||||
if records.len() >= limit {
|
||||
break;
|
||||
}
|
||||
collect_memory_records(
|
||||
&path,
|
||||
&slug,
|
||||
"decision",
|
||||
needle.as_deref(),
|
||||
ctx,
|
||||
limit - records.len(),
|
||||
&mut records,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// requests
|
||||
if records.len() < limit {
|
||||
for (path, slug) in list_md_files(&self.layout.requests_dir()) {
|
||||
if records.len() >= limit {
|
||||
break;
|
||||
}
|
||||
collect_memory_records(
|
||||
&path,
|
||||
&slug,
|
||||
"request",
|
||||
needle.as_deref(),
|
||||
ctx,
|
||||
limit - records.len(),
|
||||
&mut records,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let body = serde_json::to_string_pretty(&records)
|
||||
.map_err(|e| ToolError::ExecutionFailed(format!("serialize records: {e}")))?;
|
||||
let summary = match params.query.as_deref() {
|
||||
Some(q) => format!("{} hit(s) for {q:?}", records.len()),
|
||||
None => format!("{} record(s)", records.len()),
|
||||
};
|
||||
Ok(ToolOutput {
|
||||
summary,
|
||||
content: Some(body),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for KnowledgeQueryTool {
|
||||
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||
let params: KnowledgeQueryParams = serde_json::from_str(input_json).map_err(|e| {
|
||||
ToolError::InvalidArgument(format!("invalid KnowledgeQuery input: {e}"))
|
||||
})?;
|
||||
let needle = match params.query.as_deref() {
|
||||
Some(q) => Some(validate_query(q)?),
|
||||
None => None,
|
||||
};
|
||||
let kind_filter = params.kind.as_deref();
|
||||
|
||||
let mut records: Vec<KnowledgeRecord> = Vec::new();
|
||||
let limit = self.config.result_limit;
|
||||
let ctx = self.config.excerpt_lines;
|
||||
|
||||
for (path, slug) in list_md_files(&self.layout.knowledge_dir()) {
|
||||
if records.len() >= limit {
|
||||
break;
|
||||
}
|
||||
let raw = match std::fs::read_to_string(&path) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let fm = parse_knowledge_frontmatter(&raw);
|
||||
|
||||
// kind filter applies to the frontmatter's kind field.
|
||||
if let Some(filter) = kind_filter {
|
||||
let matches = fm
|
||||
.as_ref()
|
||||
.map(|f| f.kind.as_str() == filter)
|
||||
.unwrap_or(false);
|
||||
if !matches {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let kind = fm.as_ref().map(|f| f.kind.clone());
|
||||
let description = fm.as_ref().map(|f| f.description.clone());
|
||||
let model_invokation = fm.as_ref().map(|f| f.model_invokation);
|
||||
|
||||
match needle.as_deref() {
|
||||
Some(n) => {
|
||||
scan_text(&raw, n, ctx, limit - records.len(), |excerpt| {
|
||||
records.push(KnowledgeRecord {
|
||||
slug: slug.clone(),
|
||||
kind: kind.clone(),
|
||||
description: description.clone(),
|
||||
model_invokation,
|
||||
excerpt: Some(excerpt),
|
||||
});
|
||||
});
|
||||
}
|
||||
None => {
|
||||
records.push(KnowledgeRecord {
|
||||
slug: slug.clone(),
|
||||
kind,
|
||||
description,
|
||||
model_invokation,
|
||||
excerpt: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let body = serde_json::to_string_pretty(&records)
|
||||
.map_err(|e| ToolError::ExecutionFailed(format!("serialize records: {e}")))?;
|
||||
let summary = match params.query.as_deref() {
|
||||
Some(q) => format!("{} hit(s) for {q:?}", records.len()),
|
||||
None => format!("{} record(s)", records.len()),
|
||||
};
|
||||
Ok(ToolOutput {
|
||||
summary,
|
||||
content: Some(body),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_memory_records(
|
||||
path: &Path,
|
||||
slug: &str,
|
||||
kind: &'static str,
|
||||
needle_lower: Option<&str>,
|
||||
ctx: usize,
|
||||
remaining: usize,
|
||||
out: &mut Vec<MemoryRecord>,
|
||||
) {
|
||||
if remaining == 0 {
|
||||
return;
|
||||
}
|
||||
match needle_lower {
|
||||
Some(n) => {
|
||||
scan_file(path, n, ctx, remaining, |excerpt| {
|
||||
out.push(MemoryRecord {
|
||||
slug: slug.to_string(),
|
||||
kind,
|
||||
excerpt: Some(excerpt),
|
||||
});
|
||||
});
|
||||
}
|
||||
None => {
|
||||
out.push(MemoryRecord {
|
||||
slug: slug.to_string(),
|
||||
kind,
|
||||
excerpt: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_query(query: &str) -> Result<String, ToolError> {
|
||||
if query.trim().is_empty() {
|
||||
return Err(ToolError::InvalidArgument(
|
||||
"query must not be empty when provided; omit it to list all records".into(),
|
||||
));
|
||||
}
|
||||
Ok(query.to_lowercase())
|
||||
}
|
||||
|
||||
/// Sorted list of `(path, slug)` for `*.md` files directly under `dir`.
|
||||
/// Returns empty if the directory doesn't exist.
|
||||
fn list_md_files(dir: &Path) -> Vec<(PathBuf, String)> {
|
||||
let mut out: Vec<(PathBuf, String)> = Vec::new();
|
||||
let entries = match std::fs::read_dir(dir) {
|
||||
Ok(it) => it,
|
||||
Err(_) => return out,
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
let name = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
let slug = match name.strip_suffix(".md") {
|
||||
Some(s) => s.to_string(),
|
||||
None => continue,
|
||||
};
|
||||
out.push((path, slug));
|
||||
}
|
||||
out.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
out
|
||||
}
|
||||
|
||||
fn scan_file(
|
||||
path: &Path,
|
||||
needle_lower: &str,
|
||||
ctx: usize,
|
||||
remaining: usize,
|
||||
mut on_match: impl FnMut(String),
|
||||
) {
|
||||
if remaining == 0 {
|
||||
return;
|
||||
}
|
||||
let text = match std::fs::read_to_string(path) {
|
||||
Ok(t) => t,
|
||||
Err(_) => return,
|
||||
};
|
||||
scan_text(&text, needle_lower, ctx, remaining, |e| on_match(e));
|
||||
}
|
||||
|
||||
fn scan_text(
|
||||
text: &str,
|
||||
needle_lower: &str,
|
||||
ctx: usize,
|
||||
remaining: usize,
|
||||
mut on_match: impl FnMut(String),
|
||||
) {
|
||||
if remaining == 0 {
|
||||
return;
|
||||
}
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let mut produced = 0;
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
if produced >= remaining {
|
||||
break;
|
||||
}
|
||||
if line.to_lowercase().contains(needle_lower) {
|
||||
let start = i.saturating_sub(ctx);
|
||||
let end = i.saturating_add(ctx + 1).min(lines.len());
|
||||
let excerpt = lines[start..end].join("\n");
|
||||
on_match(excerpt);
|
||||
produced += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort frontmatter parse. Returns `None` if missing/malformed
|
||||
/// — query still finds matches in the body even when the header is
|
||||
/// broken.
|
||||
fn parse_knowledge_frontmatter(raw: &str) -> Option<KnowledgeFrontmatter> {
|
||||
let (yaml, _body) = split_frontmatter(raw).ok()?;
|
||||
serde_yaml::from_str::<KnowledgeFrontmatter>(yaml).ok()
|
||||
}
|
||||
|
||||
pub fn memory_query_tool(layout: WorkspaceLayout, config: QueryConfig) -> ToolDefinition {
|
||||
Arc::new(move || {
|
||||
let schema = schemars::schema_for!(MemoryQueryParams);
|
||||
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||
let meta = ToolMeta::new("MemoryQuery")
|
||||
.description(MEMORY_QUERY_DESCRIPTION)
|
||||
.input_schema(schema_value);
|
||||
let tool: Arc<dyn Tool> = Arc::new(MemoryQueryTool {
|
||||
layout: layout.clone(),
|
||||
config,
|
||||
});
|
||||
(meta, tool)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn knowledge_query_tool(layout: WorkspaceLayout, config: QueryConfig) -> ToolDefinition {
|
||||
Arc::new(move || {
|
||||
let schema = schemars::schema_for!(KnowledgeQueryParams);
|
||||
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||
let meta = ToolMeta::new("KnowledgeQuery")
|
||||
.description(KNOWLEDGE_QUERY_DESCRIPTION)
|
||||
.input_schema(schema_value);
|
||||
let tool: Arc<dyn Tool> = Arc::new(KnowledgeQueryTool {
|
||||
layout: layout.clone(),
|
||||
config,
|
||||
});
|
||||
(meta, tool)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::Utc;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn now() -> String {
|
||||
Utc::now().to_rfc3339()
|
||||
}
|
||||
|
||||
fn setup() -> (TempDir, WorkspaceLayout) {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
|
||||
std::fs::create_dir_all(dir.path().join("memory/decisions")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/requests")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/workflow")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/_staging")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("knowledge")).unwrap();
|
||||
(dir, layout)
|
||||
}
|
||||
|
||||
fn write_decision(dir: &Path, slug: &str, body: &str) {
|
||||
let path = dir.join("memory/decisions").join(format!("{slug}.md"));
|
||||
let content = format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n{body}",
|
||||
n = now()
|
||||
);
|
||||
std::fs::write(path, content).unwrap();
|
||||
}
|
||||
|
||||
fn write_knowledge(dir: &Path, slug: &str, kind: &str, description: &str, body: &str) {
|
||||
let path = dir.join("knowledge").join(format!("{slug}.md"));
|
||||
let content = format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nkind: {kind}\ndescription: \"{description}\"\nmodel_invokation: false\nuser_invocable: true\nlast_sources: []\n---\n{body}",
|
||||
n = now()
|
||||
);
|
||||
std::fs::write(path, content).unwrap();
|
||||
}
|
||||
|
||||
fn parse_records<T: for<'de> serde::Deserialize<'de>>(out: &ToolOutput) -> Vec<T> {
|
||||
serde_json::from_str(out.content.as_ref().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OwnedMemoryRecord {
|
||||
slug: String,
|
||||
kind: String,
|
||||
#[serde(default)]
|
||||
excerpt: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OwnedKnowledgeRecord {
|
||||
slug: String,
|
||||
kind: Option<String>,
|
||||
description: Option<String>,
|
||||
model_invokation: Option<bool>,
|
||||
#[serde(default)]
|
||||
excerpt: Option<String>,
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_finds_decision_body() {
|
||||
let (dir, layout) = setup();
|
||||
write_decision(dir.path(), "alpha", "we chose Ollama because it works\n");
|
||||
write_decision(dir.path(), "beta", "no match here\n");
|
||||
let (_, tool) = memory_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "ollama" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "alpha");
|
||||
assert_eq!(records[0].kind, "decision");
|
||||
assert!(records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_without_query_lists_all_records() {
|
||||
let (dir, layout) = setup();
|
||||
write_decision(dir.path(), "alpha", "body\n");
|
||||
write_decision(dir.path(), "beta", "body\n");
|
||||
let summary_path = dir.path().join("memory/summary.md");
|
||||
std::fs::write(
|
||||
&summary_path,
|
||||
format!("---\nupdated_at: {n}\n---\nhello\n", n = now()),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let (_, tool) = memory_query_tool(layout, QueryConfig::default())();
|
||||
let out = tool.execute("{}").await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
let mut slugs: Vec<&str> = records.iter().map(|r| r.slug.as_str()).collect();
|
||||
slugs.sort();
|
||||
assert_eq!(slugs, vec!["alpha", "beta", "summary"]);
|
||||
// No excerpts when listing.
|
||||
assert!(records.iter().all(|r| r.excerpt.is_none()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_finds_summary() {
|
||||
let (dir, layout) = setup();
|
||||
let summary_path = dir.path().join("memory/summary.md");
|
||||
std::fs::write(
|
||||
&summary_path,
|
||||
format!("---\nupdated_at: {n}\n---\nthe needle is here\n", n = now()),
|
||||
)
|
||||
.unwrap();
|
||||
let (_, tool) = memory_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "summary");
|
||||
assert_eq!(records[0].kind, "summary");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_excludes_workflow_and_staging() {
|
||||
let (dir, layout) = setup();
|
||||
let wf = dir.path().join("memory/workflow/wf.md");
|
||||
std::fs::write(&wf, "needle in workflow\n").unwrap();
|
||||
let stg = dir.path().join("memory/_staging/abc.json");
|
||||
std::fs::write(&stg, "needle in staging\n").unwrap();
|
||||
|
||||
let (_, tool) = memory_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
assert!(records.is_empty(), "got records: {:?}", out.content);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_respects_result_limit() {
|
||||
let (dir, layout) = setup();
|
||||
for i in 0..10 {
|
||||
write_decision(dir.path(), &format!("rec-{i}"), "needle line\n");
|
||||
}
|
||||
let cfg = QueryConfig {
|
||||
result_limit: 3,
|
||||
excerpt_lines: 1,
|
||||
};
|
||||
let (_, tool) = memory_query_tool(layout, cfg)();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_excerpt_includes_context_lines() {
|
||||
let (dir, layout) = setup();
|
||||
write_decision(
|
||||
dir.path(),
|
||||
"ctx",
|
||||
"line a\nline b\nNEEDLE here\nline d\nline e\n",
|
||||
);
|
||||
let cfg = QueryConfig {
|
||||
result_limit: 5,
|
||||
excerpt_lines: 1,
|
||||
};
|
||||
let (_, tool) = memory_query_tool(layout, cfg)();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedMemoryRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
let e = records[0].excerpt.as_deref().unwrap();
|
||||
assert!(e.contains("line b"));
|
||||
assert!(e.contains("NEEDLE here"));
|
||||
assert!(e.contains("line d"));
|
||||
assert!(!e.contains("line a"));
|
||||
assert!(!e.contains("line e"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_query_blank_query_rejected() {
|
||||
let (_dir, layout) = setup();
|
||||
let (_, tool) = memory_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": " " });
|
||||
let err = tool.execute(&inp.to_string()).await.unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_returns_frontmatter_fields() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(
|
||||
dir.path(),
|
||||
"policy",
|
||||
"policy",
|
||||
"the policy doc",
|
||||
"Ollama first\n",
|
||||
);
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "ollama" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "policy");
|
||||
assert_eq!(records[0].kind.as_deref(), Some("policy"));
|
||||
assert_eq!(records[0].description.as_deref(), Some("the policy doc"));
|
||||
assert_eq!(records[0].model_invokation, Some(false));
|
||||
assert!(records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_without_query_lists_all_records() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p1", "policy", "d1", "body\n");
|
||||
write_knowledge(dir.path(), "h1", "howto", "d2", "body\n");
|
||||
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let out = tool.execute("{}").await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
let mut slugs: Vec<&str> = records.iter().map(|r| r.slug.as_str()).collect();
|
||||
slugs.sort();
|
||||
assert_eq!(slugs, vec!["h1", "p1"]);
|
||||
assert!(records.iter().all(|r| r.excerpt.is_none()));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_kind_filter() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p1", "policy", "d1", "needle\n");
|
||||
write_knowledge(dir.path(), "h1", "howto", "d2", "needle\n");
|
||||
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle", "kind": "howto" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "h1");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_kind_filter_works_without_query() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p1", "policy", "d1", "body\n");
|
||||
write_knowledge(dir.path(), "h1", "howto", "d2", "body\n");
|
||||
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "kind": "howto" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "h1");
|
||||
assert!(records[0].excerpt.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_searches_frontmatter_too() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p", "policy", "mentions xyzzy here", "body\n");
|
||||
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "xyzzy" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "p");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_query_no_matches_returns_empty() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p", "policy", "d", "no match\n");
|
||||
let (_, tool) = knowledge_query_tool(layout, QueryConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "absent" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let records: Vec<OwnedKnowledgeRecord> = parse_records(&out);
|
||||
assert!(records.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,575 +0,0 @@
|
|||
//! `MemorySearch` / `KnowledgeSearch` tools.
|
||||
//!
|
||||
//! Both perform a case-insensitive substring scan over markdown record
|
||||
//! files, returning a list of `{slug, kind, ..., excerpt}` entries.
|
||||
//! Excerpts are `excerpt_lines` lines before and after the matched
|
||||
//! line (so 2N+1 lines per excerpt when not clipped).
|
||||
//!
|
||||
//! - `MemorySearch` walks `memory/summary.md`, `memory/decisions/`,
|
||||
//! `memory/requests/`. `memory/workflow/` and `memory/_staging/`
|
||||
//! are excluded by construction.
|
||||
//! - `KnowledgeSearch` walks `knowledge/*.md` and supports a `kind`
|
||||
//! filter against the Knowledge frontmatter's `kind` field.
|
||||
//!
|
||||
//! No derived index — the file tree is the source of truth and is
|
||||
//! re-scanned per call. grep 出現順: within a file by line order,
|
||||
//! across files by sorted filename.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::schema::{KnowledgeFrontmatter, split_frontmatter};
|
||||
use crate::workspace::WorkspaceLayout;
|
||||
|
||||
const DEFAULT_HIT_LIMIT: usize = 20;
|
||||
const DEFAULT_EXCERPT_LINES: usize = 3;
|
||||
|
||||
const MEMORY_SEARCH_DESCRIPTION: &str = "Search memory records (summary / decisions / \
|
||||
requests) for a substring. Returns up to a hit cap (configurable via the manifest's \
|
||||
`[memory]` section) as `{slug, kind, excerpt}` entries with line context. Use the \
|
||||
returned `slug` + `kind` with MemoryRead to fetch the full record. Workflow and \
|
||||
staging directories are not searched.";
|
||||
|
||||
const KNOWLEDGE_SEARCH_DESCRIPTION: &str = "Search knowledge records for a substring. \
|
||||
Optional `kind` filters by the Knowledge frontmatter's `kind` field; records whose \
|
||||
frontmatter fails to parse are skipped when `kind` is given (the body is still \
|
||||
searched when `kind` is omitted). Returns up to a hit cap (configurable via the \
|
||||
manifest's `[memory]` section) as `{slug, kind, description, model_invokation, \
|
||||
excerpt}` entries with line context. Use the returned `slug` with MemoryRead \
|
||||
(kind=knowledge) for the full record.";
|
||||
|
||||
/// Tunables passed in from the manifest.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SearchConfig {
|
||||
pub hit_limit: usize,
|
||||
/// Lines of context before and after each matched line.
|
||||
pub excerpt_lines: usize,
|
||||
}
|
||||
|
||||
impl Default for SearchConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hit_limit: DEFAULT_HIT_LIMIT,
|
||||
excerpt_lines: DEFAULT_EXCERPT_LINES,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&manifest::MemoryConfig> for SearchConfig {
|
||||
fn from(cfg: &manifest::MemoryConfig) -> Self {
|
||||
let mut out = Self::default();
|
||||
if let Some(n) = cfg.search_hit_limit {
|
||||
out.hit_limit = n;
|
||||
}
|
||||
if let Some(n) = cfg.search_excerpt_lines {
|
||||
out.excerpt_lines = n;
|
||||
}
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||
struct MemorySearchParams {
|
||||
/// Substring to search for. Case-insensitive.
|
||||
query: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||
struct KnowledgeSearchParams {
|
||||
/// Substring to search for. Case-insensitive.
|
||||
query: String,
|
||||
/// Optional filter on the Knowledge frontmatter's `kind` field.
|
||||
#[serde(default)]
|
||||
kind: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct MemoryHit {
|
||||
slug: String,
|
||||
kind: &'static str,
|
||||
excerpt: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct KnowledgeHit {
|
||||
slug: String,
|
||||
kind: Option<String>,
|
||||
description: Option<String>,
|
||||
model_invokation: Option<bool>,
|
||||
excerpt: String,
|
||||
}
|
||||
|
||||
struct MemorySearchTool {
|
||||
layout: WorkspaceLayout,
|
||||
config: SearchConfig,
|
||||
}
|
||||
|
||||
struct KnowledgeSearchTool {
|
||||
layout: WorkspaceLayout,
|
||||
config: SearchConfig,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for MemorySearchTool {
|
||||
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||
let params: MemorySearchParams = serde_json::from_str(input_json)
|
||||
.map_err(|e| ToolError::InvalidArgument(format!("invalid MemorySearch input: {e}")))?;
|
||||
let needle = validate_query(¶ms.query)?;
|
||||
|
||||
let mut hits: Vec<MemoryHit> = Vec::new();
|
||||
let limit = self.config.hit_limit;
|
||||
let ctx = self.config.excerpt_lines;
|
||||
|
||||
// summary
|
||||
if hits.len() < limit {
|
||||
let summary_path = self.layout.summary_path();
|
||||
if summary_path.is_file() {
|
||||
scan_file(&summary_path, &needle, ctx, limit - hits.len(), |excerpt| {
|
||||
hits.push(MemoryHit {
|
||||
slug: "summary".to_string(),
|
||||
kind: "summary",
|
||||
excerpt,
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// decisions
|
||||
if hits.len() < limit {
|
||||
for (path, slug) in list_md_files(&self.layout.decisions_dir()) {
|
||||
if hits.len() >= limit {
|
||||
break;
|
||||
}
|
||||
scan_file(&path, &needle, ctx, limit - hits.len(), |excerpt| {
|
||||
hits.push(MemoryHit {
|
||||
slug: slug.clone(),
|
||||
kind: "decision",
|
||||
excerpt,
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// requests
|
||||
if hits.len() < limit {
|
||||
for (path, slug) in list_md_files(&self.layout.requests_dir()) {
|
||||
if hits.len() >= limit {
|
||||
break;
|
||||
}
|
||||
scan_file(&path, &needle, ctx, limit - hits.len(), |excerpt| {
|
||||
hits.push(MemoryHit {
|
||||
slug: slug.clone(),
|
||||
kind: "request",
|
||||
excerpt,
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let body = serde_json::to_string_pretty(&hits)
|
||||
.map_err(|e| ToolError::ExecutionFailed(format!("serialize hits: {e}")))?;
|
||||
Ok(ToolOutput {
|
||||
summary: format!("{} hit(s) for {:?}", hits.len(), params.query),
|
||||
content: Some(body),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for KnowledgeSearchTool {
|
||||
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||
let params: KnowledgeSearchParams = serde_json::from_str(input_json).map_err(|e| {
|
||||
ToolError::InvalidArgument(format!("invalid KnowledgeSearch input: {e}"))
|
||||
})?;
|
||||
let needle = validate_query(¶ms.query)?;
|
||||
let kind_filter = params.kind.as_deref();
|
||||
|
||||
let mut hits: Vec<KnowledgeHit> = Vec::new();
|
||||
let limit = self.config.hit_limit;
|
||||
let ctx = self.config.excerpt_lines;
|
||||
|
||||
for (path, slug) in list_md_files(&self.layout.knowledge_dir()) {
|
||||
if hits.len() >= limit {
|
||||
break;
|
||||
}
|
||||
// Try to parse frontmatter for description/model_invokation/kind.
|
||||
let raw = match std::fs::read_to_string(&path) {
|
||||
Ok(s) => s,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let fm = parse_knowledge_frontmatter(&raw);
|
||||
|
||||
// kind filter applies to the frontmatter's kind field.
|
||||
if let Some(filter) = kind_filter {
|
||||
let matches = fm
|
||||
.as_ref()
|
||||
.map(|f| f.kind.as_str() == filter)
|
||||
.unwrap_or(false);
|
||||
if !matches {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let kind = fm.as_ref().map(|f| f.kind.clone());
|
||||
let description = fm.as_ref().map(|f| f.description.clone());
|
||||
let model_invokation = fm.as_ref().map(|f| f.model_invokation);
|
||||
|
||||
scan_text(&raw, &needle, ctx, limit - hits.len(), |excerpt| {
|
||||
hits.push(KnowledgeHit {
|
||||
slug: slug.clone(),
|
||||
kind: kind.clone(),
|
||||
description: description.clone(),
|
||||
model_invokation,
|
||||
excerpt,
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
let body = serde_json::to_string_pretty(&hits)
|
||||
.map_err(|e| ToolError::ExecutionFailed(format!("serialize hits: {e}")))?;
|
||||
Ok(ToolOutput {
|
||||
summary: format!("{} hit(s) for {:?}", hits.len(), params.query),
|
||||
content: Some(body),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_query(query: &str) -> Result<String, ToolError> {
|
||||
if query.trim().is_empty() {
|
||||
return Err(ToolError::InvalidArgument("query must not be empty".into()));
|
||||
}
|
||||
Ok(query.to_lowercase())
|
||||
}
|
||||
|
||||
/// Sorted list of `(path, slug)` for `*.md` files directly under `dir`.
|
||||
/// Returns empty if the directory doesn't exist.
|
||||
fn list_md_files(dir: &Path) -> Vec<(PathBuf, String)> {
|
||||
let mut out: Vec<(PathBuf, String)> = Vec::new();
|
||||
let entries = match std::fs::read_dir(dir) {
|
||||
Ok(it) => it,
|
||||
Err(_) => return out,
|
||||
};
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
let name = match path.file_name().and_then(|n| n.to_str()) {
|
||||
Some(n) => n,
|
||||
None => continue,
|
||||
};
|
||||
let slug = match name.strip_suffix(".md") {
|
||||
Some(s) => s.to_string(),
|
||||
None => continue,
|
||||
};
|
||||
out.push((path, slug));
|
||||
}
|
||||
out.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
out
|
||||
}
|
||||
|
||||
fn scan_file(
|
||||
path: &Path,
|
||||
needle_lower: &str,
|
||||
ctx: usize,
|
||||
remaining: usize,
|
||||
mut on_match: impl FnMut(String),
|
||||
) {
|
||||
if remaining == 0 {
|
||||
return;
|
||||
}
|
||||
let text = match std::fs::read_to_string(path) {
|
||||
Ok(t) => t,
|
||||
Err(_) => return,
|
||||
};
|
||||
scan_text(&text, needle_lower, ctx, remaining, |e| on_match(e));
|
||||
}
|
||||
|
||||
fn scan_text(
|
||||
text: &str,
|
||||
needle_lower: &str,
|
||||
ctx: usize,
|
||||
remaining: usize,
|
||||
mut on_match: impl FnMut(String),
|
||||
) {
|
||||
if remaining == 0 {
|
||||
return;
|
||||
}
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let mut produced = 0;
|
||||
for (i, line) in lines.iter().enumerate() {
|
||||
if produced >= remaining {
|
||||
break;
|
||||
}
|
||||
if line.to_lowercase().contains(needle_lower) {
|
||||
let start = i.saturating_sub(ctx);
|
||||
let end = i.saturating_add(ctx + 1).min(lines.len());
|
||||
let excerpt = lines[start..end].join("\n");
|
||||
on_match(excerpt);
|
||||
produced += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Best-effort frontmatter parse. Returns `None` if missing/malformed
|
||||
/// — search still finds matches in the body even when the header is
|
||||
/// broken.
|
||||
fn parse_knowledge_frontmatter(raw: &str) -> Option<KnowledgeFrontmatter> {
|
||||
let (yaml, _body) = split_frontmatter(raw).ok()?;
|
||||
serde_yaml::from_str::<KnowledgeFrontmatter>(yaml).ok()
|
||||
}
|
||||
|
||||
pub fn memory_search_tool(layout: WorkspaceLayout, config: SearchConfig) -> ToolDefinition {
|
||||
Arc::new(move || {
|
||||
let schema = schemars::schema_for!(MemorySearchParams);
|
||||
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||
let meta = ToolMeta::new("MemorySearch")
|
||||
.description(MEMORY_SEARCH_DESCRIPTION)
|
||||
.input_schema(schema_value);
|
||||
let tool: Arc<dyn Tool> = Arc::new(MemorySearchTool {
|
||||
layout: layout.clone(),
|
||||
config,
|
||||
});
|
||||
(meta, tool)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn knowledge_search_tool(layout: WorkspaceLayout, config: SearchConfig) -> ToolDefinition {
|
||||
Arc::new(move || {
|
||||
let schema = schemars::schema_for!(KnowledgeSearchParams);
|
||||
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||
let meta = ToolMeta::new("KnowledgeSearch")
|
||||
.description(KNOWLEDGE_SEARCH_DESCRIPTION)
|
||||
.input_schema(schema_value);
|
||||
let tool: Arc<dyn Tool> = Arc::new(KnowledgeSearchTool {
|
||||
layout: layout.clone(),
|
||||
config,
|
||||
});
|
||||
(meta, tool)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::Utc;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn now() -> String {
|
||||
Utc::now().to_rfc3339()
|
||||
}
|
||||
|
||||
fn setup() -> (TempDir, WorkspaceLayout) {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
|
||||
std::fs::create_dir_all(dir.path().join("memory/decisions")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/requests")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/workflow")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("memory/_staging")).unwrap();
|
||||
std::fs::create_dir_all(dir.path().join("knowledge")).unwrap();
|
||||
(dir, layout)
|
||||
}
|
||||
|
||||
fn write_decision(dir: &Path, slug: &str, body: &str) {
|
||||
let path = dir.join("memory/decisions").join(format!("{slug}.md"));
|
||||
let content = format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n{body}",
|
||||
n = now()
|
||||
);
|
||||
std::fs::write(path, content).unwrap();
|
||||
}
|
||||
|
||||
fn write_knowledge(dir: &Path, slug: &str, kind: &str, description: &str, body: &str) {
|
||||
let path = dir.join("knowledge").join(format!("{slug}.md"));
|
||||
let content = format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nkind: {kind}\ndescription: \"{description}\"\nmodel_invokation: false\nuser_invocable: true\nlast_sources: []\n---\n{body}",
|
||||
n = now()
|
||||
);
|
||||
std::fs::write(path, content).unwrap();
|
||||
}
|
||||
|
||||
fn parse_hits<T: for<'de> serde::Deserialize<'de>>(out: &ToolOutput) -> Vec<T> {
|
||||
serde_json::from_str(out.content.as_ref().unwrap()).unwrap()
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OwnedMemoryHit {
|
||||
slug: String,
|
||||
kind: String,
|
||||
excerpt: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct OwnedKnowledgeHit {
|
||||
slug: String,
|
||||
kind: Option<String>,
|
||||
description: Option<String>,
|
||||
model_invokation: Option<bool>,
|
||||
excerpt: String,
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_finds_decision_body() {
|
||||
let (dir, layout) = setup();
|
||||
write_decision(dir.path(), "alpha", "we chose Ollama because it works\n");
|
||||
write_decision(dir.path(), "beta", "no match here\n");
|
||||
let (_, tool) = memory_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "ollama" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedMemoryHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0].slug, "alpha");
|
||||
assert_eq!(hits[0].kind, "decision");
|
||||
assert!(hits[0].excerpt.to_lowercase().contains("ollama"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_finds_summary() {
|
||||
let (dir, layout) = setup();
|
||||
let summary_path = dir.path().join("memory/summary.md");
|
||||
std::fs::write(
|
||||
&summary_path,
|
||||
format!("---\nupdated_at: {n}\n---\nthe needle is here\n", n = now()),
|
||||
)
|
||||
.unwrap();
|
||||
let (_, tool) = memory_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedMemoryHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0].slug, "summary");
|
||||
assert_eq!(hits[0].kind, "summary");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_excludes_workflow_and_staging() {
|
||||
let (dir, layout) = setup();
|
||||
// Workflow and staging files contain the needle but must be ignored.
|
||||
let wf = dir.path().join("memory/workflow/wf.md");
|
||||
std::fs::write(&wf, "needle in workflow\n").unwrap();
|
||||
let stg = dir.path().join("memory/_staging/abc.json");
|
||||
std::fs::write(&stg, "needle in staging\n").unwrap();
|
||||
|
||||
let (_, tool) = memory_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedMemoryHit> = parse_hits(&out);
|
||||
assert!(hits.is_empty(), "got hits: {:?}", out.content);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_respects_hit_limit() {
|
||||
let (dir, layout) = setup();
|
||||
for i in 0..10 {
|
||||
write_decision(dir.path(), &format!("rec-{i}"), "needle line\n");
|
||||
}
|
||||
let cfg = SearchConfig {
|
||||
hit_limit: 3,
|
||||
excerpt_lines: 1,
|
||||
};
|
||||
let (_, tool) = memory_search_tool(layout, cfg)();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedMemoryHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 3);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_excerpt_includes_context_lines() {
|
||||
let (dir, layout) = setup();
|
||||
write_decision(
|
||||
dir.path(),
|
||||
"ctx",
|
||||
"line a\nline b\nNEEDLE here\nline d\nline e\n",
|
||||
);
|
||||
let cfg = SearchConfig {
|
||||
hit_limit: 5,
|
||||
excerpt_lines: 1,
|
||||
};
|
||||
let (_, tool) = memory_search_tool(layout, cfg)();
|
||||
let inp = serde_json::json!({ "query": "needle" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedMemoryHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
let e = &hits[0].excerpt;
|
||||
assert!(e.contains("line b"));
|
||||
assert!(e.contains("NEEDLE here"));
|
||||
assert!(e.contains("line d"));
|
||||
assert!(!e.contains("line a"));
|
||||
assert!(!e.contains("line e"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn memory_search_empty_query_rejected() {
|
||||
let (_dir, layout) = setup();
|
||||
let (_, tool) = memory_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": " " });
|
||||
let err = tool.execute(&inp.to_string()).await.unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_search_returns_frontmatter_fields() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(
|
||||
dir.path(),
|
||||
"policy",
|
||||
"policy",
|
||||
"the policy doc",
|
||||
"Ollama first\n",
|
||||
);
|
||||
let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "ollama" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedKnowledgeHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0].slug, "policy");
|
||||
assert_eq!(hits[0].kind.as_deref(), Some("policy"));
|
||||
assert_eq!(hits[0].description.as_deref(), Some("the policy doc"));
|
||||
assert_eq!(hits[0].model_invokation, Some(false));
|
||||
assert!(hits[0].excerpt.to_lowercase().contains("ollama"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_search_kind_filter() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p1", "policy", "d1", "needle\n");
|
||||
write_knowledge(dir.path(), "h1", "howto", "d2", "needle\n");
|
||||
|
||||
let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "needle", "kind": "howto" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedKnowledgeHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0].slug, "h1");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_search_searches_frontmatter_too() {
|
||||
// Spec completion criteria: "frontmatter 含む全文から excerpt 付きでヒットが返る"
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p", "policy", "mentions xyzzy here", "body\n");
|
||||
|
||||
let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "xyzzy" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedKnowledgeHit> = parse_hits(&out);
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0].slug, "p");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn knowledge_search_no_matches_returns_empty() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "p", "policy", "d", "no match\n");
|
||||
let (_, tool) = knowledge_search_tool(layout, SearchConfig::default())();
|
||||
let inp = serde_json::json!({ "query": "absent" });
|
||||
let out = tool.execute(&inp.to_string()).await.unwrap();
|
||||
let hits: Vec<OwnedKnowledgeHit> = parse_hits(&out);
|
||||
assert!(hits.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -74,45 +74,15 @@ impl PodController {
|
|||
let (event_tx, _) = broadcast::channel::<Event>(256);
|
||||
let alerter = Alerter::new(event_tx.clone());
|
||||
|
||||
let manifest_toml = toml::to_string_pretty(pod.manifest()).unwrap_or_default();
|
||||
let greeting = build_greeting(&pod);
|
||||
let shared_state = Arc::new(PodSharedState::new(
|
||||
pod.manifest().pod.name.clone(),
|
||||
pod.session_id(),
|
||||
manifest_toml.clone(),
|
||||
greeting,
|
||||
));
|
||||
// Runtime directory is created before tool registration because
|
||||
// the spawn-tool factories need its socket path, and before the
|
||||
// initial status/history writes because those writes consume the
|
||||
// greeting we build after registration is complete.
|
||||
let runtime_dir =
|
||||
Arc::new(RuntimeDir::create(runtime_base, &pod.manifest().pod.name).await?);
|
||||
|
||||
// Create runtime directory and write initial files
|
||||
let runtime_dir = RuntimeDir::create(runtime_base, &pod.manifest().pod.name).await?;
|
||||
runtime_dir.write_manifest(&manifest_toml).await?;
|
||||
runtime_dir.write_status(&shared_state).await?;
|
||||
runtime_dir.write_history(&shared_state).await?;
|
||||
let runtime_dir = Arc::new(runtime_dir);
|
||||
|
||||
let handle = PodHandle {
|
||||
method_tx,
|
||||
event_tx: event_tx.clone(),
|
||||
shared_state: shared_state.clone(),
|
||||
runtime_dir: runtime_dir.clone(),
|
||||
alerter: alerter.clone(),
|
||||
};
|
||||
|
||||
// Hand the alerter to the Pod so internal operations (compaction,
|
||||
// AGENTS.md ingestion during the first turn) can emit user-facing
|
||||
// notifications on the same channel.
|
||||
pod.attach_alerter(alerter.clone());
|
||||
// Also hand the raw broadcast sender so Pod-internal operations
|
||||
// can emit typed lifecycle `Event`s (currently: compact progress).
|
||||
pod.attach_event_tx(event_tx.clone());
|
||||
|
||||
// Start socket server (lives as a background task, cleaned up on drop via RuntimeDir)
|
||||
let _socket_server = SocketServer::start(&handle).await?;
|
||||
// Keep the server alive by moving it into the controller task
|
||||
// (it will be dropped when the task ends)
|
||||
|
||||
// Grab the scope/pwd before the mutable borrow of the worker so we
|
||||
// can build a `ScopedFs` for the builtin tools.
|
||||
// Snapshot pod-immutable values needed for tool factories so the
|
||||
// mutable worker borrow below doesn't conflict with reads on `pod`.
|
||||
let scope_for_tools = pod.scope().clone();
|
||||
let pwd_for_tools = pod.pwd().to_path_buf();
|
||||
let spawner_name = pod.manifest().pod.name.clone();
|
||||
|
|
@ -130,6 +100,14 @@ impl PodController {
|
|||
let spawner_socket = runtime_dir.socket_path();
|
||||
let spawned_registry = SpawnedPodRegistry::new(runtime_dir.clone());
|
||||
|
||||
// Hand the alerter to the Pod so internal operations (compaction,
|
||||
// AGENTS.md ingestion during the first turn) can emit user-facing
|
||||
// notifications on the same channel.
|
||||
pod.attach_alerter(alerter.clone());
|
||||
// Also hand the raw broadcast sender so Pod-internal operations
|
||||
// can emit typed lifecycle `Event`s (currently: compact progress).
|
||||
pod.attach_event_tx(event_tx.clone());
|
||||
|
||||
// Register event bridge callbacks on the worker
|
||||
{
|
||||
let worker = pod.worker_mut();
|
||||
|
|
@ -238,12 +216,12 @@ impl PodController {
|
|||
// already applied during `Pod::from_manifest`.
|
||||
if let Some(mem) = memory_config.as_ref() {
|
||||
let layout = memory::WorkspaceLayout::resolve(mem, &pwd_for_tools);
|
||||
let search_cfg = memory::tool::SearchConfig::from(mem);
|
||||
let query_cfg = memory::tool::QueryConfig::from(mem);
|
||||
worker.register_tool(memory::tool::read_tool(layout.clone()));
|
||||
worker.register_tool(memory::tool::write_tool(layout.clone()));
|
||||
worker.register_tool(memory::tool::edit_tool(layout.clone()));
|
||||
worker.register_tool(memory::tool::memory_search_tool(layout.clone(), search_cfg));
|
||||
worker.register_tool(memory::tool::knowledge_search_tool(layout, search_cfg));
|
||||
worker.register_tool(memory::tool::memory_query_tool(layout.clone(), query_cfg));
|
||||
worker.register_tool(memory::tool::knowledge_query_tool(layout, query_cfg));
|
||||
}
|
||||
|
||||
// Pod-orchestration tools (SpawnPod + the four comm tools)
|
||||
|
|
@ -265,6 +243,36 @@ impl PodController {
|
|||
pod.attach_tracker(tracker);
|
||||
}
|
||||
|
||||
// Materialise pending tool factories so the greeting reflects
|
||||
// the actual registered set instead of a hand-maintained mirror.
|
||||
pod.worker().tool_server_handle().flush_pending();
|
||||
|
||||
// Greeting + initial runtime files now that the tool list is final.
|
||||
let manifest_toml = toml::to_string_pretty(pod.manifest()).unwrap_or_default();
|
||||
let greeting = build_greeting(&pod);
|
||||
let shared_state = Arc::new(PodSharedState::new(
|
||||
pod.manifest().pod.name.clone(),
|
||||
pod.session_id(),
|
||||
manifest_toml.clone(),
|
||||
greeting,
|
||||
));
|
||||
runtime_dir.write_manifest(&manifest_toml).await?;
|
||||
runtime_dir.write_status(&shared_state).await?;
|
||||
runtime_dir.write_history(&shared_state).await?;
|
||||
|
||||
let handle = PodHandle {
|
||||
method_tx,
|
||||
event_tx: event_tx.clone(),
|
||||
shared_state: shared_state.clone(),
|
||||
runtime_dir: runtime_dir.clone(),
|
||||
alerter: alerter.clone(),
|
||||
};
|
||||
|
||||
// Start socket server (lives as a background task, cleaned up on
|
||||
// drop via RuntimeDir). Kept alive by moving it into the
|
||||
// controller task so it drops when that task ends.
|
||||
let _socket_server = SocketServer::start(&handle).await?;
|
||||
|
||||
// Clone cancel sender and notification buffer before moving pod
|
||||
// into the controller task so the main loop can route
|
||||
// `Method::Notify` into the buffer even while `pod` is held by
|
||||
|
|
@ -719,28 +727,16 @@ where
|
|||
.unwrap_or_default(),
|
||||
),
|
||||
};
|
||||
// The tool list mirrors what `spawn()` registers on the Worker:
|
||||
// builtin filesystem tools plus the pod-orchestration tools.
|
||||
// Orchestration tools are appended by name because constructing
|
||||
// their factories here would require Pod-lifetime handles we
|
||||
// haven't built yet (runtime_dir, socket).
|
||||
let fs = tools::ScopedFs::new(pod.scope().clone(), pod.pwd().to_path_buf());
|
||||
let tracker = tools::Tracker::new();
|
||||
let mut tool_names: Vec<String> = tools::builtin_tools(fs, tracker)
|
||||
.iter()
|
||||
.map(|def| def().0.name)
|
||||
// Tool list reflects whatever `spawn()` ended up registering on the
|
||||
// Worker. Caller must have flushed pending factories first; without
|
||||
// a flush the tool table is empty and this returns an empty vec.
|
||||
let tool_names: Vec<String> = pod
|
||||
.worker()
|
||||
.tool_server_handle()
|
||||
.tool_definitions_sorted()
|
||||
.into_iter()
|
||||
.map(|def| def.name)
|
||||
.collect();
|
||||
tool_names.extend(
|
||||
[
|
||||
"SpawnPod",
|
||||
"SendToPod",
|
||||
"ReadPodOutput",
|
||||
"StopPod",
|
||||
"ListPods",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| (*s).into()),
|
||||
);
|
||||
protocol::Greeting {
|
||||
pod_name: manifest.pod.name.clone(),
|
||||
cwd: pod.pwd().display().to_string(),
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ resident_knowledge_section = """\
|
|||
---
|
||||
## Resident knowledge
|
||||
|
||||
The following knowledge records are advertised resident. Use the KnowledgeSearch / MemoryRead tools to fetch the full body when relevant.
|
||||
The following knowledge records are advertised resident. Use the KnowledgeQuery / MemoryRead tools to fetch the full body when relevant.
|
||||
|
||||
{{ entries }}\
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user