メモリーに関するクレート作成・ファイル構造の実装

This commit is contained in:
Keisuke Hirata 2026-04-27 13:33:31 +09:00
parent c7a873bcf9
commit f2e47629d0
33 changed files with 2780 additions and 32 deletions

38
Cargo.lock generated
View File

@ -1751,6 +1751,24 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "memory"
version = "0.1.0"
dependencies = [
"async-trait",
"chrono",
"llm-worker",
"manifest",
"schemars",
"serde",
"serde_json",
"serde_yaml",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "mime" name = "mime"
version = "0.3.17" version = "0.3.17"
@ -2106,6 +2124,7 @@ dependencies = [
"libc", "libc",
"llm-worker", "llm-worker",
"manifest", "manifest",
"memory",
"minijinja", "minijinja",
"protocol", "protocol",
"provider", "provider",
@ -2840,6 +2859,19 @@ dependencies = [
"serde_core", "serde_core",
] ]
[[package]]
name = "serde_yaml"
version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]] [[package]]
name = "serial_test" name = "serial_test"
version = "3.4.0" version = "3.4.0"
@ -3593,6 +3625,12 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]] [[package]]
name = "untrusted" name = "untrusted"
version = "0.9.0" version = "0.9.0"

View File

@ -10,7 +10,7 @@ members = [
"crates/protocol", "crates/protocol",
"crates/provider", "crates/provider",
"crates/tools", "crates/tools",
"crates/tui", "crates/tui", "crates/memory",
] ]
[workspace.package] [workspace.package]

View File

@ -14,7 +14,10 @@ use serde::{Deserialize, Serialize};
use crate::defaults; use crate::defaults;
use crate::model::{AuthRef, ModelManifest}; use crate::model::{AuthRef, ModelManifest};
use crate::{CompactionConfig, PodManifest, PodMeta, ScopeConfig, ToolOutputLimits, WorkerManifest}; use crate::{
CompactionConfig, MemoryConfig, PodManifest, PodMeta, ScopeConfig, ToolOutputLimits,
WorkerManifest,
};
/// Partial-form Pod manifest. Every field is optional; one or more /// Partial-form Pod manifest. Every field is optional; one or more
/// instances merge via [`PodManifestConfig::merge`] before being /// instances merge via [`PodManifestConfig::merge`] before being
@ -35,6 +38,9 @@ pub struct PodManifestConfig {
pub scope: ScopeConfig, pub scope: ScopeConfig,
#[serde(default)] #[serde(default)]
pub compaction: Option<CompactionConfigPartial>, pub compaction: Option<CompactionConfigPartial>,
/// Memory subsystem opt-in. See [`MemoryConfig`].
#[serde(default)]
pub memory: Option<MemoryConfig>,
} }
#[derive(Debug, Clone, Default, Serialize, Deserialize)] #[derive(Debug, Clone, Default, Serialize, Deserialize)]
@ -162,6 +168,11 @@ impl PodManifestConfig {
for rule in &mut self.scope.deny { for rule in &mut self.scope.deny {
rule.target = join_if_relative(base, &rule.target); rule.target = join_if_relative(base, &rule.target);
} }
if let Some(ref mut memory) = self.memory
&& let Some(ref mut root) = memory.workspace_root
{
*root = join_if_relative(base, root);
}
if let Some(ref mut compaction) = self.compaction if let Some(ref mut compaction) = self.compaction
&& let Some(ref mut cp) = compaction.model && let Some(ref mut cp) = compaction.model
{ {
@ -185,6 +196,15 @@ impl PodManifestConfig {
upper.compaction, upper.compaction,
CompactionConfigPartial::merge, CompactionConfigPartial::merge,
), ),
memory: merge_option(self.memory, upper.memory, MemoryConfig::merge),
}
}
}
impl MemoryConfig {
fn merge(self, upper: Self) -> Self {
Self {
workspace_root: upper.workspace_root.or(self.workspace_root),
} }
} }
} }
@ -375,6 +395,7 @@ impl TryFrom<PodManifestConfig> for PodManifest {
worker, worker,
scope: cfg.scope, scope: cfg.scope,
compaction, compaction,
memory: cfg.memory,
}) })
} }
} }
@ -417,6 +438,7 @@ mod tests {
deny: Vec::new(), deny: Vec::new(),
}, },
compaction: None, compaction: None,
memory: None,
} }
} }

View File

@ -31,6 +31,25 @@ pub struct PodManifest {
pub scope: ScopeConfig, pub scope: ScopeConfig,
#[serde(default)] #[serde(default)]
pub compaction: Option<CompactionConfig>, pub compaction: Option<CompactionConfig>,
/// Memory subsystem opt-in. Presence of `[memory]` in TOML enables
/// the memory tools (MemoryRead / MemoryWrite / MemoryEdit) and
/// causes Pod to deny generic write access to `<workspace>/memory/`
/// and `<workspace>/knowledge/`. Absent ⇒ legacy behaviour, no
/// memory tools registered.
#[serde(default)]
pub memory: Option<MemoryConfig>,
}
/// Memory subsystem configuration. Presence in the manifest enables
/// memory; the workspace root defaults to the Pod's pwd unless an
/// explicit override is given.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MemoryConfig {
/// Override for the workspace root. When `None`, the Pod's pwd
/// (resolved at construction time) is used. When set, must be an
/// absolute path.
#[serde(default)]
pub workspace_root: Option<PathBuf>,
} }
/// Pod metadata. /// Pod metadata.
@ -411,6 +430,33 @@ model_id = "claude-sonnet-4-20250514"
assert!(manifest.compaction.is_none()); assert!(manifest.compaction.is_none());
} }
#[test]
fn omitted_memory_is_none() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert!(manifest.memory.is_none());
}
#[test]
fn empty_memory_section_enables_with_default_root() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.expect("memory section parsed");
assert!(mem.workspace_root.is_none());
}
#[test]
fn memory_section_with_explicit_root() {
let toml = format!(
"{MINIMAL_REQUIRED}\n[memory]\nworkspace_root = \"/some/where\"\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(
mem.workspace_root.unwrap(),
std::path::PathBuf::from("/some/where")
);
}
#[test] #[test]
fn reject_unknown_scheme() { fn reject_unknown_scheme() {
let toml = let toml =

21
crates/memory/Cargo.toml Normal file
View File

@ -0,0 +1,21 @@
[package]
name = "memory"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
async-trait = "0.1.89"
chrono = { version = "0.4.44", features = ["serde"] }
llm-worker = { version = "0.2.1", path = "../llm-worker" }
manifest = { version = "0.1.0", path = "../manifest" }
schemars = "1.2.1"
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.149"
serde_yaml = "0.9.34"
thiserror = "2.0.18"
tracing = "0.1.44"
[dev-dependencies]
tempfile = "3.27.0"
tokio = { version = "1.52.1", features = ["macros", "rt-multi-thread"] }

127
crates/memory/src/error.rs Normal file
View File

@ -0,0 +1,127 @@
//! Errors raised by the memory subsystem.
use std::path::PathBuf;
use thiserror::Error;
/// Top-level error for memory operations that don't fit the lint flow.
#[derive(Debug, Error)]
pub enum MemoryError {
#[error("path is not under the memory or knowledge tree: {}", .0.display())]
OutsideMemoryTree(PathBuf),
#[error("path is not absolute: {}", .0.display())]
RelativePath(PathBuf),
#[error("io error at {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
impl MemoryError {
pub fn io(path: impl Into<PathBuf>, source: std::io::Error) -> Self {
Self::Io {
path: path.into(),
source,
}
}
}
/// A single Linter violation. Multiple are aggregated in a [`LintReport`].
///
/// `Display` produces a one-line message used directly in the `ToolError`
/// payload returned to the LLM.
#[derive(Debug, Clone, Error, PartialEq, Eq)]
pub enum LintError {
#[error("path is not a valid memory record location: {}", .0.display())]
InvalidPath(PathBuf),
#[error("path is for a different record kind than expected at this location: {}", .0.display())]
WrongRecordKind(PathBuf),
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
InvalidSlug(String),
#[error("malformed frontmatter: {0}")]
MalformedFrontmatter(String),
#[error("frontmatter is missing or document is empty")]
MissingFrontmatter,
#[error("missing required frontmatter field: `{0}`")]
MissingField(&'static str),
#[error("invalid value for `{field}`: {message}")]
InvalidField {
field: &'static str,
message: String,
},
#[error("Decisions `status` must be one of open|resolved|replaced (got `{0}`)")]
InvalidStatus(String),
#[error("Knowledge with model_invokation: true cannot have description longer than {limit} chars (got {actual})")]
DescriptionTooLong { actual: usize, limit: usize },
#[error("body exceeds the size limit for this record kind: {actual} chars > {limit}")]
BodyTooLong { actual: usize, limit: usize },
#[error("write to `memory/workflow/` is forbidden via the memory tool — Workflows are human-edited")]
WorkflowWriteForbidden,
#[error("slug `{0}` already exists; use the edit tool instead of creating a new record")]
SlugAlreadyExists(String),
#[error("`{field}` references unknown {kind} slug `{slug}`")]
UnknownReference {
field: &'static str,
kind: &'static str,
slug: String,
},
#[error("`replaced_by` chain forms a cycle: {chain}")]
ReplacedByCycle { chain: String },
#[error("`replaced_by` must point to a different slug than the record itself")]
ReplacedBySelf,
}
/// A single Linter warning (non-blocking).
///
/// Warnings ride along in the `ToolOutput.summary` so the agent can act
/// on them when convenient; they never abort the write.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LintWarning {
/// Single-source record exceeds the importance/size threshold.
LowImportanceLargeRecord { chars: usize },
/// `sources` array has grown past the soft cap.
SourcesOverflow { count: usize },
/// Multiple slugs in the same kind are within Levenshtein distance 2.
SimilarSlugs(Vec<String>),
}
impl std::fmt::Display for LintWarning {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LowImportanceLargeRecord { chars } => write!(
f,
"record is large ({chars} chars) but only has 1 source — consider splitting or trimming"
),
Self::SourcesOverflow { count } => write!(
f,
"`sources` has {count} entries — consider keeping only the most recent and relying on git log for the rest"
),
Self::SimilarSlugs(slugs) => {
write!(f, "similar slugs detected (consider merging): ")?;
for (i, s) in slugs.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{s}")?;
}
Ok(())
}
}
}
}

21
crates/memory/src/lib.rs Normal file
View File

@ -0,0 +1,21 @@
//! Memory subsystem: persistence layer for `memory/*` and `knowledge/*` records.
//!
//! Self-contained: provides its own Tool implementations (read/write/edit)
//! that target `<workspace>/memory/` and `<workspace>/knowledge/` only,
//! with a pre-write Linter built in. Generic CRUD tools (in the `tools`
//! crate) must not touch these directories — Pod is responsible for
//! denying them at the Scope level when memory is enabled.
pub mod error;
pub mod linter;
pub mod schema;
pub mod scope;
pub mod slug;
pub mod tool;
pub mod workspace;
pub use error::{LintError, LintWarning, MemoryError};
pub use linter::{LintReport, Linter};
pub use scope::deny_write_rules;
pub use slug::Slug;
pub use workspace::WorkspaceLayout;

View File

@ -0,0 +1,138 @@
//! Walks `<workspace>/memory/{decisions,requests}/`, `memory/workflow/`,
//! and `<workspace>/knowledge/` to collect the slug set the linter
//! needs for reference-integrity and same-slug-duplication checks.
//!
//! No caching: each lint call walks fresh. Tree size is expected to
//! stay small (hundreds of files, not thousands).
use std::collections::{HashMap, HashSet};
use std::io;
use std::path::Path;
use crate::schema::{
DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, WorkflowFrontmatter,
split_frontmatter,
};
use crate::slug::Slug;
use crate::workspace::{RecordKind, WorkspaceLayout};
/// Snapshot of every record currently on disk under the workspace.
///
/// Carries enough metadata to answer:
/// - "does slug X of kind K exist?" (same-slug duplication, reference checks)
/// - "what is X's `replaced_by`?" (cycle detection)
/// - "what other slugs of kind K exist?" (similar-slug warning)
#[derive(Debug, Default, Clone)]
pub struct ExistingRecords {
decisions: HashMap<Slug, DecisionMeta>,
requests: HashSet<Slug>,
knowledge: HashSet<Slug>,
workflow: HashSet<Slug>,
}
#[derive(Debug, Clone)]
pub struct DecisionMeta {
pub replaced_by: Option<Slug>,
}
impl ExistingRecords {
pub fn contains(&self, kind: RecordKind, slug: &Slug) -> bool {
match kind {
RecordKind::Decision => self.decisions.contains_key(slug),
RecordKind::Request => self.requests.contains(slug),
RecordKind::Knowledge => self.knowledge.contains(slug),
RecordKind::Workflow => self.workflow.contains(slug),
RecordKind::Summary => false,
}
}
pub fn decision(&self, slug: &Slug) -> Option<&DecisionMeta> {
self.decisions.get(slug)
}
pub fn slugs(&self, kind: RecordKind) -> Vec<&Slug> {
match kind {
RecordKind::Decision => self.decisions.keys().collect(),
RecordKind::Request => self.requests.iter().collect(),
RecordKind::Knowledge => self.knowledge.iter().collect(),
RecordKind::Workflow => self.workflow.iter().collect(),
RecordKind::Summary => Vec::new(),
}
}
}
/// Walk the workspace and collect every record.
pub fn scan_existing(layout: &WorkspaceLayout) -> io::Result<ExistingRecords> {
let mut out = ExistingRecords::default();
scan_dir(&layout.decisions_dir(), |path, slug| {
let meta = read_decision_meta(path);
out.decisions.insert(slug, meta);
})?;
scan_dir(&layout.requests_dir(), |path, slug| {
// Parse to validate but discard contents — only slug existence
// matters for reference checks. Parse failure is silently
// ignored: existing record corruption isn't this write's
// responsibility to fix.
let _ = parse_silent::<RequestFrontmatter>(path);
out.requests.insert(slug);
})?;
scan_dir(&layout.knowledge_dir(), |path, slug| {
let _ = parse_silent::<KnowledgeFrontmatter>(path);
out.knowledge.insert(slug);
})?;
scan_dir(&layout.workflow_dir(), |path, slug| {
let _ = parse_silent::<WorkflowFrontmatter>(path);
out.workflow.insert(slug);
})?;
Ok(out)
}
fn scan_dir<F>(dir: &Path, mut visit: F) -> io::Result<()>
where
F: FnMut(&Path, Slug),
{
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
Err(err) => return Err(err),
};
for entry in entries {
let entry = entry?;
let path = entry.path();
if !path.is_file() {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => continue,
};
let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
if ext != "md" {
continue;
}
if let Ok(slug) = Slug::parse(stem) {
visit(&path, slug);
}
}
Ok(())
}
fn read_decision_meta(path: &Path) -> DecisionMeta {
match parse_silent::<DecisionFrontmatter>(path) {
Some(fm) => DecisionMeta {
replaced_by: fm.replaced_by,
},
None => DecisionMeta { replaced_by: None },
}
}
fn parse_silent<F>(path: &Path) -> Option<F>
where
F: serde::de::DeserializeOwned,
{
let content = std::fs::read_to_string(path).ok()?;
let (yaml, _) = split_frontmatter(&content).ok()?;
serde_yaml::from_str::<F>(yaml).ok()
}

View File

@ -0,0 +1,67 @@
//! YAML frontmatter parsing helpers shared by every kind.
use serde::de::DeserializeOwned;
use crate::error::LintError;
/// Strict YAML deserialization that maps serde errors into the linter's
/// `MissingField` / `InvalidField` / `MalformedFrontmatter` taxonomy
/// when possible.
pub fn deserialize_strict<F: DeserializeOwned>(yaml: &str) -> Result<F, LintError> {
serde_yaml::from_str::<F>(yaml).map_err(map_serde_error)
}
fn map_serde_error(err: serde_yaml::Error) -> LintError {
let msg = err.to_string();
// `missing field \`X\`` is the exact pattern serde uses for missing
// required fields. Hoist into the typed variant so the LLM sees a
// crisp message it can act on.
if let Some(field) = parse_missing_field(&msg) {
return LintError::MissingField(field);
}
if let Some((field, message)) = parse_invalid_status(&msg) {
if field == "status" {
return LintError::InvalidStatus(message);
}
return LintError::InvalidField { field, message };
}
LintError::MalformedFrontmatter(msg)
}
fn parse_missing_field(msg: &str) -> Option<&'static str> {
let needle = "missing field `";
let start = msg.find(needle)? + needle.len();
let end = msg[start..].find('`')? + start;
let field_name = &msg[start..end];
static FIELDS: &[&str] = &[
"created_at",
"updated_at",
"sources",
"status",
"kind",
"description",
"model_invokation",
"user_invocable",
"last_sources",
"auto_invoke",
"requires",
];
FIELDS.iter().copied().find(|n| *n == field_name)
}
fn parse_invalid_status(msg: &str) -> Option<(&'static str, String)> {
// serde renders enum failures as: "unknown variant `Foo`, expected one of ..."
// We can't reliably attribute it to a specific field from the message
// alone, so we conservatively label it as `status` only when the
// message mentions one of the DecisionStatus variants in the
// expected set.
if msg.contains("unknown variant") && msg.contains("`open`") {
let needle = "unknown variant `";
let start = msg.find(needle)? + needle.len();
let end = msg[start..].find('`')? + start;
let bad = msg[start..end].to_string();
return Some(("status", bad));
}
None
}

View File

@ -0,0 +1,435 @@
//! Pre-write Linter for the memory subsystem.
//!
//! The linter is pure: given a [`WorkspaceLayout`], a target path, and
//! the proposed file content (raw bytes), it returns a [`LintReport`]
//! aggregating every applicable rule violation. The memory tool calls
//! this *before* committing to fs and surfaces a non-empty `errors`
//! collection back to the LLM as `ToolError::InvalidArgument`.
//!
//! Reference-integrity checks (`replaced_by` / `requires` existence,
//! cycle detection) walk the whole `memory/` and `knowledge/` trees
//! each call. No caching; the trees are expected to be small.
mod existing;
mod frontmatter;
mod references;
mod size;
mod warnings;
use std::path::Path;
use serde::de::DeserializeOwned;
use crate::error::{LintError, LintWarning};
use crate::schema::{
DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, SummaryFrontmatter,
WorkflowFrontmatter, split_frontmatter,
};
use crate::workspace::{ClassifiedPath, RecordKind, WorkspaceLayout};
pub use existing::{ExistingRecords, scan_existing};
/// Aggregated linter result. `errors` empty ⇒ write proceeds.
#[derive(Debug, Default, Clone)]
pub struct LintReport {
pub errors: Vec<LintError>,
pub warnings: Vec<LintWarning>,
}
impl LintReport {
pub fn has_errors(&self) -> bool {
!self.errors.is_empty()
}
pub fn extend_errors(&mut self, more: impl IntoIterator<Item = LintError>) {
self.errors.extend(more);
}
pub fn push_error(&mut self, err: LintError) {
self.errors.push(err);
}
pub fn push_warning(&mut self, w: LintWarning) {
self.warnings.push(w);
}
}
/// Operation context: is this a brand-new file or an update of an
/// existing one? Affects same-slug duplication check.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum WriteMode {
Create,
Update,
}
/// Stateless entry point holding the workspace layout. Cheap to clone.
#[derive(Debug, Clone)]
pub struct Linter {
layout: WorkspaceLayout,
}
impl Linter {
pub fn new(layout: WorkspaceLayout) -> Self {
Self { layout }
}
pub fn layout(&self) -> &WorkspaceLayout {
&self.layout
}
/// Lint a proposed write to `path` with the given UTF-8 `content`.
///
/// `mode` tells the linter whether the path already existed at the
/// moment of write — Create triggers same-slug duplication checks,
/// Update doesn't.
pub fn lint(&self, path: &Path, content: &str, mode: WriteMode) -> LintReport {
let mut report = LintReport::default();
// 1. Path classification.
let classified = match self.layout.classify(path) {
Ok(Some(cp)) => cp,
Ok(None) => {
report.push_error(LintError::InvalidPath(path.to_path_buf()));
return report;
}
Err(e) => {
report.push_error(e);
return report;
}
};
// 2. Workflow paths are sub-Worker-forbidden at the tool layer.
if classified.kind == RecordKind::Workflow {
report.push_error(LintError::WorkflowWriteForbidden);
return report;
}
// 3. Frontmatter parse + kind-specific structural checks +
// size limits. Reference-integrity needs the existing
// record set, fetched once below.
let existing = match existing::scan_existing(&self.layout) {
Ok(e) => e,
Err(e) => {
report.push_error(LintError::MalformedFrontmatter(format!(
"failed to scan existing records: {e}"
)));
return report;
}
};
// Same-slug check on Create.
if mode == WriteMode::Create {
if let Some(slug) = &classified.slug {
if existing.contains(classified.kind, slug) {
report.push_error(LintError::SlugAlreadyExists(slug.to_string()));
}
}
}
// Frontmatter parse dispatch by kind.
match classified.kind {
RecordKind::Decision => {
self.check_decision(content, &classified, &existing, &mut report);
}
RecordKind::Request => {
self.check_kind::<RequestFrontmatter>(content, &classified, &mut report);
}
RecordKind::Knowledge => {
self.check_knowledge(content, &classified, &mut report);
}
RecordKind::Summary => {
self.check_kind::<SummaryFrontmatter>(content, &classified, &mut report);
}
RecordKind::Workflow => unreachable!("guarded above"),
}
report
}
fn check_kind<F>(&self, content: &str, cp: &ClassifiedPath, report: &mut LintReport)
where
F: DeserializeOwned + crate::schema::Frontmatter,
{
let parsed = match parse_frontmatter::<F>(content) {
Ok(p) => p,
Err(e) => {
report.push_error(e);
return;
}
};
let body = parsed.body;
size::check_body::<F>(body, report);
warnings::check_warnings_kindless(cp, body, report);
let _ = parsed.frontmatter; // discarded after structural checks
}
fn check_decision(
&self,
content: &str,
cp: &ClassifiedPath,
existing: &ExistingRecords,
report: &mut LintReport,
) {
let parsed = match parse_frontmatter::<DecisionFrontmatter>(content) {
Ok(p) => p,
Err(e) => {
report.push_error(e);
return;
}
};
let fm = parsed.frontmatter;
size::check_body::<DecisionFrontmatter>(parsed.body, report);
// replaced_by structural rules.
if let Some(target) = &fm.replaced_by {
if let Some(self_slug) = &cp.slug {
if target == self_slug {
report.push_error(LintError::ReplacedBySelf);
}
}
references::check_replaced_by(
cp.slug.as_ref(),
target,
existing,
report,
);
}
warnings::check_warnings_with_sources(parsed.body, fm.sources.len(), report);
}
fn check_knowledge(
&self,
content: &str,
cp: &ClassifiedPath,
report: &mut LintReport,
) {
let parsed = match parse_frontmatter::<KnowledgeFrontmatter>(content) {
Ok(p) => p,
Err(e) => {
report.push_error(e);
return;
}
};
let fm = parsed.frontmatter;
size::check_body::<KnowledgeFrontmatter>(parsed.body, report);
if fm.model_invokation
&& fm.description.chars().count()
> crate::schema::KNOWLEDGE_DESCRIPTION_HARD_CAP
{
report.push_error(LintError::DescriptionTooLong {
actual: fm.description.chars().count(),
limit: crate::schema::KNOWLEDGE_DESCRIPTION_HARD_CAP,
});
}
warnings::check_warnings_with_sources(parsed.body, fm.last_sources.len(), report);
let _ = cp;
}
}
/// Workflow frontmatter validator exposed for human-edit paths
/// (CLI / pre-commit). Not used by the memory tool, which rejects
/// workflow writes outright.
pub fn lint_workflow_frontmatter(content: &str) -> Result<WorkflowFrontmatter, LintError> {
let parsed = parse_frontmatter::<WorkflowFrontmatter>(content)?;
Ok(parsed.frontmatter)
}
struct Parsed<'a, F> {
frontmatter: F,
body: &'a str,
}
fn parse_frontmatter<F: DeserializeOwned>(content: &str) -> Result<Parsed<'_, F>, LintError> {
let (yaml, body) = split_frontmatter(content)?;
let fm = frontmatter::deserialize_strict::<F>(yaml)?;
Ok(Parsed {
frontmatter: fm,
body,
})
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use std::path::PathBuf;
use tempfile::TempDir;
fn write(p: &std::path::Path, content: &str) {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(p, content).unwrap();
}
fn iso_now() -> String {
Utc::now().to_rfc3339()
}
fn workspace() -> (TempDir, Linter) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
let linter = Linter::new(layout);
(dir, linter)
}
#[test]
fn workflow_write_rejected() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/workflow/wf.md");
let content = format!(
"---\nupdated_at: {now}\ndescription: x\nauto_invoke: false\nuser_invocable: true\n---\nbody",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(e, LintError::WorkflowWriteForbidden)));
}
#[test]
fn outside_memory_tree_rejected() {
let (dir, linter) = workspace();
let path = dir.path().join("src/main.rs");
let report = linter.lint(&path, "ignored", WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(e, LintError::InvalidPath(_))));
}
#[test]
fn decision_with_unknown_replaced_by_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/decisions/foo.md");
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: replaced\nreplaced_by: ghost\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(
e,
LintError::UnknownReference { .. }
)));
}
#[test]
fn decision_replaced_by_self_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/decisions/foo.md");
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: replaced\nreplaced_by: foo\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Update);
assert!(report.errors.iter().any(|e| matches!(e, LintError::ReplacedBySelf)));
}
#[test]
fn decision_replaced_by_existing_ok() {
let (dir, linter) = workspace();
// Pre-create the target.
let target = dir.path().join("memory/decisions/bar.md");
write(
&target,
&format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: open\n---\nbar body\n",
now = iso_now()
),
);
let path = dir.path().join("memory/decisions/foo.md");
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: replaced\nreplaced_by: bar\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(!report.has_errors(), "got errors: {:?}", report.errors);
}
#[test]
fn missing_required_field_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/decisions/foo.md");
// Missing `status`.
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(
e,
LintError::MissingField(_) | LintError::MalformedFrontmatter(_)
)));
}
#[test]
fn knowledge_long_description_with_model_invokation_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("knowledge/foo.md");
let big_desc = "x".repeat(2000);
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nkind: rule\ndescription: {big_desc}\nmodel_invokation: true\nuser_invocable: true\nlast_sources: []\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(e, LintError::DescriptionTooLong { .. })));
}
#[test]
fn knowledge_long_description_without_model_invokation_ok() {
let (dir, linter) = workspace();
let path = dir.path().join("knowledge/foo.md");
let big_desc = "x".repeat(2000);
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nkind: rule\ndescription: {big_desc}\nmodel_invokation: false\nuser_invocable: true\nlast_sources: []\n---\nbody\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(!report.has_errors(), "got errors: {:?}", report.errors);
}
#[test]
fn summary_path_accepted() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/summary.md");
let content = format!(
"---\nupdated_at: {now}\n---\nsummary body\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Update);
assert!(!report.has_errors(), "got errors: {:?}", report.errors);
}
#[test]
fn create_when_existing_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/decisions/foo.md");
write(
&path,
&format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: open\n---\nold\n",
now = iso_now()
),
);
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: open\n---\nnew\n",
now = iso_now()
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(e, LintError::SlugAlreadyExists(_))));
}
#[test]
fn body_size_limit_errors() {
let (dir, linter) = workspace();
let path = dir.path().join("memory/decisions/foo.md");
let big_body = "x".repeat(8001);
let content = format!(
"---\ncreated_at: {now}\nupdated_at: {now}\nsources: []\nstatus: open\n---\n{body}",
now = iso_now(),
body = big_body
);
let report = linter.lint(&path, &content, WriteMode::Create);
assert!(report.errors.iter().any(|e| matches!(e, LintError::BodyTooLong { .. })));
// Sanity: ensure path was treated as PathBuf consistently.
let _ = PathBuf::from(path);
}
}

View File

@ -0,0 +1,73 @@
//! Reference-integrity checks: `replaced_by` existence + cycle detection.
//!
//! `requires` (Workflow) is checked symmetrically when/if the Workflow
//! linter is invoked from a human-edit path; the memory tool itself
//! never writes Workflow records.
use std::collections::HashSet;
use crate::error::LintError;
use crate::linter::ExistingRecords;
use crate::linter::LintReport;
use crate::slug::Slug;
use crate::workspace::RecordKind;
/// Validate a Decision's `replaced_by` against the existing record set.
///
/// `self_slug` is the slug of the record currently being written (None
/// only when the path was malformed and we shouldn't even reach here).
pub fn check_replaced_by(
self_slug: Option<&Slug>,
target: &Slug,
existing: &ExistingRecords,
report: &mut LintReport,
) {
// Existence: target must already be a Decision on disk.
if !existing.contains(RecordKind::Decision, target) {
report.push_error(LintError::UnknownReference {
field: "replaced_by",
kind: "decision",
slug: target.to_string(),
});
return;
}
// Cycle: walk the chain target → target.replaced_by → ... and
// ensure we never revisit `self_slug` or any node twice.
let mut visited = HashSet::new();
if let Some(s) = self_slug {
visited.insert(s.clone());
}
let mut cursor = Some(target.clone());
let mut chain: Vec<String> = Vec::new();
while let Some(node) = cursor {
if !visited.insert(node.clone()) {
chain.push(node.to_string());
report.push_error(LintError::ReplacedByCycle {
chain: chain.join(" -> "),
});
return;
}
chain.push(node.to_string());
cursor = existing
.decision(&node)
.and_then(|m| m.replaced_by.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
// Smoke test: cycle detection terminates on a 2-node loop where the
// existing tree already contains A↔B and the new write would close
// the loop. A direct unit test against `check_replaced_by` is
// exercised by linter::tests; here we just guard the loop bound.
#[test]
fn empty_chain_terminates() {
let mut report = LintReport::default();
let existing = ExistingRecords::default();
let target = Slug::parse("foo").unwrap();
check_replaced_by(None, &target, &existing, &mut report);
assert_eq!(report.errors.len(), 1);
}
}

View File

@ -0,0 +1,15 @@
//! Body size limit checks.
use crate::error::LintError;
use crate::linter::LintReport;
use crate::schema::Frontmatter;
pub fn check_body<F: Frontmatter>(body: &str, report: &mut LintReport) {
let chars = body.chars().count();
if chars > F::BODY_LIMIT {
report.push_error(LintError::BodyTooLong {
actual: chars,
limit: F::BODY_LIMIT,
});
}
}

View File

@ -0,0 +1,34 @@
//! Soft warnings: low-importance large records, sources accumulation.
//!
//! Similar-slug warnings need the existing record set and are
//! integrated into the main linter pass when implemented; this file
//! covers per-write checks that only need the proposed content.
use crate::error::LintWarning;
use crate::linter::LintReport;
use crate::workspace::ClassifiedPath;
const LARGE_BODY_THRESHOLD: usize = 1500;
const SOURCES_OVERFLOW_THRESHOLD: usize = 10;
/// For kinds that don't carry a `sources` array (Summary), emit only
/// the body-size warning.
pub fn check_warnings_kindless(_cp: &ClassifiedPath, body: &str, _report: &mut LintReport) {
let _ = body;
// Summary intentionally has no warning band — the per-record
// size:importance heuristic doesn't apply to a single rolling file.
}
/// For kinds with `sources` (Decisions / Requests / Knowledge), consult
/// both the body length and the sources count.
pub fn check_warnings_with_sources(body: &str, source_count: usize, report: &mut LintReport) {
let chars = body.chars().count();
if source_count <= 1 && chars >= LARGE_BODY_THRESHOLD {
report.push_warning(LintWarning::LowImportanceLargeRecord { chars });
}
if source_count > SOURCES_OVERFLOW_THRESHOLD {
report.push_warning(LintWarning::SourcesOverflow {
count: source_count,
});
}
}

View File

@ -0,0 +1,92 @@
//! Common frontmatter helpers and shared types.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::error::LintError;
/// Reference to a session-store entry range. Stored in `sources` /
/// `last_sources` arrays for traceability back to raw session logs.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct SourceRef {
pub session_id: String,
/// `[start_entry, end_entry]` inclusive range of session-store entry indices.
pub range: [u64; 2],
}
/// Trait every kind-specific frontmatter implements so the linter can
/// drive them uniformly.
pub trait Frontmatter: Sized {
/// Hard upper bound on body chars (excluding the frontmatter block).
const BODY_LIMIT: usize;
fn created_at(&self) -> DateTime<Utc>;
fn updated_at(&self) -> DateTime<Utc>;
}
const FRONTMATTER_DELIM: &str = "---";
/// Split a markdown document into `(yaml_frontmatter, body)`.
///
/// Expects the document to start with `---\n` and have a closing
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
/// after the closing delimiter is consumed.
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), LintError> {
// The opening delimiter must be the very first line.
let after_open = content
.strip_prefix(FRONTMATTER_DELIM)
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
.ok_or(LintError::MissingFrontmatter)?;
// Look for the closing `---` on its own line.
let mut yaml_end = None;
let mut byte_offset = 0usize;
for line in after_open.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
if trimmed == FRONTMATTER_DELIM {
yaml_end = Some((byte_offset, byte_offset + line.len()));
break;
}
byte_offset += line.len();
}
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
LintError::MalformedFrontmatter("missing closing `---` line".to_string())
})?;
let yaml = &after_open[..yaml_end_excl];
let body = &after_open[body_start..];
Ok((yaml, body))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn splits_simple() {
let doc = "---\nfoo: 1\n---\nbody here\n";
let (y, b) = split_frontmatter(doc).unwrap();
assert_eq!(y, "foo: 1\n");
assert_eq!(b, "body here\n");
}
#[test]
fn no_leading_delim_errors() {
let err = split_frontmatter("hello").unwrap_err();
assert!(matches!(err, LintError::MissingFrontmatter));
}
#[test]
fn no_closing_delim_errors() {
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
assert!(matches!(err, LintError::MalformedFrontmatter(_)));
}
#[test]
fn handles_empty_body() {
let doc = "---\nfoo: 1\n---\n";
let (_, b) = split_frontmatter(doc).unwrap();
assert_eq!(b, "");
}
}

View File

@ -0,0 +1,36 @@
//! Decisions frontmatter schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::schema::common::{Frontmatter, SourceRef};
use crate::slug::Slug;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DecisionStatus {
Open,
Resolved,
Replaced,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct DecisionFrontmatter {
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub sources: Vec<SourceRef>,
pub status: DecisionStatus,
#[serde(default)]
pub replaced_by: Option<Slug>,
}
impl Frontmatter for DecisionFrontmatter {
const BODY_LIMIT: usize = 8000;
fn created_at(&self) -> DateTime<Utc> {
self.created_at
}
fn updated_at(&self) -> DateTime<Utc> {
self.updated_at
}
}

View File

@ -0,0 +1,33 @@
//! Knowledge frontmatter schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::schema::common::{Frontmatter, SourceRef};
/// Hard cap on `description` length when `model_invokation: true`.
/// Mirrors the agent-skills 1024-char rule for description that lives
/// in resident system-prompt budget.
pub const KNOWLEDGE_DESCRIPTION_HARD_CAP: usize = 1024;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct KnowledgeFrontmatter {
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub kind: String,
pub description: String,
pub model_invokation: bool,
pub user_invocable: bool,
pub last_sources: Vec<SourceRef>,
}
impl Frontmatter for KnowledgeFrontmatter {
const BODY_LIMIT: usize = 8000;
fn created_at(&self) -> DateTime<Utc> {
self.created_at
}
fn updated_at(&self) -> DateTime<Utc> {
self.updated_at
}
}

View File

@ -0,0 +1,20 @@
//! Frontmatter schemas for memory records.
//!
//! Each record kind has its own typed `*Frontmatter` struct. The linter
//! deserializes the YAML between the leading `---` markers into the
//! kind-appropriate struct; field-level errors are surfaced as
//! [`LintError::MissingField`] / [`LintError::InvalidField`].
mod common;
mod decision;
mod knowledge;
mod request;
mod summary;
mod workflow;
pub use common::{Frontmatter, SourceRef, split_frontmatter};
pub use decision::{DecisionFrontmatter, DecisionStatus};
pub use knowledge::{KNOWLEDGE_DESCRIPTION_HARD_CAP, KnowledgeFrontmatter};
pub use request::RequestFrontmatter;
pub use summary::SummaryFrontmatter;
pub use workflow::WorkflowFrontmatter;

View File

@ -0,0 +1,24 @@
//! Requests frontmatter schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::schema::common::{Frontmatter, SourceRef};
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct RequestFrontmatter {
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub sources: Vec<SourceRef>,
}
impl Frontmatter for RequestFrontmatter {
const BODY_LIMIT: usize = 8000;
fn created_at(&self) -> DateTime<Utc> {
self.created_at
}
fn updated_at(&self) -> DateTime<Utc> {
self.updated_at
}
}

View File

@ -0,0 +1,32 @@
//! Summary frontmatter schema.
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::schema::common::Frontmatter;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct SummaryFrontmatter {
pub updated_at: DateTime<Utc>,
/// `created_at` is optional for the summary because it's a
/// long-lived single file rewritten in place.
#[serde(default)]
pub created_at: Option<DateTime<Utc>>,
/// Optional pointer to the session-store entry range that drove the
/// most recent rewrite.
#[serde(default)]
pub last_rewritten_from_range: Option<[u64; 2]>,
}
impl Frontmatter for SummaryFrontmatter {
/// Summary holds always-on context, so it gets a larger body budget
/// than per-record kinds (~5k tokens at the upper end).
const BODY_LIMIT: usize = 20000;
fn created_at(&self) -> DateTime<Utc> {
self.created_at.unwrap_or(self.updated_at)
}
fn updated_at(&self) -> DateTime<Utc> {
self.updated_at
}
}

View File

@ -0,0 +1,37 @@
//! Workflow frontmatter schema.
//!
//! NOTE: Workflows are written by humans, not by the memory tool. The
//! linter only validates frontmatter when invoked directly (e.g. by a
//! future CLI / pre-commit hook). The memory write/edit tool rejects
//! `memory/workflow/` paths outright via [`LintError::WorkflowWriteForbidden`].
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::schema::common::Frontmatter;
use crate::slug::Slug;
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkflowFrontmatter {
/// Workflows don't carry sources/created_at requirements in the
/// plan doc; only `updated_at` is required at the schema level.
pub updated_at: DateTime<Utc>,
#[serde(default)]
pub created_at: Option<DateTime<Utc>>,
pub description: String,
pub auto_invoke: bool,
pub user_invocable: bool,
#[serde(default)]
pub requires: Vec<Slug>,
}
impl Frontmatter for WorkflowFrontmatter {
const BODY_LIMIT: usize = 8000;
fn created_at(&self) -> DateTime<Utc> {
self.created_at.unwrap_or(self.updated_at)
}
fn updated_at(&self) -> DateTime<Utc> {
self.updated_at
}
}

View File

@ -0,0 +1,49 @@
//! Helpers for constructing `ScopeRule` entries that exclude the
//! memory tree from the generic CRUD tools' write surface.
//!
//! Pod is expected to call [`deny_write_rules`] when memory is enabled
//! and append the result to the manifest's `scope.deny` list before
//! constructing the [`Scope`] passed to `tools::ScopedFs`. The memory
//! tools themselves bypass `ScopedFs` and write directly under the
//! workspace root, so this deny does not affect their operation.
use std::path::Path;
use manifest::{Permission, ScopeRule};
use crate::workspace::WorkspaceLayout;
/// Build deny rules that strip Write permission from `<workspace>/memory/`
/// and `<workspace>/knowledge/`. Recursive — every descendant is capped
/// at Read for the generic tools.
pub fn deny_write_rules(layout: &WorkspaceLayout) -> Vec<ScopeRule> {
vec![
deny_write(layout.memory_dir().as_path()),
deny_write(layout.knowledge_dir().as_path()),
]
}
fn deny_write(target: &Path) -> ScopeRule {
ScopeRule {
target: target.to_path_buf(),
permission: Permission::Write,
recursive: true,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn deny_targets_memory_and_knowledge() {
let layout = WorkspaceLayout::new(PathBuf::from("/ws"));
let rules = deny_write_rules(&layout);
assert_eq!(rules.len(), 2);
assert_eq!(rules[0].target, PathBuf::from("/ws/memory"));
assert_eq!(rules[0].permission, Permission::Write);
assert!(rules[0].recursive);
assert_eq!(rules[1].target, PathBuf::from("/ws/knowledge"));
}
}

155
crates/memory/src/slug.rs Normal file
View File

@ -0,0 +1,155 @@
//! Slug type and validation.
//!
//! Syntax (agent-skills compatible):
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
//! - 164 chars
//! - lowercase ASCII alphanumerics and `-`
//! - cannot start or end with `-`
//! - no consecutive `--`
use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Deserializer, Serialize};
use crate::error::LintError;
const MIN_LEN: usize = 1;
const MAX_LEN: usize = 64;
/// Validated slug. Constructible only via [`Slug::parse`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[serde(transparent)]
pub struct Slug(String);
impl Slug {
/// Parse and validate. Returns [`LintError::InvalidSlug`] on rejection.
pub fn parse(s: impl Into<String>) -> Result<Self, LintError> {
let s = s.into();
if is_valid_slug(&s) {
Ok(Self(s))
} else {
Err(LintError::InvalidSlug(s))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for Slug {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for Slug {
type Err = LintError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl<'de> Deserialize<'de> for Slug {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
Self::parse(raw).map_err(serde::de::Error::custom)
}
}
/// Pure-fn predicate matching the agent-skills slug regex without
/// pulling in the `regex` crate.
pub fn is_valid_slug(s: &str) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if len < MIN_LEN || len > MAX_LEN {
return false;
}
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
return false;
}
let mut prev_dash = false;
for &b in bytes {
if b == b'-' {
if prev_dash {
return false;
}
prev_dash = true;
} else if is_alnum_lower(b) {
prev_dash = false;
} else {
return false;
}
}
true
}
fn is_alnum_lower(b: u8) -> bool {
b.is_ascii_digit() || b.is_ascii_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_basic_slugs() {
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
assert!(is_valid_slug(s), "expected `{s}` valid");
assert!(Slug::parse(s).is_ok());
}
}
#[test]
fn rejects_bad_slugs() {
for s in [
"",
"-",
"-foo",
"foo-",
"Foo",
"foo_bar",
"foo bar",
"foo--bar",
"foo.bar",
"ä",
] {
assert!(!is_valid_slug(s), "expected `{s}` invalid");
assert!(Slug::parse(s).is_err());
}
}
#[test]
fn enforces_length_bounds() {
let too_long = "a".repeat(MAX_LEN + 1);
assert!(!is_valid_slug(&too_long));
let max = "a".repeat(MAX_LEN);
assert!(is_valid_slug(&max));
}
#[test]
fn deserializes_via_serde() {
let json = "\"valid-slug\"";
let slug: Slug = serde_json::from_str(json).unwrap();
assert_eq!(slug.as_str(), "valid-slug");
let bad = "\"BAD\"";
let err: Result<Slug, _> = serde_json::from_str(bad);
assert!(err.is_err());
}
}

View File

@ -0,0 +1,299 @@
//! `MemoryEdit` tool — partial string replacement on an existing memory record.
//!
//! Reads current content, applies the replacement, runs the Linter on
//! the result, writes only on success. The current-then-write window
//! is single-tool-call narrow; an external tracker is intentionally
//! omitted (memory tools are self-contained, no `tools` crate dep).
use std::path::PathBuf;
use std::sync::Arc;
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::linter::{LintReport, Linter, WriteMode};
use crate::workspace::WorkspaceLayout;
const DESCRIPTION: &str = "Replace a substring in an existing memory or knowledge \
record file. By default `old_string` must be unique in the file; set \
`replace_all: true` to replace every occurrence. The resulting content is \
re-validated by the memory linter; failure leaves the file untouched. Path \
must be absolute and lie inside the workspace's `memory/` or `knowledge/` tree.";
#[derive(Debug, Deserialize, schemars::JsonSchema)]
struct EditParams {
/// Absolute path under the workspace's `memory/` or `knowledge/` tree.
file_path: PathBuf,
/// String to replace. Must be unique in the file unless `replace_all` is true.
old_string: String,
/// Replacement string. Must differ from `old_string`.
new_string: String,
/// Replace all occurrences. Defaults to false.
#[serde(default)]
replace_all: bool,
}
struct EditTool {
linter: Linter,
}
#[async_trait]
impl Tool for EditTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let params: EditParams = serde_json::from_str(input_json).map_err(|e| {
ToolError::InvalidArgument(format!("invalid MemoryEdit input: {e}"))
})?;
if !params.file_path.is_absolute() {
return Err(ToolError::InvalidArgument(format!(
"file_path must be absolute: {}",
params.file_path.display()
)));
}
if params.old_string.is_empty() {
return Err(ToolError::InvalidArgument(
"old_string must not be empty".into(),
));
}
if params.old_string == params.new_string {
return Err(ToolError::InvalidArgument(
"old_string and new_string are identical".into(),
));
}
// Path-shape check; the layout::classify also runs inside the
// linter but we want a crisp error before reading the file.
if self
.linter
.layout()
.classify(&params.file_path)
.map_err(|e| ToolError::InvalidArgument(e.to_string()))?
.is_none()
{
return Err(ToolError::InvalidArgument(format!(
"path is not under the memory tree: {}",
params.file_path.display()
)));
}
let current_bytes = std::fs::read(&params.file_path).map_err(|e| match e.kind() {
std::io::ErrorKind::NotFound => ToolError::ExecutionFailed(format!(
"file not found (use MemoryWrite to create): {}",
params.file_path.display()
)),
_ => ToolError::ExecutionFailed(format!(
"read failed at {}: {e}",
params.file_path.display()
)),
})?;
let current_text = std::str::from_utf8(&current_bytes).map_err(|_| {
ToolError::InvalidArgument(format!(
"file is not valid UTF-8: {}",
params.file_path.display()
))
})?;
let count = current_text.matches(&params.old_string).count();
if count == 0 {
return Err(ToolError::InvalidArgument(format!(
"old_string not found in {}",
params.file_path.display()
)));
}
if !params.replace_all && count > 1 {
return Err(ToolError::InvalidArgument(format!(
"old_string occurs {count} times in {}; pass replace_all: true or narrow the snippet",
params.file_path.display()
)));
}
let new_text = if params.replace_all {
current_text.replace(&params.old_string, &params.new_string)
} else {
current_text.replacen(&params.old_string, &params.new_string, 1)
};
let occurrences = if params.replace_all { count } else { 1 };
let report = self.linter.lint(&params.file_path, &new_text, WriteMode::Update);
if report.has_errors() {
return Err(ToolError::InvalidArgument(format_report(&report)));
}
std::fs::write(&params.file_path, new_text.as_bytes()).map_err(|e| {
ToolError::ExecutionFailed(format!(
"failed to write {}: {e}",
params.file_path.display()
))
})?;
let summary = format!(
"Edited {} ({} replacement{}){}",
params.file_path.display(),
occurrences,
if occurrences == 1 { "" } else { "s" },
warning_tail(&report),
);
Ok(ToolOutput {
summary,
content: None,
})
}
}
fn format_report(report: &LintReport) -> String {
use std::fmt::Write as _;
let mut buf = String::from("memory linter rejected the edit:");
for e in &report.errors {
let _ = write!(&mut buf, "\n - {e}");
}
if !report.warnings.is_empty() {
let _ = write!(&mut buf, "\nwarnings (informational):");
for w in &report.warnings {
let _ = write!(&mut buf, "\n - {w}");
}
}
buf
}
fn warning_tail(report: &LintReport) -> String {
if report.warnings.is_empty() {
return String::new();
}
let mut s = format!(" [{} warning(s)]", report.warnings.len());
for w in &report.warnings {
use std::fmt::Write as _;
let _ = write!(&mut s, " {w};");
}
s
}
pub fn edit_tool(layout: WorkspaceLayout) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(EditParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("MemoryEdit")
.description(DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(EditTool {
linter: Linter::new(layout.clone()),
});
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use tempfile::TempDir;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn setup() -> (TempDir, WorkspaceLayout, PathBuf) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
let path = dir.path().join("memory/decisions/foo.md");
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
let initial = format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\nbody body\n",
n = now()
);
std::fs::write(&path, &initial).unwrap();
(dir, layout, path)
}
#[tokio::test]
async fn edit_simple_replace() {
let (_dir, layout, path) = setup();
let (meta, tool) = edit_tool(layout)();
assert_eq!(meta.name, "MemoryEdit");
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"old_string": "body body",
"new_string": "edited",
});
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(out.summary.contains("1 replacement"));
let after = std::fs::read_to_string(&path).unwrap();
assert!(after.contains("edited"));
assert!(!after.contains("body body"));
}
#[tokio::test]
async fn edit_resulting_invalid_frontmatter_rolled_back() {
let (_dir, layout, path) = setup();
let (_, tool) = edit_tool(layout)();
// Drop the `status` field by replacing it with nothing.
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"old_string": "status: open\n",
"new_string": "",
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("status") || msg.contains("missing"));
// File untouched.
let after = std::fs::read_to_string(&path).unwrap();
assert!(after.contains("status: open"));
}
#[tokio::test]
async fn edit_missing_file() {
let (dir, layout, _) = setup();
let other = dir.path().join("memory/decisions/ghost.md");
let (_, tool) = edit_tool(layout)();
let inp = serde_json::json!({
"file_path": other.to_str().unwrap(),
"old_string": "x",
"new_string": "y",
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
assert!(matches!(err, ToolError::ExecutionFailed(_)));
}
#[tokio::test]
async fn edit_outside_memory_tree_rejected() {
let (dir, layout, _) = setup();
let other = dir.path().join("src/lib.rs");
std::fs::create_dir_all(other.parent().unwrap()).unwrap();
std::fs::write(&other, "fn main() {}").unwrap();
let (_, tool) = edit_tool(layout)();
let inp = serde_json::json!({
"file_path": other.to_str().unwrap(),
"old_string": "fn",
"new_string": "pub fn",
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn edit_workflow_path_rejected() {
let (dir, layout, _) = setup();
let path = dir.path().join("memory/workflow/wf.md");
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
let initial = format!(
"---\nupdated_at: {n}\ndescription: x\nauto_invoke: false\nuser_invocable: true\n---\nbody\n",
n = now()
);
std::fs::write(&path, &initial).unwrap();
let (_, tool) = edit_tool(layout)();
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"old_string": "body",
"new_string": "edited",
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
let msg = format!("{err}");
assert!(msg.to_lowercase().contains("workflow"), "{msg}");
// Original untouched.
assert!(std::fs::read_to_string(&path).unwrap().contains("body"));
}
}

View File

@ -0,0 +1,9 @@
//! Tool implementations stub. Filled in once the linter compiles green.
mod edit;
mod read;
mod write;
pub use edit::edit_tool;
pub use read::read_tool;
pub use write::write_tool;

View File

@ -0,0 +1,195 @@
//! `MemoryRead` tool.
//!
//! Constrained to `<workspace>/memory/` and `<workspace>/knowledge/`
//! paths. Returns line-numbered content (1-based), like the generic
//! Read tool, but rejects anything outside the memory tree so the
//! agent can't sneak in a non-memory read through this surface.
use std::path::PathBuf;
use std::sync::Arc;
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::workspace::WorkspaceLayout;
const DESCRIPTION: &str = "Read a memory or knowledge record file under the \
workspace's `memory/` or `knowledge/` tree. Returns line-numbered output \
(1-based). Paths must be absolute and lie inside the memory tree.";
const DEFAULT_LIMIT: usize = 2000;
#[derive(Debug, Deserialize, schemars::JsonSchema)]
struct ReadParams {
/// Absolute path to a file under the workspace's `memory/` or `knowledge/` tree.
file_path: PathBuf,
/// 0-based line offset from the start. Defaults to 0.
#[serde(default)]
offset: Option<usize>,
/// Maximum number of lines to return. Defaults to 2000.
#[serde(default)]
limit: Option<usize>,
}
struct ReadTool {
layout: WorkspaceLayout,
}
#[async_trait]
impl Tool for ReadTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let params: ReadParams = serde_json::from_str(input_json).map_err(|e| {
ToolError::InvalidArgument(format!("invalid MemoryRead input: {e}"))
})?;
if !params.file_path.is_absolute() {
return Err(ToolError::InvalidArgument(format!(
"file_path must be absolute: {}",
params.file_path.display()
)));
}
if self
.layout
.classify(&params.file_path)
.map_err(|e| ToolError::InvalidArgument(e.to_string()))?
.is_none()
{
return Err(ToolError::InvalidArgument(format!(
"path is not under the memory tree: {}",
params.file_path.display()
)));
}
let bytes = std::fs::read(&params.file_path).map_err(|e| match e.kind() {
std::io::ErrorKind::NotFound => ToolError::ExecutionFailed(format!(
"file not found: {}",
params.file_path.display()
)),
_ => ToolError::ExecutionFailed(format!(
"read failed at {}: {e}",
params.file_path.display()
)),
})?;
let text = String::from_utf8_lossy(&bytes).into_owned();
let offset = params.offset.unwrap_or(0);
let limit = params.limit.unwrap_or(DEFAULT_LIMIT).max(1);
let rendered = render_numbered(&text, offset, limit);
let summary = if rendered.truncated {
format!(
"Read {} line(s) [{}..{}] of {} from {}",
rendered.line_count,
offset + 1,
offset + rendered.line_count,
rendered.total_lines,
params.file_path.display()
)
} else {
format!(
"Read {} line(s) from {}",
rendered.line_count,
params.file_path.display()
)
};
Ok(ToolOutput {
summary,
content: Some(rendered.body),
})
}
}
struct Rendered {
body: String,
line_count: usize,
total_lines: usize,
truncated: bool,
}
fn render_numbered(text: &str, offset: usize, limit: usize) -> Rendered {
let all_lines: Vec<&str> = text.lines().collect();
let total_lines = all_lines.len();
let start = offset.min(total_lines);
let end = start.saturating_add(limit).min(total_lines);
let slice = &all_lines[start..end];
let line_count = slice.len();
use std::fmt::Write as _;
let mut body = String::with_capacity(text.len().saturating_add(line_count * 8));
for (i, line) in slice.iter().enumerate() {
let lineno = start + i + 1;
let _ = writeln!(&mut body, "{:>6}\t{}", lineno, line);
}
Rendered {
body,
line_count,
total_lines,
truncated: start > 0 || end < total_lines,
}
}
pub fn read_tool(layout: WorkspaceLayout) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(ReadParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("MemoryRead")
.description(DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(ReadTool {
layout: layout.clone(),
});
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn setup() -> (TempDir, WorkspaceLayout) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[tokio::test]
async fn read_returns_numbered_lines() {
let (dir, layout) = setup();
let path = dir.path().join("memory/decisions/foo.md");
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
std::fs::write(&path, "alpha\nbeta\n").unwrap();
let (_meta, tool) = read_tool(layout)();
let inp = serde_json::json!({ "file_path": path.to_str().unwrap() });
let out = tool.execute(&inp.to_string()).await.unwrap();
let body = out.content.unwrap();
assert!(body.contains(" 1\talpha"));
assert!(body.contains(" 2\tbeta"));
}
#[tokio::test]
async fn rejects_outside_memory_tree() {
let (dir, layout) = setup();
let other = dir.path().join("src/main.rs");
std::fs::create_dir_all(other.parent().unwrap()).unwrap();
std::fs::write(&other, "fn main() {}").unwrap();
let (_, tool) = read_tool(layout)();
let inp = serde_json::json!({ "file_path": other.to_str().unwrap() });
let err = tool.execute(&inp.to_string()).await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn rejects_relative_path() {
let (_dir, layout) = setup();
let (_, tool) = read_tool(layout)();
let inp = serde_json::json!({ "file_path": "memory/summary.md" });
let err = tool.execute(&inp.to_string()).await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
}

View File

@ -0,0 +1,250 @@
//! `MemoryWrite` tool.
//!
//! Creates or overwrites a memory or knowledge record with full content.
//! Pre-write Linter validates frontmatter, slug uniqueness (Create only),
//! reference integrity, size limits, and the workflow-write ban. On any
//! Linter error the tool returns `ToolError::InvalidArgument` with all
//! violations aggregated and the file is **not** written.
use std::path::PathBuf;
use std::sync::Arc;
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use serde::Deserialize;
use crate::linter::{LintReport, Linter, WriteMode};
use crate::workspace::WorkspaceLayout;
const DESCRIPTION: &str = "Create or overwrite a memory or knowledge record file. \
Path must be absolute and lie inside the workspace's `memory/` or `knowledge/` \
tree. Frontmatter is validated before the file is written; on validation \
failure no write occurs and every violation is returned in the error message.";
#[derive(Debug, Deserialize, schemars::JsonSchema)]
struct WriteParams {
/// Absolute path under the workspace's `memory/` or `knowledge/` tree.
file_path: PathBuf,
/// Full file contents (frontmatter + body).
content: String,
}
struct WriteTool {
linter: Linter,
}
#[async_trait]
impl Tool for WriteTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let params: WriteParams = serde_json::from_str(input_json).map_err(|e| {
ToolError::InvalidArgument(format!("invalid MemoryWrite input: {e}"))
})?;
if !params.file_path.is_absolute() {
return Err(ToolError::InvalidArgument(format!(
"file_path must be absolute: {}",
params.file_path.display()
)));
}
let already_exists = params.file_path.exists();
let mode = if already_exists {
WriteMode::Update
} else {
WriteMode::Create
};
let report = self.linter.lint(&params.file_path, &params.content, mode);
if report.has_errors() {
return Err(ToolError::InvalidArgument(format_report(&report)));
}
if let Some(parent) = params.file_path.parent() {
std::fs::create_dir_all(parent).map_err(|e| {
ToolError::ExecutionFailed(format!(
"failed to create directory {}: {e}",
parent.display()
))
})?;
}
std::fs::write(&params.file_path, params.content.as_bytes()).map_err(|e| {
ToolError::ExecutionFailed(format!(
"failed to write {}: {e}",
params.file_path.display()
))
})?;
let summary = format!(
"{} {}{}",
if already_exists { "Overwrote" } else { "Created" },
params.file_path.display(),
warning_tail(&report),
);
Ok(ToolOutput {
summary,
content: None,
})
}
}
fn format_report(report: &LintReport) -> String {
use std::fmt::Write as _;
let mut buf = String::from("memory linter rejected the write:");
for e in &report.errors {
let _ = write!(&mut buf, "\n - {e}");
}
if !report.warnings.is_empty() {
let _ = write!(&mut buf, "\nwarnings (informational):");
for w in &report.warnings {
let _ = write!(&mut buf, "\n - {w}");
}
}
buf
}
fn warning_tail(report: &LintReport) -> String {
if report.warnings.is_empty() {
return String::new();
}
let mut s = format!(" [{} warning(s)]", report.warnings.len());
for w in &report.warnings {
use std::fmt::Write as _;
let _ = write!(&mut s, " {w};");
}
s
}
pub fn write_tool(layout: WorkspaceLayout) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(WriteParams);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("MemoryWrite")
.description(DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(WriteTool {
linter: Linter::new(layout.clone()),
});
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use tempfile::TempDir;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn setup() -> (TempDir, WorkspaceLayout) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[tokio::test]
async fn write_creates_summary() {
let (dir, layout) = setup();
let path = dir.path().join("memory/summary.md");
let content = format!("---\nupdated_at: {n}\n---\nbody\n", n = now());
let (meta, tool) = write_tool(layout)();
assert_eq!(meta.name, "MemoryWrite");
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"content": content,
});
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(out.summary.contains("Created"));
assert!(path.exists());
}
#[tokio::test]
async fn write_rejects_workflow() {
let (dir, layout) = setup();
let path = dir.path().join("memory/workflow/wf.md");
let content = format!(
"---\nupdated_at: {n}\ndescription: x\nauto_invoke: false\nuser_invocable: true\n---\n",
n = now()
);
let (_, tool) = write_tool(layout)();
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"content": content,
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("workflow"), "unexpected error: {msg}");
assert!(!path.exists(), "workflow file must not be written");
}
#[tokio::test]
async fn write_aggregates_multiple_errors() {
let (dir, layout) = setup();
let path = dir.path().join("memory/decisions/foo.md");
// Missing required `status` field AND body too long.
let huge = "x".repeat(8001);
let content = format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\n---\n{huge}",
n = now()
);
let (_, tool) = write_tool(layout)();
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"content": content,
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
let msg = format!("{err}");
assert!(msg.contains("status") || msg.contains("missing"), "{msg}");
}
#[tokio::test]
async fn write_blocks_create_when_existing() {
let (dir, layout) = setup();
let path = dir.path().join("memory/decisions/foo.md");
std::fs::create_dir_all(path.parent().unwrap()).unwrap();
let initial = format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\nold\n",
n = now()
);
std::fs::write(&path, &initial).unwrap();
// Same content as a re-write should pass (Update mode).
let (_, tool) = write_tool(layout.clone())();
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"content": initial,
});
let out = tool.execute(&inp.to_string()).await.unwrap();
assert!(out.summary.contains("Overwrote"));
}
#[tokio::test]
async fn write_rejects_non_absolute() {
let (_dir, layout) = setup();
let (_, tool) = write_tool(layout)();
let inp = serde_json::json!({
"file_path": "memory/summary.md",
"content": "ignored",
});
let err = tool.execute(&inp.to_string()).await.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn write_does_not_persist_on_lint_failure() {
let (dir, layout) = setup();
let path = dir.path().join("memory/decisions/foo.md");
let bad = "no frontmatter at all";
let (_, tool) = write_tool(layout)();
let inp = serde_json::json!({
"file_path": path.to_str().unwrap(),
"content": bad,
});
assert!(tool.execute(&inp.to_string()).await.is_err());
assert!(!path.exists());
}
}

View File

@ -0,0 +1,298 @@
//! Workspace-level path layout for the memory subsystem.
//!
//! Resolves a workspace root into the concrete directories the linter
//! and tools operate on:
//!
//! - `<root>/memory/summary.md`
//! - `<root>/memory/decisions/<slug>.md`
//! - `<root>/memory/requests/<slug>.md`
//! - `<root>/memory/workflow/<slug>.md`
//! - `<root>/memory/_staging/<id>.json`
//! - `<root>/knowledge/<slug>.md`
use std::path::{Path, PathBuf};
use crate::error::LintError;
use crate::slug::Slug;
const MEMORY_DIR: &str = "memory";
const KNOWLEDGE_DIR: &str = "knowledge";
const SUMMARY_FILE: &str = "summary.md";
const DECISIONS_DIR: &str = "decisions";
const REQUESTS_DIR: &str = "requests";
const WORKFLOW_DIR: &str = "workflow";
const STAGING_DIR: &str = "_staging";
/// What kind of record a path under the memory tree represents.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RecordKind {
Summary,
Decision,
Request,
Workflow,
Knowledge,
}
impl RecordKind {
pub fn as_str(self) -> &'static str {
match self {
Self::Summary => "summary",
Self::Decision => "decision",
Self::Request => "request",
Self::Workflow => "workflow",
Self::Knowledge => "knowledge",
}
}
}
/// A path classified into a kind and (where applicable) a slug.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ClassifiedPath {
pub kind: RecordKind,
pub slug: Option<Slug>,
}
/// Workspace-rooted layout. Cheap to clone.
#[derive(Debug, Clone)]
pub struct WorkspaceLayout {
root: PathBuf,
}
impl WorkspaceLayout {
pub fn new(root: impl Into<PathBuf>) -> Self {
Self { root: root.into() }
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn memory_dir(&self) -> PathBuf {
self.root.join(MEMORY_DIR)
}
pub fn knowledge_dir(&self) -> PathBuf {
self.root.join(KNOWLEDGE_DIR)
}
pub fn summary_path(&self) -> PathBuf {
self.memory_dir().join(SUMMARY_FILE)
}
pub fn decisions_dir(&self) -> PathBuf {
self.memory_dir().join(DECISIONS_DIR)
}
pub fn requests_dir(&self) -> PathBuf {
self.memory_dir().join(REQUESTS_DIR)
}
pub fn workflow_dir(&self) -> PathBuf {
self.memory_dir().join(WORKFLOW_DIR)
}
pub fn staging_dir(&self) -> PathBuf {
self.memory_dir().join(STAGING_DIR)
}
pub fn decision_path(&self, slug: &Slug) -> PathBuf {
self.decisions_dir().join(format!("{slug}.md"))
}
pub fn request_path(&self, slug: &Slug) -> PathBuf {
self.requests_dir().join(format!("{slug}.md"))
}
pub fn workflow_path(&self, slug: &Slug) -> PathBuf {
self.workflow_dir().join(format!("{slug}.md"))
}
pub fn knowledge_path(&self, slug: &Slug) -> PathBuf {
self.knowledge_dir().join(format!("{slug}.md"))
}
/// Classify a path under the memory tree. Returns `None` if the
/// path is not under `memory/` or `knowledge/` of this workspace,
/// or if it lives in `_staging/` (which is opaque to the linter).
///
/// On a conventional path that's *almost* a record but malformed
/// (e.g. `decisions/Foo.md` with an invalid slug), returns
/// `Err(LintError::InvalidSlug | InvalidPath)` so the caller can
/// surface it as a write violation.
pub fn classify(&self, path: &Path) -> Result<Option<ClassifiedPath>, LintError> {
let memory = self.memory_dir();
let knowledge = self.knowledge_dir();
if let Ok(rel) = path.strip_prefix(&knowledge) {
return Ok(Some(classify_kinded_md(
rel,
RecordKind::Knowledge,
path,
)?));
}
let rel = match path.strip_prefix(&memory) {
Ok(r) => r,
Err(_) => return Ok(None),
};
let mut comps = rel.components();
let first = match comps.next() {
Some(c) => c.as_os_str(),
None => return Err(LintError::InvalidPath(path.to_path_buf())),
};
if first == SUMMARY_FILE {
if comps.next().is_some() {
return Err(LintError::InvalidPath(path.to_path_buf()));
}
return Ok(Some(ClassifiedPath {
kind: RecordKind::Summary,
slug: None,
}));
}
if first == STAGING_DIR {
// Linter opts out of `_staging/`; Phase 1 handles its schema.
return Ok(None);
}
let kind = if first == DECISIONS_DIR {
RecordKind::Decision
} else if first == REQUESTS_DIR {
RecordKind::Request
} else if first == WORKFLOW_DIR {
RecordKind::Workflow
} else {
return Err(LintError::InvalidPath(path.to_path_buf()));
};
let rest: PathBuf = comps.collect();
let cp = classify_kinded_md(&rest, kind, path)?;
Ok(Some(cp))
}
}
fn classify_kinded_md(
rel: &Path,
kind: RecordKind,
full_path: &Path,
) -> Result<ClassifiedPath, LintError> {
let mut comps = rel.components();
let first = match comps.next() {
Some(c) => c,
None => return Err(LintError::InvalidPath(full_path.to_path_buf())),
};
if comps.next().is_some() {
// Subdirectories under the record kind aren't allowed.
return Err(LintError::InvalidPath(full_path.to_path_buf()));
}
let name = first.as_os_str();
let s = name
.to_str()
.ok_or_else(|| LintError::InvalidPath(full_path.to_path_buf()))?;
let stem = s
.strip_suffix(".md")
.ok_or_else(|| LintError::InvalidPath(full_path.to_path_buf()))?;
let slug = Slug::parse(stem)?;
Ok(ClassifiedPath {
kind,
slug: Some(slug),
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn layout() -> WorkspaceLayout {
WorkspaceLayout::new(PathBuf::from("/ws"))
}
#[test]
fn classifies_summary() {
let cp = layout()
.classify(&PathBuf::from("/ws/memory/summary.md"))
.unwrap()
.unwrap();
assert_eq!(cp.kind, RecordKind::Summary);
assert!(cp.slug.is_none());
}
#[test]
fn classifies_decision_with_slug() {
let cp = layout()
.classify(&PathBuf::from("/ws/memory/decisions/foo-bar.md"))
.unwrap()
.unwrap();
assert_eq!(cp.kind, RecordKind::Decision);
assert_eq!(cp.slug.unwrap().as_str(), "foo-bar");
}
#[test]
fn classifies_knowledge() {
let cp = layout()
.classify(&PathBuf::from("/ws/knowledge/x.md"))
.unwrap()
.unwrap();
assert_eq!(cp.kind, RecordKind::Knowledge);
}
#[test]
fn classifies_workflow() {
let cp = layout()
.classify(&PathBuf::from("/ws/memory/workflow/wf.md"))
.unwrap()
.unwrap();
assert_eq!(cp.kind, RecordKind::Workflow);
}
#[test]
fn staging_returns_none() {
assert!(
layout()
.classify(&PathBuf::from("/ws/memory/_staging/abc.json"))
.unwrap()
.is_none()
);
}
#[test]
fn outside_returns_none() {
assert!(
layout()
.classify(&PathBuf::from("/elsewhere/file.md"))
.unwrap()
.is_none()
);
assert!(
layout()
.classify(&PathBuf::from("/ws/src/main.rs"))
.unwrap()
.is_none()
);
}
#[test]
fn invalid_slug_rejected() {
let err = layout()
.classify(&PathBuf::from("/ws/memory/decisions/Foo.md"))
.unwrap_err();
assert!(matches!(err, LintError::InvalidSlug(_)));
}
#[test]
fn nested_under_record_dir_rejected() {
let err = layout()
.classify(&PathBuf::from("/ws/memory/decisions/sub/foo.md"))
.unwrap_err();
assert!(matches!(err, LintError::InvalidPath(_)));
}
#[test]
fn unknown_top_level_dir_rejected() {
let err = layout()
.classify(&PathBuf::from("/ws/memory/something/foo.md"))
.unwrap_err();
assert!(matches!(err, LintError::InvalidPath(_)));
}
}

View File

@ -25,6 +25,7 @@ include_dir = "0.7.4"
fs4 = { version = "0.13.1", features = ["sync"] } fs4 = { version = "0.13.1", features = ["sync"] }
libc = "0.2.185" libc = "0.2.185"
schemars = "1.2.1" schemars = "1.2.1"
memory = { version = "0.1.0", path = "../memory" }
[dev-dependencies] [dev-dependencies]
async-trait = "0.1.89" async-trait = "0.1.89"

View File

@ -117,6 +117,7 @@ impl PodController {
let pwd_for_tools = pod.pwd().to_path_buf(); let pwd_for_tools = pod.pwd().to_path_buf();
let spawner_name = pod.manifest().pod.name.clone(); let spawner_name = pod.manifest().pod.name.clone();
let spawner_model = pod.manifest().model.clone(); let spawner_model = pod.manifest().model.clone();
let memory_config = pod.manifest().memory.clone();
// Parent callback socket (this Pod's own parent, used for // Parent callback socket (this Pod's own parent, used for
// `PodEvent` upward reports). `None` for top-level Pods. // `PodEvent` upward reports). `None` for top-level Pods.
@ -233,6 +234,23 @@ impl PodController {
let tracker = tools::Tracker::new(); let tracker = tools::Tracker::new();
worker.register_tools(tools::builtin_tools(fs, tracker.clone())); worker.register_tools(tools::builtin_tools(fs, tracker.clone()));
// Memory subsystem opt-in. When `[memory]` is present in
// the manifest, register the memory-specific Read/Write/Edit
// tools that target `<workspace>/memory/` and
// `<workspace>/knowledge/` with their built-in linter. The
// companion deny rules on the generic CRUD scope were
// already applied during `Pod::from_manifest`.
if let Some(mem) = memory_config.as_ref() {
let workspace_root = mem
.workspace_root
.clone()
.unwrap_or_else(|| pwd_for_tools.clone());
let layout = memory::WorkspaceLayout::new(workspace_root);
worker.register_tool(memory::tool::read_tool(layout.clone()));
worker.register_tool(memory::tool::write_tool(layout.clone()));
worker.register_tool(memory::tool::edit_tool(layout));
}
// Pod-orchestration tools (SpawnPod + the four comm tools) // Pod-orchestration tools (SpawnPod + the four comm tools)
// share the Pod-scoped `SpawnedPodRegistry` hoisted above // share the Pod-scoped `SpawnedPodRegistry` hoisted above
// (also consumed by the main loop's `PodEvent` handler). // (also consumed by the main loop's `PodEvent` handler).

View File

@ -1197,7 +1197,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
loader: PromptLoader, loader: PromptLoader,
) -> Result<Self, PodError> { ) -> Result<Self, PodError> {
let pwd = current_pwd()?; let pwd = current_pwd()?;
let scope = Scope::from_config(&manifest.scope).map_err(PodError::Scope)?; let scope = build_scope_with_memory(&manifest, &pwd)?;
if !scope.is_readable(&pwd) { if !scope.is_readable(&pwd) {
return Err(PodError::PwdOutsideScope { pwd }); return Err(PodError::PwdOutsideScope { pwd });
} }
@ -1279,7 +1279,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
callback_socket: PathBuf, callback_socket: PathBuf,
) -> Result<Self, PodError> { ) -> Result<Self, PodError> {
let pwd = current_pwd()?; let pwd = current_pwd()?;
let scope = Scope::from_config(&manifest.scope).map_err(PodError::Scope)?; let scope = build_scope_with_memory(&manifest, &pwd)?;
if !scope.is_readable(&pwd) { if !scope.is_readable(&pwd) {
return Err(PodError::PwdOutsideScope { pwd }); return Err(PodError::PwdOutsideScope { pwd });
} }
@ -1507,6 +1507,25 @@ pub enum PodError {
PromptCatalog(#[from] CatalogError), PromptCatalog(#[from] CatalogError),
} }
/// Build the Pod's runtime [`Scope`] from the manifest, layering the
/// memory subsystem's deny-write rules on top when `[memory]` is
/// present. The deny rules cap generic CRUD tools so they cannot
/// touch `<workspace>/memory/` or `<workspace>/knowledge/` while the
/// memory tools (registered separately) bypass `ScopedFs` and write
/// through `std::fs` directly.
fn build_scope_with_memory(manifest: &PodManifest, pwd: &Path) -> Result<Scope, PodError> {
let mut scope_config = manifest.scope.clone();
if let Some(mem) = manifest.memory.as_ref() {
let root = mem
.workspace_root
.clone()
.unwrap_or_else(|| pwd.to_path_buf());
let layout = memory::WorkspaceLayout::new(root);
scope_config.deny.extend(memory::deny_write_rules(&layout));
}
Scope::from_config(&scope_config).map_err(PodError::Scope)
}
/// Snapshot the process's current working directory as the Pod's pwd, /// Snapshot the process's current working directory as the Pod's pwd,
/// canonicalising symlinks and any `.`/`..` components. The Pod keeps /// canonicalising symlinks and any `.`/`..` components. The Pod keeps
/// this value for its lifetime; changes to the process-wide cwd after /// this value for its lifetime; changes to the process-wide cwd after

View File

@ -57,11 +57,11 @@ Knowledge / memory を LLM に渡す経路は以下で固定。採択基準(
- ソートは初期 grep の出現順、FTS / vector 導入時に関連度へ切り替え(将来検討) - ソートは初期 grep の出現順、FTS / vector 導入時に関連度へ切り替え(将来検討)
- ヒット件数上限と excerpt 行数は設定で tune - ヒット件数上限と excerpt 行数は設定で tune
- **memory 検索ツール**: `memory/{summary,decisions,requests}/*.md` 対象。spec は Knowledge 検索ツールと同型。§使用頻度メトリクスの観測経路と同一視する - **memory 検索ツール**: `memory/{summary,decisions,requests}/*.md` 対象。spec は Knowledge 検索ツールと同型。§使用頻度メトリクスの観測経路と同一視する
- **更新は既定の汎用 CRUD + Linter**: Knowledge / memory とも §書き込み経路と Linter の汎用 CRUD tool + post-write Linter Hook で済ませる。専用の create/update ツールは作らない - **更新は memory 専用 Tool + Linter**: Knowledge / memory への write/edit は `memory` クレートが提供する専用 Tool 経由のみ。汎用 CRUD`tools` クレートの Write/Editは memory/knowledge 配下を触らないPod が Scope で deny。Linter は memory tool 内で pre-write 検証として走り、違反は `ToolError::InvalidArgument` で LLM に返る。詳細は §書き込み経路と Linter
- **常駐注入**: メモリを消費する主体は通常 Pod。`model_invokation: ON` な record の description を通常 Pod の system prompt に常駐注入する。Phase 2 prompt には入れない - **常駐注入**: メモリを消費する主体は通常 Pod。`model_invokation: ON` な record の description を通常 Pod の system prompt に常駐注入する。Phase 2 prompt には入れない
- 予算はシステムプロンプト全体の予算に含める(`memory_summary.md` の 5k 枠とは別管理にしない) - 予算はシステムプロンプト全体の予算に含める(`memory_summary.md` の 5k 枠とは別管理にしない)
- 超過時の件数キャップ / 優先順位ルールは、description 1024 chars 上限で通常は収まる前提。ON record 数が増えたら追加する - 超過時の件数キャップ / 優先順位ルールは、description 1024 chars 上限で通常は収まる前提。ON record 数が増えたら追加する
- **Phase 2 の Knowledge アクセス**: 全 Knowledge 本文を prompt に埋めず、Knowledge 検索ツール + 汎用 CRUD を agent に渡して自律探索させる(詳細は §Phase 2 - **Phase 2 の Knowledge アクセス**: 全 Knowledge 本文を prompt に埋めず、Knowledge 検索ツール + memory 専用 Tool を agent に渡して自律探索させる(詳細は §Phase 2
- **`#<slug>` 補完 / 自動呼び出し(大枠のみ、実装は段階的)**: - **`#<slug>` 補完 / 自動呼び出し(大枠のみ、実装は段階的)**:
- `#<slug>` は検索ツールの slug 完全一致経路で本文が展開される - `#<slug>` は検索ツールの slug 完全一致経路で本文が展開される
- 補完 UIslug サジェスト)は TUI 側。`user_invocable: false` は候補除外 - 補完 UIslug サジェスト)は TUI 側。`user_invocable: false` は候補除外
@ -80,13 +80,17 @@ Knowledge は「保存する価値があるか」だけでなく、「あとで
### 書き込み経路と Linter ### 書き込み経路と Linter
人間も consolidation sub-Worker も**同じ CRUD toolfile read / write / edit**で `memory/*` を触る。書き込み時の制約は静的 Linter で検証し、違反時は post-write Hook が turn を戻して sub-Worker に自己修正させるN 回失敗で abort。Linter は frontmatter / slug / 参照整合などの機械的ルールを見る。 `memory/*` / `knowledge/*` への write/edit は **`memory` クレート提供の memory 専用 Toolread / write / edit** に集約する。汎用 CRUD`tools` クレートの Write/Editはこれらのディレクトリに触らないPod が Scope で deny に落とすことで構造的に担保)。
Linter は memory tool 内で **pre-write 検証** として走り、違反は `ToolError::InvalidArgument` で返す。LLM は通常の tool error フローで違反内容を読み、自己修正する。Interceptor 拡張・retry message 注入・違反カウンタは持たないworker 層の max iteration が暴走を止める。Linter は frontmatter / slug / 参照整合などの機械的ルールを見る。
人間編集(エディタ / git commitは tool 層を経由しないため Linter を通らない。`memory::Linter` は pure 関数として export し、CLI / pre-commit hook 経路を後で別チケットで用意する。
意味破壊rewrite で既存の主張・根拠が落ちる、Knowledge の記述主題がズレる等の自動検出は初期範囲に含めない。Phase 2 prompt 側の情報損失最小化指示と git diff レビューで運用し、実使用で顕在化したら監査 LLM 層を後から挟む(将来検討)。 意味破壊rewrite で既存の主張・根拠が落ちる、Knowledge の記述主題がズレる等の自動検出は初期範囲に含めない。Phase 2 prompt 側の情報損失最小化指示と git diff レビューで運用し、実使用で顕在化したら監査 LLM 層を後から挟む(将来検討)。
Linter ルールは 2 系統: Linter ルールは 2 系統:
**静的 error**post-write Hook で turn 戻し、sub-Worker が自己修正): **静的 error**memory tool が `ToolError::InvalidArgument` で返し、LLM が tool error フローで自己修正):
- frontmatter 必須 field - frontmatter 必須 field
- Decisions / Requests: `created_at`, `updated_at`, `sources` - Decisions / Requests: `created_at`, `updated_at`, `sources`
@ -132,7 +136,7 @@ Workflow 保護は専用 tool schema のトリックではなく Linter ルー
- **Trigger**: staging の累積ファイル数 or bytes が閾値超過、または compact 発火時(必ず flush - **Trigger**: staging の累積ファイル数 or bytes が閾値超過、または compact 発火時(必ず flush
- **実行主体**: Phase 1 を終えた pod が consolidation Worker を spawn。並走防止は staging 配下の進行状況ファイル(後述)で担保 - **実行主体**: Phase 1 を終えた pod が consolidation Worker を spawn。並走防止は staging 配下の進行状況ファイル(後述)で担保
- **入力**: 起動時スナップショットで確定した consumed ID list 分の staging エントリ(活動ログ + `source`+ 既存 `memory/*`summary / decisions / requestsの全文 + **Knowledge 化候補レポート**(後述の使用頻度メトリクスから機械集計、閾値超過の source 一覧)。既存 `knowledge/*` は全文を prompt に埋めず、Knowledge 検索ツール経由で agent が必要分を引く - **入力**: 起動時スナップショットで確定した consumed ID list 分の staging エントリ(活動ログ + `source`+ 既存 `memory/*`summary / decisions / requestsの全文 + **Knowledge 化候補レポート**(後述の使用頻度メトリクスから機械集計、閾値超過の source 一覧)。既存 `knowledge/*` は全文を prompt に埋めず、Knowledge 検索ツール経由で agent が必要分を引く
- **処理**: sub-Worker に**汎用 CRUD toolfile read / write / edit+ Knowledge 検索ツール + memory 検索ツール + post-write Linter Hook** を渡し、agentic に以下を自律判断: - **処理**: sub-Worker に **memory 専用 Toolread / write / edit、Linter 内蔵+ Knowledge 検索ツール + memory 検索ツール** を渡し、agentic に以下を自律判断:
- 新規 decisions / requests を 1 件 1 ファイルで追加。`sources` は staging の `source` をコピーLLM 推論ではない) - 新規 decisions / requests を 1 件 1 ファイルで追加。`sources` は staging の `source` をコピーLLM 推論ではない)
- 活動ログから派生する Knowledge用語定義 / 運用方針 / ルール / 事実 / ノウハウ)を新規作成 or 既存 patch。**新規作成は候補レポート掲載の source から派生する場合に限る**。`kind` を frontmatter に持ち、`last_sources` を更新 - 活動ログから派生する Knowledge用語定義 / 運用方針 / ルール / 事実 / ノウハウ)を新規作成 or 既存 patch。**新規作成は候補レポート掲載の source から派生する場合に限る**。`kind` を frontmatter に持ち、`last_sources` を更新
- summary を必要に応じて rewrite - summary を必要に応じて rewrite
@ -245,7 +249,7 @@ GC は record を一律に「stale」とみなさず、少なくとも次の 4
### 将来検討(運用で必要性が見えたら追加) ### 将来検討(運用で必要性が見えたら追加)
- 監査 LLM 層(意味破壊検出)の導入 — 初期は静的 Linter のみで運用し、Phase 2 の rewrite で情報損失・主題ズレが実運用で顕在化したら post-write Hook の 2 層目として追加。入力 / check 項目 / pass-fail 返却形式は導入時に詰める - 監査 LLM 層(意味破壊検出)の導入 — 初期は静的 Linter のみで運用し、Phase 2 の rewrite で情報損失・主題ズレが実運用で顕在化したら memory tool 内の検証パイプラインに 2 層目として追加。入力 / check 項目 / pass-fail 返却形式は導入時に詰める
- Vector index / FTS5 等の検索索引 — 初期は grep で足りる想定。ファイル数増加で検索が重くなったら検討 - Vector index / FTS5 等の検索索引 — 初期は grep で足りる想定。ファイル数増加で検索が重くなったら検討
- `model_invokation` offer の自動判定ロジック — 初期は人間が手動で切り替え - `model_invokation` offer の自動判定ロジック — 初期は人間が手動で切り替え
- 過去 session を cross-session で検索する UI - 過去 session を cross-session で検索する UI

View File

@ -2,10 +2,36 @@
## 背景 ## 背景
`docs/plan/memory.md` で決めたメモリ機構の永続化レイヤの土台。`memory/*` と `knowledge/*` の record を保存・編集する際の静的スキーマと、汎用 CRUD への post-write Hook として挟む Linter を成立させる。Phase 1/2、検索ツール、常駐注入、GC はすべてこの層に乗る。 `docs/plan/memory.md` で決めたメモリ機構の永続化レイヤの土台。`memory/*` と `knowledge/*` の record を保存・編集する際の静的スキーマと、書き込み時の Linter を成立させる。Phase 1/2、検索ツール、常駐注入、GC はすべてこの層に乗る。
Workflow`docs/plan/workflow.md`)も同じ frontmatter / Linter 経路で扱うため、`memory/workflow/<slug>.md` の frontmatter 検証と書き込み制限も本チケットに含める。実行経路(`/<slug>` dispatchは別。 Workflow`docs/plan/workflow.md`)も同じ frontmatter / Linter 経路で扱うため、`memory/workflow/<slug>.md` の frontmatter 検証と書き込み制限も本チケットに含める。実行経路(`/<slug>` dispatchは別。
## 設計方針
### memory クレートに集約
memory 関連は新規 `crates/memory/` に全部閉じ込める。`tools` クレートや `pod` 層に memory 由来のコードを漏らさない。
- schemafrontmatter 型、slug 文法、Linter ルール、Tool 実装、ワークスペース解決を全部 `memory` クレート内に置く
- `tools::write_tool` / `edit_tool` には一切手を入れない
- LLM への違反伝達は memory tool が `ToolError::InvalidArgument` を返すことで自然に成立。Interceptor 拡張・retry message 注入・違反カウンタは持たない
- 「N 回失敗で abort」は worker 層の max iteration に委ね、memory 固有のカウンタは設けない
### memory 専用 Tool汎用 CRUD ではない)
`memory` クレートが `read_tool` / `write_tool` / `edit_tool` の 3 種を提供する。これらは `<workspace>/memory/`、`<workspace>/knowledge/` 配下のみを対象とし、write/edit は **fs 書き込み前** に Linter を通して違反は `ToolError::InvalidArgument` で返す。
`docs/plan/memory.md` の「同じ汎用 CRUD」記述は本チケットで `memory` 専用 Tool 方式に書き換える(汎用 CRUD は memory ディレクトリには触らせない)。
### Pod 側の責務(最小限)
memory を有効化する Pod は、generic tool に渡す Scope から `memory/`、`knowledge/` を deny に落とす。これにより同じ workspace 内で、generic write/edit は memory 配下を触れず、memory tool だけが触れる構造になる。Pod 側でやることはこの Scope deny と memory tool の登録だけ。
### 「sub-Worker / 人間」の二系統
- **sub-Worker**: tool 層を経由するため Linter を必ず通る
- **人間**: エディタ / git commit は tool 層を経由しないので Linter を通らない。`memory::Linter` を import して走らせる CLI / pre-commit hook を後で用意できる構造にしておく(本チケットでは実装しない)
## 要件 ## 要件
### ディレクトリと record 種別 ### ディレクトリと record 種別
@ -14,44 +40,58 @@ Workflow`docs/plan/workflow.md`)も同じ frontmatter / Linter 経路で扱
- `memory/decisions/<slug>.md` — Decisions - `memory/decisions/<slug>.md` — Decisions
- `memory/requests/<slug>.md` — Requests - `memory/requests/<slug>.md` — Requests
- `memory/workflow/<slug>.md` — Workflowfrontmatter 検証のみ対象) - `memory/workflow/<slug>.md` — Workflowfrontmatter 検証のみ対象)
- `memory/_staging/<id>.json` — Phase 1 中間(本チケットはパス予約と Linter 対象外化のみ - `memory/_staging/<id>.json` — Phase 1 中間(パス予約のみ。Linter 対象外
- `knowledge/<slug>.md` — Knowledge`memory/` の兄弟) - `knowledge/<slug>.md` — Knowledge`memory/` の兄弟)
slug は kebab-case小文字英数とハイフン。ファイル名がそのまま識別子で、frontmatter に `id` / `name` は持たない。 slug 文法: `^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$`agent-skills 準拠、1-64 chars、先頭末尾 `-` 不可、`--` 連続不可。ファイル名がそのまま識別子で、frontmatter に `id` / `name` は持たない。
### frontmatter スキーマ ### frontmatter スキーマ
種別ごとの必須フィールドは `docs/plan/memory.md` §ファイル形式 / §書き込み経路と Linter の表に従う。具体的な必須項目: - 共通: `created_at`, `updated_at`RFC3339
- 共通: `created_at`, `updated_at`
- Decisions: `sources`, `status: open | resolved | replaced`、置き換え時 `replaced_by: <slug>` - Decisions: `sources`, `status: open | resolved | replaced`、置き換え時 `replaced_by: <slug>`
- Requests: `sources` - Requests: `sources`
- Knowledge: `kind`, `description`, `model_invokation`, `user_invocable`, `last_sources` - Knowledge: `kind`, `description`, `model_invokation`, `user_invocable`, `last_sources`
- Summary: `updated_at`optional: `last_rewritten_from_range` - Summary: `updated_at`optional: `last_rewritten_from_range`
- Workflow: `description`, `auto_invoke`, `user_invocable`, `requires` - Workflow: `description`, `auto_invoke`, `user_invocable`, `requires`
`sources` / `last_sources` の要素形式は `{ session_id: String, range: [u64, u64] }`。`range` は session-store の entry index ペア。
### Linter ルール ### Linter ルール
静的 errorpost-write Hook が turn を戻し、sub-Worker に自己修正させる。N 回失敗で abort: **静的 error**memory tool が `ToolError::InvalidArgument` で返す。複数違反は集約して 1 回で返す:
- frontmatter 必須 field 欠落・型違反 - frontmatter 必須 field 欠落・型違反
- `memory/workflow/` への書き込み禁止sub-Worker のみ。人間編集は対象外) - `memory/workflow/` への書き込み禁止sub-Worker のみ。memory tool が拒否、人間編集は memory tool を通らないので素通り)
- 同 slug での新規作成禁止(既存があれば update に切り替えるサイン) - 同 slug での新規作成禁止(既存があれば update を要求)
- `#<slug>` / `replaced_by: <slug>` / `requires: [..]` が実在 record を指す - `replaced_by: <slug>` / `requires: [..]` が実在 record を指す
- `replaced_by` の循環は error
- Decisions `status` の enum 違反 - Decisions `status` の enum 違反
- slug 文法違反
- `model_invokation: true` な Knowledge の description 1024 chars 上限 - `model_invokation: true` な Knowledge の description 1024 chars 上限
- 種別ごとの char 硬上限(具体値は設定ファイルで tune - 種別ごとの char 硬上限(初期既定値、設定 key は別 PR:
- `summary.md`: 20000 chars
- decisions / requests / knowledge 本文: 各 8000 chars
膨張抑制 Warnerror ではなく改善ヒント): **膨張抑制 Warn**error にせず、warn として返す。memory tool は受け取って summary に追記する程度:
- 低重要度 × char の天秤 - 低重要度 × char の天秤(暫定: 1500 chars 超のレコードに対して `sources` 1 件のみなら warn
- `sources` 配列長の累積 - `sources` 配列長の累積(暫定: 10 件超で warn
- 類似 slug 乱立 - 類似 slug 乱立(暫定: Levenshtein 距離 2 以下の slug が 3 件以上で warn
具体閾値の調整は別 PR設定 key 化)。
### `#<slug>` 本文中検出はスコープ外
本文中の `#<slug>` 参照の検出 / 補完は submit-segment 系チケットの責務。本チケットの Linter は frontmatter 由来の参照(`replaced_by` / `requires`)のみを検証する。
### 参照整合チェックの実行方式
write/edit 毎に `<workspace>/memory/`、`<workspace>/knowledge/` を毎回 walk して slug 集合を構築(キャッシュなし)。ファイル数は少ない想定。
### 適用経路 ### 適用経路
- sub-Worker の汎用 CRUDread/write/editへの post-write Hook として挟む - memory tool の write/edit 内で pre-write 検証
- 人間編集(エディタ / git commitに対しても同一ルールで検証できる CLI または pre-commit hook 経路を用意(詳細は実装で判断、結果として同じルールが一箇所で定義されていること) - 人間編集 / pre-commit hook 経路は本チケットでは作らない。`memory::Linter` を pure 関数として export しておけば後で CLI 化できる
## 範囲外 ## 範囲外
@ -59,16 +99,43 @@ slug は kebab-case小文字英数とハイフン。ファイル名がそ
- 意味破壊rewrite で主張が落ちる等)の検出 — 監査 LLM 層は将来検討 - 意味破壊rewrite で主張が落ちる等)の検出 — 監査 LLM 層は将来検討
- staging JSON の schema — Phase 1 チケット - staging JSON の schema — Phase 1 チケット
- Workflow の `/<slug>` 実行経路 - Workflow の `/<slug>` 実行経路
- 本文中 `#<slug>` 参照の検出 — submit-segment 系
- 設定 key閾値 tune— 別 PR
- 人間編集向け CLI / pre-commit hook — 別チケット
- `Interceptor` / `Hook` 系統への拡張 — 不要tool error で完結)
## 完了条件 ## 完了条件
- 上記パスに手で record を置いて Linter を走らせると、スキーマ違反 / 参照切れ / 同 slug 競合が error として返る - `crates/memory/` が新設され、workspace に登録されている
- sub-Worker が CRUD で書き込んだ際、違反時は turn が戻り自己修正が走る - memory tool 3 種read / write / editが登録できる
- Warn は error を止めず、出力されることが確認できる - write/edit に違反 content を渡すと、複数違反を集約した `ToolError::InvalidArgument` が返り、fs に書き込まれない
- `memory/workflow/` への sub-Worker からの書き込みは error で止まり、人間編集は通る - 正常 content は通常通り書き込まれる
- `memory/workflow/<slug>.md` への write/edit は error で止まる
- 同 slug で新規作成しようとすると error になるexisting → edit に倒すサイン)
- `replaced_by` / `requires` の参照切れと循環が error として検出される
- Pod が memory を有効化すると、generic tool の Scope から memory/knowledge が deny される
- 既存ビルド・テストを壊さない - 既存ビルド・テストを壊さない
## 実装順序
1. `crates/memory/` 新設、workspace 登録、依存追加
2. `schema/`, `slug.rs`, `error.rs`pure 関数 + 型)
3. `linter/`frontmatter / size / 参照存在 / 循環 / workflow 拒否)
4. `tool/`read / write / edit、pre-write で linter 通す)
5. Pod 側の Scope deny 配線
6. 単体テスト
各ステップ終了時点でビルド通過を維持する。
## 参照 ## 参照
- `docs/plan/memory.md` §ファイル形式 / §書き込み経路と Linter / §Knowledge の採択基準 - `docs/plan/memory.md` §ファイル形式 / §書き込み経路と Linter / §Knowledge の採択基準(本チケットで該当箇所を memory 専用 tool 方式に更新)
- `docs/plan/workflow.md` §格納先とファイル形式 / §生成・更新ポリシー - `docs/plan/workflow.md` §格納先とファイル形式 / §生成・更新ポリシー
- `crates/tools/src/{write,edit,read}.rs` — Tool 実装の参考(依存はしない)
- `crates/llm-worker/src/tool.rs``Tool` trait / `ToolError` / `ToolOutput`
## Review
- 状態: Approve with follow-up
- レビュー詳細: [./memory-file-format.review.md](./memory-file-format.review.md)
- 日付: 2026-04-27

View File

@ -0,0 +1,73 @@
# Review: メモリ機構: ファイル形式 + Linter 土台
## 前提・要件の確認
### 完了条件マッピング
| # | 条件 | 結果 | 根拠 |
| - | ---- | ---- | ---- |
| 1 | `crates/memory/` 新設、workspace 登録 | OK | `Cargo.toml:13`, `crates/memory/Cargo.toml`, `crates/memory/src/lib.rs:9-15` |
| 2 | memory tool 3 種read / write / editが登録できる | OK | `crates/pod/src/controller.rs:243-252` |
| 3 | 違反 content で複数違反集約の `InvalidArgument`、fs 不変 | OK | `crates/memory/src/tool/write.rs:185-202`, `:237-249`、`linter::format_report` で全 error を 1 メッセージに集約 |
| 4 | 正常 content は通常書き込み | OK | `tool/write.rs:147-163` |
| 5 | `memory/workflow/` 書き込み拒否 | OK | `linter/mod.rs:101-105`、`tool/write.rs:165-181`, `tool/edit.rs:276-298` |
| 6 | 同 slug 新規作成 error | OK | `linter/mod.rs:120-127`, `tool/write.rs:204-222` |
| 7 | `replaced_by` / `requires` の参照切れと循環が error | 部分 | `replaced_by` 不在: OK / `replaced_by` 循環: アルゴリズム実装済みだが実循環ケースの end-to-end テスト無し / **`requires` 整合: 未実装**`schema/workflow.rs` で型パースのみ) |
| 8 | Pod 有効化で generic tool Scope deny | OK | `pod.rs:1516-1527` (`build_scope_with_memory`)、`scope::deny_write_rules` |
| 9 | 既存ビルド・テスト壊さない | OK | `cargo build --workspace` / `cargo test --workspace` 全 pass、memory 単体 44 テスト pass |
### Linter ルール(要件節)と実装の照合
静的 error:
- frontmatter 必須欠落・型違反: OK (`linter/frontmatter.rs::map_serde_error`、ただし `parse_invalid_status``\`open\`` リテラル含有でラフに status 同定する点はやや fragile — 別フィールドが今後 enum を持つ場合は誤分類リスク)
- workflow 書き込み禁止: OK
- 同 slug 新規禁止: OK
- `replaced_by` 実在: OK
- `replaced_by` 循環: 実装あり / テスト不足
- Decisions `status` enum: OK
- slug 文法: OK (`slug::is_valid_slug` + 全網羅 deserialize 経路)
- `model_invokation: true` の description 1024 上限: OK
- 種別ごとの char 硬上限 (summary 20000 / 他 8000): OK
膨張抑制 Warn:
- 大 record × 単一 sources warn: OK
- sources 配列長 warn: OK
- **類似 slug 乱立 (Levenshtein 距離 2 / 3 件以上) warn: 未実装**`LintWarning::SimilarSlugs` バリアントは定義済みだが、emit 経路無し。`existing.slugs(kind)` は揃っており実装は容易)
## アーキテクチャ・スコープ
### 良い点
- memory 関連は `crates/memory/` に明確に閉じ込められている。`tools/` 側に侵入なし、`pod` 側は `MemoryConfig` 取得 + `build_scope_with_memory` + tool 登録 3 行のみで責務が薄い
- crate 名は `memory``insomnia-` プレフィックス無し(命名ルール準拠)
- 依存追加は `cargo add` 由来とみられる版指定で workspace に整合(`crates/memory/Cargo.toml`
- `schema/`, `linter/`, `tool/` を feature module で分割しており、巨大 `lib.rs` を回避
- LLM への違反伝達は `ToolError::InvalidArgument` のみで完結。Interceptor 拡張・retry message 注入・違反カウンタを持たないという設計方針通り
- 公開 API は `lib.rs` 12 行で抑制的、leaky でない
- `_staging/` は意図通り linter 透過 (`workspace::classify` で `Ok(None)`)
### 懸念
- **plan 残存矛盾**: `docs/plan/memory.md:196` に「Phase 2 と同じ CRUD tool + Linter Hook を使う」が残っている。「Linter Hook」は本チケットで否定された旧設計の語彙。GC 節の更新漏れ4 箇所更新と説明されたが GC 節は対象外だった可能性)
- **deny target = `<workspace_root>/memory`**: `MemoryConfig::workspace_root``None` の時 pwd を採用するが、pwd ≠ workspace root の Pod を spawn したケースで deny 対象パスがズレ、generic write が実際の `memory/` を保護しない可能性。ticket 要件は文字通りには満たしているが、「workspace_root 既定値が pwd」前提が manifest 側で文書化されていない。`PodManifest` 化のドクコメントに「workspace_root 未指定 ⇒ Pod 構築時 pwd」と書く程度は欲しい
## 指摘事項
### Blocking
なし。完了条件 7 は部分的だが、`requires` の検証は memory tool 経由で書き込む経路がticket の禁止により存在しないため、実害が無い。CLI / pre-commit が別チケットに切り出されていることと整合する
### Non-blocking / Follow-up
- **類似 slug 乱立 warn 未実装**`crates/memory/src/error.rs:101` `SimilarSlugs` バリアントは定義済みだが emit されない。ticket 要件節「膨張抑制 Warn」に明記されているので、本ライフサイクル内に追加するか、後続 ticket に明示移管したい
- **`requires` 参照整合チェック未実装** — `lint_workflow_frontmatter` (`linter/mod.rs:235`) は型パースだけで、`requires: Vec<Slug>` の各 slug が実在 Workflow か検査しない。memory tool では到達しないが、ticket 完了条件に文言が残っている。CLI ticket に明示的に移すか、本チケットで `lint_workflow_frontmatter` の引数に `&ExistingRecords` を渡す形で実装するのが筋
- **`replaced_by` 循環の end-to-end テスト追加** — `references.rs::tests::empty_chain_terminates` は smoke test に近い。`A.replaced_by=B`, `B.replaced_by=A``existing` に置いた状態で 2 ノード閉路を実検出するテストを足すと完了条件 7 のカバレッジが揃う
- **`docs/plan/memory.md:196`** — 「Linter Hook」を「Lintermemory tool 内 pre-write 検証」相当に書き換え。GC 節は本チケットの直接対象外でも、用語整合は取りたい
- **`MemoryConfig.workspace_root` 既定値の文書化** — `crates/manifest/src/lib.rs:46-53` のドクコメントに「`None` ⇒ Pod の pwd 採用」「pwd ≠ workspace root の Pod では明示指定が必要」を追記
- **`lint_workflow_frontmatter``lib.rs` 公開** — CLI 経路の入口になる関数なので `memory::Linter` と並んで `pub use linter::lint_workflow_frontmatter;` しておくと後続 CLI ticket が触れやすい
### Nits
- `linter/frontmatter.rs::parse_invalid_status``\`open\`` 文字列マッチで status enum を識別するため、将来別の enum (`status`-like) が増えた時に誤分類するリスク。コメント注記はあるが、`field: "status"` ハードコード分岐に何らかのテストを足したい
- `tool/edit.rs``format_report` / `warning_tail``tool/write.rs` の同名関数とロジックがほぼ重複。`tool/mod.rs` に小さなヘルパとして括り出すと DRY に
- `SourceRef.range: [u64; 2]` は frontmatter での扱いが配列だが、構造体名コメントが「inclusive range」とのみ。ドクコメントに「`[start_entry_index, end_entry_index]` 両端含む」を簡潔に書くと session-store との対応が明示される
- `Slug::PartialOrd / Ord` は派生済みだが、`HashMap<Slug, _>` で十分なので `BTreeMap` 用途が現状無い。問題ないが意図メモがあると親切
## 判断
**Approve with follow-up** — 完了条件はほぼ全て充足。アーキテクチャ的に memory 専用クレートへ綺麗に閉じ込められ、`tools` / `pod` への漏出は最小5 行程度。LLM 違反伝達も `ToolError::InvalidArgument` 一本で完結し、ticket 設計方針に忠実。残課題(類似 slug warn / `requires` 検証 / 循環 e2e テスト / plan 残存表現 / workspace_root ドキュメント)はいずれも非 blocking で、別 PR / 追跡 ticket への計上で十分。ビルド・テストともに workspace 全 pass。