メモリー内容のシステムプロンプトへの埋め込みの実装

This commit is contained in:
Keisuke Hirata 2026-04-27 18:25:47 +09:00
parent 12c1d55127
commit e8559d4bee
9 changed files with 410 additions and 1 deletions

View File

@ -8,6 +8,7 @@
pub mod error;
pub mod linter;
pub mod resident;
pub mod schema;
pub mod scope;
pub mod slug;
@ -16,6 +17,7 @@ pub mod workspace;
pub use error::{LintError, LintWarning, MemoryError};
pub use linter::{LintReport, Linter};
pub use resident::{ResidentKnowledgeEntry, collect_resident_knowledge};
pub use scope::deny_write_rules;
pub use slug::Slug;
pub use workspace::WorkspaceLayout;

View File

@ -0,0 +1,163 @@
//! Collect resident-injection candidates from the workspace.
//!
//! Walks `<workspace>/knowledge/*.md`, returns the records whose
//! frontmatter has `model_invokation: true` as `(slug, description)`
//! pairs sorted by slug. The Pod system-prompt assembler appends them
//! into the trailing section so descriptions sit next to the scope
//! summary and AGENTS.md.
//!
//! Files that fail to read or parse are skipped silently — the Linter
//! enforces shape on write, so a malformed file here means external
//! tampering and we'd rather degrade than panic.
use crate::schema::{KnowledgeFrontmatter, split_frontmatter};
use crate::workspace::WorkspaceLayout;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ResidentKnowledgeEntry {
pub slug: String,
pub description: String,
}
/// Walk `<workspace>/knowledge/*.md` and return entries whose
/// frontmatter has `model_invokation: true`, sorted by slug. A missing
/// `knowledge/` directory yields an empty vec.
pub fn collect_resident_knowledge(layout: &WorkspaceLayout) -> Vec<ResidentKnowledgeEntry> {
let dir = layout.knowledge_dir();
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return Vec::new(),
};
let mut out: Vec<ResidentKnowledgeEntry> = Vec::new();
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let name = match path.file_name().and_then(|n| n.to_str()) {
Some(n) => n,
None => continue,
};
let slug = match name.strip_suffix(".md") {
Some(s) => s.to_string(),
None => continue,
};
let raw = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(_) => continue,
};
let (yaml, _body) = match split_frontmatter(&raw) {
Ok(v) => v,
Err(_) => continue,
};
let fm: KnowledgeFrontmatter = match serde_yaml::from_str(yaml) {
Ok(f) => f,
Err(_) => continue,
};
if fm.model_invokation {
out.push(ResidentKnowledgeEntry {
slug,
description: fm.description,
});
}
}
out.sort_by(|a, b| a.slug.cmp(&b.slug));
out
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use std::path::Path;
use tempfile::TempDir;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn write_knowledge(
dir: &Path,
slug: &str,
description: &str,
model_invokation: bool,
body: &str,
) {
let path = dir.join("knowledge").join(format!("{slug}.md"));
let content = format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nkind: policy\ndescription: \"{description}\"\nmodel_invokation: {flag}\nuser_invocable: true\nlast_sources: []\n---\n{body}",
n = now(),
flag = model_invokation,
);
std::fs::write(path, content).unwrap();
}
fn setup() -> (TempDir, WorkspaceLayout) {
let dir = TempDir::new().unwrap();
std::fs::create_dir_all(dir.path().join("knowledge")).unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[test]
fn missing_knowledge_dir_returns_empty() {
let dir = TempDir::new().unwrap();
// No knowledge/ directory at all.
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
assert!(collect_resident_knowledge(&layout).is_empty());
}
#[test]
fn picks_only_model_invokation_true() {
let (dir, layout) = setup();
write_knowledge(dir.path(), "alpha", "alpha desc", true, "body\n");
write_knowledge(dir.path(), "beta", "beta desc", false, "body\n");
write_knowledge(dir.path(), "gamma", "gamma desc", true, "body\n");
let got = collect_resident_knowledge(&layout);
assert_eq!(got.len(), 2);
assert_eq!(got[0].slug, "alpha");
assert_eq!(got[0].description, "alpha desc");
assert_eq!(got[1].slug, "gamma");
assert_eq!(got[1].description, "gamma desc");
}
#[test]
fn entries_are_sorted_by_slug() {
let (dir, layout) = setup();
write_knowledge(dir.path(), "zeta", "z", true, "");
write_knowledge(dir.path(), "alpha", "a", true, "");
write_knowledge(dir.path(), "mu", "m", true, "");
let got = collect_resident_knowledge(&layout);
let slugs: Vec<&str> = got.iter().map(|e| e.slug.as_str()).collect();
assert_eq!(slugs, vec!["alpha", "mu", "zeta"]);
}
#[test]
fn malformed_frontmatter_is_skipped() {
let (dir, layout) = setup();
write_knowledge(dir.path(), "good", "ok", true, "");
// Garbage in frontmatter — must be skipped, not panic.
std::fs::write(
dir.path().join("knowledge/bad.md"),
"---\nthis is not yaml: : :\n---\nbody\n",
)
.unwrap();
let got = collect_resident_knowledge(&layout);
assert_eq!(got.len(), 1);
assert_eq!(got[0].slug, "good");
}
#[test]
fn non_md_files_ignored() {
let (dir, layout) = setup();
write_knowledge(dir.path(), "good", "ok", true, "");
std::fs::write(dir.path().join("knowledge/note.txt"), "not markdown\n").unwrap();
let got = collect_resident_knowledge(&layout);
assert_eq!(got.len(), 1);
}
}

View File

@ -646,6 +646,7 @@ permission = "write"
scope: &scope,
tool_names: Vec::new(),
agents_md: None,
resident_knowledge: None,
prompts: &catalog,
};
let rendered = tmpl.render(&ctx).unwrap();

View File

@ -118,6 +118,12 @@ pub struct Pod<C: LlmClient, St: Store> {
/// [`Self::from_manifest`], or defaults to the builtin pack when a
/// Pod is constructed through lower-level paths that have no loader.
prompts: Arc<PromptCatalog>,
/// When true (default), the system-prompt assembler walks
/// `<workspace>/knowledge/*` and appends a `## Resident knowledge`
/// section listing records with `model_invokation: true`.
/// Phase 2 (consolidation) workers set this to false so the
/// agentic worker pulls knowledge through the search tools instead.
inject_resident_knowledge: bool,
}
impl<C: LlmClient, St: Store> Pod<C, St> {
@ -164,6 +170,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
scope_allocation: None,
callback_socket: None,
prompts,
inject_resident_knowledge: true,
};
pod.apply_prune_from_manifest();
Ok(pod)
@ -177,6 +184,20 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
self.system_prompt_template = Some(template);
}
/// Toggle the resident-knowledge section of the system prompt.
///
/// Default `true`: when memory is enabled in the manifest, the
/// assembler walks `<workspace>/knowledge/*` and lists records with
/// `model_invokation: true`. Phase 2 (consolidation) workers and
/// other agentic memory paths set this to `false` so the worker
/// pulls knowledge through the search tools instead of riding on
/// the resident system-prompt budget. Idempotent if called multiple
/// times before the first turn; ineffective once the system prompt
/// has been materialised.
pub fn set_resident_knowledge_injection(&mut self, enabled: bool) {
self.inject_resident_knowledge = enabled;
}
/// Restore a Pod from a persisted session.
/// Shared handle to the prompt catalog. Cheap to clone (`Arc`).
pub fn prompts(&self) -> &Arc<PromptCatalog> {
@ -237,6 +258,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
scope_allocation: None,
callback_socket: None,
prompts,
inject_resident_knowledge: true,
};
pod.apply_prune_from_manifest();
Ok(pod)
@ -538,12 +560,39 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
);
}
}
// Resident-injection collection: only when memory is enabled in
// the manifest AND this Pod opts in (Phase 2 workers opt out).
// Owned `Vec` lives for the duration of `render` below; the
// context borrows a slice into it.
let resident: Vec<memory::ResidentKnowledgeEntry> = if self.inject_resident_knowledge {
self.manifest
.memory
.as_ref()
.map(|mem| {
let workspace_root = mem
.workspace_root
.clone()
.unwrap_or_else(|| self.pwd.clone());
let layout = memory::WorkspaceLayout::new(workspace_root);
memory::collect_resident_knowledge(&layout)
})
.unwrap_or_default()
} else {
Vec::new()
};
let resident_slice: Option<&[memory::ResidentKnowledgeEntry]> =
if self.inject_resident_knowledge && self.manifest.memory.is_some() {
Some(&resident)
} else {
None
};
let ctx = SystemPromptContext {
now: chrono::Utc::now(),
cwd: &self.pwd,
scope: &self.scope,
tool_names,
agents_md: agents_md_read.body,
resident_knowledge: resident_slice,
prompts: &self.prompts,
};
let rendered = template
@ -1257,6 +1306,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
scope_allocation: Some(scope_allocation),
callback_socket: None,
prompts,
inject_resident_knowledge: true,
};
pod.apply_prune_from_manifest();
Ok(pod)
@ -1320,6 +1370,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
scope_allocation: Some(scope_allocation),
callback_socket: Some(callback_socket),
prompts,
inject_resident_knowledge: true,
};
pod.apply_prune_from_manifest();
Ok(pod)

View File

@ -75,6 +75,10 @@ pub enum PodPrompt {
/// Trailing `## Project instructions (AGENTS.md)` section, appended
/// after the scope summary when an AGENTS.md is present.
AgentsMdSection,
/// Trailing `## Resident knowledge` section, appended after the
/// AGENTS.md section when memory is enabled and at least one
/// `knowledge/*` record advertises `model_invokation: true`.
ResidentKnowledgeSection,
}
impl PodPrompt {
@ -86,6 +90,7 @@ impl PodPrompt {
Self::InterruptSystemNote => "interrupt_system_note",
Self::WorkingBoundariesSection => "working_boundaries_section",
Self::AgentsMdSection => "agents_md_section",
Self::ResidentKnowledgeSection => "resident_knowledge_section",
}
}
@ -99,6 +104,7 @@ impl PodPrompt {
PodPrompt::InterruptSystemNote,
PodPrompt::WorkingBoundariesSection,
PodPrompt::AgentsMdSection,
PodPrompt::ResidentKnowledgeSection,
];
pub const KEYS: &'static [&'static str] = &[
@ -108,6 +114,7 @@ impl PodPrompt {
"interrupt_system_note",
"working_boundaries_section",
"agents_md_section",
"resident_knowledge_section",
];
}
@ -317,6 +324,15 @@ impl PromptCatalog {
pub fn agents_md_section(&self, agents_md: &str) -> Result<String, CatalogError> {
self.render(PodPrompt::AgentsMdSection, single("agents_md", agents_md))
}
/// Render `PodPrompt::ResidentKnowledgeSection` with `{{ entries }}`
/// (a pre-formatted list block authored by the caller).
pub fn resident_knowledge_section(&self, entries: &str) -> Result<String, CatalogError> {
self.render(
PodPrompt::ResidentKnowledgeSection,
single("entries", entries),
)
}
}
fn single(key: &'static str, value: &str) -> Value {

View File

@ -18,6 +18,7 @@ use std::sync::Arc;
use chrono::{DateTime, SecondsFormat, Utc};
use manifest::Scope;
use memory::ResidentKnowledgeEntry;
use minijinja::value::Value;
use minijinja::{Environment, ErrorKind, UndefinedBehavior};
use thiserror::Error;
@ -117,7 +118,13 @@ impl SystemPromptTemplate {
let body = tmpl
.render(ctx.to_minijinja_value())
.map_err(|e| SystemPromptError::Render(e.to_string()))?;
append_trailing_section(&body, ctx.prompts, ctx.scope, ctx.agents_md.as_deref())
append_trailing_section(
&body,
ctx.prompts,
ctx.scope,
ctx.agents_md.as_deref(),
ctx.resident_knowledge,
)
}
}
@ -143,6 +150,11 @@ pub struct SystemPromptContext<'a> {
/// Not visible from the template; consumed by the trailing-section
/// formatter in [`SystemPromptTemplate::render`].
pub agents_md: Option<String>,
/// Resident-injection candidates from `<workspace>/knowledge/*` whose
/// frontmatter has `model_invokation: true`. `None` disables the
/// section entirely (memory disabled, or a Phase 2 worker that opts
/// out); `Some(&[])` also yields no section.
pub resident_knowledge: Option<&'a [ResidentKnowledgeEntry]>,
/// Catalog used to render the fixed trailing section headers.
/// Passed by reference so callers do not give up ownership across
/// the short-lived render borrow.
@ -190,6 +202,7 @@ pub fn append_trailing_section(
prompts: &PromptCatalog,
scope: &Scope,
agents_md: Option<&str>,
resident_knowledge: Option<&[ResidentKnowledgeEntry]>,
) -> Result<String, SystemPromptError> {
let mut out = String::with_capacity(body.len() + 256);
out.push_str(body);
@ -207,6 +220,15 @@ pub fn append_trailing_section(
out.push_str(section.trim_end_matches(&['\n', ' '][..]));
out.push('\n');
}
if let Some(entries) = resident_knowledge {
if !entries.is_empty() {
out.push('\n');
let formatted = format_resident_knowledge_entries(entries);
let section = prompts.resident_knowledge_section(&formatted)?;
out.push_str(section.trim_end_matches(&['\n', ' '][..]));
out.push('\n');
}
}
// Canonicalise the tail so the emitted prompt has a single form
// regardless of how individual templates chose to end.
while out.ends_with('\n') || out.ends_with(' ') {
@ -215,6 +237,28 @@ pub fn append_trailing_section(
Ok(out)
}
/// `- <slug>: <description>` per line. Description newlines are folded
/// to spaces so a single entry stays on one row in the rendered prompt.
fn format_resident_knowledge_entries(entries: &[ResidentKnowledgeEntry]) -> String {
let mut out = String::new();
for (i, e) in entries.iter().enumerate() {
if i > 0 {
out.push('\n');
}
out.push_str("- ");
out.push_str(&e.slug);
out.push_str(": ");
for ch in e.description.chars() {
if ch == '\n' || ch == '\r' {
out.push(' ');
} else {
out.push(ch);
}
}
}
out
}
/// Bridge used by [`Pod::ensure_system_prompt_materialized`] so tests
/// can construct a synthetic context without going through a full Pod.
#[doc(hidden)]
@ -257,6 +301,23 @@ mod tests {
scope,
tool_names: tools,
agents_md,
resident_knowledge: None,
prompts: test_prompts(),
}
}
fn ctx_with_resident<'a>(
cwd: &'a Path,
scope: &'a Scope,
resident: &'a [ResidentKnowledgeEntry],
) -> SystemPromptContext<'a> {
SystemPromptContext {
now: fixed_now(),
cwd,
scope,
tool_names: Vec::new(),
agents_md: None,
resident_knowledge: Some(resident),
prompts: test_prompts(),
}
}
@ -464,4 +525,55 @@ mod tests {
assert!(!rendered.contains("AGENTS.md"));
assert!(!rendered.contains("Project instructions"));
}
#[test]
fn trailing_section_omits_resident_knowledge_when_none() {
let (_tmp, loader) = user_loader_with("body.md", "BODY");
let tmpl = SystemPromptTemplate::parse("$user/body", loader).unwrap();
let dir = TempDir::new().unwrap();
let scope = build_scope(dir.path());
let rendered = tmpl.render(&ctx(dir.path(), &scope, vec![], None)).unwrap();
assert!(!rendered.contains("Resident knowledge"));
}
#[test]
fn trailing_section_omits_resident_knowledge_when_empty_slice() {
let (_tmp, loader) = user_loader_with("body.md", "BODY");
let tmpl = SystemPromptTemplate::parse("$user/body", loader).unwrap();
let dir = TempDir::new().unwrap();
let scope = build_scope(dir.path());
let rendered = tmpl
.render(&ctx_with_resident(dir.path(), &scope, &[]))
.unwrap();
assert!(!rendered.contains("Resident knowledge"));
}
#[test]
fn trailing_section_renders_resident_knowledge_entries() {
let (_tmp, loader) = user_loader_with("body.md", "BODY");
let tmpl = SystemPromptTemplate::parse("$user/body", loader).unwrap();
let dir = TempDir::new().unwrap();
let scope = build_scope(dir.path());
let entries = vec![
ResidentKnowledgeEntry {
slug: "alpha".into(),
description: "first record".into(),
},
ResidentKnowledgeEntry {
slug: "beta".into(),
description: "second record\nwith newline".into(),
},
];
let rendered = tmpl
.render(&ctx_with_resident(dir.path(), &scope, &entries))
.unwrap();
assert!(rendered.contains("## Resident knowledge"));
assert!(rendered.contains("- alpha: first record"));
// Newline in description is folded to a space (one entry per line).
assert!(rendered.contains("- beta: second record with newline"));
// Resident section sits *after* the working-boundaries header.
let pos_boundaries = rendered.find("## Working boundaries").unwrap();
let pos_resident = rendered.find("## Resident knowledge").unwrap();
assert!(pos_resident > pos_boundaries);
}
}

View File

@ -34,3 +34,12 @@ agents_md_section = """\
{{ agents_md }}\
"""
resident_knowledge_section = """\
---
## Resident knowledge
The following knowledge records are advertised resident. Use the KnowledgeSearch / MemoryRead tools to fetch the full body when relevant.
{{ entries }}\
"""

View File

@ -30,3 +30,8 @@
- `docs/plan/memory.md` §retrieval 経路 / §Knowledge の呼び出し制御
- `tickets/memory-file-format.md`(依存: `model_invokation` frontmatter
## Review
- 状態: Approve
- レビュー詳細: [./memory-resident-injection.review.md](./memory-resident-injection.review.md)
- 日付: 2026-04-27

View File

@ -0,0 +1,50 @@
# Review: メモリ機構 `model_invokation: ON` の常駐注入
## 前提・要件の確認
- 「Pod 起動時に `knowledge/*` を走査し、`model_invokation: ON` の record の description を system prompt に連結」
- `crates/memory/src/resident.rs:25``collect_resident_knowledge``<workspace>/knowledge/*.md` を走査し、`KnowledgeFrontmatter` を deserialize して `model_invokation: true` のみ採用、slug 順にソートして返す。`crates/pod/src/pod.rs:567-588` で system prompt 生成時に呼び出され、`crates/pod/src/prompt/system.rs:223-231` で `## Resident knowledge` セクションが trailing 部に追記される。要件充足。
- 「`model_invokation: false` のものは含まれない」
- `resident.rs:58``if fm.model_invokation` 判定。`picks_only_model_invokation_true` テストで担保済み。
- 「Phase 2 Pod では注入しないconsolidation は検索ツール経由)」
- `Pod::set_resident_knowledge_injection(false)` の lever が用意され、`ensure_system_prompt_materialized` 内で `inject_resident_knowledge` フラグと `manifest.memory.is_some()` の両方を条件に注入。Phase 2 Pod の実装はまだ存在しないため、現時点では「lever は用意されたが呼び出し側がない」状態(後述 Follow-up 参照)。
- 「既存の system prompt 構成AGENTS.md / scope summary / skills 等)と共存」
- `append_trailing_section` で Working boundaries → AGENTS.md → Resident knowledge の順で追記。`trailing_section_renders_resident_knowledge_entries` テストで順序検証あり。共存 OK。
- 「予算はシステムプロンプト全体予算に含める。`memory/summary.md` の 5k 枠とは別管理にしない」
- 別バジェット管理は導入されていない。要件通り。
- 「初期は件数キャップ / 優先順位ルール不要」
- 単純に slug 順で全件出力。要件通り。
## アーキテクチャ・スコープ
- レイヤ境界: 走査ロジックは `memory` クレートに置かれ、`pod` 側は `memory::collect_resident_knowledge` を呼び出すだけ。レイヤ責務に整合。
- catalog 拡張: `PodPrompt::ResidentKnowledgeSection` の追加と `internal.toml` の対応エントリは既存パターン(`AgentsMdSection` 等)に揃っている。`ALL` / `KEYS` の同期と build-time 検査がそのまま機能する。
- prompt rendering: 文字列フォーマットは `format_resident_knowledge_entries` として system.rs にローカル化。テンプレートは `entries` を pre-formatted で受け取るので、後で「list 以外の表現にしたい」になっても catalog 側の差し替えで済む(チケットの「フォーマットは初期 simple リスト、後で再検討」と整合)。
- スコープ膨張は感じられない。新規追加は約 200 行で、要件達成のために必要な最小構成に近い。
## 指摘事項
### Blocking
なし。
### Non-blocking / Follow-up
- `manifest.memory``workspace_root` 解決ロジック(`mem.workspace_root.clone().unwrap_or_else(|| pwd.clone())` + `WorkspaceLayout::new`)が今回の追加で 3 箇所に増えた:
- `crates/pod/src/controller.rs:244-248`
- `crates/pod/src/pod.rs:567-577`(今回追加)
- `crates/pod/src/pod.rs:1567-1577``build_scope_with_memory`
これ自体は本チケットで生まれた重複ではなく既存パターンの踏襲だが、3 箇所目に達した時点で `MemoryConfig::workspace_layout(pwd: &Path) -> WorkspaceLayout` のような小さなヘルパに寄せておくと健全。本チケット範囲外で OK。
- 統合テスト: 単体では `collect_resident_knowledge``append_trailing_section` の挙動が個別に担保されているが、Pod の `ensure_system_prompt_materialized` を通る経路(`knowledge/*.md` を置いた状態で system prompt が組み上がるところまで)の end-to-end 確認はない。回帰防止としては unit 2 種で十分カバー範囲に入っているとも読めるが、`inject_resident_knowledge=false` / `manifest.memory=None` の枝を実際の Pod 経路で踏むテストがあると配線ミスを早期に検知できる。
- Phase 2 Pod 自体が未実装で、`set_resident_knowledge_injection(false)` を呼ぶ箇所がない。`tickets/memory-phase2-consolidation.md` 側で「Phase 2 spawn 時にこの setter を呼ぶ」旨を明記しておかないと、将来「lever はあるが誰も呼ばずに常駐注入されてしまう」事故になりうる。Phase 2 チケット側に注記推奨。
- `internal.toml``resident_knowledge_section` 文言 ("Use the KnowledgeSearch / MemoryRead tools to fetch the full body when relevant.") はモデル向けの英語固定。多言語 prompt pack を作る運用になった時点で overlay で差し替える前提なので現状で問題ないが、ツール名の改名が起きたら追従が必要(`KnowledgeSearch` / `MemoryRead` が tool catalog 側の正式名と一致しているか確認推奨)。
- `format_resident_knowledge_entries` の改行畳み込み: linter は `description` の長さ上限1024 charsは強制するが「単一行」は強制していないので、defensive な `\n` / `\r` → space 変換は妥当な防御。挙動は `trailing_section_renders_resident_knowledge_entries` でカバー済み。
### Nits
- `resident.rs` のテストヘルパ `write_knowledge` は description を `"{description}"` で raw quote しているため、「description に改行が混ざるケース」は単体テストでは触れていない。改行畳み込みは `prompt::system` 側のテストで担保されているので二重には不要だが、collector 側で意図的に `\n` を含む description を 1 件入れて round-trip 確認すると堅牢。
- `pod.rs:567-588``resident` / `resident_slice` の二段構えは `Vec` を所有しつつ `Option<&[..]>` を欲しい、という要件のための定石だが、コメントを 1 行足しておくと後続読者に親切owned `Vec` がスコープを跨ぐ理由)。すでに `// Owned `Vec` lives for the duration of `render` below` の注釈はある。十分。
## 判断
**Approve** — チケットの要件は実装側で漏れなく満たされており、レイヤ責務 / 既存パターンとも整合。Phase 2 側の lever 呼び出しは Phase 2 チケットに引き継ぐ形で問題なく、本チケットの完了条件は満たしている。