update: memoryシステム周りのプロンプトの整理

2026-05-03 00:27:10 +09:00 · 2026-05-03 00:27:10 +09:00 · 4b9b4f1450
commit 4b9b4f1450
parent 670abdc336
9 changed files with 189 additions and 33 deletions
--- a/crates/memory/src/consolidate/mod.rs
+++ b/crates/memory/src/consolidate/mod.rs
@ -5,20 +5,20 @@
 //! の観点で整理する disposable Worker を、Pod 側が組み立てるための
 //! ヘルパー群を提供する。Pod は次の手順で sub-Worker を構築する:
 //!
 //! - [`CONSOLIDATION_SYSTEM_PROMPT`] を sub-Worker の system prompt に
 //! - [`build_consolidate_input`] を sub-Worker の最初の user 入力に
 //! - memory 専用 Tool (read / write / edit) と Knowledge / memory 検索ツールを登録
 //! - [`StagingLock::acquire`] で並走防止 + consumed ID 確定
 //! - sub-Worker run 完了後、[`StagingLock::release_with_cleanup`] で
 //!   consumed ID 分の staging のみ削除し、占有ファイルを解放
 //!
-//! Knowledge 化候補レポートと使用頻度メトリクスは別チケットで供給される
+//! system prompt は Pod の `PromptCatalog`
-//! 想定。本モジュール時点では空入力として扱い、prompt 側の説明だけ
+//! (`PodPrompt::MemoryConsolidationSystem`) で管理される。Knowledge 化候補
-//! 残しておく（`docs/plan/memory.md` §Phase 2 / 整理材料）。
+//! レポートと使用頻度メトリクスは別チケットで供給される想定。本モジュール
 //! 時点では空入力として扱い、prompt 側の説明だけ残しておく
 //! （`docs/plan/memory.md` §Phase 2 / 整理材料）。
 mod input;
 mod lock;
 mod prompt;
 mod staging;
 mod tidy;
@ -27,6 +27,5 @@ pub use input::{
    render_staging_records, render_tidy_hints,
 };
 pub use lock::{LockError, LockRecord, StagingLock};
 pub use prompt::CONSOLIDATION_SYSTEM_PROMPT;
 pub use staging::{StagingEntry, list_staging_entries};
 pub use tidy::{TidyHints, collect_tidy_hints};
--- a/crates/memory/src/extract/mod.rs
+++ b/crates/memory/src/extract/mod.rs
@ -4,13 +4,14 @@
 //! 出力を `<workspace>/.insomnia/memory/_staging/<id>.json` に書き出す
 //! ヘルパーを提供する。Pod 側はこのモジュールから:
 //!
 //! - [`EXTRACT_SYSTEM_PROMPT`] を sub-Worker の system prompt に
 //! - [`build_extract_input`] を sub-Worker の最初の user 入力に
 //! - [`write_extracted_tool`] を唯一のツールとして
 //! - [`write_staging`] で受け取った JSON を staging に書き出し
 //!
-//! の順で組み立てる。pointer 永続化（session-store の
+//! の順で組み立てる。system prompt は Pod の `PromptCatalog`
-//! `LogEntry::Extension`、domain `"memory.extract"`）は Pod 側が責務を持つ。
+//! (`PodPrompt::MemoryExtractSystem`) で管理される。pointer 永続化
 //! （session-store の `LogEntry::Extension`、domain `"memory.extract"`）は
 //! Pod 側が責務を持つ。
 //!
 //! 出力 JSON の wrap は [`write_staging`] が `source: { session_id, range }`
 //! を機械付与する形で担当し、LLM には source を推論させない。
@ -18,7 +19,6 @@
 mod input;
 mod payload;
 mod pointer;
 mod prompt;
 mod staging;
 mod tool;
@ -27,7 +27,6 @@ pub use payload::{
    AttemptEntry, DecisionEntry, DiscussionEntry, ExtractedPayload, RequestEntry, StagingRecord,
 };
 pub use pointer::{ExtractPointerPayload, fold_pointer};
 pub use prompt::EXTRACT_SYSTEM_PROMPT;
 pub use staging::{StagingError, write_staging};
 pub use tool::{ExtractWorkerContext, write_extracted_tool};
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -1549,7 +1549,11 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            .unwrap_or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS);
        let client = self.build_extractor_client(memory_cfg)?;
-        let mut extract_worker = Worker::new(client).system_prompt(extract::EXTRACT_SYSTEM_PROMPT);
+        let extract_system_prompt = self
            .prompts
            .memory_extract_system()
            .map_err(PodError::PromptCatalog)?;
        let mut extract_worker = Worker::new(client).system_prompt(extract_system_prompt);
        extract_worker.set_cache_key(Some(self.session_id.to_string()));
        // Cumulative input-token meter + interceptor (mirror of
@ -1742,8 +1746,14 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                return Err(e);
            }
        };
-        let mut worker =
+        let consolidation_system_prompt = match self.prompts.memory_consolidation_system() {
-            Worker::new(client).system_prompt(consolidate::CONSOLIDATION_SYSTEM_PROMPT);
+            Ok(p) => p,
            Err(e) => {
                lock.release_only();
                return Err(PodError::PromptCatalog(e));
            }
        };
        let mut worker = Worker::new(client).system_prompt(consolidation_system_prompt);
        worker.set_cache_key(Some(self.session_id.to_string()));
        // Memory tools are self-contained — they bypass ScopedFs and write
--- a/crates/pod/src/prompt/catalog.rs
+++ b/crates/pod/src/prompt/catalog.rs
@ -61,6 +61,10 @@ const INTERNAL_TOML: &str = include_str!("../../../../resources/prompts/internal
 pub enum PodPrompt {
    /// System prompt of the compaction (summary) Worker.
    CompactSystem,
    /// System prompt of the memory Phase 1 (extract) Worker.
    MemoryExtractSystem,
    /// System prompt of the memory Phase 2 (consolidation + tidy) Worker.
    MemoryConsolidationSystem,
    /// Wrapper around an incoming `Method::Notify` message injected into
    /// the next LLM request context as a transient system message.
    NotifyWrapper,
@ -89,6 +93,8 @@ impl PodPrompt {
    pub fn key(self) -> &'static str {
        match self {
            Self::CompactSystem => "compact_system",
            Self::MemoryExtractSystem => "memory_extract_system",
            Self::MemoryConsolidationSystem => "memory_consolidation_system",
            Self::NotifyWrapper => "notify_wrapper",
            Self::InterruptToolResultSummary => "interrupt_tool_result_summary",
            Self::InterruptSystemNote => "interrupt_system_note",
@ -104,6 +110,8 @@ impl PodPrompt {
    /// `INTERNAL_KEYS` (generated by `build.rs`).
    pub const ALL: &'static [PodPrompt] = &[
        PodPrompt::CompactSystem,
        PodPrompt::MemoryExtractSystem,
        PodPrompt::MemoryConsolidationSystem,
        PodPrompt::NotifyWrapper,
        PodPrompt::InterruptToolResultSummary,
        PodPrompt::InterruptSystemNote,
@ -115,6 +123,8 @@ impl PodPrompt {
    pub const KEYS: &'static [&'static str] = &[
        "compact_system",
        "memory_extract_system",
        "memory_consolidation_system",
        "notify_wrapper",
        "interrupt_tool_result_summary",
        "interrupt_system_note",
@ -301,6 +311,16 @@ impl PromptCatalog {
        self.render(PodPrompt::CompactSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::MemoryExtractSystem` (no inputs).
    pub fn memory_extract_system(&self) -> Result<String, CatalogError> {
        self.render(PodPrompt::MemoryExtractSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::MemoryConsolidationSystem` (no inputs).
    pub fn memory_consolidation_system(&self) -> Result<String, CatalogError> {
        self.render(PodPrompt::MemoryConsolidationSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::NotifyWrapper` with `{{ message }}`.
    pub fn notify_wrapper(&self, message: &str) -> Result<String, CatalogError> {
        self.render(PodPrompt::NotifyWrapper, single("message", message))
--- a/crates/pod/tests/controller_test.rs
+++ b/crates/pod/tests/controller_test.rs
@ -570,6 +570,64 @@ async fn notify_while_idle_auto_starts_turn_and_injects_system_message() {
    assert!(last_item_text.contains("not a blocking request"));
 }
 #[tokio::test]
 async fn pod_event_turn_ended_while_idle_auto_starts_turn_and_injects_system_message() {
    let client = MockClient::new(simple_text_events());
    let client_for_assert = client.clone();
    let pod = make_pod(client).await;
    let handle = spawn_controller(pod).await;
    let mut rx = handle.subscribe();
    handle
        .send(Method::PodEvent(protocol::PodEvent::TurnEnded {
            pod_name: "child".into(),
        }))
        .await
        .unwrap();
    let mut saw_turn_end = false;
    let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
    loop {
        tokio::select! {
            event = rx.recv() => {
                match event {
                    Ok(Event::TurnEnd { .. }) => { saw_turn_end = true; break; }
                    Err(_) => break,
                    _ => {}
                }
            }
            _ = tokio::time::sleep_until(deadline) => break,
        }
    }
    assert!(
        saw_turn_end,
        "PodEvent::TurnEnded on idle Pod should auto-start a turn"
    );
    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
    assert_eq!(handle.shared_state.get_status(), PodStatus::Idle);
    let requests = client_for_assert.captured_requests();
    assert_eq!(
        requests.len(),
        1,
        "auto-kick should issue exactly one LLM request"
    );
    let last_item_text = requests[0]
        .items
        .last()
        .and_then(|i| i.as_text())
        .unwrap_or_default()
        .to_string();
    assert!(
        last_item_text.contains("[Notification]"),
        "injected system message missing, got: {last_item_text:?}"
    );
    assert!(
        last_item_text.contains("child") && last_item_text.contains("finished a turn"),
        "rendered TurnEnded text missing, got: {last_item_text:?}"
    );
 }
 #[tokio::test]
 async fn notify_while_running_does_not_emit_already_running_error() {
    let client = MockClient::new(simple_text_events());
@ -669,6 +727,61 @@ async fn socket_run_receives_events() {
    assert!(saw_turn_end, "should see turn_end via socket");
 }
 #[tokio::test]
 async fn socket_pod_event_turn_ended_while_idle_auto_starts_turn() {
    use protocol::stream::{JsonLineReader, JsonLineWriter};
    use tokio::net::UnixStream;
    let client = MockClient::new(simple_text_events());
    let pod = make_pod(client).await;
    let handle = spawn_controller(pod).await;
    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
    let sock_path = handle.runtime_dir.socket_path();
    let stream = UnixStream::connect(&sock_path).await.unwrap();
    let (reader, writer) = stream.into_split();
    let mut reader = JsonLineReader::new(reader);
    let mut writer = JsonLineWriter::new(writer);
    writer
        .write(&Method::PodEvent(protocol::PodEvent::TurnEnded {
            pod_name: "child".into(),
        }))
        .await
        .unwrap();
    let mut saw_turn_start = false;
    let mut saw_turn_end = false;
    let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
    loop {
        tokio::select! {
            event = reader.next::<Event>() => {
                match event {
                    Ok(Some(Event::TurnStart { .. })) => saw_turn_start = true,
                    Ok(Some(Event::TurnEnd { .. })) => {
                        saw_turn_end = true;
                        break;
                    }
                    Ok(None) | Err(_) => break,
                    _ => {}
                }
            }
            _ = tokio::time::sleep_until(deadline) => break,
        }
    }
    assert!(
        saw_turn_start,
        "PodEvent::TurnEnded via socket should auto-start a turn"
    );
    assert!(
        saw_turn_end,
        "auto-triggered turn should reach turn_end via socket"
    );
 }
 #[tokio::test]
 async fn socket_invalid_method_returns_error() {
    use protocol::stream::JsonLineReader;
--- a/docs/plan/memory-prompts.md
+++ b/docs/plan/memory-prompts.md
@ -17,6 +17,7 @@ memory 関連 prompt は種別を問わず、最低限以下を共有する:
 - **単純 append を優先しない**。既存 record に統合できるなら update を優先する
 - **session 固有の進行状態を書かない**。長期参照価値のある内容だけを memory に残す
 - **既存 docs と重複保存しない**。`AGENTS.md`、`docs/plan/*`、固定運用文書に既にある内容を再保存しない
 - **git で追える事実を memory に書かない**。ticket file の作成・編集、TODO 更新、branch / worktree 操作、commit / merge / push、「commit X で実装した」「ticket Y を作った」「worker Pod を spawn した」等は git diff / log が真実で、memory に写すと陳腐化する。commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号と組み合わせないと意味を成さない記録は採用しない
 - **空出力を許容**する。保存価値が無ければ「何も追加しない」を正当な結果として扱う
 ### Phase 1: 活動抽出 prompt
@ -29,6 +30,13 @@ Phase 1 は「派生物を作る」段階ではなく、「起きたことを抽
 - 出力は schema 準拠の構造化データのみ。自由文の補足説明で schema 外情報を足さない
 - 対象が無ければ空配列を返す
 ノイズ防御として、抽出時点で以下を除外する:
 - `attempts`: git で追える操作 (ticket file / TODO 編集、branch / worktree 作成、commit / merge / push、既知 ticket への worker Pod spawn) は除外。残すのは git からは復元できない情報 (ビルド/テスト結果、外部 API 応答、観測されたバグ再現、後段の判断材料となる設計実験結果) に限る
 - `discussions`: 当日中に陳腐化する一過性 triage (「次に着手するチケットはどれか」「いま review すべきか後でか」など) は除外。session を越えて意味を持つ論点 (アーキテクチャの trade-off、恒常的な制約、再来する問い) のみ残す
 - `decisions`: rationale が「この session で X をした」になるものは除外。設計 / 方針 / 取り組み方の根拠でない記録は decision ではなく作業ログ
 - 本文中に commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号など陳腐化する identifier を埋め込まない
 ### Phase 2: 統合 + 整理 prompt
 Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase と整理 phase を 1 セッション内で続けて回す。両 phase に共通する原則:
@ -44,6 +52,11 @@ Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase
 統合 phase の追加指示:
 - staging の活動ログを decisions / requests / summary / Knowledge update に落とし込む
 - staging の field ごとに宛先を分ける:
  - `decisions` (staging) → `memory/decisions/`。設計 / 方針 / 取り組み方の判断のみ。「この session で X をした」型は drop
  - `requests` (staging) → `memory/requests/`
  - `attempts` (staging) → 既定は drop。memory に `attempts/` フォルダは設けない。複数 attempts に通底する持続的な傾向だけ `summary.md` に 1 行で圧縮する例外あり
  - `discussions` (staging) → 設計 / 方針に決着していれば `decisions/` に統合、未決着でも問い自体が持続的なら `summary.md` に 1 行、それ以外は drop。`decisions/` に「議論した」だけの未決着メモを作らない
 - Knowledge 新規作成は候補レポート掲載 source 由来に限る（詳細は §Phase 2: Knowledge 書き込み prompt）
 整理 phase の追加指示（統合 phase 完了後、余力で実行）:
--- a/resources/prompts/internal.toml
+++ b/resources/prompts/internal.toml
@ -10,6 +10,10 @@
 [prompt]
 compact_system = "{% include \"$insomnia/internal/compact_system\" %}"
 memory_extract_system = "{% include \"$insomnia/internal/memory_extract_system\" %}"
 memory_consolidation_system = "{% include \"$insomnia/internal/memory_consolidation_system\" %}"
 notify_wrapper = """\
 [Notification]
 {{ message }}
--- a/resources/prompts/internal/memory_consolidation_system.md
+++ b/resources/prompts/internal/memory_consolidation_system.md
@ -1,12 +1,4 @@
-//! Phase 2 sub-Worker の system prompt。
+You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem.
 //!
 //! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 2 統合 + 整理 /
 //! §Phase 2 Knowledge 書き込み を縮約。統合 phase / 整理 phase は同じ
 //! prompt 1 本で順に進める縛り（agent から見ると 1 セッション内のフェーズ
 //! 進行、別 trigger / 別 Worker は持たない、`docs/plan/memory.md` §整理
 //! の扱い）。
 pub const CONSOLIDATION_SYSTEM_PROMPT: &str = r#"You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem.
 Your job is to take Phase 1 activity-log staging entries together with the workspace's current `memory/*` / `knowledge/*` records, then run two phases back-to-back in this single session:
@ -27,6 +19,7 @@ Your initial user message contains the staging entries, the full memory records,
 - **Update over create.** If an existing slug fits, edit it. Only create a new slug when no existing record fits and you can articulate why.
 - **`replaced` over delete.** When a Decision is superseded by a different one, mark the old one `status: replaced` with `replaced_by: <new-slug>`. Do not silently drop it.
 - **Don't duplicate static docs.** Skip content that already lives in `AGENTS.md`, `docs/plan/*`, or other fixed project documents.
 - **git is authoritative.** Do not record facts that git already tracks: ticket-file creation / edit, TODO updates, branch / worktree operations, commit / merge / push events, "implementation landed as commit X", "ticket Y was created", "worker Pod was spawned for Z". Diff and commit log are the truth there; memory shadowing it just rots. If a candidate write only makes sense when paired with a commit hash, branch name, worktree path, or ticket filename, drop it.
 - **Empty output is fine.** If a staging entry doesn't justify a memory write, skip it.
 - **Slug rules.** Slugs are kebab-case, short, recognisable, and must be unique within their kind. Same-slug create is a linter error — use Edit instead.
 - **Linter errors come back as tool errors.** When the memory linter rejects a write, read the error, fix the issue (missing frontmatter field, oversized body, unknown reference, etc.), and try again. Do not work around the rule.
@ -35,7 +28,11 @@ Your initial user message contains the staging entries, the full memory records,
 Walk every staging entry in the input. For each one:
- Add or update `decisions` / `requests` records as appropriate. Copy `sources` verbatim from the staging entry.
+- **Routing by staging field:**
  - `decisions` (staging) → `memory/decisions/<slug>.md`, but only when the entry is a real **design / policy / approach** judgement. "We did X in this session" is not a decision — it's a session log; drop it. The rationale must outlive the session.
  - `requests` (staging) → `memory/requests/<slug>.md`. Copy `sources` verbatim.
  - `attempts` (staging) → default is **drop**. Memory has no `attempts/` folder by design; do not invent one and do not stash attempts under `decisions/`. The only exception is when several attempts together form a durable trend worth a one-line summary in `memory/summary.md` (e.g. "X reliably fails on Y").
  - `discussions` (staging) → if the discussion settled on a design / policy direction during the slice, fold the conclusion into a `decisions/` record. If it stayed unresolved but the question itself is durable, fold a one-line note into `summary.md`. Otherwise drop. Never create a `decisions/` record that just records "we discussed X".
 - Update existing knowledge records when the staging activity refines them. Use `KnowledgeQuery` to find candidates before creating anything new.
 - **Knowledge creation is gated.** Only create a new `knowledge/<slug>.md` when the originating source appears in the supplied "Knowledge candidate report". When the report is empty (the metrics pipeline is still being built), do not create new knowledge — fold the activity into decisions / requests / summary or update existing knowledge instead.
 - Rewrite `memory/summary.md` only when needed. Aim for 1–5k tokens. Preserve the high-level shape (current focus, recent decisions, stable facts) while pruning stale items.
@ -66,4 +63,3 @@ When both phases are done, write a short final assistant message stating:
 - anything you intentionally left alone and why.
 Then end the turn. Do not ask questions — there is no human in the loop for this run.
 "#;
--- a/resources/prompts/internal/memory_extract_system.md
+++ b/resources/prompts/internal/memory_extract_system.md
@ -1,10 +1,4 @@
-//! Phase 1 sub-Worker の system prompt。
+You are the Phase 1 activity extractor for an INSOMNIA memory subsystem.
 //!
 //! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 1 を縮約。
 //! 「派生物を作らず、起きたことを抽出する」段階に縛り、JSON schema
 //! 準拠以外の自由文を許さない。
 pub const EXTRACT_SYSTEM_PROMPT: &str = r#"You are the Phase 1 activity extractor for an INSOMNIA memory subsystem.
 Your single job: read the supplied conversation slice and emit a structured JSON record of "what happened" via the `write_extracted` tool. You are not consolidating, summarising, or generating knowledge — that is a later phase's job.
@ -28,5 +22,13 @@ Your single job: read the supplied conversation slice and emit a structured JSON
 - Do not duplicate content already captured by static project docs (AGENTS.md, plan documents) — those are not "what happened in this slice".
 - Prefer concise, fact-shaped strings. Do not pad rationale or summary fields.
 # Anti-noise rules
 git is the source of truth for what happened to files, branches, commits, tickets, and worktrees. Memory must NOT shadow it.
 - `attempts`: skip any action whose substance is a git-trackable operation — creating / editing a ticket file, adding a TODO entry, opening a branch / worktree, running `commit` / `merge` / `push`, spawning a worker Pod for a known ticket. The corresponding diff / commit log already records it. Keep `attempts` for things that are NOT recoverable from git: build / test outcomes, external API responses, observed bug reproductions, design experiments whose results inform later judgement.
 - `discussions`: skip transient triage that goes stale within the day — "which ticket to start next", "should we review now or later", checklist-style status reads. Keep discussions whose points outlive the session (architectural trade-offs, durable constraints, recurring questions).
 - `decisions`: the rationale must be a design / policy / approach reason, not "we did X in this session". Recording "a ticket was created for Y" or "implementation landed as commit Z" is NOT a decision — those belong to git, not memory.
 - Do not embed identifiers that age out of relevance: commit hashes, branch names, worktree paths, ticket file names, PR numbers. If a record is only meaningful with such an identifier, the record itself is probably session-local and should be skipped.
 When you have produced the JSON, call `write_extracted` and end the turn. No follow-up text.
 "#;