From 261c682e5e805fc0e1a77405a6ed8a27517981c5 Mon Sep 17 00:00:00 2001 From: Hare Date: Sun, 3 May 2026 00:27:10 +0900 Subject: [PATCH] =?UTF-8?q?update:=20memory=E3=82=B7=E3=82=B9=E3=83=86?= =?UTF-8?q?=E3=83=A0=E5=91=A8=E3=82=8A=E3=81=AE=E3=83=97=E3=83=AD=E3=83=B3?= =?UTF-8?q?=E3=83=97=E3=83=88=E3=81=AE=E6=95=B4=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/memory/src/consolidate/mod.rs | 11 +- crates/memory/src/extract/mod.rs | 9 +- crates/pod/src/pod.rs | 16 ++- crates/pod/src/prompt/catalog.rs | 20 ++++ crates/pod/tests/controller_test.rs | 113 ++++++++++++++++++ docs/plan/memory-prompts.md | 13 ++ resources/prompts/internal.toml | 4 + .../internal/memory_consolidation_system.md | 18 ++- .../prompts/internal/memory_extract_system.md | 18 +-- 9 files changed, 189 insertions(+), 33 deletions(-) rename crates/memory/src/consolidate/prompt.rs => resources/prompts/internal/memory_consolidation_system.md (75%) rename crates/memory/src/extract/prompt.rs => resources/prompts/internal/memory_extract_system.md (56%) diff --git a/crates/memory/src/consolidate/mod.rs b/crates/memory/src/consolidate/mod.rs index a103aad4..34e76dac 100644 --- a/crates/memory/src/consolidate/mod.rs +++ b/crates/memory/src/consolidate/mod.rs @@ -5,20 +5,20 @@ //! の観点で整理する disposable Worker を、Pod 側が組み立てるための //! ヘルパー群を提供する。Pod は次の手順で sub-Worker を構築する: //! -//! - [`CONSOLIDATION_SYSTEM_PROMPT`] を sub-Worker の system prompt に //! - [`build_consolidate_input`] を sub-Worker の最初の user 入力に //! - memory 専用 Tool (read / write / edit) と Knowledge / memory 検索ツールを登録 //! - [`StagingLock::acquire`] で並走防止 + consumed ID 確定 //! - sub-Worker run 完了後、[`StagingLock::release_with_cleanup`] で //! consumed ID 分の staging のみ削除し、占有ファイルを解放 //! -//! Knowledge 化候補レポートと使用頻度メトリクスは別チケットで供給される -//! 想定。本モジュール時点では空入力として扱い、prompt 側の説明だけ -//! 残しておく(`docs/plan/memory.md` §Phase 2 / 整理材料)。 +//! system prompt は Pod の `PromptCatalog` +//! (`PodPrompt::MemoryConsolidationSystem`) で管理される。Knowledge 化候補 +//! レポートと使用頻度メトリクスは別チケットで供給される想定。本モジュール +//! 時点では空入力として扱い、prompt 側の説明だけ残しておく +//! (`docs/plan/memory.md` §Phase 2 / 整理材料)。 mod input; mod lock; -mod prompt; mod staging; mod tidy; @@ -27,6 +27,5 @@ pub use input::{ render_staging_records, render_tidy_hints, }; pub use lock::{LockError, LockRecord, StagingLock}; -pub use prompt::CONSOLIDATION_SYSTEM_PROMPT; pub use staging::{StagingEntry, list_staging_entries}; pub use tidy::{TidyHints, collect_tidy_hints}; diff --git a/crates/memory/src/extract/mod.rs b/crates/memory/src/extract/mod.rs index 31517874..8f9c9775 100644 --- a/crates/memory/src/extract/mod.rs +++ b/crates/memory/src/extract/mod.rs @@ -4,13 +4,14 @@ //! 出力を `/.insomnia/memory/_staging/.json` に書き出す //! ヘルパーを提供する。Pod 側はこのモジュールから: //! -//! - [`EXTRACT_SYSTEM_PROMPT`] を sub-Worker の system prompt に //! - [`build_extract_input`] を sub-Worker の最初の user 入力に //! - [`write_extracted_tool`] を唯一のツールとして //! - [`write_staging`] で受け取った JSON を staging に書き出し //! -//! の順で組み立てる。pointer 永続化(session-store の -//! `LogEntry::Extension`、domain `"memory.extract"`)は Pod 側が責務を持つ。 +//! の順で組み立てる。system prompt は Pod の `PromptCatalog` +//! (`PodPrompt::MemoryExtractSystem`) で管理される。pointer 永続化 +//! (session-store の `LogEntry::Extension`、domain `"memory.extract"`)は +//! Pod 側が責務を持つ。 //! //! 出力 JSON の wrap は [`write_staging`] が `source: { session_id, range }` //! を機械付与する形で担当し、LLM には source を推論させない。 @@ -18,7 +19,6 @@ mod input; mod payload; mod pointer; -mod prompt; mod staging; mod tool; @@ -27,7 +27,6 @@ pub use payload::{ AttemptEntry, DecisionEntry, DiscussionEntry, ExtractedPayload, RequestEntry, StagingRecord, }; pub use pointer::{ExtractPointerPayload, fold_pointer}; -pub use prompt::EXTRACT_SYSTEM_PROMPT; pub use staging::{StagingError, write_staging}; pub use tool::{ExtractWorkerContext, write_extracted_tool}; diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index 5c1b0b81..cdc4fd13 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -1549,7 +1549,11 @@ impl Pod { .unwrap_or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS); let client = self.build_extractor_client(memory_cfg)?; - let mut extract_worker = Worker::new(client).system_prompt(extract::EXTRACT_SYSTEM_PROMPT); + let extract_system_prompt = self + .prompts + .memory_extract_system() + .map_err(PodError::PromptCatalog)?; + let mut extract_worker = Worker::new(client).system_prompt(extract_system_prompt); extract_worker.set_cache_key(Some(self.session_id.to_string())); // Cumulative input-token meter + interceptor (mirror of @@ -1742,8 +1746,14 @@ impl Pod { return Err(e); } }; - let mut worker = - Worker::new(client).system_prompt(consolidate::CONSOLIDATION_SYSTEM_PROMPT); + let consolidation_system_prompt = match self.prompts.memory_consolidation_system() { + Ok(p) => p, + Err(e) => { + lock.release_only(); + return Err(PodError::PromptCatalog(e)); + } + }; + let mut worker = Worker::new(client).system_prompt(consolidation_system_prompt); worker.set_cache_key(Some(self.session_id.to_string())); // Memory tools are self-contained — they bypass ScopedFs and write diff --git a/crates/pod/src/prompt/catalog.rs b/crates/pod/src/prompt/catalog.rs index 28e2632d..a8944f91 100644 --- a/crates/pod/src/prompt/catalog.rs +++ b/crates/pod/src/prompt/catalog.rs @@ -61,6 +61,10 @@ const INTERNAL_TOML: &str = include_str!("../../../../resources/prompts/internal pub enum PodPrompt { /// System prompt of the compaction (summary) Worker. CompactSystem, + /// System prompt of the memory Phase 1 (extract) Worker. + MemoryExtractSystem, + /// System prompt of the memory Phase 2 (consolidation + tidy) Worker. + MemoryConsolidationSystem, /// Wrapper around an incoming `Method::Notify` message injected into /// the next LLM request context as a transient system message. NotifyWrapper, @@ -89,6 +93,8 @@ impl PodPrompt { pub fn key(self) -> &'static str { match self { Self::CompactSystem => "compact_system", + Self::MemoryExtractSystem => "memory_extract_system", + Self::MemoryConsolidationSystem => "memory_consolidation_system", Self::NotifyWrapper => "notify_wrapper", Self::InterruptToolResultSummary => "interrupt_tool_result_summary", Self::InterruptSystemNote => "interrupt_system_note", @@ -104,6 +110,8 @@ impl PodPrompt { /// `INTERNAL_KEYS` (generated by `build.rs`). pub const ALL: &'static [PodPrompt] = &[ PodPrompt::CompactSystem, + PodPrompt::MemoryExtractSystem, + PodPrompt::MemoryConsolidationSystem, PodPrompt::NotifyWrapper, PodPrompt::InterruptToolResultSummary, PodPrompt::InterruptSystemNote, @@ -115,6 +123,8 @@ impl PodPrompt { pub const KEYS: &'static [&'static str] = &[ "compact_system", + "memory_extract_system", + "memory_consolidation_system", "notify_wrapper", "interrupt_tool_result_summary", "interrupt_system_note", @@ -301,6 +311,16 @@ impl PromptCatalog { self.render(PodPrompt::CompactSystem, Value::UNDEFINED) } + /// Render `PodPrompt::MemoryExtractSystem` (no inputs). + pub fn memory_extract_system(&self) -> Result { + self.render(PodPrompt::MemoryExtractSystem, Value::UNDEFINED) + } + + /// Render `PodPrompt::MemoryConsolidationSystem` (no inputs). + pub fn memory_consolidation_system(&self) -> Result { + self.render(PodPrompt::MemoryConsolidationSystem, Value::UNDEFINED) + } + /// Render `PodPrompt::NotifyWrapper` with `{{ message }}`. pub fn notify_wrapper(&self, message: &str) -> Result { self.render(PodPrompt::NotifyWrapper, single("message", message)) diff --git a/crates/pod/tests/controller_test.rs b/crates/pod/tests/controller_test.rs index 39792920..8ea34269 100644 --- a/crates/pod/tests/controller_test.rs +++ b/crates/pod/tests/controller_test.rs @@ -570,6 +570,64 @@ async fn notify_while_idle_auto_starts_turn_and_injects_system_message() { assert!(last_item_text.contains("not a blocking request")); } +#[tokio::test] +async fn pod_event_turn_ended_while_idle_auto_starts_turn_and_injects_system_message() { + let client = MockClient::new(simple_text_events()); + let client_for_assert = client.clone(); + let pod = make_pod(client).await; + let handle = spawn_controller(pod).await; + let mut rx = handle.subscribe(); + + handle + .send(Method::PodEvent(protocol::PodEvent::TurnEnded { + pod_name: "child".into(), + })) + .await + .unwrap(); + + let mut saw_turn_end = false; + let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2); + loop { + tokio::select! { + event = rx.recv() => { + match event { + Ok(Event::TurnEnd { .. }) => { saw_turn_end = true; break; } + Err(_) => break, + _ => {} + } + } + _ = tokio::time::sleep_until(deadline) => break, + } + } + assert!( + saw_turn_end, + "PodEvent::TurnEnded on idle Pod should auto-start a turn" + ); + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + assert_eq!(handle.shared_state.get_status(), PodStatus::Idle); + + let requests = client_for_assert.captured_requests(); + assert_eq!( + requests.len(), + 1, + "auto-kick should issue exactly one LLM request" + ); + let last_item_text = requests[0] + .items + .last() + .and_then(|i| i.as_text()) + .unwrap_or_default() + .to_string(); + assert!( + last_item_text.contains("[Notification]"), + "injected system message missing, got: {last_item_text:?}" + ); + assert!( + last_item_text.contains("child") && last_item_text.contains("finished a turn"), + "rendered TurnEnded text missing, got: {last_item_text:?}" + ); +} + #[tokio::test] async fn notify_while_running_does_not_emit_already_running_error() { let client = MockClient::new(simple_text_events()); @@ -669,6 +727,61 @@ async fn socket_run_receives_events() { assert!(saw_turn_end, "should see turn_end via socket"); } +#[tokio::test] +async fn socket_pod_event_turn_ended_while_idle_auto_starts_turn() { + use protocol::stream::{JsonLineReader, JsonLineWriter}; + use tokio::net::UnixStream; + + let client = MockClient::new(simple_text_events()); + let pod = make_pod(client).await; + let handle = spawn_controller(pod).await; + + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + + let sock_path = handle.runtime_dir.socket_path(); + let stream = UnixStream::connect(&sock_path).await.unwrap(); + let (reader, writer) = stream.into_split(); + let mut reader = JsonLineReader::new(reader); + let mut writer = JsonLineWriter::new(writer); + + writer + .write(&Method::PodEvent(protocol::PodEvent::TurnEnded { + pod_name: "child".into(), + })) + .await + .unwrap(); + + let mut saw_turn_start = false; + let mut saw_turn_end = false; + + let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2); + loop { + tokio::select! { + event = reader.next::() => { + match event { + Ok(Some(Event::TurnStart { .. })) => saw_turn_start = true, + Ok(Some(Event::TurnEnd { .. })) => { + saw_turn_end = true; + break; + } + Ok(None) | Err(_) => break, + _ => {} + } + } + _ = tokio::time::sleep_until(deadline) => break, + } + } + + assert!( + saw_turn_start, + "PodEvent::TurnEnded via socket should auto-start a turn" + ); + assert!( + saw_turn_end, + "auto-triggered turn should reach turn_end via socket" + ); +} + #[tokio::test] async fn socket_invalid_method_returns_error() { use protocol::stream::JsonLineReader; diff --git a/docs/plan/memory-prompts.md b/docs/plan/memory-prompts.md index 147067f0..1a6b50ff 100644 --- a/docs/plan/memory-prompts.md +++ b/docs/plan/memory-prompts.md @@ -17,6 +17,7 @@ memory 関連 prompt は種別を問わず、最低限以下を共有する: - **単純 append を優先しない**。既存 record に統合できるなら update を優先する - **session 固有の進行状態を書かない**。長期参照価値のある内容だけを memory に残す - **既存 docs と重複保存しない**。`AGENTS.md`、`docs/plan/*`、固定運用文書に既にある内容を再保存しない +- **git で追える事実を memory に書かない**。ticket file の作成・編集、TODO 更新、branch / worktree 操作、commit / merge / push、「commit X で実装した」「ticket Y を作った」「worker Pod を spawn した」等は git diff / log が真実で、memory に写すと陳腐化する。commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号と組み合わせないと意味を成さない記録は採用しない - **空出力を許容**する。保存価値が無ければ「何も追加しない」を正当な結果として扱う ### Phase 1: 活動抽出 prompt @@ -29,6 +30,13 @@ Phase 1 は「派生物を作る」段階ではなく、「起きたことを抽 - 出力は schema 準拠の構造化データのみ。自由文の補足説明で schema 外情報を足さない - 対象が無ければ空配列を返す +ノイズ防御として、抽出時点で以下を除外する: + +- `attempts`: git で追える操作 (ticket file / TODO 編集、branch / worktree 作成、commit / merge / push、既知 ticket への worker Pod spawn) は除外。残すのは git からは復元できない情報 (ビルド/テスト結果、外部 API 応答、観測されたバグ再現、後段の判断材料となる設計実験結果) に限る +- `discussions`: 当日中に陳腐化する一過性 triage (「次に着手するチケットはどれか」「いま review すべきか後でか」など) は除外。session を越えて意味を持つ論点 (アーキテクチャの trade-off、恒常的な制約、再来する問い) のみ残す +- `decisions`: rationale が「この session で X をした」になるものは除外。設計 / 方針 / 取り組み方の根拠でない記録は decision ではなく作業ログ +- 本文中に commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号など陳腐化する identifier を埋め込まない + ### Phase 2: 統合 + 整理 prompt Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase と整理 phase を 1 セッション内で続けて回す。両 phase に共通する原則: @@ -44,6 +52,11 @@ Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase 統合 phase の追加指示: - staging の活動ログを decisions / requests / summary / Knowledge update に落とし込む +- staging の field ごとに宛先を分ける: + - `decisions` (staging) → `memory/decisions/`。設計 / 方針 / 取り組み方の判断のみ。「この session で X をした」型は drop + - `requests` (staging) → `memory/requests/` + - `attempts` (staging) → 既定は drop。memory に `attempts/` フォルダは設けない。複数 attempts に通底する持続的な傾向だけ `summary.md` に 1 行で圧縮する例外あり + - `discussions` (staging) → 設計 / 方針に決着していれば `decisions/` に統合、未決着でも問い自体が持続的なら `summary.md` に 1 行、それ以外は drop。`decisions/` に「議論した」だけの未決着メモを作らない - Knowledge 新規作成は候補レポート掲載 source 由来に限る(詳細は §Phase 2: Knowledge 書き込み prompt) 整理 phase の追加指示(統合 phase 完了後、余力で実行): diff --git a/resources/prompts/internal.toml b/resources/prompts/internal.toml index a6038326..e4dff27c 100644 --- a/resources/prompts/internal.toml +++ b/resources/prompts/internal.toml @@ -10,6 +10,10 @@ [prompt] compact_system = "{% include \"$insomnia/internal/compact_system\" %}" +memory_extract_system = "{% include \"$insomnia/internal/memory_extract_system\" %}" + +memory_consolidation_system = "{% include \"$insomnia/internal/memory_consolidation_system\" %}" + notify_wrapper = """\ [Notification] {{ message }} diff --git a/crates/memory/src/consolidate/prompt.rs b/resources/prompts/internal/memory_consolidation_system.md similarity index 75% rename from crates/memory/src/consolidate/prompt.rs rename to resources/prompts/internal/memory_consolidation_system.md index 55c52a80..02392dfe 100644 --- a/crates/memory/src/consolidate/prompt.rs +++ b/resources/prompts/internal/memory_consolidation_system.md @@ -1,12 +1,4 @@ -//! Phase 2 sub-Worker の system prompt。 -//! -//! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 2 統合 + 整理 / -//! §Phase 2 Knowledge 書き込み を縮約。統合 phase / 整理 phase は同じ -//! prompt 1 本で順に進める縛り(agent から見ると 1 セッション内のフェーズ -//! 進行、別 trigger / 別 Worker は持たない、`docs/plan/memory.md` §整理 -//! の扱い)。 - -pub const CONSOLIDATION_SYSTEM_PROMPT: &str = r#"You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem. +You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem. Your job is to take Phase 1 activity-log staging entries together with the workspace's current `memory/*` / `knowledge/*` records, then run two phases back-to-back in this single session: @@ -27,6 +19,7 @@ Your initial user message contains the staging entries, the full memory records, - **Update over create.** If an existing slug fits, edit it. Only create a new slug when no existing record fits and you can articulate why. - **`replaced` over delete.** When a Decision is superseded by a different one, mark the old one `status: replaced` with `replaced_by: `. Do not silently drop it. - **Don't duplicate static docs.** Skip content that already lives in `AGENTS.md`, `docs/plan/*`, or other fixed project documents. +- **git is authoritative.** Do not record facts that git already tracks: ticket-file creation / edit, TODO updates, branch / worktree operations, commit / merge / push events, "implementation landed as commit X", "ticket Y was created", "worker Pod was spawned for Z". Diff and commit log are the truth there; memory shadowing it just rots. If a candidate write only makes sense when paired with a commit hash, branch name, worktree path, or ticket filename, drop it. - **Empty output is fine.** If a staging entry doesn't justify a memory write, skip it. - **Slug rules.** Slugs are kebab-case, short, recognisable, and must be unique within their kind. Same-slug create is a linter error — use Edit instead. - **Linter errors come back as tool errors.** When the memory linter rejects a write, read the error, fix the issue (missing frontmatter field, oversized body, unknown reference, etc.), and try again. Do not work around the rule. @@ -35,7 +28,11 @@ Your initial user message contains the staging entries, the full memory records, Walk every staging entry in the input. For each one: -- Add or update `decisions` / `requests` records as appropriate. Copy `sources` verbatim from the staging entry. +- **Routing by staging field:** + - `decisions` (staging) → `memory/decisions/.md`, but only when the entry is a real **design / policy / approach** judgement. "We did X in this session" is not a decision — it's a session log; drop it. The rationale must outlive the session. + - `requests` (staging) → `memory/requests/.md`. Copy `sources` verbatim. + - `attempts` (staging) → default is **drop**. Memory has no `attempts/` folder by design; do not invent one and do not stash attempts under `decisions/`. The only exception is when several attempts together form a durable trend worth a one-line summary in `memory/summary.md` (e.g. "X reliably fails on Y"). + - `discussions` (staging) → if the discussion settled on a design / policy direction during the slice, fold the conclusion into a `decisions/` record. If it stayed unresolved but the question itself is durable, fold a one-line note into `summary.md`. Otherwise drop. Never create a `decisions/` record that just records "we discussed X". - Update existing knowledge records when the staging activity refines them. Use `KnowledgeQuery` to find candidates before creating anything new. - **Knowledge creation is gated.** Only create a new `knowledge/.md` when the originating source appears in the supplied "Knowledge candidate report". When the report is empty (the metrics pipeline is still being built), do not create new knowledge — fold the activity into decisions / requests / summary or update existing knowledge instead. - Rewrite `memory/summary.md` only when needed. Aim for 1–5k tokens. Preserve the high-level shape (current focus, recent decisions, stable facts) while pruning stale items. @@ -66,4 +63,3 @@ When both phases are done, write a short final assistant message stating: - anything you intentionally left alone and why. Then end the turn. Do not ask questions — there is no human in the loop for this run. -"#; diff --git a/crates/memory/src/extract/prompt.rs b/resources/prompts/internal/memory_extract_system.md similarity index 56% rename from crates/memory/src/extract/prompt.rs rename to resources/prompts/internal/memory_extract_system.md index 767bdd11..adf3b4ec 100644 --- a/crates/memory/src/extract/prompt.rs +++ b/resources/prompts/internal/memory_extract_system.md @@ -1,10 +1,4 @@ -//! Phase 1 sub-Worker の system prompt。 -//! -//! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 1 を縮約。 -//! 「派生物を作らず、起きたことを抽出する」段階に縛り、JSON schema -//! 準拠以外の自由文を許さない。 - -pub const EXTRACT_SYSTEM_PROMPT: &str = r#"You are the Phase 1 activity extractor for an INSOMNIA memory subsystem. +You are the Phase 1 activity extractor for an INSOMNIA memory subsystem. Your single job: read the supplied conversation slice and emit a structured JSON record of "what happened" via the `write_extracted` tool. You are not consolidating, summarising, or generating knowledge — that is a later phase's job. @@ -28,5 +22,13 @@ Your single job: read the supplied conversation slice and emit a structured JSON - Do not duplicate content already captured by static project docs (AGENTS.md, plan documents) — those are not "what happened in this slice". - Prefer concise, fact-shaped strings. Do not pad rationale or summary fields. +# Anti-noise rules + +git is the source of truth for what happened to files, branches, commits, tickets, and worktrees. Memory must NOT shadow it. + +- `attempts`: skip any action whose substance is a git-trackable operation — creating / editing a ticket file, adding a TODO entry, opening a branch / worktree, running `commit` / `merge` / `push`, spawning a worker Pod for a known ticket. The corresponding diff / commit log already records it. Keep `attempts` for things that are NOT recoverable from git: build / test outcomes, external API responses, observed bug reproductions, design experiments whose results inform later judgement. +- `discussions`: skip transient triage that goes stale within the day — "which ticket to start next", "should we review now or later", checklist-style status reads. Keep discussions whose points outlive the session (architectural trade-offs, durable constraints, recurring questions). +- `decisions`: the rationale must be a design / policy / approach reason, not "we did X in this session". Recording "a ticket was created for Y" or "implementation landed as commit Z" is NOT a decision — those belong to git, not memory. +- Do not embed identifiers that age out of relevance: commit hashes, branch names, worktree paths, ticket file names, PR numbers. If a record is only meaningful with such an identifier, the record itself is probably session-local and should be skipped. + When you have produced the JSON, call `write_extracted` and end the turn. No follow-up text. -"#;