update: memoryシステム周りのプロンプトの整理

docs(tickets): memory-consolidation-drop-input-cap完了
update: Consolidationの不要なToken上限の削除
2026-05-03 00:27:10 +09:00 · 2026-05-02 23:57:36 +09:00 · 2026-05-02 23:48:33 +09:00 · 2026-05-02 23:48:01 +09:00
15 changed files with 303 additions and 87 deletions
--- a/TODO.md
+++ b/TODO.md
@ -16,6 +16,10 @@
  - [ ] ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md)
 - [ ] サブミット入力
  - [ ] FileRef リゾルバ → [tickets/submit-file-ref-resolver.md](tickets/submit-file-ref-resolver.md)
 - [ ] Manifest: Tool Output / File Upload 上限の分離とデフォルト緩和 → [tickets/manifest-output-upload-limits.md](tickets/manifest-output-upload-limits.md)
 - [ ] メモリ機構
  - [ ] 使用頻度メトリクス + Knowledge 化候補レポート → [tickets/memory-usage-metrics.md](tickets/memory-usage-metrics.md)
  - [ ] Phase 2 累積入力トークン上限の撤去 → [tickets/memory-consolidation-drop-input-cap.md](tickets/memory-consolidation-drop-input-cap.md)
 - [ ] セッション内 TODO ツール（注意機構付き） → [tickets/session-todo.md](tickets/session-todo.md)
 - ワークスペースのメモリーをLintするヘッドレスCLI
 - system-reminder 注入機構の汎用化（2件目の利用者が出た時に検討。タグ形式と「履歴を汚さない」原則は session-todo で先行確立）
--- a/crates/manifest/src/config.rs
+++ b/crates/manifest/src/config.rs
@ -218,9 +218,6 @@ impl MemoryConfig {
                .extract_worker_max_input_tokens
                .or(self.extract_worker_max_input_tokens),
            consolidation_model: upper.consolidation_model.or(self.consolidation_model),
            consolidation_worker_max_input_tokens: upper
                .consolidation_worker_max_input_tokens
                .or(self.consolidation_worker_max_input_tokens),
            consolidation_threshold_files: upper
                .consolidation_threshold_files
                .or(self.consolidation_threshold_files),
--- a/crates/manifest/src/defaults.rs
+++ b/crates/manifest/src/defaults.rs
@ -50,8 +50,3 @@ pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
 /// own LLM calls. Exceeding this aborts the extract run.
 /// See [`crate::MemoryConfig::extract_worker_max_input_tokens`].
 pub const MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS: u64 = 30_000;
 /// Cumulative input-token cap for the memory Phase 2 (consolidation)
 /// worker's own LLM calls. Exceeding this aborts the consolidation run.
 /// See [`crate::MemoryConfig::consolidation_worker_max_input_tokens`].
 pub const MEMORY_CONSOLIDATION_WORKER_MAX_INPUT_TOKENS: u64 = 80_000;
--- a/crates/manifest/src/lib.rs
+++ b/crates/manifest/src/lib.rs
@ -90,11 +90,6 @@ pub struct MemoryConfig {
    /// Reasoning-class models are recommended.
    #[serde(default)]
    pub consolidation_model: Option<ModelManifest>,
    /// Cumulative input-token cap for the consolidation worker's own
    /// LLM calls. Exceeding this aborts the consolidation run. `None` ⇒
    /// [`defaults::MEMORY_CONSOLIDATION_WORKER_MAX_INPUT_TOKENS`].
    #[serde(default)]
    pub consolidation_worker_max_input_tokens: Option<u64>,
    /// Phase 2 trigger: file-count threshold of `_staging/`. Phase 2
    /// fires when the staging directory has at least this many entries.
    /// Either threshold reaching its limit fires Phase 2 (logical OR).
--- a/crates/memory/src/consolidate/mod.rs
+++ b/crates/memory/src/consolidate/mod.rs
@ -5,20 +5,20 @@
 //! の観点で整理する disposable Worker を、Pod 側が組み立てるための
 //! ヘルパー群を提供する。Pod は次の手順で sub-Worker を構築する:
 //!
 //! - [`CONSOLIDATION_SYSTEM_PROMPT`] を sub-Worker の system prompt に
 //! - [`build_consolidate_input`] を sub-Worker の最初の user 入力に
 //! - memory 専用 Tool (read / write / edit) と Knowledge / memory 検索ツールを登録
 //! - [`StagingLock::acquire`] で並走防止 + consumed ID 確定
 //! - sub-Worker run 完了後、[`StagingLock::release_with_cleanup`] で
 //!   consumed ID 分の staging のみ削除し、占有ファイルを解放
 //!
-//! Knowledge 化候補レポートと使用頻度メトリクスは別チケットで供給される
+//! system prompt は Pod の `PromptCatalog`
-//! 想定。本モジュール時点では空入力として扱い、prompt 側の説明だけ
+//! (`PodPrompt::MemoryConsolidationSystem`) で管理される。Knowledge 化候補
-//! 残しておく（`docs/plan/memory.md` §Phase 2 / 整理材料）。
+//! レポートと使用頻度メトリクスは別チケットで供給される想定。本モジュール
 //! 時点では空入力として扱い、prompt 側の説明だけ残しておく
 //! （`docs/plan/memory.md` §Phase 2 / 整理材料）。
 mod input;
 mod lock;
 mod prompt;
 mod staging;
 mod tidy;
@ -27,6 +27,5 @@ pub use input::{
    render_staging_records, render_tidy_hints,
 };
 pub use lock::{LockError, LockRecord, StagingLock};
 pub use prompt::CONSOLIDATION_SYSTEM_PROMPT;
 pub use staging::{StagingEntry, list_staging_entries};
 pub use tidy::{TidyHints, collect_tidy_hints};
--- a/crates/memory/src/extract/mod.rs
+++ b/crates/memory/src/extract/mod.rs
@ -4,13 +4,14 @@
 //! 出力を `<workspace>/.insomnia/memory/_staging/<id>.json` に書き出す
 //! ヘルパーを提供する。Pod 側はこのモジュールから:
 //!
 //! - [`EXTRACT_SYSTEM_PROMPT`] を sub-Worker の system prompt に
 //! - [`build_extract_input`] を sub-Worker の最初の user 入力に
 //! - [`write_extracted_tool`] を唯一のツールとして
 //! - [`write_staging`] で受け取った JSON を staging に書き出し
 //!
-//! の順で組み立てる。pointer 永続化（session-store の
+//! の順で組み立てる。system prompt は Pod の `PromptCatalog`
-//! `LogEntry::Extension`、domain `"memory.extract"`）は Pod 側が責務を持つ。
+//! (`PodPrompt::MemoryExtractSystem`) で管理される。pointer 永続化
 //! （session-store の `LogEntry::Extension`、domain `"memory.extract"`）は
 //! Pod 側が責務を持つ。
 //!
 //! 出力 JSON の wrap は [`write_staging`] が `source: { session_id, range }`
 //! を機械付与する形で担当し、LLM には source を推論させない。
@ -18,7 +19,6 @@
 mod input;
 mod payload;
 mod pointer;
 mod prompt;
 mod staging;
 mod tool;
@ -27,7 +27,6 @@ pub use payload::{
    AttemptEntry, DecisionEntry, DiscussionEntry, ExtractedPayload, RequestEntry, StagingRecord,
 };
 pub use pointer::{ExtractPointerPayload, fold_pointer};
 pub use prompt::EXTRACT_SYSTEM_PROMPT;
 pub use staging::{StagingError, write_staging};
 pub use tool::{ExtractWorkerContext, write_extracted_tool};
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -1549,7 +1549,11 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            .unwrap_or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS);
        let client = self.build_extractor_client(memory_cfg)?;
-        let mut extract_worker = Worker::new(client).system_prompt(extract::EXTRACT_SYSTEM_PROMPT);
+        let extract_system_prompt = self
            .prompts
            .memory_extract_system()
            .map_err(PodError::PromptCatalog)?;
        let mut extract_worker = Worker::new(client).system_prompt(extract_system_prompt);
        extract_worker.set_cache_key(Some(self.session_id.to_string()));
        // Cumulative input-token meter + interceptor (mirror of
@ -1735,9 +1739,6 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            Err(e) => return Err(PodError::ConsolidationLock(e)),
        };
        let cap = memory_cfg
            .consolidation_worker_max_input_tokens
            .unwrap_or(manifest::defaults::MEMORY_CONSOLIDATION_WORKER_MAX_INPUT_TOKENS);
        let client = match self.build_consolidator_client(memory_cfg) {
            Ok(c) => c,
            Err(e) => {
@ -1745,24 +1746,16 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                return Err(e);
            }
        };
-        let mut worker =
+        let consolidation_system_prompt = match self.prompts.memory_consolidation_system() {
-            Worker::new(client).system_prompt(consolidate::CONSOLIDATION_SYSTEM_PROMPT);
+            Ok(p) => p,
            Err(e) => {
                lock.release_only();
                return Err(PodError::PromptCatalog(e));
            }
        };
        let mut worker = Worker::new(client).system_prompt(consolidation_system_prompt);
        worker.set_cache_key(Some(self.session_id.to_string()));
        let input_so_far = Arc::new(std::sync::atomic::AtomicU64::new(0));
        {
            let acc = input_so_far.clone();
            worker.on_usage(move |event| {
                if let Some(tokens) = event.input_tokens {
                    acc.fetch_add(tokens, Ordering::Relaxed);
                }
            });
        }
        worker.set_interceptor(MemoryConsolidationWorkerInterceptor {
            input_so_far: input_so_far.clone(),
            max_input_tokens: cap,
        });
        // Memory tools are self-contained — they bypass ScopedFs and write
        // directly under the workspace via WorkspaceLayout. Resident
        // knowledge injection (`Pod::set_resident_knowledge_injection`) is
@ -1843,30 +1836,6 @@ enum ConsolidateDecision {
    Completed,
 }
 /// Pre-request interceptor for the Phase 2 consolidation worker. Same
 /// shape as the extract interceptor; kept separate so the abort message
 /// names the right subsystem.
 struct MemoryConsolidationWorkerInterceptor {
    input_so_far: Arc<std::sync::atomic::AtomicU64>,
    max_input_tokens: u64,
 }
 #[async_trait]
 impl llm_worker::interceptor::Interceptor for MemoryConsolidationWorkerInterceptor {
    async fn pre_llm_request(
        &self,
        _context: &mut Vec<Item>,
    ) -> llm_worker::interceptor::PreRequestAction {
        if self.input_so_far.load(Ordering::Relaxed) > self.max_input_tokens {
            return llm_worker::interceptor::PreRequestAction::Cancel(format!(
                "Phase 2 consolidation worker input exceeded {} tokens",
                self.max_input_tokens
            ));
        }
        llm_worker::interceptor::PreRequestAction::Continue
    }
 }
 impl<St: Store> Pod<Box<dyn LlmClient>, St> {
    /// Create a Pod entirely from a validated manifest.
    ///
--- a/crates/pod/src/prompt/catalog.rs
+++ b/crates/pod/src/prompt/catalog.rs
@ -61,6 +61,10 @@ const INTERNAL_TOML: &str = include_str!("../../../../resources/prompts/internal
 pub enum PodPrompt {
    /// System prompt of the compaction (summary) Worker.
    CompactSystem,
    /// System prompt of the memory Phase 1 (extract) Worker.
    MemoryExtractSystem,
    /// System prompt of the memory Phase 2 (consolidation + tidy) Worker.
    MemoryConsolidationSystem,
    /// Wrapper around an incoming `Method::Notify` message injected into
    /// the next LLM request context as a transient system message.
    NotifyWrapper,
@ -89,6 +93,8 @@ impl PodPrompt {
    pub fn key(self) -> &'static str {
        match self {
            Self::CompactSystem => "compact_system",
            Self::MemoryExtractSystem => "memory_extract_system",
            Self::MemoryConsolidationSystem => "memory_consolidation_system",
            Self::NotifyWrapper => "notify_wrapper",
            Self::InterruptToolResultSummary => "interrupt_tool_result_summary",
            Self::InterruptSystemNote => "interrupt_system_note",
@ -104,6 +110,8 @@ impl PodPrompt {
    /// `INTERNAL_KEYS` (generated by `build.rs`).
    pub const ALL: &'static [PodPrompt] = &[
        PodPrompt::CompactSystem,
        PodPrompt::MemoryExtractSystem,
        PodPrompt::MemoryConsolidationSystem,
        PodPrompt::NotifyWrapper,
        PodPrompt::InterruptToolResultSummary,
        PodPrompt::InterruptSystemNote,
@ -115,6 +123,8 @@ impl PodPrompt {
    pub const KEYS: &'static [&'static str] = &[
        "compact_system",
        "memory_extract_system",
        "memory_consolidation_system",
        "notify_wrapper",
        "interrupt_tool_result_summary",
        "interrupt_system_note",
@ -301,6 +311,16 @@ impl PromptCatalog {
        self.render(PodPrompt::CompactSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::MemoryExtractSystem` (no inputs).
    pub fn memory_extract_system(&self) -> Result<String, CatalogError> {
        self.render(PodPrompt::MemoryExtractSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::MemoryConsolidationSystem` (no inputs).
    pub fn memory_consolidation_system(&self) -> Result<String, CatalogError> {
        self.render(PodPrompt::MemoryConsolidationSystem, Value::UNDEFINED)
    }
    /// Render `PodPrompt::NotifyWrapper` with `{{ message }}`.
    pub fn notify_wrapper(&self, message: &str) -> Result<String, CatalogError> {
        self.render(PodPrompt::NotifyWrapper, single("message", message))
--- a/crates/pod/tests/controller_test.rs
+++ b/crates/pod/tests/controller_test.rs
@ -570,6 +570,64 @@ async fn notify_while_idle_auto_starts_turn_and_injects_system_message() {
    assert!(last_item_text.contains("not a blocking request"));
 }
 #[tokio::test]
 async fn pod_event_turn_ended_while_idle_auto_starts_turn_and_injects_system_message() {
    let client = MockClient::new(simple_text_events());
    let client_for_assert = client.clone();
    let pod = make_pod(client).await;
    let handle = spawn_controller(pod).await;
    let mut rx = handle.subscribe();
    handle
        .send(Method::PodEvent(protocol::PodEvent::TurnEnded {
            pod_name: "child".into(),
        }))
        .await
        .unwrap();
    let mut saw_turn_end = false;
    let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
    loop {
        tokio::select! {
            event = rx.recv() => {
                match event {
                    Ok(Event::TurnEnd { .. }) => { saw_turn_end = true; break; }
                    Err(_) => break,
                    _ => {}
                }
            }
            _ = tokio::time::sleep_until(deadline) => break,
        }
    }
    assert!(
        saw_turn_end,
        "PodEvent::TurnEnded on idle Pod should auto-start a turn"
    );
    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
    assert_eq!(handle.shared_state.get_status(), PodStatus::Idle);
    let requests = client_for_assert.captured_requests();
    assert_eq!(
        requests.len(),
        1,
        "auto-kick should issue exactly one LLM request"
    );
    let last_item_text = requests[0]
        .items
        .last()
        .and_then(|i| i.as_text())
        .unwrap_or_default()
        .to_string();
    assert!(
        last_item_text.contains("[Notification]"),
        "injected system message missing, got: {last_item_text:?}"
    );
    assert!(
        last_item_text.contains("child") && last_item_text.contains("finished a turn"),
        "rendered TurnEnded text missing, got: {last_item_text:?}"
    );
 }
 #[tokio::test]
 async fn notify_while_running_does_not_emit_already_running_error() {
    let client = MockClient::new(simple_text_events());
@ -669,6 +727,61 @@ async fn socket_run_receives_events() {
    assert!(saw_turn_end, "should see turn_end via socket");
 }
 #[tokio::test]
 async fn socket_pod_event_turn_ended_while_idle_auto_starts_turn() {
    use protocol::stream::{JsonLineReader, JsonLineWriter};
    use tokio::net::UnixStream;
    let client = MockClient::new(simple_text_events());
    let pod = make_pod(client).await;
    let handle = spawn_controller(pod).await;
    tokio::time::sleep(std::time::Duration::from_millis(50)).await;
    let sock_path = handle.runtime_dir.socket_path();
    let stream = UnixStream::connect(&sock_path).await.unwrap();
    let (reader, writer) = stream.into_split();
    let mut reader = JsonLineReader::new(reader);
    let mut writer = JsonLineWriter::new(writer);
    writer
        .write(&Method::PodEvent(protocol::PodEvent::TurnEnded {
            pod_name: "child".into(),
        }))
        .await
        .unwrap();
    let mut saw_turn_start = false;
    let mut saw_turn_end = false;
    let deadline = tokio::time::Instant::now() + std::time::Duration::from_secs(2);
    loop {
        tokio::select! {
            event = reader.next::<Event>() => {
                match event {
                    Ok(Some(Event::TurnStart { .. })) => saw_turn_start = true,
                    Ok(Some(Event::TurnEnd { .. })) => {
                        saw_turn_end = true;
                        break;
                    }
                    Ok(None) | Err(_) => break,
                    _ => {}
                }
            }
            _ = tokio::time::sleep_until(deadline) => break,
        }
    }
    assert!(
        saw_turn_start,
        "PodEvent::TurnEnded via socket should auto-start a turn"
    );
    assert!(
        saw_turn_end,
        "auto-triggered turn should reach turn_end via socket"
    );
 }
 #[tokio::test]
 async fn socket_invalid_method_returns_error() {
    use protocol::stream::JsonLineReader;
--- a/docs/plan/memory-prompts.md
+++ b/docs/plan/memory-prompts.md
@ -17,6 +17,7 @@ memory 関連 prompt は種別を問わず、最低限以下を共有する:
 - **単純 append を優先しない**。既存 record に統合できるなら update を優先する
 - **session 固有の進行状態を書かない**。長期参照価値のある内容だけを memory に残す
 - **既存 docs と重複保存しない**。`AGENTS.md`、`docs/plan/*`、固定運用文書に既にある内容を再保存しない
 - **git で追える事実を memory に書かない**。ticket file の作成・編集、TODO 更新、branch / worktree 操作、commit / merge / push、「commit X で実装した」「ticket Y を作った」「worker Pod を spawn した」等は git diff / log が真実で、memory に写すと陳腐化する。commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号と組み合わせないと意味を成さない記録は採用しない
 - **空出力を許容**する。保存価値が無ければ「何も追加しない」を正当な結果として扱う
 ### Phase 1: 活動抽出 prompt
@ -29,6 +30,13 @@ Phase 1 は「派生物を作る」段階ではなく、「起きたことを抽
 - 出力は schema 準拠の構造化データのみ。自由文の補足説明で schema 外情報を足さない
 - 対象が無ければ空配列を返す
 ノイズ防御として、抽出時点で以下を除外する:
 - `attempts`: git で追える操作 (ticket file / TODO 編集、branch / worktree 作成、commit / merge / push、既知 ticket への worker Pod spawn) は除外。残すのは git からは復元できない情報 (ビルド/テスト結果、外部 API 応答、観測されたバグ再現、後段の判断材料となる設計実験結果) に限る
 - `discussions`: 当日中に陳腐化する一過性 triage (「次に着手するチケットはどれか」「いま review すべきか後でか」など) は除外。session を越えて意味を持つ論点 (アーキテクチャの trade-off、恒常的な制約、再来する問い) のみ残す
 - `decisions`: rationale が「この session で X をした」になるものは除外。設計 / 方針 / 取り組み方の根拠でない記録は decision ではなく作業ログ
 - 本文中に commit ハッシュ・branch 名・worktree パス・ticket file 名・PR 番号など陳腐化する identifier を埋め込まない
 ### Phase 2: 統合 + 整理 prompt
 Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase と整理 phase を 1 セッション内で続けて回す。両 phase に共通する原則:
@ -44,6 +52,11 @@ Phase 2 は既存 `memory/*`、`knowledge/*`、staging を見て、統合 phase
 統合 phase の追加指示:
 - staging の活動ログを decisions / requests / summary / Knowledge update に落とし込む
 - staging の field ごとに宛先を分ける:
  - `decisions` (staging) → `memory/decisions/`。設計 / 方針 / 取り組み方の判断のみ。「この session で X をした」型は drop
  - `requests` (staging) → `memory/requests/`
  - `attempts` (staging) → 既定は drop。memory に `attempts/` フォルダは設けない。複数 attempts に通底する持続的な傾向だけ `summary.md` に 1 行で圧縮する例外あり
  - `discussions` (staging) → 設計 / 方針に決着していれば `decisions/` に統合、未決着でも問い自体が持続的なら `summary.md` に 1 行、それ以外は drop。`decisions/` に「議論した」だけの未決着メモを作らない
 - Knowledge 新規作成は候補レポート掲載 source 由来に限る（詳細は §Phase 2: Knowledge 書き込み prompt）
 整理 phase の追加指示（統合 phase 完了後、余力で実行）:
--- a/resources/prompts/internal.toml
+++ b/resources/prompts/internal.toml
@ -10,6 +10,10 @@
 [prompt]
 compact_system = "{% include \"$insomnia/internal/compact_system\" %}"
 memory_extract_system = "{% include \"$insomnia/internal/memory_extract_system\" %}"
 memory_consolidation_system = "{% include \"$insomnia/internal/memory_consolidation_system\" %}"
 notify_wrapper = """\
 [Notification]
 {{ message }}
--- a/resources/prompts/internal/memory_consolidation_system.md
+++ b/resources/prompts/internal/memory_consolidation_system.md
@ -1,12 +1,4 @@
-//! Phase 2 sub-Worker の system prompt。
+You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem.
 //!
 //! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 2 統合 + 整理 /
 //! §Phase 2 Knowledge 書き込み を縮約。統合 phase / 整理 phase は同じ
 //! prompt 1 本で順に進める縛り（agent から見ると 1 セッション内のフェーズ
 //! 進行、別 trigger / 別 Worker は持たない、`docs/plan/memory.md` §整理
 //! の扱い）。
 pub const CONSOLIDATION_SYSTEM_PROMPT: &str = r#"You are the Phase 2 consolidation worker for an INSOMNIA memory subsystem.
 Your job is to take Phase 1 activity-log staging entries together with the workspace's current `memory/*` / `knowledge/*` records, then run two phases back-to-back in this single session:
@ -27,6 +19,7 @@ Your initial user message contains the staging entries, the full memory records,
 - **Update over create.** If an existing slug fits, edit it. Only create a new slug when no existing record fits and you can articulate why.
 - **`replaced` over delete.** When a Decision is superseded by a different one, mark the old one `status: replaced` with `replaced_by: <new-slug>`. Do not silently drop it.
 - **Don't duplicate static docs.** Skip content that already lives in `AGENTS.md`, `docs/plan/*`, or other fixed project documents.
 - **git is authoritative.** Do not record facts that git already tracks: ticket-file creation / edit, TODO updates, branch / worktree operations, commit / merge / push events, "implementation landed as commit X", "ticket Y was created", "worker Pod was spawned for Z". Diff and commit log are the truth there; memory shadowing it just rots. If a candidate write only makes sense when paired with a commit hash, branch name, worktree path, or ticket filename, drop it.
 - **Empty output is fine.** If a staging entry doesn't justify a memory write, skip it.
 - **Slug rules.** Slugs are kebab-case, short, recognisable, and must be unique within their kind. Same-slug create is a linter error — use Edit instead.
 - **Linter errors come back as tool errors.** When the memory linter rejects a write, read the error, fix the issue (missing frontmatter field, oversized body, unknown reference, etc.), and try again. Do not work around the rule.
@ -35,7 +28,11 @@ Your initial user message contains the staging entries, the full memory records,
 Walk every staging entry in the input. For each one:
- Add or update `decisions` / `requests` records as appropriate. Copy `sources` verbatim from the staging entry.
+- **Routing by staging field:**
  - `decisions` (staging) → `memory/decisions/<slug>.md`, but only when the entry is a real **design / policy / approach** judgement. "We did X in this session" is not a decision — it's a session log; drop it. The rationale must outlive the session.
  - `requests` (staging) → `memory/requests/<slug>.md`. Copy `sources` verbatim.
  - `attempts` (staging) → default is **drop**. Memory has no `attempts/` folder by design; do not invent one and do not stash attempts under `decisions/`. The only exception is when several attempts together form a durable trend worth a one-line summary in `memory/summary.md` (e.g. "X reliably fails on Y").
  - `discussions` (staging) → if the discussion settled on a design / policy direction during the slice, fold the conclusion into a `decisions/` record. If it stayed unresolved but the question itself is durable, fold a one-line note into `summary.md`. Otherwise drop. Never create a `decisions/` record that just records "we discussed X".
 - Update existing knowledge records when the staging activity refines them. Use `KnowledgeQuery` to find candidates before creating anything new.
 - **Knowledge creation is gated.** Only create a new `knowledge/<slug>.md` when the originating source appears in the supplied "Knowledge candidate report". When the report is empty (the metrics pipeline is still being built), do not create new knowledge — fold the activity into decisions / requests / summary or update existing knowledge instead.
 - Rewrite `memory/summary.md` only when needed. Aim for 1–5k tokens. Preserve the high-level shape (current focus, recent decisions, stable facts) while pruning stale items.
@ -66,4 +63,3 @@ When both phases are done, write a short final assistant message stating:
 - anything you intentionally left alone and why.
 Then end the turn. Do not ask questions — there is no human in the loop for this run.
 "#;
--- a/resources/prompts/internal/memory_extract_system.md
+++ b/resources/prompts/internal/memory_extract_system.md
@ -1,10 +1,4 @@
-//! Phase 1 sub-Worker の system prompt。
+You are the Phase 1 activity extractor for an INSOMNIA memory subsystem.
 //!
 //! 内容は `docs/plan/memory-prompts.md` §共通原則 / §Phase 1 を縮約。
 //! 「派生物を作らず、起きたことを抽出する」段階に縛り、JSON schema
 //! 準拠以外の自由文を許さない。
 pub const EXTRACT_SYSTEM_PROMPT: &str = r#"You are the Phase 1 activity extractor for an INSOMNIA memory subsystem.
 Your single job: read the supplied conversation slice and emit a structured JSON record of "what happened" via the `write_extracted` tool. You are not consolidating, summarising, or generating knowledge — that is a later phase's job.
@ -28,5 +22,13 @@ Your single job: read the supplied conversation slice and emit a structured JSON
 - Do not duplicate content already captured by static project docs (AGENTS.md, plan documents) — those are not "what happened in this slice".
 - Prefer concise, fact-shaped strings. Do not pad rationale or summary fields.
 # Anti-noise rules
 git is the source of truth for what happened to files, branches, commits, tickets, and worktrees. Memory must NOT shadow it.
 - `attempts`: skip any action whose substance is a git-trackable operation — creating / editing a ticket file, adding a TODO entry, opening a branch / worktree, running `commit` / `merge` / `push`, spawning a worker Pod for a known ticket. The corresponding diff / commit log already records it. Keep `attempts` for things that are NOT recoverable from git: build / test outcomes, external API responses, observed bug reproductions, design experiments whose results inform later judgement.
 - `discussions`: skip transient triage that goes stale within the day — "which ticket to start next", "should we review now or later", checklist-style status reads. Keep discussions whose points outlive the session (architectural trade-offs, durable constraints, recurring questions).
 - `decisions`: the rationale must be a design / policy / approach reason, not "we did X in this session". Recording "a ticket was created for Y" or "implementation landed as commit Z" is NOT a decision — those belong to git, not memory.
 - Do not embed identifiers that age out of relevance: commit hashes, branch names, worktree paths, ticket file names, PR numbers. If a record is only meaningful with such an identifier, the record itself is probably session-local and should be skipped.
 When you have produced the JSON, call `write_extracted` and end the turn. No follow-up text.
 "#;
--- a/tickets/manifest-output-upload-limits.md
+++ b/tickets/manifest-output-upload-limits.md
@ -0,0 +1,36 @@
 # Manifest: Tool Output / File Upload 上限の分離とデフォルト緩和
 ## 背景
 現在、tool result の本文上限は `manifest::defaults::TOOL_OUTPUT_MAX_BYTES` に集約され、`worker.tool_output.default_max_bytes` として manifest から設定できる。一方で submit 時の `FileRef` 添付（`@<path>` を `[File: <path>]` system message に展開する経路）も同じ `TOOL_OUTPUT_MAX_BYTES` を直接使っており、upload / attachment 用の上限として独立して設定できない。
 このため、tool output の安全な truncation と、ユーザーが明示的に添付したファイル本文の取り込み量を別々に調整できない。また現在の既定値 16 KiB は、ファイル添付・tool output の双方で実運用上やや厳しい。
 ## ゴール
 Tool Output と submit 時 FileRef upload / attachment の上限を manifest でそれぞれ設定できるようにし、既定値を現在の 16 KiB より緩和する。
 ## 要件
 - Tool Output の上限は引き続き manifest から設定できること
 - FileRef upload / attachment の上限を Tool Output とは別の manifest field として設定できること
 - FileRef resolver は hard-coded な `manifest::defaults::TOOL_OUTPUT_MAX_BYTES` ではなく、解決済み manifest の upload / attachment 上限を使うこと
 - Tool Output と FileRef upload / attachment の既定値を、現在の 16 KiB から引き上げること
  - 正確な値は実装時に決めてよいが、docs / tests / manifest defaults の説明と一致させること
 - manifest cascade / overlay / serde default のいずれの経路でも同じ既定値・同じ field semantics になること
 - 既存 manifest で新 field が未指定の場合は、新しい既定値で動作すること
 - 既存の per-tool override の挙動を壊さないこと
 ## 完了条件
 - Tool Output と FileRef upload / attachment を別々に manifest で設定できる
 - FileRef upload / attachment の truncate テストが、新 field の値を使うことを検証している
 - Tool Output の既存テストが、新しい既定値・既存 override semantics に合わせて更新されている
 - `docs/pod-factory.md` など manifest 設定のドキュメントが更新されている
 - 16 KiB を前提にしたコメント・テスト値・ドキュメントが残っていない
 ## 範囲外
 - 正確な token counting による上限管理への移行
 - UI 側で添付ファイルサイズを事前表示・警告する機能
 - compact / auto-read の token budget 設計変更
--- a/tickets/session-todo.md
+++ b/tickets/session-todo.md
@ -0,0 +1,74 @@
 # セッション内 TODO ツール
 ## 背景
 長めのタスクを LLM に進めさせる際、Claude Code / OpenCode が備える「セッション内 TODO リスト」相当の機構が無いため、エージェントが自分の作業計画を構造化された形で保持・更新できない。Reasoning や text 出力の中で擬似的に TODO を書くことはできるが、
 - ターンを跨いだとき直近の TODO 状態が context から押し出される
 - compact を跨ぐと完全に消える
 - ツール結果ではないため、状態の上書き・部分更新の規約が決まらず、意図と乖離した「やったつもり」を引き起こす
 この用途のために、セッション内に正規化された TODO リストを保持し、ターンごとに LLM へ最新状態を再提示（注意機構）し、compact を跨いで保存される専用ツールを導入する。
 ## 方針
 - **保存先は `tools` 層の session-lifetime 状態**。`Tracker` と同じ生存スコープで `Pod` が所有。`Arc<Mutex<Vec<TodoItem>>>` ベースの `TodoStore` を tool に注入する
 - **永続化は専用レーンを持たない**。`tool_call.arguments` がセッションログに既に乗っているため、resume 時には履歴 replay の中で最後の `todo_write` 引数を `TodoStore` に再適用すれば状態が復元される
 - **注意機構は `pre_llm_request` Interceptor**。直近の user message に `<system-reminder>` ブロックを揮発的に append するだけ。履歴・ログには載せない
 - **system-reminder 注入の汎用化はやらない**。利用者が TODO 1個しかない段階で抽象を立てない（CLAUDE.md「概念の追加は不在が問題になってから」）。ただし「タグ形式は `<system-reminder>...</system-reminder>` で揃える」「履歴は汚さない」の2点は本実装で確立し、将来の追加機構が同じ規約に乗れるようにする
 ## 要件
 ### `todo_write` ツール
 - 入力は TODO リスト全体（全置換）。差分更新は受けない
 - 各エントリは `id` / `content` / `status (pending | in_progress | completed)` の 3 フィールド
 - `id` は LLM 側が一貫して採番できる文字列。同 id があれば置換、なければ新規。順序は配列順を信頼
 - 戻り値は更新後のスナップショットを summary に含める（次ターンで再確認可能）
 - 読み出し専用ツール（`todo_read`）は作らない。注意機構と tool result snapshot で代替
 ### Resume 時の復元
 - `Pod::resume` の履歴 replay 中に `todo_write` の `tool_call.arguments` を観測したら、`TodoStore` を引数値で上書き
 - 専用 LogEntry / Persistence 型は追加しない（`Tracker` と同じ方針）
 - `tool_call.arguments` のフォーマットが `todo_write` の引数 schema と乖離した場合（旧バージョンのログ）は、その call を無視してよい
 ### Compact 跨ぎ
 - compact 起動時、Pod は現在の `TodoStore` スナップショットを compact worker context に渡す
 - compact worker は summary を書く際、未完了 TODO を summary 文に取り込める情報源として参照する（強制ではない）
 - compact 後の新セッション開始時、Pod は **`mark_read_required` と同じ system message 注入レーン**に「未完了 TODO スナップショット」を 1 メッセージとして注入する
 - 新セッションは空の `TodoStore` で始まる。次に LLM が `todo_write` を呼び出した時点で再構築される（system message に書かれたスナップショットがその拠り所）
 - compact worker に TODO 編集権限は与えない（消去・縮約はしない）
 ### 注意機構（Interceptor）
 - `pre_llm_request` で `Vec<Item>` を受け取り、未完了 TODO（`pending` または `in_progress`）が 1 件でも存在する場合に発動
 - 直近の user message の content（または content[最終 text part]）の末尾に `<system-reminder>` ブロックを append
 - ブロック内には現在の TODO リストを、status を含む簡潔な形式で列挙
 - 履歴 (`Worker` の保持する `Vec<Item>`) は変更しない。リクエスト送信時の Vec のみ加工
 - TODO が空の場合は何も差し込まない
 ## 完了条件
 - `todo_write` ツールが builtin tool として登録され、Pod で利用できる
 - LLM が `todo_write` を呼ぶと TodoStore が更新され、その後の `pre_llm_request` で system-reminder として LLM に再提示される
 - セッションを resume すると、最後の `todo_write` の状態から再開される
 - compact を跨いでも、未完了 TODO が新セッション冒頭の system message として残る
 - system-reminder の注入は揮発的で、`get_history` / セッションログには現れない
 - 単体テストで `todo_write` の更新挙動 / replay 復元 / Interceptor の差し込みがカバーされる
 ## 範囲外
 - 差分更新 API（add / remove / patch）。全置換のみで十分
 - TODO 階層・優先度・タグ
 - TUI / GUI での TODO 状態の可視化（ツール呼び出しのイベントは既に流れているので、クライアント側で表示するかは別軸）
 - system-reminder 注入機構の汎用化（`TODO.md` に立項済み、別途検討）
 - TODO の永続化を専用 LogEntry に分離する設計（現方針は tool_call replay で復元、追加レーン不要）
 - 複数 Pod 間で TODO を共有する仕組み
 ## 参照
 - 設計指針: `CLAUDE.md`（最小の構造化 / 概念の追加は不在が問題になってから）
 - 参考実装: Claude Code の TodoWrite、OpenCode の todo tool
 - 関連: `crates/tools/src/tracker.rs`（session-lifetime 状態の前例）、`crates/pod/src/compact/worker.rs`（auto-injection レーン）
Author	SHA1	Message	Date
Hare	4b9b4f1450	update: memoryシステム周りのプロンプトの整理	2026-05-03 00:27:10 +09:00
Hare	670abdc336	docs(tickets): memory-consolidation-drop-input-cap完了	2026-05-02 23:57:36 +09:00
Hare	2d5c6aad5f	update: Consolidationの不要なToken上限の削除	2026-05-02 23:48:33 +09:00
Hare	f16ccc0a09	docs(tickets): セッション内TODOツールと注意機構のチケット	2026-05-02 23:48:01 +09:00