update: fmt + memoryに用いる言語の構成

2026-05-13 01:57:04 +09:00 · 2026-05-13 01:57:04 +09:00 · 3c4a34b13b
commit 3c4a34b13b
parent 076cf9af18
16 changed files with 95 additions and 47 deletions
--- a/crates/client/src/spawn.rs
+++ b/crates/client/src/spawn.rs
@ -47,7 +47,9 @@ pub enum SpawnError {
    /// runtime ディレクトリが解決できなかった (環境変数未設定等)。
    RuntimeDirUnavailable,
    PodLaunchFailed(io::Error),
-    PodExitedEarly { stderr_tail: String },
+    PodExitedEarly {
+        stderr_tail: String,
+    },
    Timeout,
 }

@ -88,10 +90,7 @@ impl From<io::Error> for SpawnError {
 ///
 /// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
 /// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
-pub async fn spawn_pod<F>(
-    config: SpawnConfig,
-    mut progress: F,
-) -> Result<SpawnReady, SpawnError>
+pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
 where
    F: FnMut(&str),
 {
--- a/crates/llm-worker/src/llm_client/scheme/anthropic/events.rs
+++ b/crates/llm-worker/src/llm_client/scheme/anthropic/events.rs
@ -316,7 +316,8 @@ impl AnthropicScheme {
                });
                match &raw.content_block {
                    ContentBlock::Thinking {
-                        thinking, signature,
+                        thinking,
+                        signature,
                    } => {
                        state.pending_thinking = Some(PendingThinking {
                            text: thinking.clone(),
@ -372,10 +373,7 @@ impl AnthropicScheme {
            }
            AnthropicEventType::ContentBlockStop => {
                let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
-                let block_type = state
-                    .current_block_type
-                    .take()
-                    .unwrap_or(BlockType::Text);
+                let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
                emitted.push(Event::BlockStop(BlockStop {
                    index: raw.index,
                    block_type,
--- a/crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs
+++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs
@ -458,7 +458,10 @@ pub(crate) fn parse_sse(
        "response.reasoning_text.delta" => {
            let ev: ReasoningTextDelta = from_json(data)?;
            // round-trip 用に蓄積
-            state.ensure_reasoning(ev.output_index).text.push_str(&ev.delta);
+            state
+                .ensure_reasoning(ev.output_index)
+                .text
+                .push_str(&ev.delta);
            Ok(ensure_and_delta(
                state,
                SlotKey::ContentPart {
--- a/crates/llm-worker/tests/reasoning_round_trip_test.rs
+++ b/crates/llm-worker/tests/reasoning_round_trip_test.rs
@ -16,9 +16,7 @@ mod common;
 use common::MockLlmClient;
 use llm_worker::Item;
 use llm_worker::Worker;
-use llm_worker::llm_client::event::{
-    Event, ReasoningItemEvent, ResponseStatus, StatusEvent,
-};
+use llm_worker::llm_client::event::{Event, ReasoningItemEvent, ResponseStatus, StatusEvent};

 /// Anthropic 風: thinking ブロック → text → 終了 のシーケンス。
 /// Worker history に Reasoning(signature 付き) → assistant_message が並ぶ。
--- a/crates/manifest/src/config.rs
+++ b/crates/manifest/src/config.rs
@ -258,6 +258,7 @@ impl MemoryConfig {
            workspace_root: upper.workspace_root.or(self.workspace_root),
            query_result_limit: upper.query_result_limit.or(self.query_result_limit),
            query_excerpt_lines: upper.query_excerpt_lines.or(self.query_excerpt_lines),
+            language: upper.language.or(self.language),
            extract_model: upper.extract_model.or(self.extract_model),
            extract_threshold: upper.extract_threshold.or(self.extract_threshold),
            extract_worker_max_input_tokens: upper
--- a/crates/manifest/src/defaults.rs
+++ b/crates/manifest/src/defaults.rs
@ -62,3 +62,7 @@ pub const MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS: u64 = 30_000;
 /// Optional maximum extract-worker tool-loop depth. `None` means unlimited.
 /// See [`crate::MemoryConfig::extract_worker_max_turns`].
 pub const MEMORY_EXTRACT_WORKER_MAX_TURNS: Option<u32> = Some(8);
+
+/// Default language used by memory extraction / consolidation workers for
+/// durable memory and knowledge text. See [`crate::MemoryConfig::language`].
+pub const MEMORY_LANGUAGE: &str = "English";
--- a/crates/manifest/src/lib.rs
+++ b/crates/manifest/src/lib.rs
@ -96,6 +96,12 @@ pub struct MemoryConfig {
    /// Ignored when the request omits `query`. `None` ⇒ tool default (3).
    #[serde(default)]
    pub query_excerpt_lines: Option<usize>,
+    /// Language used by memory extraction / consolidation workers for durable
+    /// memory and knowledge text. Free-form so workspaces can use names like
+    /// `English`, `Japanese`, or locale tags. `None` ⇒
+    /// [`defaults::MEMORY_LANGUAGE`].
+    #[serde(default)]
+    pub language: Option<String>,
    /// Optional model for the extract worker. When `None`,
    /// the main pod model is cloned via `clone_boxed()`. Lightweight
    /// reasoning-capable models (Haiku / 4o-mini / Flash class) are
@ -656,6 +662,14 @@ model_id = "claude-sonnet-4-20250514"
        );
    }

+    #[test]
+    fn memory_section_with_language() {
+        let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nlanguage = \"Japanese\"\n");
+        let manifest = PodManifest::from_toml(&toml).unwrap();
+        let mem = manifest.memory.unwrap();
+        assert_eq!(mem.language.as_deref(), Some("Japanese"));
+    }
+
    #[test]
    fn reject_unknown_scheme() {
        let toml =
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -2075,9 +2075,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            .or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_TURNS);

        let client = self.build_extractor_client(memory_cfg)?;
+        let memory_language = memory_language(memory_cfg);
        let extract_system_prompt = self
            .prompts
-            .memory_extract_system()
+            .memory_extract_system(memory_language)
            .map_err(PodError::PromptCatalog)?;
        let mut extract_worker = Worker::new(client).system_prompt(extract_system_prompt);
        extract_worker.set_cache_key(Some(self.session_id.to_string()));
@ -2276,13 +2277,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                return Err(e);
            }
        };
-        let consolidation_system_prompt = match self.prompts.memory_consolidation_system() {
-            Ok(p) => p,
-            Err(e) => {
-                lock.release_only();
-                return Err(PodError::PromptCatalog(e));
-            }
-        };
+        let memory_language = memory_language(memory_cfg);
+        let consolidation_system_prompt =
+            match self.prompts.memory_consolidation_system(memory_language) {
+                Ok(p) => p,
+                Err(e) => {
+                    lock.release_only();
+                    return Err(PodError::PromptCatalog(e));
+                }
+            };
        let mut worker = Worker::new(client).system_prompt(consolidation_system_prompt);
        worker.set_cache_key(Some(self.session_id.to_string()));

@ -2331,6 +2334,14 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
    }
 }

+fn memory_language(cfg: &manifest::MemoryConfig) -> &str {
+    cfg.language
+        .as_deref()
+        .map(str::trim)
+        .filter(|language| !language.is_empty())
+        .unwrap_or(manifest::defaults::MEMORY_LANGUAGE)
+}
+
 /// Outcome of a single extract iteration. Internal to
 /// `try_post_run_extract` / `run_extract_once`.
 enum ExtractDecision {
--- a/crates/pod/src/prompt/catalog.rs
+++ b/crates/pod/src/prompt/catalog.rs
@ -311,14 +311,17 @@ impl PromptCatalog {
        self.render(PodPrompt::CompactSystem, Value::UNDEFINED)
    }

-    /// Render `PodPrompt::MemoryExtractSystem` (no inputs).
-    pub fn memory_extract_system(&self) -> Result<String, CatalogError> {
-        self.render(PodPrompt::MemoryExtractSystem, Value::UNDEFINED)
+    /// Render `PodPrompt::MemoryExtractSystem` with `{{ language }}`.
+    pub fn memory_extract_system(&self, language: &str) -> Result<String, CatalogError> {
+        self.render(PodPrompt::MemoryExtractSystem, single("language", language))
    }

-    /// Render `PodPrompt::MemoryConsolidationSystem` (no inputs).
-    pub fn memory_consolidation_system(&self) -> Result<String, CatalogError> {
-        self.render(PodPrompt::MemoryConsolidationSystem, Value::UNDEFINED)
+    /// Render `PodPrompt::MemoryConsolidationSystem` with `{{ language }}`.
+    pub fn memory_consolidation_system(&self, language: &str) -> Result<String, CatalogError> {
+        self.render(
+            PodPrompt::MemoryConsolidationSystem,
+            single("language", language),
+        )
    }

    /// Render `PodPrompt::NotifyWrapper` with `{{ message }}`.
@ -488,6 +491,15 @@ mod tests {
        assert!(rendered.contains("mark_read_required"));
    }

+    #[test]
+    fn memory_worker_prompts_include_language() {
+        let cat = PromptCatalog::builtins_only().unwrap();
+        let extract = cat.memory_extract_system("Japanese").unwrap();
+        let consolidate = cat.memory_consolidation_system("Japanese").unwrap();
+        assert!(extract.contains("`language`: `Japanese`"));
+        assert!(consolidate.contains("`language`: `Japanese`"));
+    }
+
    #[test]
    fn notify_wrapper_interpolates_message() {
        let cat = PromptCatalog::builtins_only().unwrap();
--- a/crates/pod/src/shared_state.rs
+++ b/crates/pod/src/shared_state.rs
@ -280,10 +280,7 @@ mod tests {
        assert_eq!(all.len(), 3);
        let alpha = state.list_knowledge_completions("alpha");
        assert_eq!(
-            alpha
-                .iter()
-                .map(|c| c.slug.as_str())
-                .collect::<Vec<_>>(),
+            alpha.iter().map(|c| c.slug.as_str()).collect::<Vec<_>>(),
            vec!["alpha", "alphabet"]
        );
        assert!(state.list_knowledge_completions("zzz").is_empty());
--- a/crates/session-store/src/logged_item.rs
+++ b/crates/session-store/src/logged_item.rs
@ -325,7 +325,8 @@ mod tests {
    fn legacy_reasoning_without_signature_field_deserializes() {
        // signature フィールドが無い旧形式の history.json を読み込んでも
        // None としてロードできる（後方互換性）。
-        let legacy_json = r#"{"kind":"reasoning","text":"old","summary":[],"encrypted_content":null}"#;
+        let legacy_json =
+            r#"{"kind":"reasoning","text":"old","summary":[],"encrypted_content":null}"#;
        let parsed: LoggedItem = serde_json::from_str(legacy_json).unwrap();
        match Item::from(parsed) {
            Item::Reasoning {
--- a/crates/tui/src/markdown.rs
+++ b/crates/tui/src/markdown.rs
@ -69,7 +69,9 @@ impl Renderer {

    fn span_style(&self) -> Style {
        if self.in_inline_code > 0 {
-            return Style::default().fg(Color::Yellow).bg(Color::Rgb(40, 40, 40));
+            return Style::default()
+                .fg(Color::Yellow)
+                .bg(Color::Rgb(40, 40, 40));
        }
        if self.in_code_block {
            return Style::default().fg(Color::Cyan);
@ -211,10 +213,8 @@ impl Renderer {
            }
            Tag::BlockQuote(_) => {
                self.emit_blank(out);
-                self.line_prefix.push(Span::styled(
-                    "│ ",
-                    Style::default().fg(Color::DarkGray),
-                ));
+                self.line_prefix
+                    .push(Span::styled("│ ", Style::default().fg(Color::DarkGray)));
            }
            Tag::Strong => self.bold += 1,
            Tag::Emphasis => self.italic += 1,
--- a/crates/tui/src/task.rs
+++ b/crates/tui/src/task.rs
@ -309,10 +309,7 @@ mod tests {
        s.apply_system_message_text(&text);
        let t = &s.tasks()[0];
        assert_eq!(t.subject, "subject with\nembedded newline");
-        assert_eq!(
-            t.description,
-            "desc:\n  status: not-actually-a-field"
-        );
+        assert_eq!(t.description, "desc:\n  status: not-actually-a-field");
    }
 }

--- a/crates/tui/src/ui.rs
+++ b/crates/tui/src/ui.rs
@ -165,10 +165,7 @@ fn mini_view_summary_line(counts: TaskCounts, width: u16) -> Line<'static> {
        counts.deleted,
    );
    let shown = truncate_with_ellipsis(&text, width as usize);
-    Line::from(Span::styled(
-        shown,
-        Style::default().fg(Color::DarkGray),
-    ))
+    Line::from(Span::styled(shown, Style::default().fg(Color::DarkGray)))
 }

 /// Two-character status marker + the style to render it with. Mirrors
@ -591,7 +588,10 @@ fn render_block_into(lines: &mut Vec<Line<'static>>, block: &Block, width: u16,
        }
        Block::AssistantText { text } => match mode {
            Mode::Overview => push_overview_line(lines, text, width, MessageKind::Assistant, ""),
-            _ => lines.extend(crate::markdown::render(text, kind_style(MessageKind::Assistant))),
+            _ => lines.extend(crate::markdown::render(
+                text,
+                kind_style(MessageKind::Assistant),
+            )),
        },
        Block::Thinking(t) => render_thinking(lines, t, width, mode),
        // ToolCall is dispatched in `compute_history` via `tool::render_tool`
--- a/resources/prompts/internal/memory_consolidation_system.md
+++ b/resources/prompts/internal/memory_consolidation_system.md
@ -12,6 +12,13 @@ You have:

 Your initial user message contains the staging entries, the full memory records, the knowledge candidate report, and the tidy hints. Existing knowledge bodies are NOT in the prompt; pull them through `KnowledgeQuery` + `MemoryRead` when relevant.

+# Memory language
+
+- `language`: `{{ language }}`.
+- Write durable memory and knowledge prose in this language, including frontmatter descriptions and record bodies.
+- Existing records in another language may be rewritten into this language when you touch them for integration or tidy work; do not rewrite untouched records only for language normalization.
+- Preserve code identifiers, paths, command names, quoted user text, logs, and external proper nouns when translation would reduce fidelity.
+
 # Common rules (both steps)

 - **Do not invent provenance.** Decisions / Requests `sources` arrays MUST be copied from the staging `source` field for the originating activity log entries. Do not synthesise `session_id` or entry ranges. Do not fabricate `last_sources` for Knowledge.
--- a/resources/prompts/internal/memory_extract_system.md
+++ b/resources/prompts/internal/memory_extract_system.md
@ -2,6 +2,12 @@ You are the activity extractor for an INSOMNIA memory subsystem.

 Your single job: read the supplied conversation slice and emit a structured JSON record of "what happened" via the `write_extracted` tool. You are not consolidating, summarising, or generating knowledge — that is the consolidation worker's job.

+# Memory language
+
+- `language`: `{{ language }}`.
+- Write extracted fact strings (`rationale`, `topic`, `points`, `action`, `result`, `intent`, `summary`, etc.) in this language.
+- Preserve code identifiers, paths, command names, quoted user text, logs, and external proper nouns when translation would reduce fidelity.
+
 # Hard rules

 - Call `write_extracted` exactly once. Do not narrate, ask questions, or send any other tool output.