From f8948be43deb3e99fcb16f4c7540da03123c5519 Mon Sep 17 00:00:00 2001 From: Hare Date: Mon, 27 Apr 2026 22:25:27 +0900 Subject: [PATCH] =?UTF-8?q?model-reasoning-control=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../llm-worker/src/llm_client/capability.rs | 93 ++++++++++++--- .../llm_client/scheme/anthropic/request.rs | 109 +++++++++++++----- .../src/llm_client/scheme/gemini/request.rs | 48 +++++++- .../llm_client/scheme/openai_chat/request.rs | 50 ++++++-- .../scheme/openai_responses/request.rs | 46 ++++---- crates/manifest/src/config.rs | 83 ++++++++++--- crates/manifest/src/lib.rs | 27 ++++- crates/manifest/src/model.rs | 2 +- crates/pod/src/pod.rs | 1 + 9 files changed, 353 insertions(+), 106 deletions(-) diff --git a/crates/llm-worker/src/llm_client/capability.rs b/crates/llm-worker/src/llm_client/capability.rs index 8bb709fc..a2fde57a 100644 --- a/crates/llm-worker/src/llm_client/capability.rs +++ b/crates/llm-worker/src/llm_client/capability.rs @@ -8,7 +8,7 @@ //! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル) //! 2. `ModelConfig::capability` での明示 override(未知モデル、または上書き) -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; /// モデル能力メタデータ #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -80,23 +80,90 @@ pub enum CacheStrategy { Auto, } -/// Reasoning 制御(共通型、scheme 側で各社形式に投影) +/// Reasoning 制御(共通型、scheme 側で各社形式に投影)。 /// -/// `effort` / `budget_tokens` はユーザー設定から任意で渡される。Scheme -/// 側は自身の `ReasoningSupport` に応じて片方だけ使う。両方が宣言 -/// されている場合の優先順位は scheme 実装が決める。 -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] -pub struct ReasoningControl { - #[serde(default)] - pub effort: Option, - #[serde(default)] - pub budget_tokens: Option, +/// 文字列は provider-native な effort label、数値は provider-native な +/// thinking budget token として扱う。どちらか一方だけを型で表現する。 +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(untagged)] +pub enum ReasoningControl { + Effort(ReasoningEffort), + BudgetTokens(i32), } -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum ReasoningEffort { + Minimal, Low, Medium, High, + XHigh, + Other(String), +} + +impl ReasoningEffort { + pub fn as_str(&self) -> &str { + match self { + Self::Minimal => "minimal", + Self::Low => "low", + Self::Medium => "medium", + Self::High => "high", + Self::XHigh => "xhigh", + Self::Other(label) => label.as_str(), + } + } +} + +impl From for ReasoningEffort { + fn from(value: String) -> Self { + match value.as_str() { + "minimal" => Self::Minimal, + "low" => Self::Low, + "medium" => Self::Medium, + "high" => Self::High, + "xhigh" => Self::XHigh, + _ => Self::Other(value), + } + } +} + +impl Serialize for ReasoningEffort { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(self.as_str()) + } +} + +impl<'de> Deserialize<'de> for ReasoningEffort { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + String::deserialize(deserializer).map(Self::from) + } +} + +#[cfg(test)] +mod tests { + use super::{ReasoningControl, ReasoningEffort}; + + #[test] + fn reasoning_control_deserializes_effort_labels() { + let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap(); + assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh)); + + let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap(); + assert_eq!( + unknown, + ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into())) + ); + } + + #[test] + fn reasoning_control_deserializes_signed_budget() { + let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap(); + assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1)); + } } diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs index b1c4d1c1..322d6043 100644 --- a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs @@ -7,9 +7,9 @@ use std::collections::BTreeSet; use serde::Serialize; use crate::llm_client::{ + capability::{CacheStrategy, ModelCapability, ReasoningControl, ReasoningSupport}, + types::{parse_tool_arguments, ContentPart, Item, Role, ToolDefinition}, Request, - capability::{CacheStrategy, ModelCapability, ReasoningSupport}, - types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments}, }; use super::AnthropicScheme; @@ -41,7 +41,7 @@ pub(crate) struct AnthropicRequest { #[derive(Debug, Serialize)] #[serde(tag = "type", rename_all = "snake_case")] pub(crate) enum AnthropicThinking { - Enabled { budget_tokens: u32 }, + Enabled { budget_tokens: i32 }, } /// Anthropic message @@ -170,9 +170,13 @@ impl AnthropicScheme { .config .reasoning .as_ref() - .and_then(|rc| rc.budget_tokens) .filter(|_| supports_budget_tokens) - .map(|budget_tokens| AnthropicThinking::Enabled { budget_tokens }); + .and_then(|rc| match rc { + ReasoningControl::BudgetTokens(budget_tokens) => Some(AnthropicThinking::Enabled { + budget_tokens: *budget_tokens, + }), + ReasoningControl::Effort(_) => None, + }); AnthropicRequest { model: model.to_string(), @@ -218,7 +222,12 @@ impl AnthropicScheme { for (i, item) in items.iter().enumerate() { match item { Item::Message { role, content, .. } => { - flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations); + flush_pending( + &mut messages, + &mut pending_assistant, + "assistant", + &mut locations, + ); flush_pending(&mut messages, &mut pending_user, "user", &mut locations); let anthropic_role = match role { @@ -229,9 +238,7 @@ impl AnthropicScheme { let parts: Vec = content .iter() .map(|p| match p { - ContentPart::Text { text } => { - AnthropicContentPart::text(text.clone()) - } + ContentPart::Text { text } => AnthropicContentPart::text(text.clone()), ContentPart::Refusal { refusal } => { AnthropicContentPart::text(refusal.clone()) } @@ -284,15 +291,18 @@ impl AnthropicScheme { content, .. } => { - flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations); + flush_pending( + &mut messages, + &mut pending_assistant, + "assistant", + &mut locations, + ); let text = match content { Some(c) => format!("{summary}\n{c}"), None => summary.clone(), }; - pending_user.push(( - i, - AnthropicContentPart::tool_result(call_id.clone(), text), - )); + pending_user + .push((i, AnthropicContentPart::tool_result(call_id.clone(), text))); } Item::Reasoning { text, .. } => { @@ -304,7 +314,12 @@ impl AnthropicScheme { } } - flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations); + flush_pending( + &mut messages, + &mut pending_assistant, + "assistant", + &mut locations, + ); flush_pending(&mut messages, &mut pending_user, "user", &mut locations); // Apply cache_control markers at each breakpoint item's last part. @@ -400,7 +415,7 @@ fn compute_breakpoints(items: &[Item], cache_anchor: Option) -> BTreeSet< mod tests { use super::*; use crate::llm_client::capability::{ - CacheStrategy, StructuredOutput, ToolCallingSupport, + CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport, }; /// cache_control が有効になる既定の capability。 @@ -422,6 +437,13 @@ mod tests { } } + fn cap_budget_reasoning() -> ModelCapability { + ModelCapability { + reasoning: Some(ReasoningSupport::BudgetTokens), + ..cap_explicit() + } + } + #[test] fn test_build_simple_request() { let scheme = AnthropicScheme::new(); @@ -429,7 +451,8 @@ mod tests { .system("You are a helpful assistant.") .user("Hello!"); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); + let anthropic_req = + scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.model, "claude-sonnet-4-20250514"); assert_eq!( @@ -455,12 +478,45 @@ mod tests { })), ); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); + let anthropic_req = + scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.tools.len(), 1); assert_eq!(anthropic_req.tools[0].name, "get_weather"); } + #[test] + fn thinking_budget_projected_when_supported() { + let scheme = AnthropicScheme::new(); + let mut request = Request::new().user("think"); + request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096)); + + let req = scheme.build_request( + "claude-sonnet-4-20250514", + &request, + &cap_budget_reasoning(), + ); + let json = serde_json::to_value(&req).unwrap(); + + assert_eq!(json["thinking"]["type"], "enabled"); + assert_eq!(json["thinking"]["budget_tokens"], 4096); + } + + #[test] + fn effort_reasoning_not_projected_to_anthropic() { + let scheme = AnthropicScheme::new(); + let mut request = Request::new().user("think"); + request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High)); + + let req = scheme.build_request( + "claude-sonnet-4-20250514", + &request, + &cap_budget_reasoning(), + ); + + assert!(req.thinking.is_none()); + } + #[test] fn test_tool_call_and_result() { let scheme = AnthropicScheme::new(); @@ -473,7 +529,8 @@ mod tests { )) .item(Item::tool_result("call_123", "Sunny, 25°C")); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); + let anthropic_req = + scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.messages.len(), 3); assert_eq!(anthropic_req.messages[0].role, "user"); @@ -543,7 +600,7 @@ mod tests { let scheme = AnthropicScheme::new(); let mut items = completed_turn(); items.push(Item::user_message("next turn")); // index 5 = latest user - // cache_anchor=None, turn_end=4, head=5. + // cache_anchor=None, turn_end=4, head=5. let request = Request::new().items(items); let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); @@ -607,9 +664,7 @@ mod tests { // so we don't bloat requests with wrapper arrays. Here the Head // lands on items[1], leaving items[0] without a marker. let scheme = AnthropicScheme::new(); - let request = Request::new() - .user("hello") - .assistant("hi there"); + let request = Request::new().user("hello").assistant("hi there"); let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert!( matches!(req.messages[0].content, AnthropicContent::Text(_)), @@ -628,10 +683,7 @@ mod tests { match &req.messages[0].content { AnthropicContent::Parts(parts) => { assert_eq!(parts.len(), 1); - assert_eq!( - part_cache_control(&parts[0]), - Some(CacheControl::Ephemeral) - ); + assert_eq!(part_cache_control(&parts[0]), Some(CacheControl::Ephemeral)); } AnthropicContent::Text(_) => panic!("breakpoint item should use Parts form"), } @@ -668,7 +720,8 @@ mod tests { #[test] fn empty_items_produce_no_breakpoints() { let scheme = AnthropicScheme::new(); - let req = scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit()); + let req = + scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit()); assert!(req.messages.is_empty()); assert!(breakpoint_positions(&req).is_empty()); } diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs index 6d9b0f07..e871e4f4 100644 --- a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs @@ -6,9 +6,9 @@ use serde::Serialize; use serde_json::Value; use crate::llm_client::{ + capability::{ModelCapability, ReasoningControl, ReasoningSupport}, + types::{parse_tool_arguments, Item, Role, ToolDefinition}, Request, - capability::{ModelCapability, ReasoningSupport}, - types::{Item, Role, ToolDefinition, parse_tool_arguments}, }; use super::GeminiScheme; @@ -203,10 +203,12 @@ impl GeminiScheme { .config .reasoning .as_ref() - .and_then(|rc| rc.budget_tokens) .filter(|_| supports_budget) - .map(|budget| GeminiThinkingConfig { - thinking_budget: budget as i32, + .and_then(|rc| match rc { + ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig { + thinking_budget: *budget, + }), + ReasoningControl::Effort(_) => None, }); // Generation config @@ -374,7 +376,9 @@ impl GeminiScheme { #[cfg(test)] mod tests { use super::*; - use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport}; + use crate::llm_client::capability::{ + CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport, + }; fn cap() -> ModelCapability { ModelCapability { @@ -386,6 +390,13 @@ mod tests { } } + fn cap_budget_reasoning() -> ModelCapability { + ModelCapability { + reasoning: Some(ReasoningSupport::BudgetTokens), + ..cap() + } + } + #[test] fn test_build_simple_request() { let scheme = GeminiScheme::new(); @@ -457,4 +468,29 @@ mod tests { assert_eq!(gemini_req.contents[1].role, "model"); assert_eq!(gemini_req.contents[2].role, "user"); } + + #[test] + fn thinking_budget_projected_when_supported() { + let scheme = GeminiScheme::new(); + let mut request = Request::new().user("think"); + request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1)); + + let gemini_req = scheme.build_request(&request, &cap_budget_reasoning()); + let config = gemini_req.generation_config.expect("generation config"); + let thinking = config.thinking_config.expect("thinking config"); + + assert_eq!(thinking.thinking_budget, -1); + } + + #[test] + fn effort_reasoning_not_projected_to_gemini() { + let scheme = GeminiScheme::new(); + let mut request = Request::new().user("think"); + request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium)); + + let gemini_req = scheme.build_request(&request, &cap_budget_reasoning()); + let config = gemini_req.generation_config.expect("generation config"); + + assert!(config.thinking_config.is_none()); + } } diff --git a/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs index 09f006b5..6b033a9a 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs @@ -6,9 +6,9 @@ use serde::Serialize; use serde_json::Value; use crate::llm_client::{ + capability::{ModelCapability, ReasoningControl, ReasoningSupport}, + types::{parse_tool_arguments, Item, Role, ToolDefinition}, Request, - capability::{ModelCapability, ReasoningEffort, ReasoningSupport}, - types::{Item, Role, ToolDefinition, parse_tool_arguments}, }; use super::OpenAIScheme; @@ -37,7 +37,7 @@ pub(crate) struct OpenAIRequest { pub tool_choice: Option, /// Reasoning effort(o1 / o3 / o4 / gpt-5 系で有効)。 #[serde(skip_serializing_if = "Option::is_none")] - pub reasoning_effort: Option<&'static str>, + pub reasoning_effort: Option, } #[derive(Debug, Serialize)] @@ -154,12 +154,10 @@ impl OpenAIScheme { .config .reasoning .as_ref() - .and_then(|rc| rc.effort) .filter(|_| supports_effort) - .map(|effort| match effort { - ReasoningEffort::Low => "low", - ReasoningEffort::Medium => "medium", - ReasoningEffort::High => "high", + .and_then(|rc| match rc { + ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()), + ReasoningControl::BudgetTokens(_) => None, }); OpenAIRequest { @@ -322,7 +320,9 @@ impl OpenAIScheme { #[cfg(test)] mod tests { use super::*; - use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport}; + use crate::llm_client::capability::{ + CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport, + }; fn cap() -> ModelCapability { ModelCapability { @@ -387,6 +387,38 @@ mod tests { assert!(body.max_tokens.is_none()); } + #[test] + fn reasoning_effort_projected_when_supported() { + let scheme = OpenAIScheme::new(); + let mut request = Request::new().user("Hello"); + request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other( + "provider-native".into(), + ))); + let capability = ModelCapability { + reasoning: Some(ReasoningSupport::Effort), + ..cap() + }; + + let body = scheme.build_request("gpt-5", &request, &capability); + + assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native")); + } + + #[test] + fn budget_reasoning_not_projected_to_openai_chat() { + let scheme = OpenAIScheme::new(); + let mut request = Request::new().user("Hello"); + request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096)); + let capability = ModelCapability { + reasoning: Some(ReasoningSupport::Both), + ..cap() + }; + + let body = scheme.build_request("gpt-5", &request, &capability); + + assert!(body.reasoning_effort.is_none()); + } + #[test] fn test_tool_call_and_result() { let scheme = OpenAIScheme::new(); diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs index 08ae557b..a7dce5f9 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs @@ -9,7 +9,7 @@ use serde_json::Value; use crate::llm_client::{ Request, - capability::{ModelCapability, ReasoningEffort, ReasoningSupport}, + capability::{ModelCapability, ReasoningControl, ReasoningSupport}, types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments}, }; @@ -50,7 +50,7 @@ pub(crate) struct ResponsesRequest { #[derive(Debug, Serialize)] pub(crate) struct ReasoningConfig { #[serde(skip_serializing_if = "Option::is_none")] - pub effort: Option<&'static str>, + pub effort: Option, /// summary の出力制御。`"auto"` 固定で summary_text を受け取る。 pub summary: &'static str, } @@ -168,16 +168,15 @@ impl OpenAIResponsesScheme { .config .reasoning .as_ref() - .and_then(|rc| rc.effort) .filter(|_| supports_effort) .map(|effort| ReasoningConfig { - effort: Some(match effort { - ReasoningEffort::Low => "low", - ReasoningEffort::Medium => "medium", - ReasoningEffort::High => "high", - }), + effort: match effort { + ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()), + ReasoningControl::BudgetTokens(_) => None, + }, summary: "auto", - }); + }) + .filter(|reasoning| reasoning.effort.is_some()); let include: Vec<&'static str> = if self.include_encrypted_content { vec!["reasoning.encrypted_content"] @@ -209,12 +208,12 @@ fn convert_items_to_input(items: &[Item]) -> Vec { for item in items { match item { Item::Message { role, content, .. } => { - let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = match role - { - Role::User => ("user", |t| InputContent::InputText { text: t }), - Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }), - Role::System => ("system", |t| InputContent::InputText { text: t }), - }; + let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = + match role { + Role::User => ("user", |t| InputContent::InputText { text: t }), + Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }), + Role::System => ("system", |t| InputContent::InputText { text: t }), + }; let parts: Vec = content .iter() .map(|p| match p { @@ -395,7 +394,10 @@ mod tests { .item(Item::tool_result("c1", "ok")); let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); assert!(matches!(body.input[1], InputItem::FunctionCall { .. })); - assert!(matches!(body.input[2], InputItem::FunctionCallOutput { .. })); + assert!(matches!( + body.input[2], + InputItem::FunctionCallOutput { .. } + )); } #[test] @@ -425,13 +427,10 @@ mod tests { fn reasoning_effort_projected_when_supported() { let scheme = OpenAIResponsesScheme::new(); let mut req = Request::new().user("hi"); - req.config.reasoning = Some(ReasoningControl { - effort: Some(ReasoningEffort::High), - budget_tokens: None, - }); + req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High)); let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); let reasoning = body.reasoning.expect("reasoning should be set"); - assert_eq!(reasoning.effort, Some("high")); + assert_eq!(reasoning.effort.as_deref(), Some("high")); assert_eq!(reasoning.summary, "auto"); } @@ -439,10 +438,7 @@ mod tests { fn reasoning_omitted_when_unsupported() { let scheme = OpenAIResponsesScheme::new(); let mut req = Request::new().user("hi"); - req.config.reasoning = Some(ReasoningControl { - effort: Some(ReasoningEffort::High), - budget_tokens: None, - }); + req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High)); let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning()); assert!(body.reasoning.is_none()); } diff --git a/crates/manifest/src/config.rs b/crates/manifest/src/config.rs index 6b40a11b..f47c2935 100644 --- a/crates/manifest/src/config.rs +++ b/crates/manifest/src/config.rs @@ -13,7 +13,7 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; use crate::defaults; -use crate::model::{AuthRef, ModelManifest}; +use crate::model::{AuthRef, ModelManifest, ReasoningControl}; use crate::{ CompactionConfig, MemoryConfig, PodManifest, PodMeta, ScopeConfig, ToolOutputLimits, WorkerManifest, @@ -65,6 +65,8 @@ pub struct WorkerManifestConfig { #[serde(default)] pub temperature: Option, #[serde(default)] + pub reasoning: Option, + #[serde(default)] pub tool_output: ToolOutputLimitsPartial, } @@ -103,10 +105,7 @@ pub enum ResolveError { #[error("missing required field: {0}")] MissingField(&'static str), #[error("path must be absolute ({field}): {}", .path.display())] - RelativePath { - field: &'static str, - path: PathBuf, - }, + RelativePath { field: &'static str, path: PathBuf }, } impl PodManifestConfig { @@ -227,6 +226,7 @@ impl WorkerManifestConfig { max_tokens: upper.max_tokens.or(self.max_tokens), max_turns: upper.max_turns.or(self.max_turns), temperature: upper.temperature.or(self.temperature), + reasoning: upper.reasoning.or(self.reasoning), tool_output: self.tool_output.merge(upper.tool_output), } } @@ -323,10 +323,7 @@ impl TryFrom for PodManifest { type Error = ResolveError; fn try_from(cfg: PodManifestConfig) -> Result { - let name = cfg - .pod - .name - .ok_or(ResolveError::MissingField("pod.name"))?; + let name = cfg.pod.name.ok_or(ResolveError::MissingField("pod.name"))?; let prompt_pack = cfg.pod.prompt_pack; if let Some(ref p) = prompt_pack { ensure_absolute("pod.prompt_pack", p)?; @@ -342,6 +339,7 @@ impl TryFrom for PodManifest { max_tokens: cfg.worker.max_tokens, max_turns: cfg.worker.max_turns, temperature: cfg.worker.temperature, + reasoning: cfg.worker.reasoning, tool_output: ToolOutputLimits { default_max_bytes: cfg .worker @@ -372,9 +370,7 @@ impl TryFrom for PodManifest { prune_protected_turns: c .prune_protected_turns .unwrap_or(defaults::PRUNE_PROTECTED_TURNS), - prune_min_savings: c - .prune_min_savings - .unwrap_or(defaults::PRUNE_MIN_SAVINGS), + prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS), compact_threshold: c.compact_threshold, compact_request_threshold: c.compact_request_threshold, compact_retained_tokens: c @@ -406,7 +402,7 @@ impl TryFrom for PodManifest { mod tests { use super::*; use crate::model::SchemeKind; - use crate::{Permission, ScopeRule}; + use crate::{Permission, ReasoningEffort, ScopeRule}; fn abs(path: &str) -> PathBuf { PathBuf::from(format!("/tmp/insomnia-test{path}")) @@ -565,6 +561,31 @@ mod tests { assert_eq!(merged.model.model_id.as_deref(), Some("lower-model")); } + #[test] + fn merge_worker_reasoning_upper_wins() { + let lower = PodManifestConfig { + worker: WorkerManifestConfig { + reasoning: Some(ReasoningControl::Effort(ReasoningEffort::Low)), + ..Default::default() + }, + ..Default::default() + }; + let upper = PodManifestConfig { + worker: WorkerManifestConfig { + reasoning: Some(ReasoningControl::BudgetTokens(4096)), + ..Default::default() + }, + ..Default::default() + }; + + let merged = lower.merge(upper); + + assert_eq!( + merged.worker.reasoning, + Some(ReasoningControl::BudgetTokens(4096)) + ); + } + #[test] fn merge_scope_accumulates_allow_and_deny() { let lower = PodManifestConfig { @@ -614,12 +635,9 @@ mod tests { worker: WorkerManifestConfig { tool_output: ToolOutputLimitsPartial { default_max_bytes: None, - per_tool: [ - ("Read".to_string(), 2048), - ("Grep".to_string(), 512), - ] - .into_iter() - .collect(), + per_tool: [("Read".to_string(), 2048), ("Grep".to_string(), 512)] + .into_iter() + .collect(), }, ..Default::default() }, @@ -687,6 +705,33 @@ unknown_future_field = "tolerated" assert_eq!(cfg.worker.max_tokens, Some(1000)); } + #[test] + fn from_toml_accepts_worker_reasoning_string_or_integer() { + let effort = PodManifestConfig::from_toml( + r#" +[worker] +reasoning = "xhigh" +"#, + ) + .unwrap(); + assert_eq!( + effort.worker.reasoning, + Some(ReasoningControl::Effort(ReasoningEffort::XHigh)) + ); + + let budget = PodManifestConfig::from_toml( + r#" +[worker] +reasoning = -1 +"#, + ) + .unwrap(); + assert_eq!( + budget.worker.reasoning, + Some(ReasoningControl::BudgetTokens(-1)) + ); + } + #[test] fn from_toml_partial_layer_succeeds() { // A project-layer manifest with only scope set must parse fine. diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs index 4f4a5e6d..4a668ddd 100644 --- a/crates/manifest/src/lib.rs +++ b/crates/manifest/src/lib.rs @@ -6,12 +6,14 @@ pub mod paths; mod scope; pub use cascade::{LayerLoadError, find_project_manifest_from, load_layer}; -pub use paths::user_manifest_path; pub use config::{ CompactionConfigPartial, PodManifestConfig, PodMetaConfig, ResolveError, ToolOutputLimitsPartial, WorkerManifestConfig, }; -pub use model::{AuthRef, ModelCapability, ModelManifest, SchemeKind}; +pub use model::{ + AuthRef, ModelCapability, ModelManifest, ReasoningControl, ReasoningEffort, SchemeKind, +}; +pub use paths::user_manifest_path; pub use protocol::{Permission, ScopeRule}; pub use scope::{Scope, ScopeError}; @@ -99,6 +101,8 @@ pub struct WorkerManifest { pub max_turns: Option, #[serde(default)] pub temperature: Option, + #[serde(default)] + pub reasoning: Option, /// Byte-size caps applied to tool `content` before it reaches the /// conversation history. The section is optional in TOML — when /// omitted, `ToolOutputLimits::default()` (16KB default cap, no @@ -312,6 +316,7 @@ auth = { kind = "api_key", file = "/abs/keys/anthropic" } instruction = "$user/reviewer" max_tokens = 4096 temperature = 0.3 +reasoning = "medium" [[scope.allow]] target = "/abs/project" @@ -336,6 +341,10 @@ permission = "write" assert_eq!(manifest.worker.instruction, "$user/reviewer"); assert_eq!(manifest.worker.max_tokens, Some(4096)); assert_eq!(manifest.worker.temperature, Some(0.3)); + assert_eq!( + manifest.worker.reasoning, + Some(ReasoningControl::Effort(ReasoningEffort::Medium)) + ); let allow = &manifest.scope.allow; assert_eq!(allow.len(), 2); assert_eq!(allow[0].permission, Permission::Write); @@ -368,6 +377,16 @@ model_id = "claude-sonnet-4-20250514" assert_eq!(manifest.worker.max_turns.unwrap().get(), 50); } + #[test] + fn parse_reasoning_budget() { + let toml = MINIMAL_REQUIRED.replace("[worker]\n", "[worker]\nreasoning = -1\n"); + let manifest = PodManifest::from_toml(&toml).unwrap(); + assert_eq!( + manifest.worker.reasoning, + Some(ReasoningControl::BudgetTokens(-1)) + ); + } + #[test] fn omitted_max_turns_is_none() { let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap(); @@ -458,9 +477,7 @@ model_id = "claude-sonnet-4-20250514" #[test] fn memory_section_with_explicit_root() { - let toml = format!( - "{MINIMAL_REQUIRED}\n[memory]\nworkspace_root = \"/some/where\"\n" - ); + let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nworkspace_root = \"/some/where\"\n"); let manifest = PodManifest::from_toml(&toml).unwrap(); let mem = manifest.memory.unwrap(); assert_eq!( diff --git a/crates/manifest/src/model.rs b/crates/manifest/src/model.rs index c37f01f3..2b06a128 100644 --- a/crates/manifest/src/model.rs +++ b/crates/manifest/src/model.rs @@ -16,7 +16,7 @@ use serde::{Deserialize, Serialize}; // `ModelCapability` は `llm-worker` 側に定義される runtime 構造だが、 // マニフェストで任意に override できるよう型だけ再エクスポートする。 -pub use llm_worker::llm_client::capability::ModelCapability; +pub use llm_worker::llm_client::capability::{ModelCapability, ReasoningControl, ReasoningEffort}; /// Pod マニフェストの `[model]` セクション。 /// diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index 757a980a..8ff1ad28 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -1398,6 +1398,7 @@ pub fn apply_worker_manifest(worker: &mut Worker, wm: &WorkerMa if let Some(temperature) = wm.temperature { config.temperature = Some(temperature); } + config.reasoning = wm.reasoning.clone(); worker.set_request_config(config); worker.set_max_turns(wm.max_turns.map(|n| n.get())); worker.set_tool_output_limits(Some(ToolOutputLimits {