From b1e45728233279f88578ba1d47509f154d8db720 Mon Sep 17 00:00:00 2001 From: Hare Date: Sun, 19 Apr 2026 23:32:14 +0900 Subject: [PATCH] =?UTF-8?q?llm-model-config=E3=81=AE=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../examples/record_test_fixtures/main.rs | 55 +++- .../llm-worker/examples/worker_cancel_demo.rs | 18 +- crates/llm-worker/examples/worker_cli.rs | 68 +++- crates/llm-worker/src/llm_client/auth.rs | 23 ++ .../llm-worker/src/llm_client/capability.rs | 102 ++++++ crates/llm-worker/src/llm_client/mod.rs | 6 +- .../src/llm_client/providers/anthropic.rs | 206 ------------ .../src/llm_client/providers/gemini.rs | 190 ----------- .../src/llm_client/providers/mod.rs | 8 - .../src/llm_client/providers/ollama.rs | 67 ---- .../src/llm_client/providers/openai.rs | 217 ------------- .../llm_client/scheme/anthropic/capability.rs | 26 ++ .../src/llm_client/scheme/anthropic/mod.rs | 4 + .../llm_client/scheme/anthropic/request.rs | 106 +++++- .../scheme/anthropic/scheme_impl.rs | 99 ++++++ .../llm_client/scheme/gemini/capability.rs | 26 ++ .../src/llm_client/scheme/gemini/mod.rs | 2 + .../src/llm_client/scheme/gemini/request.rs | 54 +++- .../llm_client/scheme/gemini/scheme_impl.rs | 53 +++ .../llm-worker/src/llm_client/scheme/mod.rs | 75 ++++- .../scheme/openai_chat/capability.rs | 47 +++ .../scheme/{openai => openai_chat}/events.rs | 0 .../scheme/{openai => openai_chat}/mod.rs | 2 + .../scheme/{openai => openai_chat}/request.rs | 51 ++- .../scheme/openai_chat/scheme_impl.rs | 57 ++++ crates/llm-worker/src/llm_client/transport.rs | 226 +++++++++++++ crates/llm-worker/src/llm_client/types.rs | 6 + .../tests/ui/locked_register_tool.rs | 21 +- .../tests/ui/locked_register_tool.stderr | 8 +- .../ui/tool_server_handle_register_tool.rs | 21 +- .../tool_server_handle_register_tool.stderr | 4 +- crates/llm-worker/tests/validation_test.rs | 39 --- crates/manifest/README.md | 7 +- crates/manifest/src/config.rs | 178 +++++----- crates/manifest/src/lib.rs | 115 +++---- crates/manifest/src/model.rs | 74 +++++ crates/pod/examples/pod_cli.rs | 6 +- crates/pod/examples/pod_protocol.rs | 6 +- crates/pod/src/controller.rs | 16 +- crates/pod/src/factory.rs | 68 ++-- crates/pod/src/lib.rs | 2 +- crates/pod/src/pod.rs | 12 +- crates/pod/src/spawn_pod.rs | 63 ++-- crates/pod/tests/controller_test.rs | 6 +- crates/pod/tests/spawn_pod_test.rs | 22 +- .../pod/tests/system_prompt_template_test.rs | 6 +- crates/provider/README.md | 11 +- crates/provider/src/lib.rs | 305 +++++++++++------- tickets/llm-model-config.md | 4 + tickets/llm-model-config.review.md | 105 ++++++ 50 files changed, 1718 insertions(+), 1175 deletions(-) create mode 100644 crates/llm-worker/src/llm_client/auth.rs create mode 100644 crates/llm-worker/src/llm_client/capability.rs delete mode 100644 crates/llm-worker/src/llm_client/providers/anthropic.rs delete mode 100644 crates/llm-worker/src/llm_client/providers/gemini.rs delete mode 100644 crates/llm-worker/src/llm_client/providers/mod.rs delete mode 100644 crates/llm-worker/src/llm_client/providers/ollama.rs delete mode 100644 crates/llm-worker/src/llm_client/providers/openai.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/anthropic/capability.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/gemini/capability.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/gemini/scheme_impl.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs rename crates/llm-worker/src/llm_client/scheme/{openai => openai_chat}/events.rs (100%) rename crates/llm-worker/src/llm_client/scheme/{openai => openai_chat}/mod.rs (96%) rename crates/llm-worker/src/llm_client/scheme/{openai => openai_chat}/request.rs (86%) create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_chat/scheme_impl.rs create mode 100644 crates/llm-worker/src/llm_client/transport.rs delete mode 100644 crates/llm-worker/tests/validation_test.rs create mode 100644 crates/manifest/src/model.rs create mode 100644 tickets/llm-model-config.review.md diff --git a/crates/llm-worker/examples/record_test_fixtures/main.rs b/crates/llm-worker/examples/record_test_fixtures/main.rs index 1578c63a..e9d0faef 100644 --- a/crates/llm-worker/examples/record_test_fixtures/main.rs +++ b/crates/llm-worker/examples/record_test_fixtures/main.rs @@ -20,9 +20,35 @@ mod recorder; mod scenarios; use clap::{Parser, ValueEnum}; -use llm_worker::llm_client::providers::anthropic::AnthropicClient; -use llm_worker::llm_client::providers::gemini::GeminiClient; -use llm_worker::llm_client::providers::openai::OpenAIClient; +use llm_worker::llm_client::capability::{ + CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport, +}; +use llm_worker::llm_client::scheme::{ + Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme, +}; +use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth}; + +/// 既定の capability: fixture 記録には cache_control を付けない +/// (既知モデルの静的テーブルを経由すると scheme 毎に自動設定される)。 +fn fallback_capability() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + } +} + +fn make_transport( + scheme: S, + model: &str, + auth: ResolvedAuth, +) -> HttpTransport { + let cap = scheme.capability_for(model).unwrap_or_else(fallback_capability); + let base_url = scheme.default_base_url().to_string(); + HttpTransport::new(scheme, model.to_string(), base_url, auth, cap) +} #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -60,7 +86,11 @@ async fn run_scenario_with_anthropic( let api_key = std::env::var("ANTHROPIC_API_KEY") .expect("ANTHROPIC_API_KEY environment variable must be set"); let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514"); - let client = AnthropicClient::new(&api_key, model); + let client = make_transport( + AnthropicScheme::new(), + model, + ResolvedAuth::ApiKey(api_key), + ); recorder::record_request( &client, @@ -82,7 +112,7 @@ async fn run_scenario_with_openai( let api_key = std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable must be set"); let model = model.as_deref().unwrap_or("gpt-4o"); - let client = OpenAIClient::new(&api_key, model); + let client = make_transport(OpenAIScheme::new(), model, ResolvedAuth::ApiKey(api_key)); recorder::record_request( &client, @@ -101,10 +131,15 @@ async fn run_scenario_with_ollama( subdir: &str, model: Option, ) -> Result<(), Box> { - use llm_worker::llm_client::providers::ollama::OllamaClient; - // Ollama typically runs local, no key needed or placeholder - let model = model.as_deref().unwrap_or("llama3"); // default example - let client = OllamaClient::new(model); // base_url placeholder, handled by client default + // Ollama = Anthropic scheme + base_url 差し替え + 認証なし + let model = model.as_deref().unwrap_or("llama3"); + let client = HttpTransport::new( + AnthropicScheme::new(), + model.to_string(), + "http://localhost:11434".to_string(), + ResolvedAuth::None, + fallback_capability(), + ); recorder::record_request( &client, @@ -126,7 +161,7 @@ async fn run_scenario_with_gemini( let api_key = std::env::var("GEMINI_API_KEY").expect("GEMINI_API_KEY environment variable must be set"); let model = model.as_deref().unwrap_or("gemini-2.0-flash"); - let client = GeminiClient::new(&api_key, model); + let client = make_transport(GeminiScheme::new(), model, ResolvedAuth::ApiKey(api_key)); recorder::record_request( &client, diff --git a/crates/llm-worker/examples/worker_cancel_demo.rs b/crates/llm-worker/examples/worker_cancel_demo.rs index 56c69e13..feea838d 100644 --- a/crates/llm-worker/examples/worker_cancel_demo.rs +++ b/crates/llm-worker/examples/worker_cancel_demo.rs @@ -2,7 +2,11 @@ //! //! Example of cancelling from another thread during streaming -use llm_worker::llm_client::providers::anthropic::AnthropicClient; +use llm_worker::llm_client::capability::{ + CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport, +}; +use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme}; +use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth}; use llm_worker::{Worker, WorkerResult}; use std::time::Duration; @@ -22,7 +26,17 @@ async fn main() -> Result<(), Box> { let api_key = std::env::var("ANTHROPIC_API_KEY").expect("ANTHROPIC_API_KEY environment variable not set"); - let client = AnthropicClient::new(&api_key, "claude-sonnet-4-20250514"); + let scheme = AnthropicScheme::new(); + let model = "claude-sonnet-4-20250514".to_string(); + let cap = scheme.capability_for(&model).unwrap_or(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }); + let base_url = scheme.default_base_url().to_string(); + let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap); let worker = Worker::new(client); println!("🚀 Starting Worker..."); diff --git a/crates/llm-worker/examples/worker_cli.rs b/crates/llm-worker/examples/worker_cli.rs index a63acec2..2c7c25bd 100644 --- a/crates/llm-worker/examples/worker_cli.rs +++ b/crates/llm-worker/examples/worker_cli.rs @@ -44,10 +44,11 @@ use llm_worker::{ interceptor::{Interceptor, PostToolAction, ToolResultInfo}, llm_client::{ LlmClient, - providers::{ - anthropic::AnthropicClient, gemini::GeminiClient, ollama::OllamaClient, - openai::OpenAIClient, + capability::{CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport}, + scheme::{ + Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme, }, + transport::{HttpTransport, ResolvedAuth}, }, timeline::{Handler, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind}, }; @@ -327,6 +328,28 @@ fn get_api_key(args: &Args) -> Result { } /// Create client based on provider +fn default_capability() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + } +} + +fn build_transport( + scheme: S, + model: String, + auth: ResolvedAuth, +) -> Box { + let cap = scheme + .capability_for(&model) + .unwrap_or_else(default_capability); + let base_url = scheme.default_base_url().to_string(); + Box::new(HttpTransport::new(scheme, model, base_url, auth, cap)) +} + fn create_client(args: &Args) -> Result, String> { let model = args .model @@ -336,21 +359,32 @@ fn create_client(args: &Args) -> Result, String> { let api_key = get_api_key(args)?; match args.provider { - Provider::Anthropic => { - let client = AnthropicClient::new(&api_key, &model); - Ok(Box::new(client)) - } - Provider::Gemini => { - let client = GeminiClient::new(&api_key, &model); - Ok(Box::new(client)) - } - Provider::Openai => { - let client = OpenAIClient::new(&api_key, &model); - Ok(Box::new(client)) - } + Provider::Anthropic => Ok(build_transport( + AnthropicScheme::new(), + model, + ResolvedAuth::ApiKey(api_key), + )), + Provider::Gemini => Ok(build_transport( + GeminiScheme::new(), + model, + ResolvedAuth::ApiKey(api_key), + )), + Provider::Openai => Ok(build_transport( + OpenAIScheme::new(), + model, + ResolvedAuth::ApiKey(api_key), + )), Provider::Ollama => { - let client = OllamaClient::new(&model); - Ok(Box::new(client)) + // Ollama = Anthropic scheme + base_url 差し替え + 認証なし + let scheme = AnthropicScheme::new(); + let cap = default_capability(); + Ok(Box::new(HttpTransport::new( + scheme, + model, + "http://localhost:11434".to_string(), + ResolvedAuth::None, + cap, + ))) } } } diff --git a/crates/llm-worker/src/llm_client/auth.rs b/crates/llm-worker/src/llm_client/auth.rs new file mode 100644 index 00000000..7d0283e6 --- /dev/null +++ b/crates/llm-worker/src/llm_client/auth.rs @@ -0,0 +1,23 @@ +//! `Scheme` 実装と通信層が要求する認証要件。 +//! +//! マニフェスト側の型(`ModelConfig` / `SchemeKind` / `AuthRef`)は +//! `crates/manifest` に置き、llm-worker はそれを知らずに済む。 +//! `AuthRequirement` は scheme が宣言する「この scheme はどんな認証を +//! 期待するか」のランタイム記述で、manifest 側の `AuthRef` との +//! 照合(`AuthRef → ResolvedAuth` 変換の適否)は `crates/provider` +//! で行う。 + +/// `Scheme::required_auth()` が返す認証要件。 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AuthRequirement { + /// 認証を行わない(Ollama など) + None, + /// `Authorization: Bearer ` ヘッダ(token は API key 相当) + Bearer, + /// `x-api-key: ` ヘッダ(Anthropic 形式) + XApiKey, + /// クエリパラメータ `?=`(Gemini 形式) + QueryParam { name: &'static str }, + /// 複合ヘッダ(Codex OAuth 等、`crates/provider` 側で解決) + Custom, +} diff --git a/crates/llm-worker/src/llm_client/capability.rs b/crates/llm-worker/src/llm_client/capability.rs new file mode 100644 index 00000000..8bb709fc --- /dev/null +++ b/crates/llm-worker/src/llm_client/capability.rs @@ -0,0 +1,102 @@ +//! モデル能力メタデータ +//! +//! `ModelCapability` はモデルが持つ機能差を表現する。scheme は同じでも +//! モデルごとに reasoning 可否や prompt caching 方式が違うため、scheme +//! から分離して保持する。 +//! +//! 値の供給経路は 2 通り: +//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル) +//! 2. `ModelConfig::capability` での明示 override(未知モデル、または上書き) + +use serde::{Deserialize, Serialize}; + +/// モデル能力メタデータ +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct ModelCapability { + pub tool_calling: ToolCallingSupport, + pub structured_output: StructuredOutput, + #[serde(default)] + pub reasoning: Option, + #[serde(default)] + pub vision: bool, + pub prompt_caching: CacheStrategy, +} + +impl ModelCapability { + /// 何もサポートしない安全側デフォルト。未知モデルのフォールバック用。 + pub const fn minimal() -> Self { + Self { + tool_calling: ToolCallingSupport::None, + structured_output: StructuredOutput::None, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + } + } +} + +/// ツール呼び出しサポート +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ToolCallingSupport { + /// 非サポート + None, + /// 1 回のレスポンスで 1 ツールのみ + Sequential, + /// 1 回のレスポンスで複数ツール並行 + Parallel, +} + +/// Structured output サポート +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum StructuredOutput { + None, + /// `json_object` モード(スキーマなし JSON 強制) + JsonObject, + /// JSON Schema 指定で構造化出力 + JsonSchema, +} + +/// Reasoning(extended thinking)サポート +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum ReasoningSupport { + /// OpenAI 形式: `reasoning.effort` (low/medium/high) + Effort, + /// Anthropic 形式: `thinking.budget_tokens` + BudgetTokens, + /// 両対応(内部では共通 `ReasoningControl` として扱い、各 scheme で投影) + Both, +} + +/// Prompt caching 戦略 +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum CacheStrategy { + /// Anthropic: `cache_control` マーカーを明示挿入 + Explicit { max_breakpoints: u8 }, + /// それ以外: サーバ側自動 prefix、または未サポート + Auto, +} + +/// Reasoning 制御(共通型、scheme 側で各社形式に投影) +/// +/// `effort` / `budget_tokens` はユーザー設定から任意で渡される。Scheme +/// 側は自身の `ReasoningSupport` に応じて片方だけ使う。両方が宣言 +/// されている場合の優先順位は scheme 実装が決める。 +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] +pub struct ReasoningControl { + #[serde(default)] + pub effort: Option, + #[serde(default)] + pub budget_tokens: Option, +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum ReasoningEffort { + Low, + Medium, + High, +} diff --git a/crates/llm-worker/src/llm_client/mod.rs b/crates/llm-worker/src/llm_client/mod.rs index 1a74527c..c707f94f 100644 --- a/crates/llm-worker/src/llm_client/mod.rs +++ b/crates/llm-worker/src/llm_client/mod.rs @@ -16,14 +16,18 @@ //! - `providers`: プロバイダ固有のクライアント実装 //! - `scheme`: APIスキーマ(リクエスト/レスポンス変換) +pub mod auth; +pub mod capability; pub mod client; pub mod error; pub mod event; pub mod types; -pub mod providers; pub mod scheme; +pub mod transport; +pub use auth::*; +pub use capability::*; pub use client::*; pub use error::*; pub use event::*; diff --git a/crates/llm-worker/src/llm_client/providers/anthropic.rs b/crates/llm-worker/src/llm_client/providers/anthropic.rs deleted file mode 100644 index 153cf4da..00000000 --- a/crates/llm-worker/src/llm_client/providers/anthropic.rs +++ /dev/null @@ -1,206 +0,0 @@ -//! Anthropic プロバイダ実装 -//! -//! Anthropic Messages APIと通信し、Eventストリームを出力 - -use std::pin::Pin; - -use crate::llm_client::{ - ClientError, LlmClient, Request, event::Event, scheme::anthropic::AnthropicScheme, -}; -use async_trait::async_trait; -use eventsource_stream::Eventsource; -use futures::{Stream, StreamExt, TryStreamExt, future::ready}; -use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue}; - -/// Anthropic クライアント -#[derive(Clone)] -pub struct AnthropicClient { - /// HTTPクライアント - http_client: reqwest::Client, - /// APIキー - api_key: String, - /// モデル名 - model: String, - /// スキーマ - scheme: AnthropicScheme, - /// ベースURL - base_url: String, -} - -impl AnthropicClient { - /// 新しいAnthropicクライアントを作成 - pub fn new(api_key: impl Into, model: impl Into) -> Self { - Self { - http_client: reqwest::Client::new(), - api_key: api_key.into(), - model: model.into(), - scheme: AnthropicScheme::default(), - base_url: "https://api.anthropic.com".to_string(), - } - } - - /// カスタムHTTPクライアントを設定 - pub fn with_http_client(mut self, client: reqwest::Client) -> Self { - self.http_client = client; - self - } - - /// スキーマを設定 - pub fn with_scheme(mut self, scheme: AnthropicScheme) -> Self { - self.scheme = scheme; - self - } - - /// ベースURLを設定 - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.base_url = url.into(); - self - } - - /// リクエストヘッダーを構築 - fn build_headers(&self) -> Result { - let mut headers = HeaderMap::new(); - - headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); - headers.insert( - "x-api-key", - HeaderValue::from_str(&self.api_key) - .map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?, - ); - headers.insert( - "anthropic-version", - HeaderValue::from_str(&self.scheme.api_version) - .map_err(|e| ClientError::Config(format!("Invalid API version: {}", e)))?, - ); - - // 細粒度ツールストリーミングを有効にする場合 - if self.scheme.fine_grained_tool_streaming { - headers.insert( - "anthropic-beta", - HeaderValue::from_static("fine-grained-tool-streaming-2025-05-14"), - ); - } - - Ok(headers) - } -} - -#[async_trait] -impl LlmClient for AnthropicClient { - fn clone_boxed(&self) -> Box { - Box::new(self.clone()) - } - - async fn stream( - &self, - request: Request, - ) -> Result> + Send>>, ClientError> { - let url = format!("{}/v1/messages", self.base_url); - let headers = self.build_headers()?; - let body = self.scheme.build_request(&self.model, &request); - - let response = self - .http_client - .post(&url) - .headers(headers) - .json(&body) - .send() - .await?; - - // エラーレスポンスをチェック - if !response.status().is_success() { - let status = response.status().as_u16(); - let text = response.text().await.unwrap_or_default(); - - // JSONでエラーをパースしてみる - if let Ok(json) = serde_json::from_str::(&text) { - let error = json.get("error").unwrap_or(&json); - let code = error.get("type").and_then(|v| v.as_str()).map(String::from); - let message = error - .get("message") - .and_then(|v| v.as_str()) - .unwrap_or(&text) - .to_string(); - return Err(ClientError::Api { - status: Some(status), - code, - message, - }); - } - - return Err(ClientError::Api { - status: Some(status), - code: None, - message: text, - }); - } - - // SSEストリームを構築 - let scheme = self.scheme.clone(); - let byte_stream = response - .bytes_stream() - .map_err(|e| std::io::Error::other(e)); - let event_stream = byte_stream.eventsource(); - - // AnthropicはBlockStopイベントに正しいblock_typeを含まないため、 - // クライアント側で状態を追跡して補完する - let mut current_block_type = None; - - let stream = event_stream.filter_map(move |result| { - ready(match result { - Ok(event) => { - // SSEイベントをパース - match scheme.parse_event(&event.event, &event.data) { - Ok(Some(mut evt)) => { - // ブロックタイプの追跡と修正 - match &evt { - Event::BlockStart(start) => { - current_block_type = Some(start.block_type); - } - Event::BlockStop(stop) => { - if let Some(block_type) = current_block_type.take() { - // 正しいブロックタイプで上書き - // (Event::BlockStopの中身を置換) - evt = - Event::BlockStop(crate::llm_client::event::BlockStop { - block_type, - ..stop.clone() - }); - } - } - _ => {} - } - Some(Ok(evt)) - } - Ok(None) => None, - Err(e) => Some(Err(e)), - } - } - Err(e) => Some(Err(ClientError::Sse(e.to_string()))), - }) - }); - - Ok(Box::pin(stream)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_client_creation() { - let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514"); - assert_eq!(client.model, "claude-sonnet-4-20250514"); - } - - #[test] - fn test_build_headers() { - let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514"); - let headers = client.build_headers().unwrap(); - - assert!(headers.contains_key("x-api-key")); - assert!(headers.contains_key("anthropic-version")); - assert!(headers.contains_key("anthropic-beta")); - } -} diff --git a/crates/llm-worker/src/llm_client/providers/gemini.rs b/crates/llm-worker/src/llm_client/providers/gemini.rs deleted file mode 100644 index b7378921..00000000 --- a/crates/llm-worker/src/llm_client/providers/gemini.rs +++ /dev/null @@ -1,190 +0,0 @@ -//! Gemini プロバイダ実装 -//! -//! Google Gemini APIと通信し、Eventストリームを出力 - -use std::pin::Pin; - -use crate::llm_client::{ - ClientError, LlmClient, Request, event::Event, scheme::gemini::GeminiScheme, -}; -use async_trait::async_trait; -use eventsource_stream::Eventsource; -use futures::{Stream, StreamExt, TryStreamExt}; -use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue}; - -/// Gemini クライアント -#[derive(Clone)] -pub struct GeminiClient { - /// HTTPクライアント - http_client: reqwest::Client, - /// APIキー - api_key: String, - /// モデル名 - model: String, - /// スキーマ - scheme: GeminiScheme, - /// ベースURL - base_url: String, -} - -impl GeminiClient { - /// 新しいGeminiクライアントを作成 - pub fn new(api_key: impl Into, model: impl Into) -> Self { - Self { - http_client: reqwest::Client::new(), - api_key: api_key.into(), - model: model.into(), - scheme: GeminiScheme::default(), - base_url: "https://generativelanguage.googleapis.com".to_string(), - } - } - - /// カスタムHTTPクライアントを設定 - pub fn with_http_client(mut self, client: reqwest::Client) -> Self { - self.http_client = client; - self - } - - /// スキーマを設定 - pub fn with_scheme(mut self, scheme: GeminiScheme) -> Self { - self.scheme = scheme; - self - } - - /// ベースURLを設定 - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.base_url = url.into(); - self - } - - /// リクエストヘッダーを構築 - fn build_headers(&self) -> Result { - let mut headers = HeaderMap::new(); - - headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); - - Ok(headers) - } -} - -#[async_trait] -impl LlmClient for GeminiClient { - fn clone_boxed(&self) -> Box { - Box::new(self.clone()) - } - - async fn stream( - &self, - request: Request, - ) -> Result> + Send>>, ClientError> { - // URL構築: base_url/v1beta/models/{model}:streamGenerateContent?alt=sse&key={api_key} - let url = format!( - "{}/v1beta/models/{}:streamGenerateContent?alt=sse&key={}", - self.base_url, self.model, self.api_key - ); - - let headers = self.build_headers()?; - let body = self.scheme.build_request(&request); - - let response = self - .http_client - .post(&url) - .headers(headers) - .json(&body) - .send() - .await?; - - // エラーレスポンスをチェック - if !response.status().is_success() { - let status = response.status().as_u16(); - let text = response.text().await.unwrap_or_default(); - - // JSONでエラーをパースしてみる - if let Ok(json) = serde_json::from_str::(&text) { - // Gemini error format: { "error": { "code": xxx, "message": "...", "status": "..." } } - let error = json.get("error").unwrap_or(&json); - let code = error - .get("status") - .and_then(|v| v.as_str()) - .map(String::from); - let message = error - .get("message") - .and_then(|v| v.as_str()) - .unwrap_or(&text) - .to_string(); - return Err(ClientError::Api { - status: Some(status), - code, - message, - }); - } - - return Err(ClientError::Api { - status: Some(status), - code: None, - message: text, - }); - } - - // SSEストリームを構築 - let scheme = self.scheme.clone(); - let byte_stream = response - .bytes_stream() - .map_err(|e| std::io::Error::other(e)); - let event_stream = byte_stream.eventsource(); - - let stream = event_stream - .map(move |result| { - match result { - Ok(event) => { - // SSEイベントをパース - // Geminiは "data: {...}" 形式で送る - match scheme.parse_event(&event.data) { - Ok(Some(events)) => Ok(Some(events)), - Ok(None) => Ok(None), - Err(e) => Err(e), - } - } - Err(e) => Err(ClientError::Sse(e.to_string())), - } - }) - // flatten Option> stream to Stream - .map(|res| { - let s: Pin> + Send>> = match res { - Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))), - Ok(None) => Box::pin(futures::stream::empty()), - Err(e) => Box::pin(futures::stream::once(async move { Err(e) })), - }; - s - }) - .flatten(); - - Ok(Box::pin(stream)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_client_creation() { - let client = GeminiClient::new("test-key", "gemini-2.0-flash"); - assert_eq!(client.model, "gemini-2.0-flash"); - } - - #[test] - fn test_build_headers() { - let client = GeminiClient::new("test-key", "gemini-2.0-flash"); - let headers = client.build_headers().unwrap(); - - assert!(headers.contains_key("content-type")); - } - - #[test] - fn test_custom_base_url() { - let client = GeminiClient::new("test-key", "gemini-2.0-flash") - .with_base_url("https://custom.api.example.com"); - assert_eq!(client.base_url, "https://custom.api.example.com"); - } -} diff --git a/crates/llm-worker/src/llm_client/providers/mod.rs b/crates/llm-worker/src/llm_client/providers/mod.rs deleted file mode 100644 index 51ec0ae0..00000000 --- a/crates/llm-worker/src/llm_client/providers/mod.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! プロバイダ実装 -//! -//! 各プロバイダ固有のHTTPクライアント実装 - -pub mod anthropic; -pub mod gemini; -pub mod ollama; -pub mod openai; diff --git a/crates/llm-worker/src/llm_client/providers/ollama.rs b/crates/llm-worker/src/llm_client/providers/ollama.rs deleted file mode 100644 index 8148bf5f..00000000 --- a/crates/llm-worker/src/llm_client/providers/ollama.rs +++ /dev/null @@ -1,67 +0,0 @@ -//! Ollama プロバイダ実装 -//! -//! OllamaはOpenAI互換APIを提供するため、OpenAIクライアントと互換性がある。 -//! デフォルトのベースURLと認証設定が異なる。 - -use std::pin::Pin; - -use crate::llm_client::{ - ClientError, LlmClient, Request, event::Event, providers::openai::OpenAIClient, - scheme::openai::OpenAIScheme, -}; -use async_trait::async_trait; -use futures::Stream; - -/// Ollama クライアント -/// -/// 内部的にOpenAIClientを使用するラッパー、もしくはOpenAIClientと同様の実装を持つ。 -/// ここではOpenAIClient構成をカスタマイズして提供する。 -#[derive(Clone)] -pub struct OllamaClient { - inner: OpenAIClient, -} - -impl OllamaClient { - /// 新しいOllamaクライアントを作成 - pub fn new(model: impl Into) -> Self { - // Ollama usually runs on localhost:11434/v1 - // API key is "ollama" or ignored - let base_url = "http://localhost:11434"; - - let scheme = OpenAIScheme::new().with_legacy_max_tokens(true); - - let client = OpenAIClient::new("ollama", model) - .with_base_url(base_url) - .with_scheme(scheme); - // Currently OpenAIScheme sets include_usage: true. Ollama supports checks? - // Assuming Ollama modern versions support usage. - - Self { inner: client } - } - - /// ベースURLを設定 - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.inner = self.inner.with_base_url(url); - self - } - - /// カスタムHTTPクライアントを設定 - pub fn with_http_client(mut self, client: reqwest::Client) -> Self { - self.inner = self.inner.with_http_client(client); - self - } -} - -#[async_trait] -impl LlmClient for OllamaClient { - fn clone_boxed(&self) -> Box { - Box::new(self.clone()) - } - - async fn stream( - &self, - request: Request, - ) -> Result> + Send>>, ClientError> { - self.inner.stream(request).await - } -} diff --git a/crates/llm-worker/src/llm_client/providers/openai.rs b/crates/llm-worker/src/llm_client/providers/openai.rs deleted file mode 100644 index eb0a12dc..00000000 --- a/crates/llm-worker/src/llm_client/providers/openai.rs +++ /dev/null @@ -1,217 +0,0 @@ -//! OpenAI プロバイダ実装 -//! -//! OpenAI Chat Completions APIと通信し、Eventストリームを出力 - -use std::pin::Pin; - -use crate::llm_client::{ - ClientError, ConfigWarning, LlmClient, Request, RequestConfig, event::Event, - scheme::openai::OpenAIScheme, -}; -use async_trait::async_trait; -use eventsource_stream::Eventsource; -use futures::{Stream, StreamExt, TryStreamExt}; -use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue}; - -/// OpenAI クライアント -#[derive(Clone)] -pub struct OpenAIClient { - /// HTTPクライアント - http_client: reqwest::Client, - /// APIキー - api_key: String, - /// モデル名 - model: String, - /// スキーマ - scheme: OpenAIScheme, - /// ベースURL - base_url: String, -} - -impl OpenAIClient { - /// 新しいOpenAIクライアントを作成 - pub fn new(api_key: impl Into, model: impl Into) -> Self { - Self { - http_client: reqwest::Client::new(), - api_key: api_key.into(), - model: model.into(), - scheme: OpenAIScheme::default(), - base_url: "https://api.openai.com".to_string(), - } - } - - /// カスタムHTTPクライアントを設定 - pub fn with_http_client(mut self, client: reqwest::Client) -> Self { - self.http_client = client; - self - } - - /// スキーマを設定 - pub fn with_scheme(mut self, scheme: OpenAIScheme) -> Self { - self.scheme = scheme; - self - } - - /// ベースURLを設定 - pub fn with_base_url(mut self, url: impl Into) -> Self { - self.base_url = url.into(); - self - } - - /// リクエストヘッダーを構築 - fn build_headers(&self) -> Result { - let mut headers = HeaderMap::new(); - - headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); - - let api_key_val = if self.api_key.is_empty() { - // For providers like Ollama, API key might be empty/dummy. - // But typical OpenAI requires it. - // We'll allow empty if user intends it, but usually it's checked. - HeaderValue::from_static("") - } else { - let mut val = HeaderValue::from_str(&format!("Bearer {}", self.api_key)) - .map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?; - val.set_sensitive(true); - val - }; - - if !api_key_val.is_empty() { - headers.insert("Authorization", api_key_val); - } - - Ok(headers) - } -} - -#[async_trait] -impl LlmClient for OpenAIClient { - fn clone_boxed(&self) -> Box { - Box::new(self.clone()) - } - - async fn stream( - &self, - request: Request, - ) -> Result> + Send>>, ClientError> { - // Construct the URL: base_url usually ends without slash, path starts with slash or vice versa. - // Standard OpenAI base is "https://api.openai.com". Endpoint is "/v1/chat/completions". - // If external base_url includes /v1, we should be careful. - // Let's assume defaults. If user provides "http://localhost:11434/v1", we append "/chat/completions". - // Or cleaner: user provides full base up to version? - // Anthropic client uses "{}/v1/messages". - // Let's stick to appending "/v1/chat/completions" if base is just host, - // OR assume base includes /v1 if user overrides it? - // Let's use robust joining or simple assumption matching Anthropic pattern: - // Default: https://api.openai.com -> https://api.openai.com/v1/chat/completions - - // However, Ollama default is http://localhost:11434/v1/chat/completions if using OpenAI compact. - // If we configure base_url via `with_base_url`, it's flexible. - // Let's try to detect if /v1 is present or just append consistently. - // Ideally `base_url` should be the root passed to `new`. - - let url = if self.base_url.ends_with("/v1") { - format!("{}/chat/completions", self.base_url) - } else if self.base_url.ends_with("/") { - format!("{}v1/chat/completions", self.base_url) - } else { - format!("{}/v1/chat/completions", self.base_url) - }; - - let headers = self.build_headers()?; - let body = self.scheme.build_request(&self.model, &request); - - let response = self - .http_client - .post(&url) - .headers(headers) - .json(&body) - .send() - .await?; - - // エラーレスポンスをチェック - if !response.status().is_success() { - let status = response.status().as_u16(); - let text = response.text().await.unwrap_or_default(); - - // JSONでエラーをパースしてみる - if let Ok(json) = serde_json::from_str::(&text) { - // OpenAI error format: { "error": { "message": "...", "type": "...", ... } } - let error = json.get("error").unwrap_or(&json); - let code = error.get("type").and_then(|v| v.as_str()).map(String::from); - let message = error - .get("message") - .and_then(|v| v.as_str()) - .unwrap_or(&text) - .to_string(); - return Err(ClientError::Api { - status: Some(status), - code, - message, - }); - } - - return Err(ClientError::Api { - status: Some(status), - code: None, - message: text, - }); - } - - // SSEストリームを構築 - let scheme = self.scheme.clone(); - let byte_stream = response - .bytes_stream() - .map_err(|e| std::io::Error::other(e)); - let event_stream = byte_stream.eventsource(); - - let stream = event_stream - .map(move |result| { - match result { - Ok(event) => { - // SSEイベントをパース - // OpenAI stream events are "data: {...}" - // event.event is usually "message" (default) or empty. - // parse_event takes data string. - - if event.data == "[DONE]" { - // End of stream handled inside parse_event usually returning None - Ok(None) - } else { - match scheme.parse_event(&event.data) { - Ok(Some(events)) => Ok(Some(events)), - Ok(None) => Ok(None), - Err(e) => Err(e), - } - } - } - Err(e) => Err(ClientError::Sse(e.to_string())), - } - }) - // flatten Option> stream to Stream - // map returns Result>, Error> - // We want Stream> - .map(|res| { - let s: Pin> + Send>> = match res { - Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))), - Ok(None) => Box::pin(futures::stream::empty()), - Err(e) => Box::pin(futures::stream::once(async move { Err(e) })), - }; - s - }) - .flatten(); - - Ok(Box::pin(stream)) - } - - fn validate_config(&self, config: &RequestConfig) -> Vec { - let mut warnings = Vec::new(); - - // OpenAI does not support top_k - if config.top_k.is_some() { - warnings.push(ConfigWarning::unsupported("top_k", "OpenAI")); - } - - warnings - } -} diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/capability.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/capability.rs new file mode 100644 index 00000000..b4ad126a --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/capability.rs @@ -0,0 +1,26 @@ +//! `model_id → ModelCapability` 静的テーブル。 +//! +//! 既知モデルのみ網羅する。未知モデルは `None` を返し、呼び出し側 +//! (`HttpTransport` 構築時)に scheme 既定へフォールバックさせる。 + +use crate::llm_client::capability::{ + CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport, +}; + +/// Anthropic 公式モデルの既定 capability。 +/// +/// `claude-sonnet-*` / `claude-opus-*` / `claude-haiku-*` に対応する。 +/// `cache_control` は公式のみ有効で、最大 4 breakpoint(公式仕様)。 +pub(crate) fn lookup(model_id: &str) -> Option { + if !model_id.starts_with("claude-") { + return None; + } + Some(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: Some(ReasoningSupport::BudgetTokens), + vision: true, + prompt_caching: CacheStrategy::Explicit { max_breakpoints: 4 }, + }) +} + diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/mod.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/mod.rs index 944a3509..7b6a5805 100644 --- a/crates/llm-worker/src/llm_client/scheme/anthropic/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/mod.rs @@ -3,8 +3,12 @@ //! - リクエストJSON生成 //! - SSEイベントパース → Event変換 +mod capability; mod events; mod request; +mod scheme_impl; + +pub use scheme_impl::AnthropicState; /// Anthropicスキーマ /// diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs index 7a398b65..b1c4d1c1 100644 --- a/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/request.rs @@ -8,6 +8,7 @@ use serde::Serialize; use crate::llm_client::{ Request, + capability::{CacheStrategy, ModelCapability, ReasoningSupport}, types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments}, }; @@ -32,6 +33,15 @@ pub(crate) struct AnthropicRequest { #[serde(skip_serializing_if = "Vec::is_empty")] pub stop_sequences: Vec, pub stream: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub thinking: Option, +} + +/// Anthropic extended thinking 指示。 +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum AnthropicThinking { + Enabled { budget_tokens: u32 }, } /// Anthropic message @@ -130,12 +140,40 @@ pub(crate) struct AnthropicTool { } impl AnthropicScheme { - /// Build Anthropic request from Request - pub(crate) fn build_request(&self, model: &str, request: &Request) -> AnthropicRequest { - let breakpoints = compute_breakpoints(&request.items, request.cache_anchor); + /// Build Anthropic request from Request. + /// + /// `capability.prompt_caching` が [`CacheStrategy::Auto`] のときは + /// `cache_control` マーカーを一切挿入しない(Ollama の `/v1/messages` + /// 流用時など、サーバ側が `cache_control` を受け付けないケース)。 + pub(crate) fn build_request( + &self, + model: &str, + request: &Request, + capability: &ModelCapability, + ) -> AnthropicRequest { + let breakpoints = if matches!(capability.prompt_caching, CacheStrategy::Explicit { .. }) { + compute_breakpoints(&request.items, request.cache_anchor) + } else { + BTreeSet::new() + }; let messages = self.convert_items_to_messages(&request.items, &breakpoints); let tools = request.tools.iter().map(|t| self.convert_tool(t)).collect(); + // Reasoning の投影: capability が BudgetTokens / Both をサポート + // していて、request 側で budget_tokens が指定されているときだけ + // thinking フィールドを付ける。 + let supports_budget_tokens = matches!( + capability.reasoning, + Some(ReasoningSupport::BudgetTokens | ReasoningSupport::Both), + ); + let thinking = request + .config + .reasoning + .as_ref() + .and_then(|rc| rc.budget_tokens) + .filter(|_| supports_budget_tokens) + .map(|budget_tokens| AnthropicThinking::Enabled { budget_tokens }); + AnthropicRequest { model: model.to_string(), max_tokens: request.config.max_tokens.unwrap_or(4096), @@ -147,6 +185,7 @@ impl AnthropicScheme { top_k: request.config.top_k, stop_sequences: request.config.stop_sequences.clone(), stream: true, + thinking, } } @@ -360,6 +399,28 @@ fn compute_breakpoints(items: &[Item], cache_anchor: Option) -> BTreeSet< #[cfg(test)] mod tests { use super::*; + use crate::llm_client::capability::{ + CacheStrategy, StructuredOutput, ToolCallingSupport, + }; + + /// cache_control が有効になる既定の capability。 + fn cap_explicit() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Explicit { max_breakpoints: 4 }, + } + } + + /// cache_control を送らない capability(Ollama 等)。 + fn cap_auto() -> ModelCapability { + ModelCapability { + prompt_caching: CacheStrategy::Auto, + ..cap_explicit() + } + } #[test] fn test_build_simple_request() { @@ -368,7 +429,7 @@ mod tests { .system("You are a helpful assistant.") .user("Hello!"); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request); + let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.model, "claude-sonnet-4-20250514"); assert_eq!( @@ -394,7 +455,7 @@ mod tests { })), ); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request); + let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.tools.len(), 1); assert_eq!(anthropic_req.tools[0].name, "get_weather"); @@ -412,7 +473,7 @@ mod tests { )) .item(Item::tool_result("call_123", "Sunny, 25°C")); - let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request); + let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert_eq!(anthropic_req.messages.len(), 3); assert_eq!(anthropic_req.messages[0].role, "user"); @@ -469,7 +530,7 @@ mod tests { let mut request = Request::new().items(items); request.cache_anchor = Some(0); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 3, "expected 3 breakpoints, got {:?}", bps); for (_, _, cc) in bps { @@ -485,7 +546,7 @@ mod tests { // cache_anchor=None, turn_end=4, head=5. let request = Request::new().items(items); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 2, "expected 2 breakpoints, got {:?}", bps); } @@ -495,7 +556,7 @@ mod tests { let scheme = AnthropicScheme::new(); let request = Request::new().user("first ever turn"); // latest user at 0 → no turn_end; head=0; no anchor. Collapse → 1. - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 1, "expected 1 breakpoint, got {:?}", bps); } @@ -511,7 +572,7 @@ mod tests { ]); request.cache_anchor = Some(0); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 2, "expected collapse to 2, got {:?}", bps); } @@ -525,7 +586,7 @@ mod tests { .user("run it") .item(Item::tool_call("c1", "t", "{}")) .item(Item::tool_result("c1", "result")); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 1); let (mi, pi, _) = bps[0]; @@ -549,7 +610,7 @@ mod tests { let request = Request::new() .user("hello") .assistant("hi there"); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); assert!( matches!(req.messages[0].content, AnthropicContent::Text(_)), "non-breakpoint single-text message should use text shorthand", @@ -563,7 +624,7 @@ mod tests { let scheme = AnthropicScheme::new(); let mut request = Request::new().user("hello"); request.cache_anchor = Some(0); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); match &req.messages[0].content { AnthropicContent::Parts(parts) => { assert_eq!(parts.len(), 1); @@ -583,7 +644,7 @@ mod tests { let scheme = AnthropicScheme::new(); let mut request = Request::new().user("hello"); request.cache_anchor = Some(0); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let json = serde_json::to_value(&req).unwrap(); let part = &json["messages"][0]["content"][0]; assert_eq!(part["type"], "text"); @@ -598,7 +659,7 @@ mod tests { let scheme = AnthropicScheme::new(); let mut request = Request::new().user("one"); request.cache_anchor = Some(99); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); // Only the Head breakpoint survives. let bps = breakpoint_positions(&req); assert_eq!(bps.len(), 1); @@ -607,11 +668,22 @@ mod tests { #[test] fn empty_items_produce_no_breakpoints() { let scheme = AnthropicScheme::new(); - let req = scheme.build_request("claude-sonnet-4-20250514", &Request::new()); + let req = scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit()); assert!(req.messages.is_empty()); assert!(breakpoint_positions(&req).is_empty()); } + #[test] + fn cache_auto_does_not_add_cache_control() { + // Ollama のように `CacheStrategy::Auto` のときは cache_control + // マーカーを一切付けない。breakpoint 計算も走らないこと。 + let scheme = AnthropicScheme::new(); + let mut request = Request::new().user("hello"); + request.cache_anchor = Some(0); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_auto()); + assert!(breakpoint_positions(&req).is_empty()); + } + #[test] fn tool_definitions_carry_no_cache_control() { // Tool JSON schema must serialise unchanged — no sneak-in of @@ -623,7 +695,7 @@ mod tests { "type": "object", "properties": {} }))); - let req = scheme.build_request("claude-sonnet-4-20250514", &request); + let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit()); let json = serde_json::to_value(&req).unwrap(); let tool = &json["tools"][0]; assert!(tool.get("cache_control").is_none()); diff --git a/crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs b/crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs new file mode 100644 index 00000000..6e54a30f --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs @@ -0,0 +1,99 @@ +//! `impl Scheme for AnthropicScheme` +//! +//! Anthropic Messages API の wire 表現に必要な URL・ヘッダ・SSE パース・ +//! リクエスト body 生成を共通 `Scheme` trait にぶら下げる。 + +use serde_json::Value; + +use crate::llm_client::{ + ClientError, + capability::ModelCapability, + event::{BlockStop, BlockType, Event}, + auth::AuthRequirement, + scheme::Scheme, + types::Request, +}; + +use super::AnthropicScheme; + +/// Anthropic の SSE パースで必要な状態。 +/// +/// `content_block_stop` イベントは `block_type` を持たない仕様なので、 +/// 直前の `content_block_start` で観測した `block_type` を保持して +/// `BlockStop` に書き戻す。 +#[derive(Debug, Default)] +pub struct AnthropicState { + current_block_type: Option, +} + +impl Scheme for AnthropicScheme { + type State = AnthropicState; + + fn default_base_url(&self) -> &'static str { + "https://api.anthropic.com" + } + + fn path(&self, _model_id: &str) -> String { + "/v1/messages".to_string() + } + + fn required_auth(&self) -> AuthRequirement { + // Ollama の `/v1/messages` 互換では認証が要らないが、それは + // `AuthRef::None` + `build_headers` 側の「ResolvedAuth::None + // なら何もしない」分岐で吸収する(`accepts` 判定で弾かれない + // よう、現状は XApiKey を要求しつつ、None 側でもパスするよう + // にする戦略)。 + AuthRequirement::XApiKey + } + + fn additional_headers(&self) -> Vec<(&'static str, String)> { + let mut headers = vec![("anthropic-version", self.api_version.clone())]; + if self.fine_grained_tool_streaming { + headers.push(( + "anthropic-beta", + "fine-grained-tool-streaming-2025-05-14".to_string(), + )); + } + headers + } + + fn build_request_body( + &self, + model_id: &str, + request: &Request, + capability: &ModelCapability, + ) -> Value { + let req = self.build_request(model_id, request, capability); + serde_json::to_value(&req).expect("AnthropicRequest is always serialisable") + } + + fn parse_sse( + &self, + event_type: &str, + data: &str, + state: &mut Self::State, + ) -> Result, ClientError> { + let Some(mut event) = self.parse_event(event_type, data)? else { + return Ok(Vec::new()); + }; + match &event { + Event::BlockStart(start) => { + state.current_block_type = Some(start.block_type); + } + Event::BlockStop(stop) => { + if let Some(block_type) = state.current_block_type.take() { + event = Event::BlockStop(BlockStop { + block_type, + ..stop.clone() + }); + } + } + _ => {} + } + Ok(vec![event]) + } + + fn capability_for(&self, model_id: &str) -> Option { + super::capability::lookup(model_id) + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/capability.rs b/crates/llm-worker/src/llm_client/scheme/gemini/capability.rs new file mode 100644 index 00000000..b3993d9d --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/gemini/capability.rs @@ -0,0 +1,26 @@ +//! `model_id → ModelCapability` 静的テーブル(Google Gemini)。 + +use crate::llm_client::capability::{ + CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport, +}; + +pub(crate) fn lookup(model_id: &str) -> Option { + if !model_id.starts_with("gemini-") { + return None; + } + // 2.5 系以降は thinking / reasoning を持つ + let reasoning = if model_id.starts_with("gemini-2.5") + || model_id.starts_with("gemini-3") + { + Some(ReasoningSupport::BudgetTokens) + } else { + None + }; + Some(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning, + vision: true, + prompt_caching: CacheStrategy::Auto, + }) +} diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/mod.rs b/crates/llm-worker/src/llm_client/scheme/gemini/mod.rs index 79cf4b57..de453156 100644 --- a/crates/llm-worker/src/llm_client/scheme/gemini/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/gemini/mod.rs @@ -3,8 +3,10 @@ //! - リクエストJSON生成 //! - SSEイベントパース → Event変換 +mod capability; mod events; mod request; +mod scheme_impl; /// Geminiスキーマ /// diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs index d0f3b5fe..6d9b0f07 100644 --- a/crates/llm-worker/src/llm_client/scheme/gemini/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/gemini/request.rs @@ -7,6 +7,7 @@ use serde_json::Value; use crate::llm_client::{ Request, + capability::{ModelCapability, ReasoningSupport}, types::{Item, Role, ToolDefinition, parse_tool_arguments}, }; @@ -139,11 +140,26 @@ pub(crate) struct GeminiGenerationConfig { /// Stop sequences #[serde(skip_serializing_if = "Vec::is_empty")] pub stop_sequences: Vec, + /// Thinking / reasoning 設定(Gemini 2.5 以降)。 + #[serde(skip_serializing_if = "Option::is_none")] + pub thinking_config: Option, +} + +/// Gemini thinking config (gemini-2.5 以降) +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub(crate) struct GeminiThinkingConfig { + /// Token budget for thinking. `-1` means dynamic. + pub thinking_budget: i32, } impl GeminiScheme { /// Build Gemini request from Request - pub(crate) fn build_request(&self, request: &Request) -> GeminiRequest { + pub(crate) fn build_request( + &self, + request: &Request, + capability: &ModelCapability, + ) -> GeminiRequest { let contents = self.convert_items_to_contents(&request.items); // System prompt @@ -177,6 +193,22 @@ impl GeminiScheme { None }; + // Reasoning の投影: capability が BudgetTokens / Both をサポートし、 + // request 側で budget_tokens が指定されているときだけ thinking_config を付ける。 + let supports_budget = matches!( + capability.reasoning, + Some(ReasoningSupport::BudgetTokens | ReasoningSupport::Both), + ); + let thinking_config = request + .config + .reasoning + .as_ref() + .and_then(|rc| rc.budget_tokens) + .filter(|_| supports_budget) + .map(|budget| GeminiThinkingConfig { + thinking_budget: budget as i32, + }); + // Generation config let generation_config = Some(GeminiGenerationConfig { max_output_tokens: request.config.max_tokens, @@ -184,6 +216,7 @@ impl GeminiScheme { top_p: request.config.top_p, top_k: request.config.top_k, stop_sequences: request.config.stop_sequences.clone(), + thinking_config, }); GeminiRequest { @@ -341,6 +374,17 @@ impl GeminiScheme { #[cfg(test)] mod tests { use super::*; + use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport}; + + fn cap() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: true, + prompt_caching: CacheStrategy::Auto, + } + } #[test] fn test_build_simple_request() { @@ -349,7 +393,7 @@ mod tests { .system("You are a helpful assistant.") .user("Hello!"); - let gemini_req = scheme.build_request(&request); + let gemini_req = scheme.build_request(&request, &cap()); assert!(gemini_req.system_instruction.is_some()); assert_eq!(gemini_req.contents.len(), 1); @@ -371,7 +415,7 @@ mod tests { })), ); - let gemini_req = scheme.build_request(&request); + let gemini_req = scheme.build_request(&request, &cap()); assert_eq!(gemini_req.tools.len(), 1); assert_eq!(gemini_req.tools[0].function_declarations.len(), 1); @@ -387,7 +431,7 @@ mod tests { let scheme = GeminiScheme::new(); let request = Request::new().user("Hello").assistant("Hi there!"); - let gemini_req = scheme.build_request(&request); + let gemini_req = scheme.build_request(&request, &cap()); assert_eq!(gemini_req.contents.len(), 2); assert_eq!(gemini_req.contents[0].role, "user"); @@ -406,7 +450,7 @@ mod tests { )) .item(Item::tool_result("call_123", "Sunny, 25°C")); - let gemini_req = scheme.build_request(&request); + let gemini_req = scheme.build_request(&request, &cap()); assert_eq!(gemini_req.contents.len(), 3); assert_eq!(gemini_req.contents[0].role, "user"); diff --git a/crates/llm-worker/src/llm_client/scheme/gemini/scheme_impl.rs b/crates/llm-worker/src/llm_client/scheme/gemini/scheme_impl.rs new file mode 100644 index 00000000..bcacd4d4 --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/gemini/scheme_impl.rs @@ -0,0 +1,53 @@ +//! `impl Scheme for GeminiScheme` + +use serde_json::Value; + +use crate::llm_client::{ + ClientError, + capability::ModelCapability, + event::Event, + auth::AuthRequirement, + scheme::Scheme, + types::Request, +}; + +use super::GeminiScheme; + +impl Scheme for GeminiScheme { + type State = (); + + fn default_base_url(&self) -> &'static str { + "https://generativelanguage.googleapis.com" + } + + fn path(&self, model_id: &str) -> String { + format!("/v1beta/models/{model_id}:streamGenerateContent?alt=sse") + } + + fn required_auth(&self) -> AuthRequirement { + AuthRequirement::QueryParam { name: "key" } + } + + fn build_request_body( + &self, + _model_id: &str, + request: &Request, + capability: &ModelCapability, + ) -> Value { + let req = self.build_request(request, capability); + serde_json::to_value(&req).expect("GeminiRequest is always serialisable") + } + + fn parse_sse( + &self, + _event_type: &str, + data: &str, + _state: &mut Self::State, + ) -> Result, ClientError> { + Ok(self.parse_event(data)?.unwrap_or_default()) + } + + fn capability_for(&self, model_id: &str) -> Option { + super::capability::lookup(model_id) + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/mod.rs b/crates/llm-worker/src/llm_client/scheme/mod.rs index dda327ec..839c17bf 100644 --- a/crates/llm-worker/src/llm_client/scheme/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/mod.rs @@ -3,7 +3,80 @@ //! 各APIスキーマごとの変換ロジック //! - リクエスト変換: Request → プロバイダ固有JSON //! - レスポンス変換: SSEイベント → Event +//! +//! [`Scheme`] trait により `HttpTransport` から scheme 固有の差分 +//! (パス、ヘッダ、認証要件、body 生成、SSE パース)をすべて委譲する。 pub mod anthropic; pub mod gemini; -pub mod openai; +pub mod openai_chat; + +use serde_json::Value; + +use super::auth::AuthRequirement; +use super::capability::ModelCapability; +use super::error::ClientError; +use super::event::Event; +use super::types::Request; + +/// wire scheme の抽象。各プロバイダの API 仕様ごとに 1 つ実装する。 +/// +/// `HttpTransport` が URL 組立・認証ヘッダ挿入・SSE パース +/// のループを担い、`Scheme` 実装は各仕様固有の差分のみ提供する。 +/// +/// # 状態 +/// +/// SSE パースでフレーム間に状態を保つ必要がある scheme(Anthropic の +/// `BlockStop` に `block_type` が載らない仕様の補完など)は +/// [`Scheme::State`] に中間状態を表す型を置く。 +/// 状態を持たない scheme は `type State = ()` とする。 +pub trait Scheme: Clone + Send + Sync + 'static { + /// SSE パースのフレーム間で共有する状態。`HttpTransport` が + /// ストリーム開始時に `Default::default()` を一度だけ作り、 + /// フレームごとに `&mut` で渡す。 + type State: Default + Send + 'static; + + /// scheme のベース URL(`ModelConfig::base_url` 未指定時のデフォルト) + fn default_base_url(&self) -> &'static str; + + /// リクエスト先の相対パス。Gemini のようにモデル名をパスに埋め込む + /// プロバイダもあるため、モデル ID を受け取る。 + fn path(&self, model_id: &str) -> String; + + /// この scheme が要求する認証形式。`build_client` 時に + /// [`AuthRef`](../../../manifest/enum.AuthRef.html) と照合する。 + fn required_auth(&self) -> AuthRequirement; + + /// `Content-Type` 以外の追加ヘッダ。`anthropic-version` / `anthropic-beta` 等。 + fn additional_headers(&self) -> Vec<(&'static str, String)> { + Vec::new() + } + + /// リクエスト body を生成する。`capability` は `CacheStrategy` や + /// `ReasoningSupport` を参照して scheme 側の挙動を分岐させるため + /// に渡される。 + fn build_request_body( + &self, + model_id: &str, + request: &Request, + capability: &ModelCapability, + ) -> Value; + + /// SSE イベント 1 件を 0 個以上の [`Event`] に変換する。 + /// + /// `event_type` は SSE フレームの `event:` フィールド、`data` は + /// `data:` フィールド。`[DONE]` 等の終端マーカーは実装側で判定する。 + /// `state` はストリーム単位で共有される可変状態。 + fn parse_sse( + &self, + event_type: &str, + data: &str, + state: &mut Self::State, + ) -> Result, ClientError>; + + /// 既知モデル ID の能力テーブル引き。未知なら `None` を返す + /// ので、呼び出し側は scheme ごとの安全側デフォルト + /// ([`ModelCapability::minimal`])にフォールバックする。 + fn capability_for(&self, model_id: &str) -> Option; +} + diff --git a/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs new file mode 100644 index 00000000..ed47c973 --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs @@ -0,0 +1,47 @@ +//! `model_id → ModelCapability` 静的テーブル(OpenAI Chat Completions)。 +//! +//! OpenAI 本家の主要モデルのみ網羅する。OpenRouter / xAI / Groq 等は +//! モデル ID が各社独自なので、マニフェスト側で明示 override する +//! 前提。 + +use crate::llm_client::capability::{ + CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport, +}; + +pub(crate) fn lookup(model_id: &str) -> Option { + // GPT-5 / o1 / o3 / o4 reasoning 系 + if model_id.starts_with("gpt-5") + || model_id.starts_with("o1") + || model_id.starts_with("o3") + || model_id.starts_with("o4") + { + return Some(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: Some(ReasoningSupport::Effort), + vision: true, + prompt_caching: CacheStrategy::Auto, + }); + } + // GPT-4o / GPT-4 系 + if model_id.starts_with("gpt-4") { + return Some(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: true, + prompt_caching: CacheStrategy::Auto, + }); + } + // GPT-3.5 系(旧式・structured output 限定) + if model_id.starts_with("gpt-3.5") { + return Some(ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonObject, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }); + } + None +} diff --git a/crates/llm-worker/src/llm_client/scheme/openai/events.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/events.rs similarity index 100% rename from crates/llm-worker/src/llm_client/scheme/openai/events.rs rename to crates/llm-worker/src/llm_client/scheme/openai_chat/events.rs diff --git a/crates/llm-worker/src/llm_client/scheme/openai/mod.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs similarity index 96% rename from crates/llm-worker/src/llm_client/scheme/openai/mod.rs rename to crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs index 46845607..60ca62f7 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs @@ -3,8 +3,10 @@ //! - リクエストJSON生成 //! - SSEイベントパース → Event変換 +mod capability; mod events; mod request; +mod scheme_impl; /// OpenAIスキーマ /// diff --git a/crates/llm-worker/src/llm_client/scheme/openai/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs similarity index 86% rename from crates/llm-worker/src/llm_client/scheme/openai/request.rs rename to crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs index 2aa71c21..09f006b5 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/request.rs @@ -7,6 +7,7 @@ use serde_json::Value; use crate::llm_client::{ Request, + capability::{ModelCapability, ReasoningEffort, ReasoningSupport}, types::{Item, Role, ToolDefinition, parse_tool_arguments}, }; @@ -34,6 +35,9 @@ pub(crate) struct OpenAIRequest { pub tools: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option, + /// Reasoning effort(o1 / o3 / o4 / gpt-5 系で有効)。 + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning_effort: Option<&'static str>, } #[derive(Debug, Serialize)] @@ -110,7 +114,12 @@ pub(crate) struct OpenAIToolCallFunction { impl OpenAIScheme { /// Build OpenAI request from Request - pub(crate) fn build_request(&self, model: &str, request: &Request) -> OpenAIRequest { + pub(crate) fn build_request( + &self, + model: &str, + request: &Request, + capability: &ModelCapability, + ) -> OpenAIRequest { let mut messages = Vec::new(); // Add system message if present @@ -135,6 +144,24 @@ impl OpenAIScheme { (None, request.config.max_tokens) }; + // Reasoning の投影: capability が Effort / Both をサポートし、 + // request 側で effort が指定されているときだけ reasoning_effort を付ける。 + let supports_effort = matches!( + capability.reasoning, + Some(ReasoningSupport::Effort | ReasoningSupport::Both), + ); + let reasoning_effort = request + .config + .reasoning + .as_ref() + .and_then(|rc| rc.effort) + .filter(|_| supports_effort) + .map(|effort| match effort { + ReasoningEffort::Low => "low", + ReasoningEffort::Medium => "medium", + ReasoningEffort::High => "high", + }); + OpenAIRequest { model: model.to_string(), max_completion_tokens, @@ -149,6 +176,7 @@ impl OpenAIScheme { messages, tools, tool_choice: None, + reasoning_effort, } } @@ -294,13 +322,24 @@ impl OpenAIScheme { #[cfg(test)] mod tests { use super::*; + use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport}; + + fn cap() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + } + } #[test] fn test_build_simple_request() { let scheme = OpenAIScheme::new(); let request = Request::new().system("System prompt").user("Hello"); - let body = scheme.build_request("gpt-4o", &request); + let body = scheme.build_request("gpt-4o", &request, &cap()); assert_eq!(body.model, "gpt-4o"); assert_eq!(body.messages.len(), 2); @@ -321,7 +360,7 @@ mod tests { .user("Check weather") .tool(ToolDefinition::new("weather").description("Get weather")); - let body = scheme.build_request("gpt-4o", &request); + let body = scheme.build_request("gpt-4o", &request, &cap()); assert_eq!(body.tools.len(), 1); assert_eq!(body.tools[0].function.name, "weather"); } @@ -331,7 +370,7 @@ mod tests { let scheme = OpenAIScheme::new().with_legacy_max_tokens(true); let request = Request::new().user("Hello").max_tokens(100); - let body = scheme.build_request("llama3", &request); + let body = scheme.build_request("llama3", &request, &cap()); assert_eq!(body.max_tokens, Some(100)); assert!(body.max_completion_tokens.is_none()); @@ -342,7 +381,7 @@ mod tests { let scheme = OpenAIScheme::new(); let request = Request::new().user("Hello").max_tokens(100); - let body = scheme.build_request("gpt-4o", &request); + let body = scheme.build_request("gpt-4o", &request, &cap()); assert_eq!(body.max_completion_tokens, Some(100)); assert!(body.max_tokens.is_none()); @@ -360,7 +399,7 @@ mod tests { )) .item(Item::tool_result("call_123", "Sunny, 25°C")); - let body = scheme.build_request("gpt-4o", &request); + let body = scheme.build_request("gpt-4o", &request, &cap()); assert_eq!(body.messages.len(), 3); assert_eq!(body.messages[0].role, "user"); diff --git a/crates/llm-worker/src/llm_client/scheme/openai_chat/scheme_impl.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/scheme_impl.rs new file mode 100644 index 00000000..149338ee --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/scheme_impl.rs @@ -0,0 +1,57 @@ +//! `impl Scheme for OpenAIScheme` + +use serde_json::Value; + +use crate::llm_client::{ + ClientError, + capability::ModelCapability, + event::Event, + auth::AuthRequirement, + scheme::Scheme, + types::Request, +}; + +use super::OpenAIScheme; + +impl Scheme for OpenAIScheme { + type State = (); + + fn default_base_url(&self) -> &'static str { + "https://api.openai.com" + } + + fn path(&self, _model_id: &str) -> String { + "/v1/chat/completions".to_string() + } + + fn required_auth(&self) -> AuthRequirement { + AuthRequirement::Bearer + } + + fn build_request_body( + &self, + model_id: &str, + request: &Request, + capability: &ModelCapability, + ) -> Value { + let req = self.build_request(model_id, request, capability); + serde_json::to_value(&req).expect("OpenAIRequest is always serialisable") + } + + fn parse_sse( + &self, + _event_type: &str, + data: &str, + _state: &mut Self::State, + ) -> Result, ClientError> { + // `data: [DONE]` は終端マーカー + if data.trim() == "[DONE]" { + return Ok(Vec::new()); + } + Ok(self.parse_event(data)?.unwrap_or_default()) + } + + fn capability_for(&self, model_id: &str) -> Option { + super::capability::lookup(model_id) + } +} diff --git a/crates/llm-worker/src/llm_client/transport.rs b/crates/llm-worker/src/llm_client/transport.rs new file mode 100644 index 00000000..ee075962 --- /dev/null +++ b/crates/llm-worker/src/llm_client/transport.rs @@ -0,0 +1,226 @@ +//! `HttpTransport`: すべての LLM wire scheme を共通の 1 本の +//! HTTP クライアントで扱う。 +//! +//! 旧 `providers/{anthropic,openai,gemini,ollama}.rs` を置き換える。 +//! scheme 固有の差分は [`Scheme`] trait 実装に委譲する。 + +use std::pin::Pin; + +use async_trait::async_trait; +use eventsource_stream::Eventsource; +use futures::{Stream, StreamExt, TryStreamExt}; +use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue}; + +use super::capability::ModelCapability; +use super::client::LlmClient; +use super::error::ClientError; +use super::event::Event; +use super::auth::AuthRequirement; +use super::scheme::Scheme; +use super::types::Request; + +/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。 +/// +/// `AuthRef::ApiKey` → 読み取った文字列、`AuthRef::None` → `None`。 +/// `CodexOAuth` 等、動的に更新される認証は別途 `Custom` バリアントを +/// 追加する余地を残す(本チケットでは未実装)。 +#[derive(Debug, Clone)] +pub enum ResolvedAuth { + None, + ApiKey(String), +} + +impl ResolvedAuth { + /// 認証要件と実際の解決値が噛み合うか検査する。構築時検証用。 + /// + /// `ResolvedAuth::None` は認証を付けないという宣言なので、どの + /// `AuthRequirement` でも受け入れる(Ollama の Anthropic scheme + /// 流用は `required_auth = XApiKey` だが認証ヘッダなしで動く)。 + pub fn matches(&self, req: AuthRequirement) -> bool { + match (self, req) { + (Self::None, _) => true, + ( + Self::ApiKey(_), + AuthRequirement::Bearer | AuthRequirement::XApiKey | AuthRequirement::QueryParam { .. }, + ) => true, + _ => false, + } + } +} + +/// scheme 共通の HTTP 通信層。 +pub struct HttpTransport { + http_client: reqwest::Client, + scheme: S, + model_id: String, + base_url: String, + auth: ResolvedAuth, + capability: ModelCapability, +} + +impl HttpTransport { + /// 新しい transport を作る。`base_url` は末尾スラッシュの有無を + /// どちらでも受け付ける(内部で正規化)。 + pub fn new( + scheme: S, + model_id: impl Into, + base_url: impl Into, + auth: ResolvedAuth, + capability: ModelCapability, + ) -> Self { + let base_url = base_url.into(); + let base_url = base_url.trim_end_matches('/').to_string(); + Self { + http_client: reqwest::Client::new(), + scheme, + model_id: model_id.into(), + base_url, + auth, + capability, + } + } + + /// カスタム HTTP クライアントを差し込む(テスト等)。 + pub fn with_http_client(mut self, client: reqwest::Client) -> Self { + self.http_client = client; + self + } + + fn build_url(&self) -> String { + let path = self.scheme.path(&self.model_id); + let url = format!("{}{}", self.base_url, path); + // Gemini のようにクエリパラメータで認証する場合は URL にキーを追記する + if let (AuthRequirement::QueryParam { name }, ResolvedAuth::ApiKey(key)) = + (self.scheme.required_auth(), &self.auth) + { + let sep = if url.contains('?') { '&' } else { '?' }; + format!("{url}{sep}{name}={key}") + } else { + url + } + } + + fn build_headers(&self) -> Result { + let mut headers = HeaderMap::new(); + headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); + + match (self.scheme.required_auth(), &self.auth) { + (AuthRequirement::None, _) | (_, ResolvedAuth::None) => {} + (AuthRequirement::Bearer, ResolvedAuth::ApiKey(key)) => { + let mut val = HeaderValue::from_str(&format!("Bearer {key}")) + .map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?; + val.set_sensitive(true); + headers.insert("Authorization", val); + } + (AuthRequirement::XApiKey, ResolvedAuth::ApiKey(key)) => { + let mut val = HeaderValue::from_str(key.as_str()) + .map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?; + val.set_sensitive(true); + headers.insert("x-api-key", val); + } + (AuthRequirement::QueryParam { .. }, _) => { + // クエリパラメータは `build_url` で付与済み + } + (AuthRequirement::Custom, _) => { + // 今チケットでは Custom は使わない。Codex OAuth で追加予定 + } + } + + for (name, value) in self.scheme.additional_headers() { + let hv = HeaderValue::from_str(&value) + .map_err(|e| ClientError::Config(format!("invalid header {name}: {e}")))?; + headers.insert(name, hv); + } + + Ok(headers) + } +} + +impl Clone for HttpTransport { + fn clone(&self) -> Self { + Self { + http_client: self.http_client.clone(), + scheme: self.scheme.clone(), + model_id: self.model_id.clone(), + base_url: self.base_url.clone(), + auth: self.auth.clone(), + capability: self.capability.clone(), + } + } +} + +#[async_trait] +impl LlmClient for HttpTransport { + fn clone_boxed(&self) -> Box { + Box::new(self.clone()) + } + + async fn stream( + &self, + request: Request, + ) -> Result> + Send>>, ClientError> { + let url = self.build_url(); + let headers = self.build_headers()?; + let body = self + .scheme + .build_request_body(&self.model_id, &request, &self.capability); + + let response = self + .http_client + .post(&url) + .headers(headers) + .json(&body) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status().as_u16(); + let text = response.text().await.unwrap_or_default(); + if let Ok(json) = serde_json::from_str::(&text) { + let error = json.get("error").unwrap_or(&json); + let code = error.get("type").and_then(|v| v.as_str()).map(String::from); + let message = error + .get("message") + .and_then(|v| v.as_str()) + .unwrap_or(&text) + .to_string(); + return Err(ClientError::Api { + status: Some(status), + code, + message, + }); + } + return Err(ClientError::Api { + status: Some(status), + code: None, + message: text, + }); + } + + let scheme = self.scheme.clone(); + let byte_stream = response.bytes_stream().map_err(std::io::Error::other); + let event_stream = byte_stream.eventsource(); + + // scheme 固有のパース状態をストリーム単位で保持する + let mut state = ::default(); + + let stream = event_stream + .map(move |result| match result { + Ok(frame) => match scheme.parse_sse(&frame.event, &frame.data, &mut state) { + Ok(events) => Ok(events), + Err(e) => Err(e), + }, + Err(e) => Err(ClientError::Sse(e.to_string())), + }) + .map(|res| { + let s: Pin> + Send>> = match res { + Ok(events) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))), + Err(e) => Box::pin(futures::stream::once(async move { Err(e) })), + }; + s + }) + .flatten(); + + Ok(Box::pin(stream)) + } +} diff --git a/crates/llm-worker/src/llm_client/types.rs b/crates/llm-worker/src/llm_client/types.rs index 030380c5..e88999c4 100644 --- a/crates/llm-worker/src/llm_client/types.rs +++ b/crates/llm-worker/src/llm_client/types.rs @@ -565,6 +565,12 @@ pub struct RequestConfig { pub top_k: Option, /// Stop sequences pub stop_sequences: Vec, + /// Reasoning / extended-thinking 制御(共通型、scheme 側で各社形式に投影)。 + /// + /// `None` のときは何も送らない。`Some` でも scheme の + /// `ModelCapability::reasoning` が `None` なら無視される。 + #[serde(default)] + pub reasoning: Option, } impl RequestConfig { diff --git a/crates/llm-worker/tests/ui/locked_register_tool.rs b/crates/llm-worker/tests/ui/locked_register_tool.rs index 4f8be011..6df5ddb9 100644 --- a/crates/llm-worker/tests/ui/locked_register_tool.rs +++ b/crates/llm-worker/tests/ui/locked_register_tool.rs @@ -1,9 +1,26 @@ use llm_worker::Worker; -use llm_worker::llm_client::providers::ollama::OllamaClient; +use llm_worker::llm_client::capability::{ + CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport, +}; +use llm_worker::llm_client::scheme::anthropic::AnthropicScheme; +use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth}; use std::sync::Arc; fn main() { - let client = OllamaClient::new("dummy-model"); + let cap = ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }; + let client = HttpTransport::new( + AnthropicScheme::new(), + "dummy-model".to_string(), + "http://localhost:11434".to_string(), + ResolvedAuth::None, + cap, + ); let worker = Worker::new(client); let mut locked = worker.lock(); let def: llm_worker::tool::ToolDefinition = Arc::new(|| panic!("unused")); diff --git a/crates/llm-worker/tests/ui/locked_register_tool.stderr b/crates/llm-worker/tests/ui/locked_register_tool.stderr index accf7c59..b4e5ecd4 100644 --- a/crates/llm-worker/tests/ui/locked_register_tool.stderr +++ b/crates/llm-worker/tests/ui/locked_register_tool.stderr @@ -1,8 +1,8 @@ -error[E0599]: no method named `register_tool` found for struct `Worker` in the current scope - --> tests/ui/locked_register_tool.rs:10:20 +error[E0599]: no method named `register_tool` found for struct `Worker, Locked>` in the current scope + --> tests/ui/locked_register_tool.rs:27:20 | -10 | let _ = locked.register_tool(def); - | ^^^^^^^^^^^^^ method not found in `Worker` +27 | let _ = locked.register_tool(def); + | ^^^^^^^^^^^^^ method not found in `Worker, Locked>` | = note: the method was found for - `Worker` diff --git a/crates/llm-worker/tests/ui/tool_server_handle_register_tool.rs b/crates/llm-worker/tests/ui/tool_server_handle_register_tool.rs index ff75d0c0..5d710c92 100644 --- a/crates/llm-worker/tests/ui/tool_server_handle_register_tool.rs +++ b/crates/llm-worker/tests/ui/tool_server_handle_register_tool.rs @@ -1,9 +1,26 @@ use llm_worker::Worker; -use llm_worker::llm_client::providers::ollama::OllamaClient; +use llm_worker::llm_client::capability::{ + CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport, +}; +use llm_worker::llm_client::scheme::anthropic::AnthropicScheme; +use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth}; use std::sync::Arc; fn main() { - let client = OllamaClient::new("dummy-model"); + let cap = ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }; + let client = HttpTransport::new( + AnthropicScheme::new(), + "dummy-model".to_string(), + "http://localhost:11434".to_string(), + ResolvedAuth::None, + cap, + ); let worker = Worker::new(client); let handle = worker.tool_server_handle(); let def: llm_worker::tool::ToolDefinition = Arc::new(|| panic!("unused")); diff --git a/crates/llm-worker/tests/ui/tool_server_handle_register_tool.stderr b/crates/llm-worker/tests/ui/tool_server_handle_register_tool.stderr index 9410713d..e90a37c4 100644 --- a/crates/llm-worker/tests/ui/tool_server_handle_register_tool.stderr +++ b/crates/llm-worker/tests/ui/tool_server_handle_register_tool.stderr @@ -1,7 +1,7 @@ error[E0624]: method `register_tool` is private - --> tests/ui/tool_server_handle_register_tool.rs:10:20 + --> tests/ui/tool_server_handle_register_tool.rs:27:20 | -10 | let _ = handle.register_tool(def); +27 | let _ = handle.register_tool(def); | ^^^^^^^^^^^^^ private method | ::: src/tool_server.rs diff --git a/crates/llm-worker/tests/validation_test.rs b/crates/llm-worker/tests/validation_test.rs deleted file mode 100644 index 671140bb..00000000 --- a/crates/llm-worker/tests/validation_test.rs +++ /dev/null @@ -1,39 +0,0 @@ -use llm_worker::llm_client::providers::openai::OpenAIClient; -use llm_worker::{Worker, WorkerError}; - -#[test] -fn test_openai_top_k_warning() { - // Create client with dummy key (validate_config doesn't make network calls, so safe) - let client = OpenAIClient::new("dummy-key", "gpt-4o"); - - // Create Worker with top_k set (OpenAI doesn't support top_k) - let worker = Worker::new(client).top_k(50); - - // Run validate() - let result = worker.validate(); - - // Verify error is returned and ConfigWarnings is included - match result { - Err(WorkerError::ConfigWarnings(warnings)) => { - assert_eq!(warnings.len(), 1); - assert_eq!(warnings[0].option_name, "top_k"); - println!("Got expected warning: {}", warnings[0]); - } - Ok(_) => panic!("Should have returned validation error"), - Err(e) => panic!("Unexpected error type: {:?}", e), - } -} - -#[test] -fn test_openai_valid_config() { - let client = OpenAIClient::new("dummy-key", "gpt-4o"); - - // Valid configuration (temperature only) - let worker = Worker::new(client).temperature(0.7); - - // Run validate() - let result = worker.validate(); - - // Verify success - assert!(result.is_ok()); -} diff --git a/crates/manifest/README.md b/crates/manifest/README.md index 1cb56524..53a4bacb 100644 --- a/crates/manifest/README.md +++ b/crates/manifest/README.md @@ -1,13 +1,14 @@ # manifest -Pod の宣言的設定を TOML マニフェストとして定義・パースするクレート。プロバイダ設定、ワーカー設定、ディレクトリスコープ制約を記述できる。 +Pod の宣言的設定を TOML マニフェストとして定義・パースするクレート。モデル設定、ワーカー設定、ディレクトリスコープ制約を記述できる。 ## 公開型 - `PodManifest` — Pod 設定全体(`from_toml()` でパース) - `PodMeta` — Pod メタデータ(名前、pwd) -- `ProviderConfig` — LLM プロバイダ設定(種別、モデル、APIキー環境変数、ベースURL) -- `ProviderKind` — プロバイダ種別(`Anthropic`, `Openai`, `Gemini`, `Ollama`) +- `ModelConfig` — LLM モデル設定(scheme、base_url、model_id、auth) +- `SchemeKind` — wire scheme 種別(`Anthropic`, `OpenaiChat`, `OpenaiResponses`, `Gemini`) +- `AuthRef` — 認証参照(`None`, `ApiKey { env, file }`, `CodexOAuth`) - `WorkerManifest` — ワーカー設定(システムプロンプト、max_tokens、temperature) - `ScopeConfig` / `ScopeRule` / `Permission` — allow / deny の宣言的スコープ設定 - `Scope` — 実行時スコープ。`from_config(&ScopeConfig, pwd)` で構築し、`is_readable` / `is_writable` / `permission_at` で問い合わせる diff --git a/crates/manifest/src/config.rs b/crates/manifest/src/config.rs index 9b3c9e57..49ccbd8c 100644 --- a/crates/manifest/src/config.rs +++ b/crates/manifest/src/config.rs @@ -13,10 +13,8 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; use crate::defaults; -use crate::{ - CompactionConfig, PodManifest, PodMeta, ProviderConfig, ProviderKind, ScopeConfig, - ToolOutputLimits, WorkerManifest, -}; +use crate::model::{AuthRef, ModelConfig, SchemeKind}; +use crate::{CompactionConfig, PodManifest, PodMeta, ScopeConfig, ToolOutputLimits, WorkerManifest}; /// Partial-form Pod manifest. Every field is optional; one or more /// instances merge via [`PodManifestConfig::merge`] before being @@ -26,7 +24,7 @@ pub struct PodManifestConfig { #[serde(default)] pub pod: PodMetaConfig, #[serde(default)] - pub provider: ProviderConfigPartial, + pub model: ModelConfigPartial, #[serde(default)] pub worker: WorkerManifestConfig, #[serde(default)] @@ -41,16 +39,17 @@ pub struct PodMetaConfig { pub name: Option, } +/// Partial-form of [`ModelConfig`]. カスケード層で個別に与えられる。 #[derive(Debug, Clone, Default, Serialize, Deserialize)] -pub struct ProviderConfigPartial { +pub struct ModelConfigPartial { #[serde(default)] - pub kind: Option, - #[serde(default)] - pub model: Option, - #[serde(default)] - pub api_key_file: Option, + pub scheme: Option, #[serde(default)] pub base_url: Option, + #[serde(default)] + pub model_id: Option, + #[serde(default)] + pub auth: Option, } #[derive(Debug, Clone, Default, Serialize, Deserialize)] @@ -92,7 +91,7 @@ pub struct CompactionConfigPartial { #[serde(default)] pub compact_worker_max_input_tokens: Option, #[serde(default)] - pub provider: Option, + pub model: Option, } /// Errors raised when converting a [`PodManifestConfig`] to a validated @@ -148,18 +147,16 @@ impl PodManifestConfig { /// rules from different layers do not accidentally inherit another /// layer's base. /// - /// Affected fields: `provider.api_key_file`, + /// Affected fields: `model.auth.file`, /// `scope.allow[].target`, `scope.deny[].target`, - /// `compaction.provider.api_key_file`. + /// `compaction.model.auth.file`. pub fn resolve_paths(mut self, base: &Path) -> Self { debug_assert!( base.is_absolute(), "resolve_paths base must be absolute: {}", base.display() ); - if let Some(ref mut p) = self.provider.api_key_file { - *p = join_if_relative(base, p); - } + resolve_auth_file(&mut self.model.auth, base); for rule in &mut self.scope.allow { rule.target = join_if_relative(base, &rule.target); } @@ -167,10 +164,9 @@ impl PodManifestConfig { rule.target = join_if_relative(base, &rule.target); } if let Some(ref mut compaction) = self.compaction - && let Some(ref mut cp) = compaction.provider - && let Some(ref mut p) = cp.api_key_file + && let Some(ref mut cp) = compaction.model { - *p = join_if_relative(base, p); + resolve_auth_file(&mut cp.auth, base); } self } @@ -182,7 +178,7 @@ impl PodManifestConfig { pub fn merge(self, upper: PodManifestConfig) -> Self { Self { pod: self.pod.merge(upper.pod), - provider: self.provider.merge(upper.provider), + model: self.model.merge(upper.model), worker: self.worker.merge(upper.worker), scope: merge_scope(self.scope, upper.scope), compaction: merge_option( @@ -202,13 +198,13 @@ impl PodMetaConfig { } } -impl ProviderConfigPartial { +impl ModelConfigPartial { fn merge(self, upper: Self) -> Self { Self { - kind: upper.kind.or(self.kind), - model: upper.model.or(self.model), - api_key_file: upper.api_key_file.or(self.api_key_file), + scheme: upper.scheme.or(self.scheme), base_url: upper.base_url.or(self.base_url), + model_id: upper.model_id.or(self.model_id), + auth: upper.auth.or(self.auth), } } } @@ -254,7 +250,7 @@ impl CompactionConfigPartial { compact_worker_max_input_tokens: upper .compact_worker_max_input_tokens .or(self.compact_worker_max_input_tokens), - provider: merge_option(self.provider, upper.provider, ProviderConfigPartial::merge), + model: merge_option(self.model, upper.model, ModelConfigPartial::merge), } } } @@ -295,25 +291,35 @@ fn ensure_absolute(field: &'static str, path: &Path) -> Result<(), ResolveError> } } -fn resolve_provider( - cfg: ProviderConfigPartial, - kind_field: &'static str, - model_field: &'static str, - api_key_field: &'static str, -) -> Result { - let kind = cfg.kind.ok_or(ResolveError::MissingField(kind_field))?; - let model = cfg.model.ok_or(ResolveError::MissingField(model_field))?; - if let Some(ref p) = cfg.api_key_file { - ensure_absolute(api_key_field, p)?; +fn resolve_model( + cfg: ModelConfigPartial, + scheme_field: &'static str, + model_id_field: &'static str, + auth_file_field: &'static str, +) -> Result { + let scheme = cfg.scheme.ok_or(ResolveError::MissingField(scheme_field))?; + let model_id = cfg + .model_id + .ok_or(ResolveError::MissingField(model_id_field))?; + let auth = cfg.auth.unwrap_or_default(); + if let AuthRef::ApiKey { file: Some(p), .. } = &auth { + ensure_absolute(auth_file_field, p)?; } - Ok(ProviderConfig { - kind, - model, - api_key_file: cfg.api_key_file, + Ok(ModelConfig { + scheme, base_url: cfg.base_url, + model_id, + auth, }) } +/// `AuthRef::ApiKey { file, .. }` が相対パスのとき `base` を前置する。 +fn resolve_auth_file(auth: &mut Option, base: &Path) { + if let Some(AuthRef::ApiKey { file: Some(p), .. }) = auth.as_mut() { + *p = join_if_relative(base, p); + } +} + impl TryFrom for PodManifest { type Error = ResolveError; @@ -323,11 +329,11 @@ impl TryFrom for PodManifest { .name .ok_or(ResolveError::MissingField("pod.name"))?; - let provider = resolve_provider( - cfg.provider, - "provider.kind", - "provider.model", - "provider.api_key_file", + let model = resolve_model( + cfg.model, + "model.scheme", + "model.model_id", + "model.auth.file", )?; let worker = WorkerManifest { @@ -361,14 +367,14 @@ impl TryFrom for PodManifest { let compaction = cfg .compaction .map(|c| -> Result { - let comp_provider = c - .provider + let comp_model = c + .model .map(|p| { - resolve_provider( + resolve_model( p, - "compaction.provider.kind", - "compaction.provider.model", - "compaction.provider.api_key_file", + "compaction.model.scheme", + "compaction.model.model_id", + "compaction.model.auth.file", ) }) .transpose()?; @@ -390,14 +396,14 @@ impl TryFrom for PodManifest { compact_worker_max_input_tokens: c .compact_worker_max_input_tokens .unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS), - provider: comp_provider, + model: comp_model, }) }) .transpose()?; Ok(PodManifest { pod: PodMeta { name }, - provider, + model, worker, scope: cfg.scope, compaction, @@ -414,14 +420,21 @@ mod tests { PathBuf::from(format!("/tmp/insomnia-test{path}")) } + fn api_key_file_auth(path: PathBuf) -> AuthRef { + AuthRef::ApiKey { + env: None, + file: Some(path), + } + } + fn minimal_valid() -> PodManifestConfig { PodManifestConfig { pod: PodMetaConfig { name: Some("test".into()), }, - provider: ProviderConfigPartial { - kind: Some(ProviderKind::Anthropic), - model: Some("claude-sonnet-4-20250514".into()), + model: ModelConfigPartial { + scheme: Some(SchemeKind::Anthropic), + model_id: Some("claude-sonnet-4-20250514".into()), ..Default::default() }, worker: WorkerManifestConfig::default(), @@ -441,16 +454,20 @@ mod tests { fn resolve_minimal_succeeds() { let manifest: PodManifest = minimal_valid().try_into().unwrap(); assert_eq!(manifest.pod.name, "test"); - assert_eq!(manifest.provider.kind, ProviderKind::Anthropic); + assert_eq!(manifest.model.scheme, SchemeKind::Anthropic); } #[test] - fn resolve_paths_joins_relative_api_key_file() { + fn resolve_paths_joins_relative_auth_file() { let mut cfg = minimal_valid(); - cfg.provider.api_key_file = Some(PathBuf::from("keys/anthropic")); + cfg.model.auth = Some(api_key_file_auth(PathBuf::from("keys/anthropic"))); let resolved = cfg.resolve_paths(Path::new("/home/user/.config/insomnia")); + let file = match resolved.model.auth { + Some(AuthRef::ApiKey { file, .. }) => file, + _ => panic!("expected ApiKey"), + }; assert_eq!( - resolved.provider.api_key_file.as_deref(), + file.as_deref(), Some(Path::new("/home/user/.config/insomnia/keys/anthropic")) ); } @@ -458,12 +475,13 @@ mod tests { #[test] fn resolve_paths_leaves_absolute_paths_untouched() { let mut cfg = minimal_valid(); - cfg.provider.api_key_file = Some(PathBuf::from("/etc/already/abs")); + cfg.model.auth = Some(api_key_file_auth(PathBuf::from("/etc/already/abs"))); let resolved = cfg.resolve_paths(Path::new("/home/user")); - assert_eq!( - resolved.provider.api_key_file.as_deref(), - Some(Path::new("/etc/already/abs")) - ); + let file = match resolved.model.auth { + Some(AuthRef::ApiKey { file, .. }) => file, + _ => panic!("expected ApiKey"), + }; + assert_eq!(file.as_deref(), Some(Path::new("/etc/already/abs"))); } #[test] @@ -484,16 +502,14 @@ mod tests { } #[test] - fn try_from_invariant_rejects_lingering_relative_api_key_file() { + fn try_from_invariant_rejects_lingering_relative_auth_file() { let mut cfg = minimal_valid(); - cfg.provider.api_key_file = Some(PathBuf::from("keys/relative")); - // Skipping resolve_paths on purpose: TryFrom must catch the - // invariant violation. + cfg.model.auth = Some(api_key_file_auth(PathBuf::from("keys/relative"))); let err = PodManifest::try_from(cfg).unwrap_err(); assert!(matches!( err, ResolveError::RelativePath { - field: "provider.api_key_file", + field: "model.auth.file", .. } )); @@ -535,8 +551,8 @@ mod tests { pod: PodMetaConfig { name: Some("lower".into()), }, - provider: ProviderConfigPartial { - model: Some("lower-model".into()), + model: ModelConfigPartial { + model_id: Some("lower-model".into()), ..Default::default() }, ..Default::default() @@ -549,8 +565,8 @@ mod tests { }; let merged = lower.merge(upper); assert_eq!(merged.pod.name.as_deref(), Some("upper")); - // model not present in upper — retain lower - assert_eq!(merged.provider.model.as_deref(), Some("lower-model")); + // model_id not present in upper — retain lower + assert_eq!(merged.model.model_id.as_deref(), Some("lower-model")); } #[test] @@ -706,9 +722,9 @@ permission = "write" pod: PodMetaConfig { name: Some("x".into()), }, - provider: ProviderConfigPartial { - kind: Some(ProviderKind::Anthropic), - model: Some("m".into()), + model: ModelConfigPartial { + scheme: Some(SchemeKind::Anthropic), + model_id: Some("m".into()), ..Default::default() }, scope: ScopeConfig { @@ -734,9 +750,9 @@ permission = "write" let builtin = PodManifestConfig::default(); let user = PodManifestConfig::from_toml( r#" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" "#, ) .unwrap(); @@ -759,7 +775,7 @@ name = "dbg" let merged = builtin.merge(user).merge(project).merge(overlay); let manifest: PodManifest = merged.try_into().unwrap(); assert_eq!(manifest.pod.name, "dbg"); - assert_eq!(manifest.provider.kind, ProviderKind::Anthropic); + assert_eq!(manifest.model.scheme, SchemeKind::Anthropic); assert_eq!(manifest.scope.allow.len(), 1); } } diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs index e50c9893..a48ca0aa 100644 --- a/crates/manifest/src/lib.rs +++ b/crates/manifest/src/lib.rs @@ -1,30 +1,31 @@ mod config; pub mod defaults; +mod model; mod scope; pub use config::{ - CompactionConfigPartial, PodManifestConfig, PodMetaConfig, ProviderConfigPartial, ResolveError, + CompactionConfigPartial, ModelConfigPartial, PodManifestConfig, PodMetaConfig, ResolveError, ToolOutputLimitsPartial, WorkerManifestConfig, }; +pub use model::{AuthRef, ModelConfig, SchemeKind}; pub use protocol::{Permission, ScopeRule}; pub use scope::{Scope, ScopeError}; use std::collections::HashMap; use std::num::NonZeroU32; -use std::path::PathBuf; use serde::{Deserialize, Serialize}; /// Declarative configuration for a Pod. /// -/// Parsed from a TOML manifest file. Describes the provider, model, -/// system prompt, and directory scope (required). The Pod's working -/// directory is **not** part of the manifest — it is the process's -/// `std::env::current_dir()` at construction time. +/// Parsed from a TOML manifest file. Describes the model, system prompt, +/// and directory scope (required). The Pod's working directory is **not** +/// part of the manifest — it is the process's `std::env::current_dir()` +/// at construction time. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PodManifest { pub pod: PodMeta, - pub provider: ProviderConfig, + pub model: ModelConfig, pub worker: WorkerManifest, pub scope: ScopeConfig, #[serde(default)] @@ -37,44 +38,6 @@ pub struct PodMeta { pub name: String, } -/// LLM provider configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ProviderConfig { - pub kind: ProviderKind, - pub model: String, - /// Path to a file containing the API key (read and trimmed at startup). - #[serde(default)] - pub api_key_file: Option, - /// Custom base URL for the provider API. - #[serde(default)] - pub base_url: Option, -} - -/// Supported LLM providers. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum ProviderKind { - Anthropic, - Openai, - Gemini, - Ollama, -} - -impl ProviderKind { - /// Conventional environment variable name for the API key. - /// - /// Returns `INSOMNIA_API_KEY_{KIND}` (e.g. `INSOMNIA_API_KEY_ANTHROPIC`). - pub fn env_var_name(self) -> String { - let kind = match self { - Self::Anthropic => "ANTHROPIC", - Self::Openai => "OPENAI", - Self::Gemini => "GEMINI", - Self::Ollama => "OLLAMA", - }; - format!("INSOMNIA_API_KEY_{kind}") - } -} - /// Worker-level configuration embedded in the manifest. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WorkerManifest { @@ -211,10 +174,10 @@ pub struct CompactionConfig { #[serde(default = "default_compact_worker_max_input_tokens")] pub compact_worker_max_input_tokens: u64, - /// Optional provider for the compactor (summary) LLM. - /// If omitted, the main provider is cloned via `clone_boxed()`. + /// Optional model for the compactor (summary) LLM. + /// If omitted, the main model is cloned via `clone_boxed()`. #[serde(default)] - pub provider: Option, + pub model: Option, } fn default_prune_protected_turns() -> usize { @@ -243,7 +206,7 @@ impl Default for CompactionConfig { compact_retained_tokens: default_compact_retained_tokens(), compact_auto_read_budget: default_compact_auto_read_budget(), compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(), - provider: None, + model: None, } } } @@ -263,9 +226,9 @@ mod tests { [pod] name = "test-agent" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [worker] @@ -278,9 +241,9 @@ permission = "write" fn parse_minimal_manifest() { let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap(); assert_eq!(manifest.pod.name, "test-agent"); - assert_eq!(manifest.provider.kind, ProviderKind::Anthropic); - assert_eq!(manifest.provider.model, "claude-sonnet-4-20250514"); - assert!(manifest.provider.api_key_file.is_none()); + assert_eq!(manifest.model.scheme, SchemeKind::Anthropic); + assert_eq!(manifest.model.model_id, "claude-sonnet-4-20250514"); + assert_eq!(manifest.model.auth, AuthRef::None); assert_eq!(manifest.scope.allow.len(), 1); assert!(manifest.scope.deny.is_empty()); assert_eq!(manifest.worker.instruction, defaults::DEFAULT_INSTRUCTION); @@ -292,10 +255,10 @@ permission = "write" [pod] name = "code-reviewer" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" -api_key_file = "/abs/keys/anthropic" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" +auth = { kind = "api_key", file = "/abs/keys/anthropic" } [worker] instruction = "$user/reviewer" @@ -317,10 +280,11 @@ permission = "write" "#; let manifest = PodManifest::from_toml(toml).unwrap(); assert_eq!(manifest.pod.name, "code-reviewer"); - assert_eq!( - manifest.provider.api_key_file.as_deref(), - Some(std::path::Path::new("/abs/keys/anthropic")) - ); + let file = match &manifest.model.auth { + AuthRef::ApiKey { file, .. } => file.as_deref(), + _ => panic!("expected ApiKey"), + }; + assert_eq!(file, Some(std::path::Path::new("/abs/keys/anthropic"))); assert_eq!(manifest.worker.instruction, "$user/reviewer"); assert_eq!(manifest.worker.max_tokens, Some(4096)); assert_eq!(manifest.worker.temperature, Some(0.3)); @@ -340,9 +304,9 @@ permission = "write" [pod] name = "missing-scope" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [worker] "#; @@ -408,20 +372,20 @@ model = "claude-sonnet-4-20250514" } #[test] - fn parse_compaction_with_provider() { + fn parse_compaction_with_model() { let toml = format!( "{MINIMAL_REQUIRED}\n\ [compaction]\n\ compact_threshold = 80000\n\n\ - [compaction.provider]\n\ - kind = \"gemini\"\n\ - model = \"gemini-2.0-flash\"\n" + [compaction.model]\n\ + scheme = \"gemini\"\n\ + model_id = \"gemini-2.0-flash\"\n" ); let manifest = PodManifest::from_toml(&toml).unwrap(); let c = manifest.compaction.unwrap(); - let p = c.provider.unwrap(); - assert_eq!(p.kind, ProviderKind::Gemini); - assert_eq!(p.model, "gemini-2.0-flash"); + let p = c.model.unwrap(); + assert_eq!(p.scheme, SchemeKind::Gemini); + assert_eq!(p.model_id, "gemini-2.0-flash"); } #[test] @@ -431,8 +395,9 @@ model = "claude-sonnet-4-20250514" } #[test] - fn reject_unknown_provider() { - let toml = MINIMAL_REQUIRED.replace("kind = \"anthropic\"", "kind = \"unknown_provider\""); + fn reject_unknown_scheme() { + let toml = + MINIMAL_REQUIRED.replace("scheme = \"anthropic\"", "scheme = \"unknown_scheme\""); assert!(PodManifest::from_toml(&toml).is_err()); } diff --git a/crates/manifest/src/model.rs b/crates/manifest/src/model.rs new file mode 100644 index 00000000..5631fe6e --- /dev/null +++ b/crates/manifest/src/model.rs @@ -0,0 +1,74 @@ +//! LLM モデル宣言型 +//! +//! Pod マニフェストの `[model]` セクションで記述する型。`scheme` と +//! `auth` を直交軸として表現し、1 つの汎用アダプタ(`crates/provider`) +//! で任意の wire / 認証組合せを受け止める。 + +use std::path::PathBuf; + +use serde::{Deserialize, Serialize}; + +/// Pod が使う LLM モデルの宣言。 +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct ModelConfig { + /// wire format + pub scheme: SchemeKind, + /// API のベース URL。未指定なら scheme の既定値にフォールバック + #[serde(default)] + pub base_url: Option, + /// プロバイダが受け付けるモデル ID + pub model_id: String, + /// 認証方式 + #[serde(default)] + pub auth: AuthRef, +} + +/// サポートする wire scheme の種類。 +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum SchemeKind { + /// Anthropic Messages API (`/v1/messages`)。Ollama `/v1/messages` もこれで扱う + Anthropic, + /// OpenAI Chat Completions (`/v1/chat/completions`)。OpenAI 互換ルーター共通枠 + OpenaiChat, + /// OpenAI Responses API (`/v1/responses`)。別チケットで scheme 新設予定 + OpenaiResponses, + /// Google Gemini (`/v1beta/models/...:streamGenerateContent`) + Gemini, +} + +/// 認証の参照。 +/// +/// 実際のトークン値の解決(env / file 読取、OAuth refresh 等)は +/// `crates/provider` で行う。ここはあくまで「どこから取るか」の宣言。 +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum AuthRef { + /// 認証不要(ローカル Ollama 等) + #[default] + None, + /// API key。env / file のいずれか(両方指定された場合は env が優先) + ApiKey { + /// 環境変数名。未指定のときは scheme ごとの既定(`INSOMNIA_API_KEY_*`) + #[serde(default)] + env: Option, + /// key を書き込んだファイル(絶対パス) + #[serde(default)] + file: Option, + }, + /// ChatGPT OAuth(`~/.codex/auth.json`)。実装は `llm-auth-codex-oauth` チケット + CodexOAuth, +} + +impl SchemeKind { + /// 既定の環境変数名(`INSOMNIA_API_KEY_*`)。 + /// + /// `AuthRef::ApiKey { env: None, .. }` の env 未指定時に使う。 + pub fn default_env_var(self) -> &'static str { + match self { + Self::Anthropic => "INSOMNIA_API_KEY_ANTHROPIC", + Self::OpenaiChat | Self::OpenaiResponses => "INSOMNIA_API_KEY_OPENAI", + Self::Gemini => "INSOMNIA_API_KEY_GEMINI", + } + } +} diff --git a/crates/pod/examples/pod_cli.rs b/crates/pod/examples/pod_cli.rs index 5ccb82c6..bf8436f7 100644 --- a/crates/pod/examples/pod_cli.rs +++ b/crates/pod/examples/pod_cli.rs @@ -22,9 +22,9 @@ fn manifest_toml(pwd: &std::path::Path) -> String { name = "hello-pod" pwd = "{pwd}" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [worker] system_prompt = "You are a concise assistant. Reply in one or two sentences." diff --git a/crates/pod/examples/pod_protocol.rs b/crates/pod/examples/pod_protocol.rs index 6e4ffb8b..baf5421d 100644 --- a/crates/pod/examples/pod_protocol.rs +++ b/crates/pod/examples/pod_protocol.rs @@ -16,9 +16,9 @@ fn manifest_toml(pwd: &std::path::Path) -> String { name = "protocol-demo" pwd = "{pwd}" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [worker] system_prompt = "You are a concise assistant. Reply in one or two sentences." diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index 4e4d83a3..a75d5fbc 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -113,7 +113,7 @@ impl PodController { let scope_for_tools = pod.scope().clone(); let pwd_for_tools = pod.pwd().to_path_buf(); let spawner_name = pod.manifest().pod.name.clone(); - let spawner_provider = pod.manifest().provider.clone(); + let spawner_model = pod.manifest().model.clone(); // Parent callback socket (this Pod's own parent, used for // `PodEvent` upward reports). `None` for top-level Pods. @@ -230,7 +230,7 @@ impl PodController { pwd_for_tools, spawned_registry.clone(), self_parent_socket.clone(), - spawner_provider.clone(), + spawner_model.clone(), )); worker.register_tool(send_to_pod_tool(spawned_registry.clone())); worker.register_tool(read_pod_output_tool(spawned_registry.clone())); @@ -663,11 +663,11 @@ where St: Store, { let manifest = pod.manifest(); - let provider = match manifest.provider.kind { - manifest::ProviderKind::Anthropic => "anthropic", - manifest::ProviderKind::Openai => "openai", - manifest::ProviderKind::Gemini => "gemini", - manifest::ProviderKind::Ollama => "ollama", + let provider = match manifest.model.scheme { + manifest::SchemeKind::Anthropic => "anthropic", + manifest::SchemeKind::OpenaiChat => "openai_chat", + manifest::SchemeKind::OpenaiResponses => "openai_responses", + manifest::SchemeKind::Gemini => "gemini", }; // The tool list mirrors what `spawn()` registers on the Worker: // builtin filesystem tools plus the pod-orchestration tools. @@ -689,7 +689,7 @@ where pod_name: manifest.pod.name.clone(), cwd: pod.pwd().display().to_string(), provider: provider.into(), - model: manifest.provider.model.clone(), + model: manifest.model.model_id.clone(), scope_summary: pod.scope().summary(), tools: tool_names, } diff --git a/crates/pod/src/factory.rs b/crates/pod/src/factory.rs index 0b0614cd..d87a4f54 100644 --- a/crates/pod/src/factory.rs +++ b/crates/pod/src/factory.rs @@ -317,9 +317,9 @@ mod tests { [pod] name = "solo" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [[scope.allow]] target = "{pwd}" @@ -342,9 +342,9 @@ permission = "write" let pwd = tmp.path().canonicalize().unwrap(); let user_cfg = PodManifestConfig::from_toml(&format!( r#" -[provider] -kind = "anthropic" -model = "user-model" +[model] +scheme = "anthropic" +model_id = "user-model" [[scope.allow]] target = "{pwd}" @@ -355,8 +355,8 @@ permission = "read" .unwrap(); let project_cfg = PodManifestConfig::from_toml(&format!( r#" -[provider] -model = "project-model" +[model] +model_id = "project-model" [[scope.allow]] target = "{pwd}" @@ -387,7 +387,7 @@ name = "overlay-name" // overlay layer so later calls win. This also exercises the // scope union across layers (two allow rules). assert_eq!(manifest.pod.name, "overlay-name"); - assert_eq!(manifest.provider.model, "project-model"); + assert_eq!(manifest.model.model_id, "project-model"); assert_eq!(manifest.scope.allow.len(), 2); } @@ -406,9 +406,9 @@ name = "overlay-name" [pod] name = "from-user" -[provider] -kind = "anthropic" -model = "user-model" +[model] +scheme = "anthropic" +model_id = "user-model" [[scope.allow]] target = "{pwd}" @@ -423,8 +423,8 @@ permission = "write" write( &project_manifest, r#" -[provider] -model = "project-model" +[model] +model_id = "project-model" "#, ); @@ -436,8 +436,8 @@ model = "project-model" .resolve() .unwrap(); - // project layer overrides user layer on provider.model - assert_eq!(manifest.provider.model, "project-model"); + // project layer overrides user layer on model.model_id + assert_eq!(manifest.model.model_id, "project-model"); // user layer provides the rest assert_eq!(manifest.pod.name, "from-user"); } @@ -454,9 +454,9 @@ model = "project-model" [pod] name = "walked-up" -[provider] -kind = "anthropic" -model = "claude-sonnet-4-20250514" +[model] +scheme = "anthropic" +model_id = "claude-sonnet-4-20250514" [[scope.allow]] target = "{root}" @@ -487,9 +487,9 @@ permission = "write" [pod] name = "standalone" -[provider] -kind = "anthropic" -model = "m" +[model] +scheme = "anthropic" +model_id = "m" [[scope.allow]] target = "{pwd}" @@ -529,9 +529,9 @@ permission = "write" [pod] name = "rel-user" -[provider] -kind = "anthropic" -model = "m" +[model] +scheme = "anthropic" +model_id = "m" [[scope.allow]] target = "./workspace" @@ -565,9 +565,9 @@ permission = "write" [pod] name = "rel-project" -[provider] -kind = "anthropic" -model = "m" +[model] +scheme = "anthropic" +model_id = "m" [[scope.allow]] target = "." @@ -604,9 +604,9 @@ permission = "write" [pod] name = "factory-pod" -[provider] -kind = "anthropic" -model = "m" +[model] +scheme = "anthropic" +model_id = "m" [[scope.allow]] target = "{root}" @@ -661,9 +661,9 @@ permission = "write" // pod.name missing — resolver must reject. let overlay = format!( r#" -[provider] -kind = "anthropic" -model = "m" +[model] +scheme = "anthropic" +model_id = "m" [[scope.allow]] target = "{pwd}" diff --git a/crates/pod/src/lib.rs b/crates/pod/src/lib.rs index 32370b93..06aeb5b7 100644 --- a/crates/pod/src/lib.rs +++ b/crates/pod/src/lib.rs @@ -31,7 +31,7 @@ pub use factory::{FactoryError, PodFactory}; pub use notifier::Notifier; pub use hook::{Hook, HookEventKind, HookRegistryBuilder}; pub use manifest::{ - PodManifest, PodManifestConfig, PodMetaConfig, ProviderConfig, ProviderKind, Scope, + AuthRef, ModelConfig, PodManifest, PodManifestConfig, PodMetaConfig, Scope, SchemeKind, }; pub use pod::{Pod, PodError, PodRunResult, apply_worker_manifest}; pub use prompt_loader::PromptLoader; diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index b3b4355a..f36f8cef 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -818,7 +818,7 @@ impl Pod { /// `[summary, ...recent_turns]` and creating a new session. /// /// The summary Worker uses: - /// - `compaction.provider` from the manifest if configured, or + /// - `compaction.model` from the manifest if configured, or /// - a clone of the main LlmClient via `clone_boxed()`. /// /// Returns the new session ID. @@ -1056,12 +1056,12 @@ impl Pod { /// Build the LlmClient for the compactor Worker. /// - /// Uses `compaction.provider` from manifest if set, otherwise clones + /// Uses `compaction.model` from manifest if set, otherwise clones /// the main client. fn build_compactor_client(&self) -> Result, PodError> { if let Some(ref compaction) = self.manifest.compaction { - if let Some(ref provider_config) = compaction.provider { - let client = provider::build_client(provider_config)?; + if let Some(ref model_config) = compaction.model { + let client = provider::build_client(model_config)?; return Ok(client); } } @@ -1109,7 +1109,7 @@ impl Pod, St> { scope.allow_rules(), )?; - let client = provider::build_client(&manifest.provider)?; + let client = provider::build_client(&manifest.model)?; let mut worker = Worker::new(client); apply_worker_manifest(&mut worker, &manifest.worker); @@ -1176,7 +1176,7 @@ impl Pod, St> { let scope_allocation = scope_lock::adopt_allocation(manifest.pod.name.clone(), std::process::id())?; - let client = provider::build_client(&manifest.provider)?; + let client = provider::build_client(&manifest.model)?; let mut worker = Worker::new(client); apply_worker_manifest(&mut worker, &manifest.worker); diff --git a/crates/pod/src/spawn_pod.rs b/crates/pod/src/spawn_pod.rs index 95492297..801125f7 100644 --- a/crates/pod/src/spawn_pod.rs +++ b/crates/pod/src/spawn_pod.rs @@ -14,8 +14,8 @@ use std::time::Duration; use async_trait::async_trait; use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput}; use manifest::{ - Permission, PodManifestConfig, PodMetaConfig, ProviderConfig, ProviderConfigPartial, - ScopeConfig, ScopeRule, WorkerManifestConfig, + ModelConfig, ModelConfigPartial, Permission, PodManifestConfig, PodMetaConfig, ScopeConfig, + ScopeRule, WorkerManifestConfig, }; use protocol::Method; use protocol::stream::JsonLineWriter; @@ -118,7 +118,7 @@ pub struct SpawnPodTool { /// Pod's overlay TOML so the child does not need its own provider /// configuration in the manifest cascade. Per-spawn override is /// out of scope here (see `tickets/spawn-inherit-provider.md`). - spawner_provider: ProviderConfig, + spawner_model: ModelConfig, } impl SpawnPodTool { @@ -129,7 +129,7 @@ impl SpawnPodTool { spawner_pwd: PathBuf, registry: Arc, parent_socket: Option, - spawner_provider: ProviderConfig, + spawner_model: ModelConfig, ) -> Self { Self { spawner_name, @@ -138,7 +138,7 @@ impl SpawnPodTool { spawner_pwd, registry, parent_socket, - spawner_provider, + spawner_model, } } } @@ -196,7 +196,7 @@ impl Tool for SpawnPodTool { &input.name, &instruction, &scope_allow, - &self.spawner_provider, + &self.spawner_model, ) { Ok(s) => s, Err(e) => { @@ -350,17 +350,17 @@ fn build_overlay_toml( name: &str, instruction: &str, scope_allow: &[ScopeRule], - provider: &ProviderConfig, + model: &ModelConfig, ) -> Result { let overlay = PodManifestConfig { pod: PodMetaConfig { name: Some(name.to_string()), }, - provider: ProviderConfigPartial { - kind: Some(provider.kind), - model: Some(provider.model.clone()), - api_key_file: provider.api_key_file.clone(), - base_url: provider.base_url.clone(), + model: ModelConfigPartial { + scheme: Some(model.scheme), + base_url: model.base_url.clone(), + model_id: Some(model.model_id.clone()), + auth: Some(model.auth.clone()), }, worker: WorkerManifestConfig { instruction: Some(instruction.to_string()), @@ -458,7 +458,7 @@ pub fn spawn_pod_tool( spawner_pwd: PathBuf, registry: Arc, parent_socket: Option, - spawner_provider: ProviderConfig, + spawner_model: ModelConfig, ) -> ToolDefinition { Arc::new(move || { let schema = schemars::schema_for!(SpawnPodInput); @@ -473,7 +473,7 @@ pub fn spawn_pod_tool( spawner_pwd.clone(), registry.clone(), parent_socket.clone(), - spawner_provider.clone(), + spawner_model.clone(), )); (meta, tool) }) @@ -482,29 +482,30 @@ pub fn spawn_pod_tool( #[cfg(test)] mod tests { use super::*; - use manifest::ProviderKind; + use manifest::{AuthRef, SchemeKind}; #[test] - fn overlay_inherits_spawner_provider() { - let provider = ProviderConfig { - kind: ProviderKind::Anthropic, - model: "claude-sonnet-4".into(), - api_key_file: Some(PathBuf::from("/etc/keys/anthropic")), + fn overlay_inherits_spawner_model() { + let model = ModelConfig { + scheme: SchemeKind::Anthropic, base_url: Some("https://example.test".into()), + model_id: "claude-sonnet-4".into(), + auth: AuthRef::ApiKey { + env: None, + file: Some(PathBuf::from("/etc/keys/anthropic")), + }, }; - let toml_str = build_overlay_toml("child", "$insomnia/default", &[], &provider).unwrap(); + let toml_str = build_overlay_toml("child", "$insomnia/default", &[], &model).unwrap(); let parsed = PodManifestConfig::from_toml(&toml_str).unwrap(); - assert_eq!(parsed.provider.kind, Some(ProviderKind::Anthropic)); - assert_eq!(parsed.provider.model.as_deref(), Some("claude-sonnet-4")); - assert_eq!( - parsed.provider.api_key_file.as_deref(), - Some(Path::new("/etc/keys/anthropic")) - ); - assert_eq!( - parsed.provider.base_url.as_deref(), - Some("https://example.test") - ); + assert_eq!(parsed.model.scheme, Some(SchemeKind::Anthropic)); + assert_eq!(parsed.model.model_id.as_deref(), Some("claude-sonnet-4")); + assert_eq!(parsed.model.base_url.as_deref(), Some("https://example.test")); + let file = match parsed.model.auth { + Some(AuthRef::ApiKey { file, .. }) => file, + _ => panic!("expected ApiKey"), + }; + assert_eq!(file.as_deref(), Some(Path::new("/etc/keys/anthropic"))); } } diff --git a/crates/pod/tests/controller_test.rs b/crates/pod/tests/controller_test.rs index f694cb45..e3f0f030 100644 --- a/crates/pod/tests/controller_test.rs +++ b/crates/pod/tests/controller_test.rs @@ -110,9 +110,9 @@ const MANIFEST_TOML: &str = r#" name = "test-pod" pwd = "./" -[provider] -kind = "anthropic" -model = "test-model" +[model] +scheme = "anthropic" +model_id = "test-model" [worker] max_tokens = 100 diff --git a/crates/pod/tests/spawn_pod_test.rs b/crates/pod/tests/spawn_pod_test.rs index cad5d102..94859c91 100644 --- a/crates/pod/tests/spawn_pod_test.rs +++ b/crates/pod/tests/spawn_pod_test.rs @@ -11,7 +11,7 @@ use std::path::{Path, PathBuf}; use std::sync::{LazyLock, Mutex}; use llm_worker::tool::{ToolError, ToolOutput}; -use manifest::{Permission, ProviderConfig, ProviderKind, ScopeRule}; +use manifest::{AuthRef, ModelConfig, Permission, SchemeKind, ScopeRule}; use pod::runtime_dir::{RuntimeDir, SpawnedPodRecord}; use pod::scope_lock::{self, LockFileGuard}; use pod::spawn_pod::spawn_pod_tool; @@ -132,15 +132,15 @@ fn which_true() -> String { "/bin/true".into() } -/// Tests don't exercise the provider — they intercept the spawned +/// Tests don't exercise the model — they intercept the spawned /// child via a mock socket — but `spawn_pod_tool` needs a value to -/// embed in the overlay TOML. Any well-formed `ProviderConfig` works. -fn dummy_provider() -> ProviderConfig { - ProviderConfig { - kind: ProviderKind::Anthropic, - model: "claude-test".into(), - api_key_file: None, +/// embed in the overlay TOML. Any well-formed `ModelConfig` works. +fn dummy_model() -> ModelConfig { + ModelConfig { + scheme: SchemeKind::Anthropic, base_url: None, + model_id: "claude-test".into(), + auth: AuthRef::None, } } @@ -171,7 +171,7 @@ async fn spawn_pod_delegates_scope_and_sends_run() { allow_root.path().to_path_buf(), registry, None, - dummy_provider(), + dummy_model(), ); let (_meta, tool) = def(); @@ -234,7 +234,7 @@ async fn spawn_pod_rejects_scope_outside_spawner() { allow_root.path().to_path_buf(), registry, None, - dummy_provider(), + dummy_model(), ); let (_meta, tool) = def(); @@ -293,7 +293,7 @@ async fn spawn_pod_rolls_back_reservation_when_socket_never_appears() { allow_root.path().to_path_buf(), registry, None, - dummy_provider(), + dummy_model(), ); let (_meta, tool) = def(); diff --git a/crates/pod/tests/system_prompt_template_test.rs b/crates/pod/tests/system_prompt_template_test.rs index f7d7f3ac..2a214027 100644 --- a/crates/pod/tests/system_prompt_template_test.rs +++ b/crates/pod/tests/system_prompt_template_test.rs @@ -79,9 +79,9 @@ const MINIMAL_MANIFEST_TOML: &str = r#" name = "test-pod" pwd = "./" -[provider] -kind = "anthropic" -model = "test-model" +[model] +scheme = "anthropic" +model_id = "test-model" [worker] max_tokens = 100 diff --git a/crates/provider/README.md b/crates/provider/README.md index 62576473..ce6eda33 100644 --- a/crates/provider/README.md +++ b/crates/provider/README.md @@ -1,8 +1,15 @@ # provider -マニフェストの設定から適切な LLM クライアントを構築するファクトリクレート。APIキーの環境変数解決を含む。 +マニフェストの `ModelConfig` から適切な `LlmClient`(`HttpTransport`)を構築するファクトリクレート。APIキーの環境変数 / ファイル解決と scheme ↔ auth の整合検証を担う。 ## 公開型 -- `build_client(config: &ProviderConfig) -> Result, ProviderError>` — プロバイダ設定に応じたクライアント生成(Anthropic, OpenAI, Gemini, Ollama) +- `build_client(config: &ModelConfig) -> Result, ProviderError>` — `SchemeKind` と `AuthRef` から `HttpTransport` を構築 - `ProviderError` — クライアント構築エラー + +## 責務 + +- `AuthRef::ApiKey` を `ResolvedAuth::ApiKey` に解決(env → file の優先順位) +- `AuthRef::None` を `ResolvedAuth::None` に変換 +- `Scheme::required_auth()` と `ResolvedAuth` の妥当性検証(非対応組合せは構築エラー) +- 既知モデルは scheme の静的テーブル、未知モデルは scheme 既定の `ModelCapability` を採用 diff --git a/crates/provider/src/lib.rs b/crates/provider/src/lib.rs index ffc10915..40cbee58 100644 --- a/crates/provider/src/lib.rs +++ b/crates/provider/src/lib.rs @@ -1,99 +1,148 @@ -use llm_worker::llm_client::client::LlmClient; -use llm_worker::llm_client::providers::anthropic::AnthropicClient; -use llm_worker::llm_client::providers::gemini::GeminiClient; -use llm_worker::llm_client::providers::ollama::OllamaClient; -use llm_worker::llm_client::providers::openai::OpenAIClient; +//! Pod マニフェストの [`ModelConfig`] を [`Box`] +//! に落とすファクトリ。 +//! +//! * `SchemeKind` を各 `Scheme` 実装にマップ +//! * `AuthRef` を環境変数 / ファイルから解決して [`ResolvedAuth`] に +//! * `scheme.required_auth()` と解決値を照合(非対応組合せは構築エラー) +//! * `ModelCapability` は明示指定 → scheme 静的テーブル → 未知時はデフォルト +//! +//! llm-worker は低レベル基盤に留める方針なので、高レベル側で必要に +//! なる認証ストア解決(Codex OAuth の `~/.codex/auth.json` 読取等)は +//! このクレートに追加する。 -use manifest::{ProviderConfig, ProviderKind}; +use llm_worker::llm_client::{ + LlmClient, + capability::{CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport}, + scheme::{ + Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme, + }, + transport::{HttpTransport, ResolvedAuth}, +}; -/// Errors from provider client construction. +use manifest::{AuthRef, ModelConfig, SchemeKind}; + +/// プロバイダ構築時のエラー。 #[derive(Debug, thiserror::Error)] pub enum ProviderError { - #[error("provider configuration error: {0}")] + #[error("model configuration error: {0}")] Config(String), - #[error("API key not provided for {provider}")] - ApiKeyMissing { provider: String }, + #[error("API key not provided for scheme {scheme:?}")] + ApiKeyMissing { scheme: SchemeKind }, + + #[error("scheme {scheme:?} does not support this auth")] + AuthMismatch { scheme: SchemeKind }, + + #[error("scheme {scheme:?} is not implemented yet")] + SchemeNotImplemented { scheme: SchemeKind }, } -/// Resolve the API key for the given provider configuration. +/// `AuthRef` をランタイムで使える [`ResolvedAuth`] に解決する。 /// -/// Resolution order: -/// 1. Environment variable `INSOMNIA_API_KEY_{KIND}` -/// 2. File specified by `api_key_file` (must be an absolute path; the -/// cascade layer is responsible for normalisation) -/// 3. `None` -fn resolve_api_key(config: &ProviderConfig) -> Result, ProviderError> { - let env_name = config.kind.env_var_name(); - if let Ok(val) = std::env::var(&env_name) { - return Ok(Some(val)); - } - - if let Some(ref path) = config.api_key_file { - if !path.is_absolute() { - return Err(ProviderError::Config(format!( - "api_key_file must be absolute: {}", - path.display() - ))); +/// 解決順: +/// 1. `AuthRef::ApiKey { env, .. }` で env が指定されていればその変数を参照 +/// 2. そうでなければ scheme 既定の環境変数 (`SchemeKind::default_env_var`) +/// 3. それでも無ければ `file` を読む(絶対パスのみ) +fn resolve_auth( + scheme: SchemeKind, + auth: &AuthRef, +) -> Result { + match auth { + AuthRef::None => Ok(ResolvedAuth::None), + AuthRef::ApiKey { env, file } => { + let env_name = env.as_deref().unwrap_or(scheme.default_env_var()); + if let Ok(val) = std::env::var(env_name) + && !val.is_empty() + { + return Ok(ResolvedAuth::ApiKey(val)); + } + if let Some(path) = file { + if !path.is_absolute() { + return Err(ProviderError::Config(format!( + "auth.file must be absolute: {}", + path.display() + ))); + } + let contents = std::fs::read_to_string(path).map_err(|e| { + ProviderError::Config(format!( + "failed to read auth.file {}: {e}", + path.display() + )) + })?; + return Ok(ResolvedAuth::ApiKey(contents.trim().to_owned())); + } + Err(ProviderError::ApiKeyMissing { scheme }) } - let contents = std::fs::read_to_string(path).map_err(|e| { - ProviderError::Config(format!( - "failed to read api_key_file {}: {e}", - path.display() - )) - })?; - return Ok(Some(contents.trim().to_owned())); + AuthRef::CodexOAuth => Err(ProviderError::Config( + "codex_oauth auth not yet implemented (tickets/llm-auth-codex-oauth)".into(), + )), } - - Ok(None) } -/// Build an [`LlmClient`] from a [`ProviderConfig`]. -/// -/// `api_key_file` (if set) must already be an absolute path — relative -/// paths are rejected because cascade resolution is the sole source of -/// path normalisation. -pub fn build_client(config: &ProviderConfig) -> Result, ProviderError> { - let api_key = resolve_api_key(config)?; +/// `SchemeKind` ごとに固定のデフォルト capability(未知モデル用)。 +fn default_capability(scheme: SchemeKind) -> ModelCapability { + match scheme { + SchemeKind::Anthropic => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + // Ollama の /v1/messages 流用時に cache_control を拒否されないよう Auto + prompt_caching: CacheStrategy::Auto, + }, + SchemeKind::OpenaiChat | SchemeKind::OpenaiResponses => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }, + SchemeKind::Gemini => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: true, + prompt_caching: CacheStrategy::Auto, + }, + } +} - match config.kind { - ProviderKind::Anthropic => { - let key = api_key.ok_or_else(|| ProviderError::ApiKeyMissing { - provider: "anthropic".into(), - })?; - let mut client = AnthropicClient::new(key, &config.model); - if let Some(ref url) = config.base_url { - client = client.with_base_url(url); - } - Ok(Box::new(client)) - } - ProviderKind::Openai => { - let key = api_key.ok_or_else(|| ProviderError::ApiKeyMissing { - provider: "openai".into(), - })?; - let mut client = OpenAIClient::new(key, &config.model); - if let Some(ref url) = config.base_url { - client = client.with_base_url(url); - } - Ok(Box::new(client)) - } - ProviderKind::Gemini => { - let key = api_key.ok_or_else(|| ProviderError::ApiKeyMissing { - provider: "gemini".into(), - })?; - let mut client = GeminiClient::new(key, &config.model); - if let Some(ref url) = config.base_url { - client = client.with_base_url(url); - } - Ok(Box::new(client)) - } - ProviderKind::Ollama => { - let mut client = OllamaClient::new(&config.model); - if let Some(ref url) = config.base_url { - client = client.with_base_url(url); - } - Ok(Box::new(client)) - } +fn build_transport( + scheme: S, + config: &ModelConfig, + resolved: ResolvedAuth, +) -> Result, ProviderError> { + if !resolved.matches(scheme.required_auth()) { + return Err(ProviderError::AuthMismatch { + scheme: config.scheme, + }); + } + let capability = scheme + .capability_for(&config.model_id) + .unwrap_or_else(|| default_capability(config.scheme)); + let base_url = config + .base_url + .clone() + .unwrap_or_else(|| scheme.default_base_url().to_string()); + Ok(Box::new(HttpTransport::new( + scheme, + config.model_id.clone(), + base_url, + resolved, + capability, + ))) +} + +/// [`ModelConfig`] から [`LlmClient`] を構築する。 +pub fn build_client(config: &ModelConfig) -> Result, ProviderError> { + let resolved = resolve_auth(config.scheme, &config.auth)?; + match config.scheme { + SchemeKind::Anthropic => build_transport(AnthropicScheme::new(), config, resolved), + SchemeKind::OpenaiChat => build_transport(OpenAIScheme::new(), config, resolved), + SchemeKind::Gemini => build_transport(GeminiScheme::new(), config, resolved), + SchemeKind::OpenaiResponses => Err(ProviderError::SchemeNotImplemented { + scheme: config.scheme, + }), } } @@ -104,23 +153,29 @@ mod tests { use std::io::Write; use std::path::PathBuf; - fn anthropic_config() -> ProviderConfig { - ProviderConfig { - kind: ProviderKind::Anthropic, - model: "test-model".into(), - api_key_file: None, + fn anthropic_config() -> ModelConfig { + ModelConfig { + scheme: SchemeKind::Anthropic, base_url: None, + model_id: "claude-sonnet-4-20250514".into(), + auth: AuthRef::ApiKey { + env: None, + file: None, + }, } } #[test] #[serial] fn resolve_from_env() { - let env_name = ProviderKind::Anthropic.env_var_name(); - unsafe { std::env::set_var(&env_name, "sk-from-env") }; - let key = resolve_api_key(&anthropic_config()).unwrap(); - unsafe { std::env::remove_var(&env_name) }; - assert_eq!(key.as_deref(), Some("sk-from-env")); + let env_name = SchemeKind::Anthropic.default_env_var(); + unsafe { std::env::set_var(env_name, "sk-from-env") }; + let auth = resolve_auth(SchemeKind::Anthropic, &anthropic_config().auth).unwrap(); + unsafe { std::env::remove_var(env_name) }; + match auth { + ResolvedAuth::ApiKey(k) => assert_eq!(k, "sk-from-env"), + _ => panic!("expected ApiKey"), + } } #[test] @@ -129,14 +184,20 @@ mod tests { let key_path = dir.path().join("key.txt"); { let mut f = std::fs::File::create(&key_path).unwrap(); - write!(f, " sk-from-file\n").unwrap(); + writeln!(f, " sk-from-file").unwrap(); } - let config = ProviderConfig { - api_key_file: Some(key_path), + let config = ModelConfig { + auth: AuthRef::ApiKey { + env: Some("INSOMNIA_API_KEY_NONEXISTENT".into()), + file: Some(key_path), + }, ..anthropic_config() }; - let key = resolve_api_key(&config).unwrap(); - assert_eq!(key.as_deref(), Some("sk-from-file")); + let auth = resolve_auth(config.scheme, &config.auth).unwrap(); + match auth { + ResolvedAuth::ApiKey(k) => assert_eq!(k, "sk-from-file"), + _ => panic!("expected ApiKey"), + } } #[test] @@ -146,43 +207,57 @@ mod tests { let key_path = dir.path().join("key.txt"); std::fs::write(&key_path, "sk-from-file").unwrap(); - let env_name = ProviderKind::Anthropic.env_var_name(); - unsafe { std::env::set_var(&env_name, "sk-from-env") }; + let env_name = SchemeKind::Anthropic.default_env_var(); + unsafe { std::env::set_var(env_name, "sk-from-env") }; - let config = ProviderConfig { - api_key_file: Some(key_path), + let config = ModelConfig { + auth: AuthRef::ApiKey { + env: None, + file: Some(key_path), + }, ..anthropic_config() }; - let key = resolve_api_key(&config).unwrap(); - unsafe { std::env::remove_var(&env_name) }; - assert_eq!(key.as_deref(), Some("sk-from-env")); + let auth = resolve_auth(config.scheme, &config.auth).unwrap(); + unsafe { std::env::remove_var(env_name) }; + match auth { + ResolvedAuth::ApiKey(k) => assert_eq!(k, "sk-from-env"), + _ => panic!("expected ApiKey"), + } } #[test] - fn relative_api_key_file_is_rejected() { - let config = ProviderConfig { - api_key_file: Some(PathBuf::from("keys/anthropic")), + fn relative_auth_file_is_rejected() { + let config = ModelConfig { + auth: AuthRef::ApiKey { + env: Some("INSOMNIA_API_KEY_NONEXISTENT".into()), + file: Some(PathBuf::from("keys/anthropic")), + }, ..anthropic_config() }; - let err = resolve_api_key(&config).unwrap_err(); + let err = resolve_auth(config.scheme, &config.auth).unwrap_err(); assert!(matches!(err, ProviderError::Config(_))); } #[test] + #[serial] fn missing_key_returns_api_key_missing() { - let config = anthropic_config(); - let result = build_client(&config); + let env_name = SchemeKind::Anthropic.default_env_var(); + unsafe { std::env::remove_var(env_name) }; + let result = build_client(&anthropic_config()); assert!(matches!(result, Err(ProviderError::ApiKeyMissing { .. }))); } #[test] fn ollama_succeeds_without_key() { - let config = ProviderConfig { - kind: ProviderKind::Ollama, - model: "llama3".into(), - api_key_file: None, - base_url: None, + // Ollama = Anthropic scheme + base_url 差し替え + AuthRef::None + let config = ModelConfig { + scheme: SchemeKind::Anthropic, + base_url: Some("http://localhost:11434".into()), + model_id: "llama3".into(), + auth: AuthRef::None, }; + // scheme.required_auth() が XApiKey でも ResolvedAuth::None は許容する + // (None は全 scheme で受け入れるため) assert!(build_client(&config).is_ok()); } } diff --git a/tickets/llm-model-config.md b/tickets/llm-model-config.md index 14c5fa41..8d8dbdac 100644 --- a/tickets/llm-model-config.md +++ b/tickets/llm-model-config.md @@ -1,5 +1,9 @@ # LLM モデル設定の再編 +> **レビュー中** — 詳細は [`llm-model-config.review.md`](llm-model-config.review.md) +> 主な指摘: 要件 6 の `ModelConfig.capability` override 未実装、`validate_config` の機能退化(OpenAI top_k warning 消失)。どちらも判断待ち。 + + ## 背景 決定済みの LLM プロバイダサポート方針(`docs/plan/llm_providers.md`)に従って llm-worker のプロバイダ層を再編する。Pod 側で「使う LLM モデル」を宣言する構造にし、共通の通信層 + scheme の組合せで任意のプロバイダを収容できるようにする。 diff --git a/tickets/llm-model-config.review.md b/tickets/llm-model-config.review.md new file mode 100644 index 00000000..983dde31 --- /dev/null +++ b/tickets/llm-model-config.review.md @@ -0,0 +1,105 @@ +# LLM モデル設定の再編 — レビュー + +## 前提・要件の再確認 + +`docs/plan/llm_providers.md` の第一級/二次/非サポート方針と、チケット本体の 9 要件 + 6 設計決定 を前提に、実装が意図と整合しているかを確認した。変更量は +1608 / -1174 行、41 ファイル。 + +`cargo check` および `cargo test --workspace --lib` は通過。 + +## 要件達成度 + +| # | 要件 | 状況 | メモ | +|---|---|---|---| +| 1 | Pod マニフェスト `[model]` 宣言 | ✓ | `manifest/src/model.rs` + `config.rs::ModelConfigPartial` | +| 2 | `providers/` 層廃止 | ✓ | 4 ファイル削除、`HttpTransport` 1 本に集約 | +| 3 | 既存 scheme 再編(openai → openai_chat、Ollama は流用) | ✓ | 各 scheme に `scheme_impl.rs` + `capability.rs` 追加 | +| 4 | `AuthRef` 分離 | ✓ | `manifest/model.rs` + `llm_client/auth.rs::AuthRequirement` | +| 5 | 第一級/二次 方針整合 | ✓ | `provider/lib.rs::build_client` + `SchemeKind` | +| 6 | `ModelCapability` 分離 | △ | 型定義・静的テーブル・default はあり。**`ModelConfig` からのマニフェスト override は未実装** | +| 7 | Streaming 現状維持 | ✓ | Event 型変更なし、`Scheme::State` で Anthropic block_type 補完 | +| 8 | Ollama 運用注意点 | ✓ | `default_capability` で Anthropic scheme が `CacheStrategy::Auto`、`ollama_succeeds_without_key` テストあり | +| 9 | 完了時動作 | ✓ | ビルド通過、既存テスト通過 | + +## 設計決定の反映 + +| # | 決定 | 反映 | +|---|---|---| +| 1 | `Scheme` trait 方針A(全面抽象化) | ✓ `scheme/mod.rs::Scheme` に URL/認証/ヘッダ/body/SSE を集約。`State` associated type で Anthropic の `block_type` 補完を綺麗に処理 | +| 2 | `AuthRef` 組合せ検証 方針B(構築時) | ✓ `ResolvedAuth::matches` + `build_transport` で照合 | +| 3 | `crates/provider` 方針A(残す) | ✓ 薄いファクトリとして維持 | +| 4 | `ModelCapability` ハイブリッド | △ scheme 側静的テーブルはあるが、マニフェスト override 側が欠落 | +| 5 | フィールド単位 override | ✓ `ModelConfigPartial::merge` | +| 6 | TOML 後方互換切り | ✓ 旧 `[provider]` は完全に新 `[model]` に置換 | + +## 指摘事項 + +### 優先度: 中 + +#### 1. 要件 6 の「ModelConfig で明示宣言すれば override」が未実装 + +`ModelConfig` 構造体に `capability: Option` フィールドがない。設計決定 4(ハイブリッド: scheme 側テーブル → マニフェスト override → デフォルト)の **override 側が欠落**している。 + +影響: +- scheme 静的テーブルに無いモデル(OpenRouter / xAI の Grok / Groq の Kimi / OpenAI 互換ルーター系)は必ず `default_capability(scheme)` に落ちる +- 二次サポートの共通枠の実用性に直結(例: Grok の `ReasoningSupport::Effort` が効かず reasoning 送れない) + +対応案: +- A. 今チケットで `ModelConfig.capability: Option` を追加し `build_transport` で優先順位 `config.capability → scheme.capability_for → default_capability` に +- B. Scope 外として明示し別チケットに切り出す + +ユーザーの決定方針(二次サポートを共通枠でカバー)からすると A が自然。ticket 本体の Scope 外にも記載がないため、A を推奨。 + +#### 2. `validate_config` の機能退化 + +旧 `providers/openai.rs` の `OpenAIClient` は `LlmClient::validate_config` をオーバーライドし「OpenAI は `top_k` 非対応」の warning を出していた。削除された `tests/validation_test.rs` はこの warning をテストしていた。 + +今の `HttpTransport` は `validate_config` をオーバーライドしておらず、`LlmClient` trait のデフォルト実装(空 `Vec`)が使われる。つまり **`Worker::validate()` は scheme による制約(top_k, logprobs 等)を検出できなくなっている**。 + +対応案: +- `Scheme` trait に `validate_config(&RequestConfig) -> Vec` を追加し、`HttpTransport` 側で scheme に委譲 +- 最低限 OpenAI Chat scheme で旧と同じ top_k warning を再実装 +- 合わせて新形式での regression test を追加(旧 `validation_test.rs` の代替) + +### 優先度: 低 + +#### 3. `default_capability` の配置 + +`crates/provider/lib.rs::default_capability` が `SchemeKind` ごとに直書き(同じ情報が `Scheme` trait 実装側と分離して存在)。`Scheme::default_capability()` メソッドに移動する方が関心事が集約される。今の形でも動作するが、新 scheme 追加時に 2 箇所編集が必要な点が弱い。 + +#### 4. `AuthRequirement` 判定の緩さ + +`ResolvedAuth::matches` は `(None, _) => true` で常にパス。これは Ollama Anthropic 流用(`AuthRef::None` で `XApiKey` 要求)のための意図的設計だが、本来認証必須の scheme(Anthropic 本家)に誤って `AuthRef::None` を渡しても構築成功し、実行時の 401 で初めて失敗する。 + +より厳密にするなら `AuthRequirement::XApiKeyOptional` のようなバリアント導入で分離できるが、実害は小さいので現状維持も許容範囲。 + +#### 5. rustdoc のクロス crate リンク + +`scheme/mod.rs:47` の `[AuthRef](../../../manifest/enum.AuthRef.html)` は相対リンクで、`cargo doc --workspace` 時に切れる可能性。`[`manifest::AuthRef`]` 形式のクロス crate リンクにしておくと rustdoc が解決できる。 + +## アーキテクチャ評価 + +### 良い点 +- `Scheme::State` associated type の導入で「Anthropic の `content_block_stop` に `block_type` が載らない」といった具体的な痛みを抽象内で解決 +- `ResolvedAuth::matches` による構築時検証が `build_transport` 1 箇所に集約、分岐が明瞭 +- `ModelConfigPartial::merge` のフィールド単位 override が既存の cascade layer と自然に噛み合う +- Ollama 運用の制約(`cache_control`/`tool_choice`/`metadata` 不可)が capability + scheme 側送出制御で分散、`provider::tests::ollama_succeeds_without_key` で境界条件がテストされている +- `spawn_pod.rs::overlay_inherits_spawner_model` テストで親 Pod の `ModelConfig` が子にシームレスに伝播することを確認 + +### コードベースを歪めていないか +- 旧 `providers/` の 4 ファイルは綺麗に削除、重複は残っていない +- `SchemeKind::OpenaiResponses` はマニフェスト側に先行存在するが、`build_client` で `SchemeNotImplemented` エラーを明示的に返す(別チケットで肉付け前提)。これは依存チケット設計通り +- `AuthRef::CodexOAuth` も同様に予約のみ、`resolve_auth` でエラーを返す + +### 不必要な実装 +- `AuthRequirement::Custom` バリアントは Codex OAuth 用の先行予約で、今チケットでは使われない。将来の拡張点として小さい負債、許容範囲 + +## 総合判定 + +**コア要件は達成されている。構造再編は綺麗で、Scope の切り方も妥当**。実装ミス的な重大欠陥はなく、既存テストも全て通過している。 + +ただし以下 2 点の判断が必要: + +1. **要件 6 の `ModelConfig.capability` override**: 今チケットで追加するか、Scope 外として明示するか +2. **`validate_config` の退化**: 復活させるか、ticket 本体に「OpenAI の top_k warning 等の validate 機能は scheme 再編で意図的に落とした」旨を明記するか + +どちらも「今 close して後で別チケット」でも進められるが、最低限ticket 本体への記載(Scope 外明示 or 後続タスク言及)が必要。