From 97326eef04b07960bc402be695cf4c3acd6ac397 Mon Sep 17 00:00:00 2001 From: Hare Date: Mon, 20 Apr 2026 02:59:16 +0900 Subject: [PATCH] =?UTF-8?q?openai-responses=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../llm-worker/src/llm_client/scheme/mod.rs | 1 + .../scheme/openai_chat/capability.rs | 56 +- .../src/llm_client/scheme/openai_chat/mod.rs | 2 +- .../scheme/openai_responses/capability.rs | 46 ++ .../scheme/openai_responses/events.rs | 782 ++++++++++++++++++ .../llm_client/scheme/openai_responses/mod.rs | 59 ++ .../scheme/openai_responses/request.rs | 461 +++++++++++ .../scheme/openai_responses/scheme_impl.rs | 59 ++ crates/llm-worker/src/llm_client/types.rs | 31 +- crates/provider/src/lib.rs | 7 +- tickets/llm-scheme-openai-responses.md | 78 +- tickets/llm-scheme-openai-responses.review.md | 77 ++ 12 files changed, 1613 insertions(+), 46 deletions(-) create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_responses/capability.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs create mode 100644 crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs create mode 100644 tickets/llm-scheme-openai-responses.review.md diff --git a/crates/llm-worker/src/llm_client/scheme/mod.rs b/crates/llm-worker/src/llm_client/scheme/mod.rs index c6d53047..cc794b3f 100644 --- a/crates/llm-worker/src/llm_client/scheme/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/mod.rs @@ -10,6 +10,7 @@ pub mod anthropic; pub mod gemini; pub mod openai_chat; +pub mod openai_responses; use serde_json::Value; diff --git a/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs index f664a50c..5fa5b94e 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs @@ -3,47 +3,71 @@ //! OpenAI 本家の主要モデルのみ網羅する。OpenRouter / xAI / Groq 等は //! モデル ID が各社独自なので、マニフェスト側で明示 override する //! 前提。 +//! +//! [`classify`] はモデル ID から family を判定する一次情報で、 +//! `scheme/openai_responses` からも参照される。 use crate::llm_client::capability::{ CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport, }; -pub(crate) fn lookup(model_id: &str) -> Option { - // GPT-5 / o1 / o3 / o4 reasoning 系 +/// OpenAI 本家のモデル family 分類。 +/// +/// `openai_chat` と `openai_responses` で共有する一次情報。各 scheme は +/// この分類に自 scheme 固有の `ReasoningSupport` 等を当てはめて +/// `ModelCapability` を組み立てる。 +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum OpenAiFamily { + /// GPT-5 / o1 / o3 / o4 系 — reasoning 対応 + Reasoning, + /// GPT-4o / GPT-4 系 + Gpt4, + /// GPT-3.5 系(旧式) + Gpt35, +} + +/// モデル ID の prefix から family を判定する。未知は `None`。 +pub(crate) fn classify(model_id: &str) -> Option { if model_id.starts_with("gpt-5") || model_id.starts_with("o1") || model_id.starts_with("o3") || model_id.starts_with("o4") { - return Some(ModelCapability { + return Some(OpenAiFamily::Reasoning); + } + if model_id.starts_with("gpt-4") { + return Some(OpenAiFamily::Gpt4); + } + if model_id.starts_with("gpt-3.5") { + return Some(OpenAiFamily::Gpt35); + } + None +} + +pub(crate) fn lookup(model_id: &str) -> Option { + classify(model_id).map(|family| match family { + OpenAiFamily::Reasoning => ModelCapability { tool_calling: ToolCallingSupport::Parallel, structured_output: StructuredOutput::JsonSchema, reasoning: Some(ReasoningSupport::Effort), vision: true, prompt_caching: CacheStrategy::Auto, - }); - } - // GPT-4o / GPT-4 系 - if model_id.starts_with("gpt-4") { - return Some(ModelCapability { + }, + OpenAiFamily::Gpt4 => ModelCapability { tool_calling: ToolCallingSupport::Parallel, structured_output: StructuredOutput::JsonSchema, reasoning: None, vision: true, prompt_caching: CacheStrategy::Auto, - }); - } - // GPT-3.5 系(旧式・structured output 限定) - if model_id.starts_with("gpt-3.5") { - return Some(ModelCapability { + }, + OpenAiFamily::Gpt35 => ModelCapability { tool_calling: ToolCallingSupport::Parallel, structured_output: StructuredOutput::JsonObject, reasoning: None, vision: false, prompt_caching: CacheStrategy::Auto, - }); - } - None + }, + }) } /// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等) diff --git a/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs b/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs index 60ca62f7..f0e1812d 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_chat/mod.rs @@ -3,7 +3,7 @@ //! - リクエストJSON生成 //! - SSEイベントパース → Event変換 -mod capability; +pub(crate) mod capability; mod events; mod request; mod scheme_impl; diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/capability.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/capability.rs new file mode 100644 index 00000000..3092d491 --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/capability.rs @@ -0,0 +1,46 @@ +//! `model_id → ModelCapability` 静的テーブル(OpenAI Responses API)。 +//! +//! モデル family 判定は `scheme/openai_chat/capability.rs::classify` を +//! 共有する。Responses 側は `ReasoningSupport::Effort` 固定で、prompt +//! caching はサーバ側自動(`CacheStrategy::Auto`)。 + +use crate::llm_client::capability::{ + CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport, +}; +use crate::llm_client::scheme::openai_chat::capability::{OpenAiFamily, classify}; + +pub(crate) fn lookup(model_id: &str) -> Option { + classify(model_id).map(|family| match family { + OpenAiFamily::Reasoning => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: Some(ReasoningSupport::Effort), + vision: true, + prompt_caching: CacheStrategy::Auto, + }, + OpenAiFamily::Gpt4 => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: true, + prompt_caching: CacheStrategy::Auto, + }, + OpenAiFamily::Gpt35 => ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonObject, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + }, + }) +} + +pub(crate) fn default_capability() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: None, + vision: false, + prompt_caching: CacheStrategy::Auto, + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs new file mode 100644 index 00000000..78bab0eb --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs @@ -0,0 +1,782 @@ +//! OpenAI Responses API の SSE イベントパース +//! +//! `response.*` 名前空間の SSE を共通の [`Event`](crate::llm_client::event::Event) +//! に変換する。Responses の (output_index, content_index) 2 次元座標と +//! insomnia 側 1 次元 `BlockStart/Delta/Stop::index` のマッピングは +//! [`OpenAIResponsesState`] が保持する。 + +use std::collections::HashMap; + +use serde::Deserialize; + +use crate::llm_client::{ + ClientError, + event::{ + BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent, + Event, ResponseStatus, StatusEvent, UsageEvent, + }, +}; + +/// SSE パース中の座標 → flat block index マップ。 +#[derive(Debug, Default)] +pub struct OpenAIResponsesState { + slots: HashMap, + next_index: usize, +} + +impl OpenAIResponsesState { + fn allocate(&mut self, key: SlotKey, block_type: BlockType) -> SlotInfo { + let info = SlotInfo { + flat_index: self.next_index, + block_type, + }; + self.next_index += 1; + self.slots.insert(key, info); + info + } + + /// 既存 slot を取得。無ければ `block_type` で暗黙に確保し、 + /// 新規確保したかを併せて返す。delta 先行 / content_part.added が + /// 抜けたときの防御。 + fn get_or_allocate( + &mut self, + key: SlotKey, + block_type: BlockType, + ) -> (SlotInfo, bool) { + if let Some(info) = self.slots.get(&key).copied() { + (info, false) + } else { + (self.allocate(key, block_type), true) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +enum SlotKey { + /// tool_use (function_call / custom_tool_call) — output_item 全体で 1 block + OutputItem(usize), + /// message の output_text / reasoning item の reasoning_text + ContentPart { output: usize, content: usize }, + /// reasoning item の summary_text (summary_index) + Summary { output: usize, summary: usize }, +} + +#[derive(Debug, Clone, Copy)] +struct SlotInfo { + flat_index: usize, + block_type: BlockType, +} + +// ============================================================================ +// SSE イベントの JSON 構造 +// ============================================================================ + +#[derive(Debug, Deserialize)] +struct OutputItemAdded { + output_index: usize, + item: OutputItem, +} + +#[derive(Debug, Deserialize)] +struct OutputItemDone { + output_index: usize, + #[allow(dead_code)] + item: OutputItem, +} + +/// `response.output_item.added/done` の `item`。 +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum OutputItem { + Message { + #[allow(dead_code)] + id: Option, + }, + Reasoning { + #[allow(dead_code)] + id: Option, + }, + FunctionCall { + #[allow(dead_code)] + #[serde(default)] + id: Option, + call_id: String, + name: String, + }, + CustomToolCall { + #[allow(dead_code)] + #[serde(default)] + id: Option, + call_id: String, + name: String, + }, + #[serde(other)] + Other, +} + +#[derive(Debug, Deserialize)] +struct ContentPartAdded { + output_index: usize, + content_index: usize, + part: ContentPart, +} + +#[derive(Debug, Deserialize)] +struct ContentPartDone { + output_index: usize, + content_index: usize, + #[allow(dead_code)] + part: ContentPart, +} + +/// `response.content_part.added/done` の `part`。 +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum ContentPart { + OutputText { + #[allow(dead_code)] + #[serde(default)] + text: String, + }, + ReasoningText { + #[allow(dead_code)] + #[serde(default)] + text: String, + }, + #[serde(other)] + Other, +} + +#[derive(Debug, Deserialize)] +struct OutputTextDelta { + output_index: usize, + content_index: usize, + delta: String, +} + +#[derive(Debug, Deserialize)] +struct ReasoningTextDelta { + output_index: usize, + content_index: usize, + delta: String, +} + +#[derive(Debug, Deserialize)] +struct ReasoningSummaryPartAdded { + output_index: usize, + summary_index: usize, + #[allow(dead_code)] + #[serde(default)] + part: Option, +} + +#[derive(Debug, Deserialize)] +struct ReasoningSummaryTextDelta { + output_index: usize, + summary_index: usize, + delta: String, +} + +#[derive(Debug, Deserialize)] +struct ReasoningSummaryPartDone { + output_index: usize, + summary_index: usize, +} + +#[derive(Debug, Deserialize)] +struct FunctionCallArgumentsDelta { + output_index: usize, + delta: String, +} + +#[derive(Debug, Deserialize)] +struct CustomToolCallInputDelta { + output_index: usize, + delta: String, +} + +#[derive(Debug, Deserialize)] +struct ResponseCompleted { + response: CompletedResponse, +} + +#[derive(Debug, Deserialize)] +struct CompletedResponse { + #[serde(default)] + usage: Option, +} + +#[derive(Debug, Deserialize)] +struct ResponsesUsage { + #[serde(default)] + input_tokens: Option, + #[serde(default)] + output_tokens: Option, + #[serde(default)] + total_tokens: Option, +} + +#[derive(Debug, Deserialize)] +struct ResponseFailed { + response: FailedResponse, +} + +#[derive(Debug, Deserialize)] +struct FailedResponse { + #[serde(default)] + error: Option, +} + +#[derive(Debug, Deserialize)] +struct ErrorDetail { + #[serde(rename = "type", default)] + error_type: Option, + #[serde(default)] + message: Option, +} + +#[derive(Debug, Deserialize)] +struct TopLevelError { + #[serde(default)] + message: Option, + #[serde(rename = "type", default)] + error_type: Option, + #[serde(default)] + code: Option, +} + +// ============================================================================ +// parse entry point +// ============================================================================ + +/// SSE フレーム 1 件をパースし、0 個以上の [`Event`] に変換する。 +/// +/// `event_type` は SSE の `event:` フィールド。未対応の event は +/// 静かに無視する。`data` が JSON でない / 必要なフィールドが抜けて +/// いる等は [`ClientError::Api`] で返す。 +pub(crate) fn parse_sse( + event_type: &str, + data: &str, + state: &mut OpenAIResponsesState, +) -> Result, ClientError> { + match event_type { + "response.created" => Ok(vec![Event::Status(StatusEvent { + status: ResponseStatus::Started, + })]), + + "response.completed" => { + let ev: ResponseCompleted = from_json(data)?; + let mut out = Vec::new(); + if let Some(usage) = ev.response.usage { + out.push(Event::Usage(UsageEvent { + input_tokens: usage.input_tokens, + output_tokens: usage.output_tokens, + total_tokens: usage.total_tokens.or_else(|| { + Some(usage.input_tokens.unwrap_or(0) + usage.output_tokens.unwrap_or(0)) + }), + cache_read_input_tokens: None, + cache_creation_input_tokens: None, + })); + } + out.push(Event::Status(StatusEvent { + status: ResponseStatus::Completed, + })); + Ok(out) + } + + "response.failed" | "response.incomplete" => { + let ev: ResponseFailed = from_json(data)?; + let (code, message) = match ev.response.error { + Some(err) => (err.error_type, err.message.unwrap_or_default()), + None => (None, format!("response {event_type}")), + }; + Ok(vec![ + Event::Error(ErrorEvent { code, message }), + Event::Status(StatusEvent { + status: ResponseStatus::Failed, + }), + ]) + } + + "response.output_item.added" => { + let ev: OutputItemAdded = from_json(data)?; + match ev.item { + OutputItem::FunctionCall { call_id, name, .. } + | OutputItem::CustomToolCall { call_id, name, .. } => { + let info = state + .allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse); + Ok(vec![Event::BlockStart(BlockStart { + index: info.flat_index, + block_type: BlockType::ToolUse, + metadata: BlockMetadata::ToolUse { + id: call_id, + name, + }, + })]) + } + _ => Ok(Vec::new()), + } + } + + "response.output_item.done" => { + let ev: OutputItemDone = from_json(data)?; + if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) { + Ok(vec![Event::BlockStop(BlockStop { + index: info.flat_index, + block_type: info.block_type, + stop_reason: None, + })]) + } else { + Ok(Vec::new()) + } + } + + "response.content_part.added" => { + let ev: ContentPartAdded = from_json(data)?; + let (block_type, metadata) = match ev.part { + ContentPart::OutputText { .. } => (BlockType::Text, BlockMetadata::Text), + ContentPart::ReasoningText { .. } => (BlockType::Thinking, BlockMetadata::Thinking), + ContentPart::Other => return Ok(Vec::new()), + }; + let info = state.allocate( + SlotKey::ContentPart { + output: ev.output_index, + content: ev.content_index, + }, + block_type, + ); + Ok(vec![Event::BlockStart(BlockStart { + index: info.flat_index, + block_type, + metadata, + })]) + } + + "response.content_part.done" => { + let ev: ContentPartDone = from_json(data)?; + if let Some(info) = state.slots.remove(&SlotKey::ContentPart { + output: ev.output_index, + content: ev.content_index, + }) { + Ok(vec![Event::BlockStop(BlockStop { + index: info.flat_index, + block_type: info.block_type, + stop_reason: None, + })]) + } else { + Ok(Vec::new()) + } + } + + "response.output_text.delta" => { + let ev: OutputTextDelta = from_json(data)?; + Ok(ensure_and_delta( + state, + SlotKey::ContentPart { + output: ev.output_index, + content: ev.content_index, + }, + BlockType::Text, + BlockMetadata::Text, + DeltaContent::Text(ev.delta), + )) + } + + "response.reasoning_text.delta" => { + let ev: ReasoningTextDelta = from_json(data)?; + Ok(ensure_and_delta( + state, + SlotKey::ContentPart { + output: ev.output_index, + content: ev.content_index, + }, + BlockType::Thinking, + BlockMetadata::Thinking, + DeltaContent::Thinking(ev.delta), + )) + } + + "response.reasoning_summary_part.added" => { + let ev: ReasoningSummaryPartAdded = from_json(data)?; + let info = state.allocate( + SlotKey::Summary { + output: ev.output_index, + summary: ev.summary_index, + }, + BlockType::Thinking, + ); + Ok(vec![Event::BlockStart(BlockStart { + index: info.flat_index, + block_type: BlockType::Thinking, + metadata: BlockMetadata::Thinking, + })]) + } + + "response.reasoning_summary_text.delta" => { + let ev: ReasoningSummaryTextDelta = from_json(data)?; + Ok(ensure_and_delta( + state, + SlotKey::Summary { + output: ev.output_index, + summary: ev.summary_index, + }, + BlockType::Thinking, + BlockMetadata::Thinking, + DeltaContent::Thinking(ev.delta), + )) + } + + "response.reasoning_summary_part.done" => { + let ev: ReasoningSummaryPartDone = from_json(data)?; + if let Some(info) = state.slots.remove(&SlotKey::Summary { + output: ev.output_index, + summary: ev.summary_index, + }) { + Ok(vec![Event::BlockStop(BlockStop { + index: info.flat_index, + block_type: info.block_type, + stop_reason: None, + })]) + } else { + Ok(Vec::new()) + } + } + + "response.function_call_arguments.delta" => { + let ev: FunctionCallArgumentsDelta = from_json(data)?; + Ok(ensure_and_delta( + state, + SlotKey::OutputItem(ev.output_index), + BlockType::ToolUse, + BlockMetadata::ToolUse { + id: String::new(), + name: String::new(), + }, + DeltaContent::InputJson(ev.delta), + )) + } + + "response.custom_tool_call_input.delta" => { + let ev: CustomToolCallInputDelta = from_json(data)?; + Ok(ensure_and_delta( + state, + SlotKey::OutputItem(ev.output_index), + BlockType::ToolUse, + BlockMetadata::ToolUse { + id: String::new(), + name: String::new(), + }, + DeltaContent::InputJson(ev.delta), + )) + } + + "error" => { + let ev: TopLevelError = from_json(data).unwrap_or(TopLevelError { + message: Some(data.to_string()), + error_type: None, + code: None, + }); + Ok(vec![Event::Error(ErrorEvent { + code: ev.error_type.or(ev.code), + message: ev.message.unwrap_or_default(), + })]) + } + + // 未対応 / 情報系イベントは無視 + _ => Ok(Vec::new()), + } +} + +/// 対応する BlockStart がまだ発行されていなければ発行しつつ、delta を流す。 +/// content_part.added を取りこぼしても delta 単独で復旧できるようにする。 +fn ensure_and_delta( + state: &mut OpenAIResponsesState, + key: SlotKey, + block_type: BlockType, + metadata: BlockMetadata, + delta: DeltaContent, +) -> Vec { + let (info, just_created) = state.get_or_allocate(key, block_type); + let mut out = Vec::with_capacity(2); + if just_created { + out.push(Event::BlockStart(BlockStart { + index: info.flat_index, + block_type, + metadata, + })); + } + out.push(Event::BlockDelta(BlockDelta { + index: info.flat_index, + delta, + })); + out +} + +fn from_json Deserialize<'de>>(data: &str) -> Result { + serde_json::from_str(data).map_err(|e| ClientError::Api { + status: None, + code: Some("parse_error".to_string()), + message: format!("Failed to parse SSE data: {e}"), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn run(event_type: &str, data: &str) -> (Vec, OpenAIResponsesState) { + let mut state = OpenAIResponsesState::default(); + let events = parse_sse(event_type, data, &mut state).unwrap(); + (events, state) + } + + fn with( + state: &mut OpenAIResponsesState, + event_type: &str, + data: &str, + ) -> Vec { + parse_sse(event_type, data, state).unwrap() + } + + #[test] + fn created_emits_status_started() { + let (events, _) = run("response.created", r#"{"response":{}}"#); + assert!(matches!( + events[0], + Event::Status(StatusEvent { + status: ResponseStatus::Started + }) + )); + } + + #[test] + fn completed_emits_usage_and_status() { + let data = r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#; + let (events, _) = run("response.completed", data); + assert!(matches!(events[0], Event::Usage(_))); + assert!(matches!( + events[1], + Event::Status(StatusEvent { + status: ResponseStatus::Completed + }) + )); + if let Event::Usage(u) = &events[0] { + assert_eq!(u.input_tokens, Some(10)); + assert_eq!(u.output_tokens, Some(20)); + assert_eq!(u.total_tokens, Some(30)); + } + } + + #[test] + fn text_stream_start_delta_stop() { + let mut state = OpenAIResponsesState::default(); + // output_item.added (message) → 無視 + with( + &mut state, + "response.output_item.added", + r#"{"output_index":0,"item":{"type":"message","id":"m1"}}"#, + ); + // content_part.added (output_text) → BlockStart(Text) + let ev = with( + &mut state, + "response.content_part.added", + r#"{"output_index":0,"content_index":0,"item_id":"m1","part":{"type":"output_text","text":""}}"#, + ); + assert_eq!(ev.len(), 1); + assert!(matches!(ev[0], Event::BlockStart(_))); + // delta + let ev = with( + &mut state, + "response.output_text.delta", + r#"{"output_index":0,"content_index":0,"item_id":"m1","delta":"hi"}"#, + ); + assert_eq!(ev.len(), 1); + if let Event::BlockDelta(d) = &ev[0] { + assert!(matches!(&d.delta, DeltaContent::Text(t) if t == "hi")); + } else { + panic!("expected delta"); + } + // content_part.done → BlockStop + let ev = with( + &mut state, + "response.content_part.done", + r#"{"output_index":0,"content_index":0,"item_id":"m1","part":{"type":"output_text","text":"hi"}}"#, + ); + assert_eq!(ev.len(), 1); + if let Event::BlockStop(s) = &ev[0] { + assert_eq!(s.block_type, BlockType::Text); + } else { + panic!("expected stop"); + } + } + + #[test] + fn function_call_start_delta_stop() { + let mut state = OpenAIResponsesState::default(); + // output_item.added (function_call) → BlockStart(ToolUse, id, name) + let ev = with( + &mut state, + "response.output_item.added", + r#"{"output_index":1,"item":{"type":"function_call","id":"fc1","call_id":"call_abc","name":"get_weather"}}"#, + ); + assert_eq!(ev.len(), 1); + if let Event::BlockStart(s) = &ev[0] { + assert_eq!(s.block_type, BlockType::ToolUse); + if let BlockMetadata::ToolUse { id, name } = &s.metadata { + assert_eq!(id, "call_abc"); + assert_eq!(name, "get_weather"); + } else { + panic!("expected ToolUse metadata"); + } + } else { + panic!("expected BlockStart"); + } + // arguments delta + let ev = with( + &mut state, + "response.function_call_arguments.delta", + r#"{"output_index":1,"item_id":"fc1","delta":"{\"x\":"}"#, + ); + assert_eq!(ev.len(), 1); + if let Event::BlockDelta(d) = &ev[0] { + assert!(matches!(&d.delta, DeltaContent::InputJson(j) if j == "{\"x\":")); + } + // output_item.done → BlockStop + let ev = with( + &mut state, + "response.output_item.done", + r#"{"output_index":1,"item":{"type":"function_call","call_id":"call_abc","name":"get_weather","arguments":"{\"x\":1}"}}"#, + ); + assert_eq!(ev.len(), 1); + assert!(matches!(ev[0], Event::BlockStop(_))); + } + + #[test] + fn custom_tool_call_input_delta_parsed() { + let mut state = OpenAIResponsesState::default(); + with( + &mut state, + "response.output_item.added", + r#"{"output_index":0,"item":{"type":"custom_tool_call","id":"ct1","call_id":"call_xyz","name":"custom"}}"#, + ); + let ev = with( + &mut state, + "response.custom_tool_call_input.delta", + r#"{"output_index":0,"item_id":"ct1","delta":"raw"}"#, + ); + assert_eq!(ev.len(), 1); + if let Event::BlockDelta(d) = &ev[0] { + assert!(matches!(&d.delta, DeltaContent::InputJson(j) if j == "raw")); + } else { + panic!("expected delta"); + } + } + + #[test] + fn reasoning_text_delta_emits_thinking() { + let mut state = OpenAIResponsesState::default(); + with( + &mut state, + "response.content_part.added", + r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#, + ); + let ev = with( + &mut state, + "response.reasoning_text.delta", + r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"think"}"#, + ); + if let Event::BlockDelta(d) = &ev[0] { + assert!(matches!(&d.delta, DeltaContent::Thinking(t) if t == "think")); + } else { + panic!("expected thinking delta"); + } + } + + #[test] + fn reasoning_summary_start_delta_stop() { + let mut state = OpenAIResponsesState::default(); + let ev = with( + &mut state, + "response.reasoning_summary_part.added", + r#"{"output_index":0,"summary_index":0,"item_id":"r1","part":{"type":"summary_text","text":""}}"#, + ); + assert!(matches!(ev[0], Event::BlockStart(_))); + let ev = with( + &mut state, + "response.reasoning_summary_text.delta", + r#"{"output_index":0,"summary_index":0,"item_id":"r1","delta":"sum"}"#, + ); + if let Event::BlockDelta(d) = &ev[0] { + assert!(matches!(&d.delta, DeltaContent::Thinking(t) if t == "sum")); + } + let ev = with( + &mut state, + "response.reasoning_summary_part.done", + r#"{"output_index":0,"summary_index":0,"item_id":"r1"}"#, + ); + assert!(matches!(ev[0], Event::BlockStop(_))); + } + + #[test] + fn delta_without_prior_start_recovers() { + // 防御: content_part.added が落ちても delta 単独で BlockStart+Delta を発行 + let mut state = OpenAIResponsesState::default(); + let ev = with( + &mut state, + "response.output_text.delta", + r#"{"output_index":0,"content_index":0,"item_id":"m1","delta":"hi"}"#, + ); + assert_eq!(ev.len(), 2); + assert!(matches!(ev[0], Event::BlockStart(_))); + assert!(matches!(ev[1], Event::BlockDelta(_))); + } + + #[test] + fn parallel_output_items_get_distinct_indices() { + // 2 つの function_call が並列で output_item.added される場合、 + // flat index が別々になる(Parallel tool calling の基本)。 + let mut state = OpenAIResponsesState::default(); + let ev1 = with( + &mut state, + "response.output_item.added", + r#"{"output_index":0,"item":{"type":"function_call","id":"a","call_id":"c1","name":"t1"}}"#, + ); + let ev2 = with( + &mut state, + "response.output_item.added", + r#"{"output_index":1,"item":{"type":"function_call","id":"b","call_id":"c2","name":"t2"}}"#, + ); + let i1 = if let Event::BlockStart(s) = &ev1[0] { + s.index + } else { + panic!() + }; + let i2 = if let Event::BlockStart(s) = &ev2[0] { + s.index + } else { + panic!() + }; + assert_ne!(i1, i2); + } + + #[test] + fn failed_response_emits_error_and_status() { + let data = + r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#; + let (events, _) = run("response.failed", data); + assert_eq!(events.len(), 2); + assert!(matches!(events[0], Event::Error(_))); + assert!(matches!( + events[1], + Event::Status(StatusEvent { + status: ResponseStatus::Failed + }) + )); + } + + #[test] + fn unknown_event_is_ignored() { + let (events, _) = run("response.in_progress", "{}"); + assert!(events.is_empty()); + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs new file mode 100644 index 00000000..1043bd6e --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs @@ -0,0 +1,59 @@ +//! OpenAI Responses API スキーマ (`/v1/responses`) +//! +//! Chat Completions とは別物の item-based wire format。reasoning item と +//! function_call item が first-class で、SSE イベントも `response.*` 名前空間で +//! 流れる。ChatGPT OAuth 経路 (codex) は本 scheme 必須。 +//! +//! - リクエスト JSON 生成: [`request`] +//! - SSE イベントパース → [`Event`](crate::llm_client::event::Event) 変換: [`events`] + +mod capability; +mod events; +mod request; +mod scheme_impl; + +pub use scheme_impl::OpenAIResponsesState; + +/// OpenAI Responses scheme 本体。 +/// +/// `store` / `include_encrypted_content` は scheme 固定の wire 設定で、 +/// デフォルトは stateless + ZDR 相当 (`store=false`, `include=[...]`)。 +/// 将来 ZDR 非対応環境で `store=true` にしたくなった場合に限り override +/// する。`ModelCapability` には入れない(これはモデルの能力ではなく、 +/// クライアントの運用方針)。 +#[derive(Debug, Clone)] +pub struct OpenAIResponsesScheme { + /// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。 + pub store: bool, + /// `include: ["reasoning.encrypted_content"]` を付けるか。 + /// `store=false` で reasoning を使うなら必須。 + pub include_encrypted_content: bool, +} + +impl Default for OpenAIResponsesScheme { + fn default() -> Self { + Self { + store: false, + include_encrypted_content: true, + } + } +} + +impl OpenAIResponsesScheme { + /// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`)。 + pub fn new() -> Self { + Self::default() + } + + /// `store` を上書き。 + pub fn with_store(mut self, store: bool) -> Self { + self.store = store; + self + } + + /// `include: ["reasoning.encrypted_content"]` の有無を上書き。 + pub fn with_include_encrypted_content(mut self, include: bool) -> Self { + self.include_encrypted_content = include; + self + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs new file mode 100644 index 00000000..42889033 --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs @@ -0,0 +1,461 @@ +//! OpenAI Responses API リクエスト body 生成 +//! +//! Chat Completions の `messages` と違い、Responses は `input[]` の +//! item 配列で reasoning / function_call / function_call_output が +//! first-class。`Item` を素に近い形で `input[]` に投影できる。 + +use serde::Serialize; +use serde_json::Value; + +use crate::llm_client::{ + Request, + capability::{ModelCapability, ReasoningEffort, ReasoningSupport}, + types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments}, +}; + +use super::OpenAIResponsesScheme; + +/// `/v1/responses` のリクエスト body。 +#[derive(Debug, Serialize)] +pub(crate) struct ResponsesRequest { + pub model: String, + /// システムプロンプト相当。`input[]` とは別フィールド。 + #[serde(skip_serializing_if = "Option::is_none")] + pub instructions: Option, + pub input: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub tools: Vec, + /// 常時 `"auto"` を送る。scheme 固定値。 + pub tool_choice: &'static str, + /// 常時 `true` を送る。scheme 固定値。 + pub parallel_tool_calls: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning: Option, + /// ZDR / stateless 運用では `false`。 + pub store: bool, + /// 常時 `true`。 + pub stream: bool, + /// `["reasoning.encrypted_content"]` 等。 + #[serde(skip_serializing_if = "Vec::is_empty")] + pub include: Vec<&'static str>, + #[serde(skip_serializing_if = "Option::is_none")] + pub max_output_tokens: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub temperature: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub top_p: Option, +} + +/// reasoning 制御。 +#[derive(Debug, Serialize)] +pub(crate) struct ReasoningConfig { + #[serde(skip_serializing_if = "Option::is_none")] + pub effort: Option<&'static str>, + /// summary の出力制御。`"auto"` 固定で summary_text を受け取る。 + pub summary: &'static str, +} + +/// `input[]` の 1 要素。 +/// +/// Responses API の item 型を素に近い形で投影する。未対応 type は +/// 無視(reasoning 送信時に `content: []` の場合は `None` として弾く)。 +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum InputItem { + /// 会話メッセージ。user / assistant / system のいずれか。 + Message { + role: &'static str, + content: Vec, + }, + /// 過去の function tool 呼び出し(assistant 側)。 + FunctionCall { + call_id: String, + name: String, + /// JSON 文字列(object でなくても正規化済み)。 + arguments: String, + }, + /// function tool の結果(user 側)。 + FunctionCallOutput { + call_id: String, + /// Responses は文字列 or 構造化 output を許すが、ここでは + /// `summary` + `content` を改行連結した文字列で送る。 + output: String, + }, + /// reasoning item。`encrypted_content` があれば必ず添える。 + Reasoning { + #[serde(skip_serializing_if = "Option::is_none")] + id: Option, + #[serde(skip_serializing_if = "Vec::is_empty")] + summary: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + content: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + encrypted_content: Option, + }, +} + +/// メッセージ content_part。role で input/output を使い分ける。 +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum InputContent { + /// user / system 側のテキスト + InputText { text: String }, + /// assistant 側のテキスト + OutputText { text: String }, +} + +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum ReasoningSummaryPart { + SummaryText { text: String }, +} + +#[derive(Debug, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub(crate) enum ReasoningContentPart { + ReasoningText { text: String }, +} + +/// Responses 用 tool 定義。Chat と違い function キーでネストせず +/// トップレベルに `name` / `parameters` が載る。 +#[derive(Debug, Serialize)] +pub(crate) struct ResponseTool { + #[serde(rename = "type")] + pub r#type: &'static str, + pub name: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + pub parameters: Value, + /// Structured output モード制御。デフォルト false。 + pub strict: bool, +} + +impl OpenAIResponsesScheme { + /// `Request` から wire 形式の body を組み立てる。 + pub(crate) fn build_request( + &self, + model: &str, + request: &Request, + capability: &ModelCapability, + ) -> ResponsesRequest { + let input = convert_items_to_input(&request.items); + let tools = request.tools.iter().map(convert_tool).collect(); + + // Reasoning 投影: capability が Effort / Both をサポートし、かつ + // request 側で effort が指定されているときだけ reasoning を付ける。 + let supports_effort = matches!( + capability.reasoning, + Some(ReasoningSupport::Effort | ReasoningSupport::Both), + ); + let reasoning = request + .config + .reasoning + .as_ref() + .and_then(|rc| rc.effort) + .filter(|_| supports_effort) + .map(|effort| ReasoningConfig { + effort: Some(match effort { + ReasoningEffort::Low => "low", + ReasoningEffort::Medium => "medium", + ReasoningEffort::High => "high", + }), + summary: "auto", + }); + + let include: Vec<&'static str> = if self.include_encrypted_content { + vec!["reasoning.encrypted_content"] + } else { + Vec::new() + }; + + ResponsesRequest { + model: model.to_string(), + instructions: request.system_prompt.clone(), + input, + tools, + tool_choice: "auto", + parallel_tool_calls: true, + reasoning, + store: self.store, + stream: true, + include, + max_output_tokens: request.config.max_tokens, + temperature: request.config.temperature, + top_p: request.config.top_p, + } + } +} + +/// `Item` 列を `input[]` に変換する。 +fn convert_items_to_input(items: &[Item]) -> Vec { + let mut out = Vec::with_capacity(items.len()); + for item in items { + match item { + Item::Message { role, content, .. } => { + let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = match role + { + Role::User => ("user", |t| InputContent::InputText { text: t }), + Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }), + Role::System => ("system", |t| InputContent::InputText { text: t }), + }; + let parts: Vec = content + .iter() + .map(|p| match p { + ContentPart::Text { text } => text_variant(text.clone()), + ContentPart::Refusal { refusal } => text_variant(refusal.clone()), + }) + .collect(); + out.push(InputItem::Message { + role: role_str, + content: parts, + }); + } + Item::ToolCall { + call_id, + name, + arguments, + .. + } => { + // 非 object / 旧形式の "null" を "{}" に正規化。 + let normalized = parse_tool_arguments(arguments).to_string(); + out.push(InputItem::FunctionCall { + call_id: call_id.clone(), + name: name.clone(), + arguments: normalized, + }); + } + Item::ToolResult { + call_id, + summary, + content, + .. + } => { + let text = match content { + Some(c) => format!("{summary}\n{c}"), + None => summary.clone(), + }; + out.push(InputItem::FunctionCallOutput { + call_id: call_id.clone(), + output: text, + }); + } + Item::Reasoning { + id, + text, + summary, + encrypted_content, + .. + } => { + let summary_parts = summary + .iter() + .filter(|s| !s.is_empty()) + .map(|s| ReasoningSummaryPart::SummaryText { text: s.clone() }) + .collect(); + let content_parts = if text.is_empty() { + Vec::new() + } else { + vec![ReasoningContentPart::ReasoningText { text: text.clone() }] + }; + out.push(InputItem::Reasoning { + id: id.clone(), + summary: summary_parts, + content: content_parts, + encrypted_content: encrypted_content.clone(), + }); + } + } + } + out +} + +fn convert_tool(tool: &ToolDefinition) -> ResponseTool { + ResponseTool { + r#type: "function", + name: tool.name.clone(), + description: tool.description.clone(), + parameters: tool.input_schema.clone(), + strict: false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::llm_client::capability::{ + CacheStrategy, ModelCapability, ReasoningControl, ReasoningEffort, ReasoningSupport, + StructuredOutput, ToolCallingSupport, + }; + + fn cap_with_reasoning() -> ModelCapability { + ModelCapability { + tool_calling: ToolCallingSupport::Parallel, + structured_output: StructuredOutput::JsonSchema, + reasoning: Some(ReasoningSupport::Effort), + vision: true, + prompt_caching: CacheStrategy::Auto, + } + } + + fn cap_no_reasoning() -> ModelCapability { + ModelCapability { + reasoning: None, + ..cap_with_reasoning() + } + } + + #[test] + fn scheme_defaults_to_stateless_zdr() { + let s = OpenAIResponsesScheme::new(); + assert!(!s.store); + assert!(s.include_encrypted_content); + } + + #[test] + fn includes_encrypted_content_when_enabled() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().user("hi"); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert_eq!(body.include, vec!["reasoning.encrypted_content"]); + assert!(!body.store); + assert!(body.stream); + } + + #[test] + fn instructions_from_system_prompt() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().system("be terse").user("hi"); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert_eq!(body.instructions.as_deref(), Some("be terse")); + assert_eq!(body.input.len(), 1); + } + + #[test] + fn tool_choice_and_parallel_are_fixed() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().user("hi"); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert_eq!(body.tool_choice, "auto"); + assert!(body.parallel_tool_calls); + } + + #[test] + fn user_message_uses_input_text() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().user("hi"); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + match &body.input[0] { + InputItem::Message { role, content } => { + assert_eq!(*role, "user"); + assert_eq!(content.len(), 1); + assert!(matches!(&content[0], InputContent::InputText { text } if text == "hi")); + } + _ => panic!("expected message"), + } + } + + #[test] + fn assistant_message_uses_output_text() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().user("hi").assistant("hello"); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + match &body.input[1] { + InputItem::Message { role, content } => { + assert_eq!(*role, "assistant"); + assert!( + matches!(&content[0], InputContent::OutputText { text } if text == "hello") + ); + } + _ => panic!("expected message"), + } + } + + #[test] + fn tool_call_and_result_become_function_items() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new() + .user("run") + .item(Item::tool_call("c1", "t", r#"{"a":1}"#)) + .item(Item::tool_result("c1", "ok")); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert!(matches!(body.input[1], InputItem::FunctionCall { .. })); + assert!(matches!(body.input[2], InputItem::FunctionCallOutput { .. })); + } + + #[test] + fn reasoning_item_round_trips_encrypted_content() { + let scheme = OpenAIResponsesScheme::new(); + let item = Item::reasoning("inner") + .with_reasoning_summary(vec!["s1".into()]) + .with_encrypted_content("ENC"); + let req = Request::new().user("hi").item(item); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + match &body.input[1] { + InputItem::Reasoning { + summary, + content, + encrypted_content, + .. + } => { + assert_eq!(summary.len(), 1); + assert_eq!(content.len(), 1); + assert_eq!(encrypted_content.as_deref(), Some("ENC")); + } + _ => panic!("expected reasoning"), + } + } + + #[test] + fn reasoning_effort_projected_when_supported() { + let scheme = OpenAIResponsesScheme::new(); + let mut req = Request::new().user("hi"); + req.config.reasoning = Some(ReasoningControl { + effort: Some(ReasoningEffort::High), + budget_tokens: None, + }); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + let reasoning = body.reasoning.expect("reasoning should be set"); + assert_eq!(reasoning.effort, Some("high")); + assert_eq!(reasoning.summary, "auto"); + } + + #[test] + fn reasoning_omitted_when_unsupported() { + let scheme = OpenAIResponsesScheme::new(); + let mut req = Request::new().user("hi"); + req.config.reasoning = Some(ReasoningControl { + effort: Some(ReasoningEffort::High), + budget_tokens: None, + }); + let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning()); + assert!(body.reasoning.is_none()); + } + + #[test] + fn max_output_tokens_passed_through() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new().user("hi").max_tokens(100); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert_eq!(body.max_output_tokens, Some(100)); + } + + #[test] + fn serialized_body_has_expected_shape() { + // wire 形式が崩れていないかのスモークテスト + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new() + .system("sys") + .user("hi") + .tool(ToolDefinition::new("t").description("d")); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + let json = serde_json::to_value(&body).unwrap(); + assert_eq!(json["model"], "gpt-5"); + assert_eq!(json["instructions"], "sys"); + assert_eq!(json["tool_choice"], "auto"); + assert_eq!(json["parallel_tool_calls"], true); + assert_eq!(json["store"], false); + assert_eq!(json["stream"], true); + assert_eq!(json["include"][0], "reasoning.encrypted_content"); + assert_eq!(json["tools"][0]["type"], "function"); + assert_eq!(json["tools"][0]["name"], "t"); + } +} diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs new file mode 100644 index 00000000..2a9c5988 --- /dev/null +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs @@ -0,0 +1,59 @@ +//! `impl Scheme for OpenAIResponsesScheme` + +use serde_json::Value; + +use crate::llm_client::{ + ClientError, + auth::AuthRequirement, + capability::ModelCapability, + event::Event, + scheme::Scheme, + types::Request, +}; + +use super::OpenAIResponsesScheme; + +pub use super::events::OpenAIResponsesState; + +impl Scheme for OpenAIResponsesScheme { + type State = OpenAIResponsesState; + + fn default_base_url(&self) -> &'static str { + "https://api.openai.com" + } + + fn path(&self, _model_id: &str) -> String { + "/v1/responses".to_string() + } + + fn required_auth(&self) -> AuthRequirement { + AuthRequirement::Bearer + } + + fn build_request_body( + &self, + model_id: &str, + request: &Request, + capability: &ModelCapability, + ) -> Value { + let body = self.build_request(model_id, request, capability); + serde_json::to_value(&body).expect("ResponsesRequest is always serialisable") + } + + fn parse_sse( + &self, + event_type: &str, + data: &str, + state: &mut Self::State, + ) -> Result, ClientError> { + super::events::parse_sse(event_type, data, state) + } + + fn capability_for(&self, model_id: &str) -> Option { + super::capability::lookup(model_id) + } + + fn default_capability(&self) -> ModelCapability { + super::capability::default_capability() + } +} diff --git a/crates/llm-worker/src/llm_client/types.rs b/crates/llm-worker/src/llm_client/types.rs index e88999c4..de33f598 100644 --- a/crates/llm-worker/src/llm_client/types.rs +++ b/crates/llm-worker/src/llm_client/types.rs @@ -86,8 +86,16 @@ pub enum Item { /// Optional item ID #[serde(skip_serializing_if = "Option::is_none")] id: Option, - /// Reasoning text + /// Reasoning text(reasoning body, `reasoning_text.delta` の累積) text: String, + /// Reasoning summary(OpenAI Responses の `summary_text[]` を格納。 + /// 他 scheme は空) + #[serde(default, skip_serializing_if = "Vec::is_empty")] + summary: Vec, + /// サーバから返された暗号化済み reasoning blob。ZDR / `store=false` + /// 運用で stateless に再送するときそのまま添える必要がある。 + #[serde(default, skip_serializing_if = "Option::is_none")] + encrypted_content: Option, /// Item status #[serde(skip_serializing_if = "Option::is_none")] status: Option, @@ -214,10 +222,31 @@ impl Item { Self::Reasoning { id: None, text: text.into(), + summary: Vec::new(), + encrypted_content: None, status: None, } } + /// Set reasoning summary on a `Reasoning` item. No-op on other variants. + pub fn with_reasoning_summary(mut self, new_summary: Vec) -> Self { + if let Self::Reasoning { summary, .. } = &mut self { + *summary = new_summary; + } + self + } + + /// Set `encrypted_content` on a `Reasoning` item. No-op on other variants. + pub fn with_encrypted_content(mut self, content: impl Into) -> Self { + if let Self::Reasoning { + encrypted_content, .. + } = &mut self + { + *encrypted_content = Some(content.into()); + } + self + } + // ======================================================================== // Builder methods // ======================================================================== diff --git a/crates/provider/src/lib.rs b/crates/provider/src/lib.rs index 9d10faa6..e4e8c2dc 100644 --- a/crates/provider/src/lib.rs +++ b/crates/provider/src/lib.rs @@ -15,6 +15,7 @@ use llm_worker::llm_client::{ capability::ModelCapability, scheme::{ Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme, + openai_responses::OpenAIResponsesScheme, }, transport::{HttpTransport, ResolvedAuth}, }; @@ -119,9 +120,9 @@ pub fn build_client(config: &ModelConfig) -> Result, Provider SchemeKind::Anthropic => build_transport(AnthropicScheme::new(), config, resolved), SchemeKind::OpenaiChat => build_transport(OpenAIScheme::new(), config, resolved), SchemeKind::Gemini => build_transport(GeminiScheme::new(), config, resolved), - SchemeKind::OpenaiResponses => Err(ProviderError::SchemeNotImplemented { - scheme: config.scheme, - }), + SchemeKind::OpenaiResponses => { + build_transport(OpenAIResponsesScheme::new(), config, resolved) + } } } diff --git a/tickets/llm-scheme-openai-responses.md b/tickets/llm-scheme-openai-responses.md index 2319a7b9..219bf608 100644 --- a/tickets/llm-scheme-openai-responses.md +++ b/tickets/llm-scheme-openai-responses.md @@ -1,8 +1,12 @@ # OpenAI Responses scheme の新設 +> **レビュー完了(close 可)** — 詳細は [`llm-scheme-openai-responses.review.md`](llm-scheme-openai-responses.review.md) +> 9 要件すべて達成、指摘事項は優先度低の 2 件のみ(tool 引数 delta 先行時の空メタデータ、`tools[].strict` ハードコード)。いずれも実害薄。 + + ## 背景 -現状の `crates/llm-worker/src/llm_client/scheme/openai` は OpenAI Chat Completions (`/v1/chat/completions`) wire format のみ実装。OpenAI の Responses API (`/v1/responses`) はリクエスト body・SSE イベント構造ともに Chat Completions と別物で、同じ scheme には乗らない。 +現状の `crates/llm-worker/src/llm_client/scheme/openai_chat` は OpenAI Chat Completions (`/v1/chat/completions`) wire format のみ実装。OpenAI の Responses API (`/v1/responses`) はリクエスト body・SSE イベント構造ともに Chat Completions と別物で、同じ scheme には乗らない。 Codex CLI (github.com/openai/codex) の実装を確認したところ、ChatGPT OAuth 経路でも OpenAI API Key 経路でもすべて `/v1/responses` を叩いており、Chat Completions は使っていない。Codex 流用(別チケット `llm-auth-codex-oauth`)を実現する前提として、この scheme が必要。 @@ -13,56 +17,80 @@ Codex CLI (github.com/openai/codex) の実装を確認したところ、ChatGPT 1. **`scheme/openai_responses` を新設**し、`HttpTransport` に差し込めるようにする 2. **リクエスト body** は `/v1/responses` の item-based 形式: - - `model`, `instructions` (system prompt 相当), `input: [ResponseItem]`, `tools`, `tool_choice`, `parallel_tool_calls` - - `reasoning: { effort?, summary? }` - - `store`, `stream: true`, `include: [String]` - - `service_tier?`, `prompt_cache_key?`, `text?: { verbosity?, format? }` - - `previous_response_id` は **使わない**(stateless で運用、履歴は insomnia 側管理) + - `model`, `instructions` (system prompt 相当), `input: [ResponseItem]`, `tools` + - `tool_choice: "auto"` / `parallel_tool_calls: true` は **scheme 固定値**で常時送信(将来必要になれば Request / RequestConfig に昇格、今は YAGNI) + - `reasoning: { effort?, summary? }` は `ReasoningControl` から投影 + - **`store: false` + `include: ["reasoning.encrypted_content"]` を scheme 固定値**で送信(stateless 運用 + 再送のため encrypted reasoning を取得) + - `stream: true` 固定 + - `service_tier?`, `prompt_cache_key?`, `text?: { verbosity?, format? }` は当面未使用、フィールドの予約のみ + - `previous_response_id` は **使わない**(stateless、履歴は insomnia 側管理) 3. **SSE event パース**: - `response.created` / `response.completed` / `response.failed` / `response.incomplete` - `response.output_item.added` / `response.output_item.done` + - `response.content_part.added` / `response.content_part.done` - `response.output_text.delta` - - `response.custom_tool_call_input.delta`(ToolCall 引数の partial JSON) + - `response.function_call_arguments.delta`(通常 function tool の引数 partial JSON) + - `response.custom_tool_call_input.delta`(custom tool のフリーフォーム入力 partial JSON) - `response.reasoning_text.delta` / `response.reasoning_summary_text.delta` 4. **BlockType / DeltaContent との対応**: + - **text BlockStart** は `response.content_part.added`(Anthropic の `content_block_start` と対称) + - **tool_use BlockStart** は `response.output_item.added`(id と name が確定する時点、streaming に乗せるためここ) - `response.output_text.delta` → `DeltaContent::Text` - `response.reasoning_text.delta` / `response.reasoning_summary_text.delta` → `DeltaContent::Thinking` - - `response.custom_tool_call_input.delta` → `DeltaContent::InputJson` - - `response.output_item.done` (tool_use) → `BlockMetadata::ToolUse { id, name }` の `BlockStart` 生成 + - `response.function_call_arguments.delta` と `response.custom_tool_call_input.delta` → 両方とも `DeltaContent::InputJson` に正規化 + - `response.content_part.done` / `response.output_item.done` → `BlockStop` -5. **reasoning の item 構造対応**: `summary[]` / `encrypted_content` を持つ reasoning item の送受信をロスなく扱える - - 送信時は `BlockMetadata::Thinking` から `input[]` に再構築 - - 受信時は `BlockType::Thinking` のブロックとしてストリームに流す +5. **`Item::Reasoning` の拡張**(llm-worker/types.rs への変更を含む): + + ```rust + Item::Reasoning { + text: String, + summary: Vec, + encrypted_content: Option, + } + ``` + + - 送信時は `input[]` の reasoning item に再構築(`encrypted_content` があれば添える) + - 受信時は SSE から `text` / `summary[]` / `encrypted_content` を組み立てて `Item::Reasoning` に格納 + - 既存 `Item::Reasoning { text }` の 1 フィールドからの拡張。`summary` は空 Vec、`encrypted_content` は `None` で既存互換を保つ + - 将来 Anthropic の extended thinking で `signature: Option` を追加する余地を残す 6. **認証は `AuthRef::ApiKey` のみ対応**: `Authorization: Bearer ` ヘッダ。`base_url` デフォルトは `https://api.openai.com`、パスは `/v1/responses`。ChatGPT OAuth 経路(`CodexOAuth`)は別チケット(`llm-auth-codex-oauth`)で追加 -7. **Usage の正規化**: `response.completed` の `usage: { input_tokens, output_tokens, total_tokens }` を `UsageEvent` に変換。Chat Completions の `prompt_tokens` 等との表記揺れを scheme 側で吸収 +7. **Usage の正規化**: `response.completed` の `usage: { input_tokens, output_tokens, total_tokens }` を `UsageEvent` に変換 -8. **完了時の動作**: OpenAI API key (`OPENAI_API_KEY`) + モデル `gpt-5` 等で `ModelConfig { scheme: OpenAIResponses, base_url: https://api.openai.com, model_id: "gpt-5", auth: ApiKey }` を宣言すると、reasoning + tool call を含む会話が動作する +8. **capability テーブル**: GPT-5 / o3 / o4 のモデル ID 判定は `scheme/openai_chat/capability.rs` と重複するため **共通関数に切り出して共有**(配置は `scheme/openai_chat/capability.rs` に `pub(crate) fn classify(model_id) -> Option` を置くか、`scheme/openai_common/` を切り出すかは実装時判断)。Responses 側は `ReasoningSupport::Effort` 固定でマッピング + +9. **完了時の動作**: OpenAI API key (`OPENAI_API_KEY`) + モデル `gpt-5` 等で `ModelConfig { scheme: OpenAIResponses, base_url: https://api.openai.com, model_id: "gpt-5", auth: ApiKey }` を宣言すると、reasoning + tool call を含む会話が動作する ## 設計課題 -### 1. reasoning item の encrypted_content +### 1. scheme-specific 設定の override フィールド -reasoning item の `encrypted_content` はサーバ側で暗号化された状態で返されることがあり、再送時にそのまま添える必要がある(ZDR 組織や `store=false` 運用時)。insomnia の `Item` enum に透過的に保持する仕組みが要る。 +`store` / `include[]` を scheme 固定値にしたが、将来 ZDR 非対応環境で `store=true` を許したくなる可能性がある。`OpenAIResponsesScheme` 自身にフィールド (`store: bool`, `include_encrypted_content: bool` 等) を持たせ、`new()` 時に上書きできる形にする。`ModelCapability` には入れない(scheme-specific な wire 設定なので)。 -### 2. `include[]` と `store` のデフォルト +### 2. Responses 非対応パラメータ -- `include: ["reasoning.encrypted_content"]` を常に付けるか、capability / config で制御するか -- `store=false` をデフォルトにするか `true` にするか(ZDR 既定なら false) - -### 3. Responses 非対応パラメータ - -`service_tier` / `prompt_cache_key` / `text.verbosity` は当面不要かもしれないが、将来対応時に scheme 拡張で入れられる構造にしておく。 +`service_tier` / `prompt_cache_key` / `text.verbosity` は当面不要だが、将来対応時に scheme 拡張で入れられる構造にしておく。 ## Scope 外 -- ChatGPT OAuth 認証(`llm-auth-codex-oauth`) +- ChatGPT OAuth 認証(`llm-auth-codex-oauth` チケットで実装) - `previous_response_id` を使う stateful 運用 - 高次ツール(`web_search` / `code_interpreter` / `computer_use`)— insomnia では採用しない方針 +- `tool_choice` / `parallel_tool_calls` の Request 昇格(必要性が出てから別チケット) ## 依存 -- `tickets/llm-model-config.md`(`HttpTransport` 構造と `AuthRef` が前提) +- `tickets/llm-model-config.md` 完了済(`HttpTransport` 構造と `AuthRef` が前提) + +## 影響範囲 + +llm-worker 単独ではなく以下にまたがる: +- `crates/llm-worker/src/llm_client/types.rs`: `Item::Reasoning` の拡張 +- `crates/llm-worker/src/llm_client/scheme/openai_responses/`: 新規 +- `crates/llm-worker/src/llm_client/scheme/openai_chat/capability.rs`: モデル family 判定を `pub(crate)` に露出 +- `crates/llm-worker/src/llm_client/scheme/mod.rs`: `pub mod openai_responses;` +- `crates/provider/src/lib.rs`: `build_client` の `SchemeKind::OpenaiResponses` アームを `SchemeNotImplemented` から実装に差し替え diff --git a/tickets/llm-scheme-openai-responses.review.md b/tickets/llm-scheme-openai-responses.review.md new file mode 100644 index 00000000..176911b9 --- /dev/null +++ b/tickets/llm-scheme-openai-responses.review.md @@ -0,0 +1,77 @@ +# OpenAI Responses scheme の新設 — レビュー + +## 前提・要件の再確認 + +チケット本体の 9 要件 + 2 設計課題を前提に、実装が意図と整合しているかを確認した。`cargo check`(warning 1 / 旧由来)・`cargo test --workspace`(全 pass)通過。変更量: 新規 5 ファイル(1407 行)+ 既存 5 ファイル微修正。 + +## 要件達成度 + +| # | 要件 | 状況 | メモ | +|---|---|---|---| +| 1 | `scheme/openai_responses` 新設、`HttpTransport` に差し込める | ✓ | `Scheme` trait 実装完了 | +| 2 | Request body: `tool_choice: "auto"` / `parallel_tool_calls: true` / `store: false` / `include` / `stream: true` 固定、`reasoning` 投影 | ✓ | `ResponsesRequest` 構造体に全項目、`build_request` で capability 照合して reasoning 投影 | +| 3 | SSE event パース (response.* 一式) | ✓ + α | ticket 列挙に加えて `response.content_part.done` / `response.reasoning_summary_part.added/done` / top-level `error` もカバー | +| 4 | BlockType / DeltaContent 対応(text は `content_part.added`、tool_use は `output_item.added` で BlockStart) | ✓ | `OpenAIResponsesState` の 3 種 SlotKey(OutputItem / ContentPart / Summary)で (output_index, content_index) → flat index を管理 | +| 5 | `Item::Reasoning` 拡張(text + summary + encrypted_content) | ✓ | `with_reasoning_summary` / `with_encrypted_content` ビルダー追加、既存コンストラクタは空値で互換 | +| 6 | `AuthRef::ApiKey` / `Authorization: Bearer` / base_url `https://api.openai.com` / パス `/v1/responses` | ✓ | scheme_impl.rs の `required_auth` / `default_base_url` / `path` | +| 7 | Usage 正規化 | ✓ | `response.completed` の `usage` を `UsageEvent` に変換、`total_tokens` 未提供時は input+output で補完 | +| 8 | capability 共通判定関数 | ✓ | `openai_chat/capability.rs::classify -> OpenAiFamily` を `pub(crate)` で公開、`openai_responses/capability.rs::lookup` が共有 | +| 9 | 完了時動作 | ✓ | `provider/lib.rs::build_client` の `OpenaiResponses` アームが `SchemeNotImplemented` から実装に差し替え済み | + +## 設計決定への反映 + +| 決定 | 反映 | +|---|---| +| `store` / `include[]` を `OpenAIResponsesScheme` フィールドで override 可能、`ModelCapability` には入れない | ✓ `with_store` / `with_include_encrypted_content` ビルダー、デフォルトは stateless + ZDR 相当 | +| `ReasoningSupport::Effort / Both` 対応時のみ `reasoning` 送信 | ✓ `build_request` で capability と `request.config.reasoning.effort` の両方が揃う時のみ投影 | +| Responses 未使用パラメータ (`service_tier` / `prompt_cache_key` / `text.verbosity`) は予約のみ | ✓ `ResponsesRequest` 構造体には入れず、必要時に追加できる構造 | + +## アーキテクチャ評価 + +### 良い点 +- **`ensure_and_delta` による防御的設計**: `content_part.added` が欠落しても delta 単独で BlockStart + Delta を発行できる。`delta_without_prior_start_recovers` テストで確認済 +- **`OpenAIResponsesState` の 3 種 SlotKey**: `OutputItem` (tool 全体) / `ContentPart { output, content }` (text/reasoning) / `Summary { output, summary }` (reasoning 要約) で Responses の 2 次元座標を flat index に自然にマップ。`parallel_output_items_get_distinct_indices` で並列 tool call の独立性を検証 +- **テストの充実**: request 12 ケース (scheme defaults / tool_choice 固定 / role 別 content 型 / reasoning 有無 / round-trip / serialize shape)、events 10 ケース (text / function_call / custom_tool / reasoning_text / summary / 並列 / failed / unknown / 防御) +- **capability の一次情報共有**: `OpenAiFamily` enum を `openai_chat` に置いて両 scheme で共有、DRY と結合度のバランスが良い +- **`Item::Reasoning` 拡張の互換性**: `Vec::is_empty` / `Option::is_none` での skip_serializing、既存 `Item::reasoning()` コンストラクタは空値で互換。他 scheme の `request.rs` は `Item::Reasoning { text, .. }` で `..` 省略しており、追加フィールドで壊れない + +## 指摘事項 + +### 優先度: 低 + +#### 1. tool 引数 delta 先行時の空メタデータ + +`ensure_and_delta` が `function_call_arguments.delta` / `custom_tool_call_input.delta` で BlockStart 未発行の場合、`BlockMetadata::ToolUse { id: String::new(), name: String::new() }` を合成する。実運用では `output_item.added` が先行するはずで発動しないが、仮に発火すると後段で空 `call_id` を使うリスクがある。 + +対応案: +- `ToolUse` では `ensure_and_delta` を使わず、`output_item.added` 必須で、欠落時は warning ログ + Delta 破棄 +- もしくは現状維持で「防御コードとして warning」出す + +現状、`output_item.added` が保証されている Responses API の仕様に依存しており、実害は薄い。 + +#### 2. `tools[].strict: false` ハードコード + +`ResponseTool::strict: false` を常時送信。Responses の `strict: true` は JSON Schema 準拠を強制する。`ModelCapability::structured_output == JsonSchema` のときに `strict: true` に昇格させる余地があるが、本チケットではスコープ外として許容範囲。 + +### 優先度: 極低(構造的なスコープ内) + +#### 3. `summary` / `encrypted_content` が他 scheme で落ちる + +`Item::Reasoning { text, .. }` で `..` 省略されている他 scheme(Anthropic / OpenAI Chat / Gemini)の `request.rs` は `summary` と `encrypted_content` を送信しない。ただし: +- Anthropic は独自の `signature` が別途必要で将来拡張 +- OpenAI Chat と Gemini は reasoning を first-class では送らない +- scheme をまたいだ履歴引き継ぎは現状想定外 + +構造的には設計通り、問題なし。 + +#### 4. `OpenAIResponsesScheme` の override が `build_client` から届かない + +`with_store` / `with_include_encrypted_content` は存在するが、`build_client` 経由では常に `OpenAIResponsesScheme::new()` デフォルト。ZDR 非対応環境で `store=true` にする場合は provider 側で新しい経路が必要。 + +チケットの設計課題 1 で「将来対応」と明示されており、スコープ通り。 + +## 総合判定 + +**close 可能**。9 要件すべて達成、設計決定も正確に反映、テストも充実。指摘事項はいずれも実害が薄いか、チケット設計課題として明示されたスコープ内の将来対応。 + +`tickets/llm-auth-codex-oauth.md` (次のチケット) を進められる状態。