diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs index c74a3d07..acce55c3 100644 --- a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs +++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs @@ -62,6 +62,9 @@ pub(crate) struct ResponsesRequest { pub(crate) struct ReasoningConfig { #[serde(skip_serializing_if = "Option::is_none")] pub effort: Option, + /// Reasoning encrypted_content は同一 user turn 内だけ再利用する。 + /// 古い turn の reasoning item は request input から除外する。 + pub context: &'static str, /// summary の出力制御。`"auto"` 固定で summary_text を受け取る。 pub summary: &'static str, } @@ -193,6 +196,7 @@ impl OpenAIResponsesScheme { ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()), ReasoningControl::BudgetTokens(_) => None, }, + context: "current_turn", summary: "auto", }) .filter(|reasoning| reasoning.effort.is_some()); @@ -236,8 +240,9 @@ impl OpenAIResponsesScheme { /// `Item` 列を `input[]` に変換する。 fn convert_items_to_input(items: &[Item]) -> Vec { + let current_turn_start = current_turn_start_index(items); let mut out = Vec::with_capacity(items.len()); - for item in items { + for (idx, item) in items.iter().enumerate() { match item { Item::Message { role, content, .. } => { let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = @@ -294,6 +299,9 @@ fn convert_items_to_input(items: &[Item]) -> Vec { encrypted_content, .. } => { + if idx < current_turn_start { + continue; + } let summary_parts = summary .iter() .filter(|s| !s.is_empty()) @@ -316,6 +324,26 @@ fn convert_items_to_input(items: &[Item]) -> Vec { out } +/// Responses の `reasoning.context = "current_turn"` に合わせ、直近の +/// user message 以降だけを current turn とみなす。ToolResult は Responses +/// wire 上では user 側 item だが、新しい人間/外部入力ではなく function-call +/// chain の継続なので turn reset には使わない。System/developer notes も +/// 同一 turn 内の補助入力になり得るため reset しない。 +fn current_turn_start_index(items: &[Item]) -> usize { + items + .iter() + .rposition(|item| { + matches!( + item, + Item::Message { + role: Role::User, + .. + } + ) + }) + .unwrap_or(0) +} + fn convert_tool(tool: &ToolDefinition) -> ResponseTool { ResponseTool { r#type: "function", @@ -477,6 +505,60 @@ mod tests { } } + #[test] + fn old_turn_reasoning_items_are_omitted_for_current_turn_context() { + let scheme = OpenAIResponsesScheme::new(); + let old_reasoning = Item::reasoning("old").with_encrypted_content("OLD_ENC"); + let current_reasoning = Item::reasoning("current").with_encrypted_content("CURRENT_ENC"); + let req = Request::new() + .user("old prompt") + .item(old_reasoning) + .assistant("old answer") + .user("new prompt") + .item(current_reasoning); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + let encrypted: Vec<_> = body + .input + .iter() + .filter_map(|item| match item { + InputItem::Reasoning { + encrypted_content, .. + } => encrypted_content.as_deref(), + _ => None, + }) + .collect(); + assert_eq!(encrypted, vec!["CURRENT_ENC"]); + } + + #[test] + fn current_turn_reasoning_is_kept_across_function_call_loop() { + let scheme = OpenAIResponsesScheme::new(); + let req = Request::new() + .user("run tool") + .item(Item::reasoning("plan").with_encrypted_content("ENC")) + .item(Item::tool_call("c1", "tool", "{}")) + .item(Item::tool_result("c1", "ok")); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + assert!(matches!(body.input[1], InputItem::Reasoning { .. })); + assert!(matches!(body.input[2], InputItem::FunctionCall { .. })); + assert!(matches!( + body.input[3], + InputItem::FunctionCallOutput { .. } + )); + } + + #[test] + fn reasoning_request_uses_current_turn_context() { + let scheme = OpenAIResponsesScheme::new(); + let mut req = Request::new().user("hi"); + req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium)); + let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); + let reasoning = body.reasoning.expect("reasoning should be set"); + assert_eq!(reasoning.context, "current_turn"); + let json = serde_json::to_value(reasoning).unwrap(); + assert_eq!(json["context"], "current_turn"); + } + #[test] fn reasoning_summary_field_is_always_serialized() { // Responses API は reasoning item に `summary` を必須で要求する。 @@ -508,6 +590,7 @@ mod tests { let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning()); let reasoning = body.reasoning.expect("reasoning should be set"); assert_eq!(reasoning.effort.as_deref(), Some("high")); + assert_eq!(reasoning.context, "current_turn"); assert_eq!(reasoning.summary, "auto"); } diff --git a/crates/llm-worker/src/llm_client/transport.rs b/crates/llm-worker/src/llm_client/transport.rs index d3ef2ea6..26d4f9f3 100644 --- a/crates/llm-worker/src/llm_client/transport.rs +++ b/crates/llm-worker/src/llm_client/transport.rs @@ -14,7 +14,7 @@ use futures::{Stream, StreamExt, TryStreamExt}; use reqwest::header::{ ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER, }; -use serde_json::{Value, json}; +use serde_json::{Map, Value, json}; use super::auth::{AuthProvider, AuthRequirement}; use super::capability::ModelCapability; @@ -260,6 +260,60 @@ fn json_value_kind(value: &Value) -> &'static str { } } +fn request_body_shape_payload(body: &Value) -> Value { + let mut map = Map::new(); + if let Some(input) = body.get("input").and_then(Value::as_array) { + let items_json_bytes = serde_json::to_vec(input).map(|bytes| bytes.len()).ok(); + let mut reasoning_items = 0usize; + let mut reasoning_encrypted_content_count = 0usize; + let mut reasoning_encrypted_content_bytes = 0usize; + for item in input { + if item.get("type").and_then(Value::as_str) != Some("reasoning") { + continue; + } + reasoning_items += 1; + if let Some(encrypted) = item.get("encrypted_content").and_then(Value::as_str) { + reasoning_encrypted_content_count += 1; + reasoning_encrypted_content_bytes += encrypted.len(); + } + } + map.insert("items_len".to_string(), json!(input.len())); + map.insert("items_json_bytes".to_string(), json!(items_json_bytes)); + map.insert("reasoning_items".to_string(), json!(reasoning_items)); + map.insert( + "reasoning_encrypted_content_count".to_string(), + json!(reasoning_encrypted_content_count), + ); + map.insert( + "reasoning_encrypted_content_bytes".to_string(), + json!(reasoning_encrypted_content_bytes), + ); + } + let reasoning_context = body + .get("reasoning") + .and_then(|reasoning| reasoning.get("context")) + .and_then(Value::as_str); + map.insert("reasoning_context".to_string(), json!(reasoning_context)); + Value::Object(map) +} + +fn api_error_code(error: &ClientError) -> Option<&str> { + match error { + ClientError::Api { code, .. } => code.as_deref(), + _ => None, + } +} + +fn is_context_length_exceeded(error: &ClientError) -> bool { + match error { + ClientError::Api { code, message, .. } => { + code.as_deref() == Some("context_length_exceeded") + || message.contains("context_length_exceeded") + } + _ => false, + } +} + async fn response_with_timeout( future: impl std::future::Future>, timeout: Duration, @@ -296,7 +350,11 @@ async fn classify_error_response(resp: reqwest::Response) -> ClientError { let text = resp.text().await.unwrap_or_default(); if let Ok(json) = serde_json::from_str::(&text) { let error = json.get("error").unwrap_or(&json); - let code = error.get("type").and_then(|v| v.as_str()).map(String::from); + let code = error + .get("code") + .and_then(|v| v.as_str()) + .or_else(|| error.get("type").and_then(|v| v.as_str())) + .map(String::from); let message = error .get("message") .and_then(|v| v.as_str()) @@ -406,12 +464,14 @@ impl LlmClient for HttpTransport { let body = self .scheme .build_request_body(&self.model_id, &request, &self.capability); + let body_shape = request_body_shape_payload(&body); emit_transport_trace( &request, "transport_body_build_done", json!({ "elapsed_ms": body_started.elapsed().as_millis() as u64, "body_kind": json_value_kind(&body), + "request_shape": body_shape.clone(), }), ); @@ -438,6 +498,7 @@ impl LlmClient for HttpTransport { "encoding": request_body.encoding(), "raw_json_bytes": request_body.raw_json_bytes(), "wire_bytes": request_body.wire_bytes(), + "request_shape": body_shape.clone(), }), ); @@ -479,15 +540,23 @@ impl LlmClient for HttpTransport { }; if !response.status().is_success() { + let status = response.status().as_u16(); + let retry_after_present = response.headers().get(RETRY_AFTER).is_some(); + let error = classify_error_response(response).await; + let context_length_exceeded = is_context_length_exceeded(&error); emit_transport_trace( &request, "transport_http_status_error", json!({ - "status": response.status().as_u16(), - "retry_after_present": response.headers().get(RETRY_AFTER).is_some(), + "status": status, + "retry_after_present": retry_after_present, + "api_error_code": api_error_code(&error), + "context_length_exceeded": context_length_exceeded, + "provider_usage_absent": context_length_exceeded, + "request_shape": body_shape.clone(), }), ); - return Err(classify_error_response(response).await); + return Err(error); } emit_transport_trace( @@ -611,6 +680,24 @@ mod tests { ) } + #[test] + fn request_body_shape_counts_reasoning_encrypted_content() { + let payload = request_body_shape_payload(&json!({ + "reasoning": { "context": "current_turn" }, + "input": [ + { "type": "message", "role": "user", "content": [] }, + { "type": "reasoning", "encrypted_content": "abc", "summary": [] }, + { "type": "reasoning", "encrypted_content": "defgh", "summary": [] } + ] + })); + assert_eq!(payload["items_len"], 3); + assert_eq!(payload["reasoning_items"], 2); + assert_eq!(payload["reasoning_encrypted_content_count"], 2); + assert_eq!(payload["reasoning_encrypted_content_bytes"], 8); + assert_eq!(payload["reasoning_context"], "current_turn"); + assert!(payload["items_json_bytes"].as_u64().unwrap() > 0); + } + #[tokio::test] async fn response_timeout_returns_retryable_lifecycle_timeout() { let err = response_with_timeout( diff --git a/crates/llm-worker/src/worker.rs b/crates/llm-worker/src/worker.rs index c4ebe72f..2728838a 100644 --- a/crates/llm-worker/src/worker.rs +++ b/crates/llm-worker/src/worker.rs @@ -2029,12 +2029,31 @@ fn items_trace_payload( _ => None, }; + let mut reasoning_items = 0usize; + let mut reasoning_encrypted_content_count = 0usize; + let mut reasoning_encrypted_content_bytes = 0usize; + for item in items { + if let Item::Reasoning { + encrypted_content, .. + } = item + { + reasoning_items += 1; + if let Some(encrypted) = encrypted_content { + reasoning_encrypted_content_count += 1; + reasoning_encrypted_content_bytes += encrypted.len(); + } + } + } + json!({ "items_len": items.len(), "items_json_bytes": serde_json::to_vec(items).map(|bytes| bytes.len()).ok(), "tools_len": tools_len, "cache_anchor": cache_anchor, "cache_key_present": cache_key_present, + "reasoning_items": reasoning_items, + "reasoning_encrypted_content_count": reasoning_encrypted_content_count, + "reasoning_encrypted_content_bytes": reasoning_encrypted_content_bytes, "last_item_kind": last.map(item_kind), "last_item_json_bytes": last.and_then(|item| serde_json::to_vec(item).ok().map(|bytes| bytes.len())), "last_tool_result": last_tool_result, diff --git a/crates/manifest/src/model.rs b/crates/manifest/src/model.rs index cb6d4d6c..aebc7965 100644 --- a/crates/manifest/src/model.rs +++ b/crates/manifest/src/model.rs @@ -52,10 +52,16 @@ pub struct ModelManifest { /// `default_capability` → scheme 既定の順で解決される。 #[serde(default, skip_serializing_if = "Option::is_none")] pub capability: Option, - /// モデルのコンテキストウィンドウ上限(tokens)。カタログ未掲載 / inline - /// モデルでもここで明示 override できる。 + /// モデルの希望コンテキストウィンドウ(tokens)。カタログ未掲載 / inline + /// モデルでもここで明示 override できる。実効値は `max_context_window` + /// またはカタログ上の backend maximum で clamp される。 #[serde(default, skip_serializing_if = "Option::is_none")] pub context_window: Option, + /// backend が実際に受け付けるコンテキストウィンドウ上限(tokens)。 + /// 表示・安全判定に使う実効 context window は `context_window` とこの値の + /// 小さい方になる。 + #[serde(default, skip_serializing_if = "Option::is_none")] + pub max_context_window: Option, } impl ModelManifest { @@ -70,6 +76,7 @@ impl ModelManifest { auth: upper.auth.or(self.auth), capability: upper.capability.or(self.capability), context_window: upper.context_window.or(self.context_window), + max_context_window: upper.max_context_window.or(self.max_context_window), } } } diff --git a/crates/pod/src/ipc/interceptor.rs b/crates/pod/src/ipc/interceptor.rs index 3f2721fb..a39a08a2 100644 --- a/crates/pod/src/ipc/interceptor.rs +++ b/crates/pod/src/ipc/interceptor.rs @@ -22,6 +22,7 @@ use tracing::info; use tracing::warn; use crate::compact::state::CompactState; +use crate::compact::usage_tracker::UsageTracker; use session_store::{SystemItem, SystemReminder}; use tools::{TaskEntry, TaskStatus, TaskStore}; @@ -91,6 +92,10 @@ pub(crate) struct PodInterceptor { /// per-request `context` to estimate current occupancy for threshold /// checks. `None` when compaction is disabled (both thresholds unset). usage_history: Option>>>, + /// In-flight usage records observed during the current run but not yet + /// persisted into `usage_history`. Subsequent tool-loop LLM calls must + /// see these records during pre-request safety accounting. + usage_tracker: Option>, /// Pending-notification buffer drained into `worker.history` /// via [`Self::pending_history_appends`] just before the next LLM /// request. The Worker `extend`s these into its persistent history @@ -138,6 +143,7 @@ impl PodInterceptor { registry, compact_state, usage_history, + usage_tracker: None, pending_notifies, pending_attachments, task_store, @@ -149,6 +155,11 @@ impl PodInterceptor { } } + pub(crate) fn with_usage_tracker(mut self, usage_tracker: Arc) -> Self { + self.usage_tracker = Some(usage_tracker); + self + } + /// Commit each `SystemItem` as its own `LogEntry::SystemItem` /// entry through the attached writer (no-op when no writer is /// wired). Sync — writes complete before the matching @@ -175,7 +186,10 @@ impl PodInterceptor { /// `usage_history` is not attached (compaction fully disabled). fn estimated_tokens(&self, context: &[Item]) -> Option { let handle = self.usage_history.as_ref()?; - let records = handle.lock().expect("usage_history poisoned").clone(); + let mut records = handle.lock().expect("usage_history poisoned").clone(); + if let Some(tracker) = self.usage_tracker.as_ref() { + records.extend(tracker.records()); + } Some(total_tokens(context, &records).tokens) } @@ -305,9 +319,15 @@ impl Interceptor for PodInterceptor { if !state.is_disabled() && !state.just_compacted() { let current = current_tokens.unwrap_or(0); if state.exceeds_request(current) { + let shape = context_shape(context); info!( input_tokens = current, threshold = state.request_threshold().unwrap_or(0), + items_len = shape.items_len, + items_json_bytes = shape.items_json_bytes, + reasoning_items = shape.reasoning_items, + reasoning_encrypted_content_count = shape.reasoning_encrypted_content_count, + reasoning_encrypted_content_bytes = shape.reasoning_encrypted_content_bytes, "Between-requests compaction threshold exceeded, yielding" ); return PreRequestAction::Yield; @@ -400,6 +420,37 @@ impl Interceptor for PodInterceptor { } } +struct ContextShape { + items_len: usize, + items_json_bytes: Option, + reasoning_items: usize, + reasoning_encrypted_content_count: usize, + reasoning_encrypted_content_bytes: usize, +} + +fn context_shape(context: &[Item]) -> ContextShape { + let mut shape = ContextShape { + items_len: context.len(), + items_json_bytes: serde_json::to_vec(context).ok().map(|bytes| bytes.len()), + reasoning_items: 0, + reasoning_encrypted_content_count: 0, + reasoning_encrypted_content_bytes: 0, + }; + for item in context { + if let Item::Reasoning { + encrypted_content, .. + } = item + { + shape.reasoning_items += 1; + if let Some(encrypted) = encrypted_content { + shape.reasoning_encrypted_content_count += 1; + shape.reasoning_encrypted_content_bytes += encrypted.len(); + } + } + } + shape +} + fn extract_message_text(item: &Item) -> Option { match item { Item::Message { content, .. } => Some( @@ -528,6 +579,40 @@ mod tests { assert_eq!(count.load(Ordering::Relaxed), 0); } + #[tokio::test] + async fn pre_llm_request_counts_in_flight_usage_records() { + let registry = Arc::new(HookRegistryBuilder::new().build()); + let state = Arc::new(CompactState::new(None, Some(100), 2)); + let ctx_items = vec![Item::user_message("hi")]; + let history = usage_handle_with(ctx_items.len(), 50); + let usage_tracker = Arc::new(UsageTracker::new()); + usage_tracker.note_request(ctx_items.len()); + usage_tracker.record_usage(&llm_worker::event::UsageEvent { + input_tokens: Some(150), + output_tokens: Some(0), + total_tokens: Some(150), + cache_read_input_tokens: Some(0), + cache_creation_input_tokens: Some(0), + }); + + let interceptor = PodInterceptor::new( + registry, + Some(state), + Some(history), + NotifyBuffer::new(), + Arc::new(Mutex::new(Vec::new())), + TaskStore::new(), + Arc::new(TaskReminderState::new()), + PromptCatalog::builtins_only().unwrap(), + None, + ) + .with_usage_tracker(usage_tracker); + let mut ctx = ctx_items; + let action = interceptor.pre_llm_request(&mut ctx).await; + + assert!(matches!(action, PreRequestAction::Yield)); + } + #[tokio::test] async fn pre_llm_request_runs_hooks_when_under_threshold() { let count = Arc::new(AtomicUsize::new(0)); diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index 198e4593..786f2e50 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -1270,7 +1270,8 @@ impl Pod { self.task_reminder_state.clone(), self.prompts.clone(), self.log_writer.clone(), - ); + ) + .with_usage_tracker(self.usage_tracker.clone()); self.worker_mut().set_interceptor(interceptor); self.interceptor_installed = true; } diff --git a/crates/provider/src/catalog.rs b/crates/provider/src/catalog.rs index f34890cb..26e4092d 100644 --- a/crates/provider/src/catalog.rs +++ b/crates/provider/src/catalog.rs @@ -117,9 +117,14 @@ pub struct ModelEntry { #[serde(default)] pub capability: Option, /// モデル単位の context window。省略時は provider default → builtin - /// fallback にフォールバックする。 + /// fallback にフォールバックする。実効値は `max_context_window` で clamp + /// される。 #[serde(default)] pub context_window: Option, + /// backend が実際に受け付ける context window の上限。UI や pre-request + /// safety は希望値ではなく clamp 済みの実効値を使う。 + #[serde(default)] + pub max_context_window: Option, } /// 解決済みモデル設定。`build_client` が消費する完成形。 @@ -130,7 +135,10 @@ pub struct ModelConfig { pub model_id: String, pub auth: AuthRef, pub capability: Option, + /// Effective context window after backend maximum clamping. pub context_window: u64, + /// Backend maximum that constrained `context_window`, when known. + pub max_context_window: Option, } #[derive(Debug, Deserialize)] @@ -259,7 +267,8 @@ fn split_ref(s: &str) -> Option<(&str, &str)> { /// manifest 明示 > model catalog > provider.default_capability > /// (`build_client` 側で)`Scheme::default_capability()`。 /// context_window は manifest 明示 > model catalog > provider default > -/// [`DEFAULT_CONTEXT_WINDOW`]。 +/// [`DEFAULT_CONTEXT_WINDOW`]。実効 context_window は manifest/model の +/// max_context_window で clamp される。 pub fn resolve_model_manifest(manifest: &ModelManifest) -> Result { let providers = load_providers().map_err(ResolveError::LoadProviders)?; let models = load_models().map_err(ResolveError::LoadModels)?; @@ -310,11 +319,15 @@ pub fn resolve_with_catalogs( .and_then(|m| m.capability.clone()) .or_else(|| provider.default_capability.clone()) }); - let context_window = manifest + let desired_context_window = manifest .context_window .or_else(|| model_entry.and_then(|m| m.context_window)) .or(provider.default_context_window) .unwrap_or(DEFAULT_CONTEXT_WINDOW); + let max_context_window = manifest + .max_context_window + .or_else(|| model_entry.and_then(|m| m.max_context_window)); + let context_window = clamp_context_window(desired_context_window, max_context_window); Ok(ModelConfig { scheme, base_url, @@ -322,6 +335,7 @@ pub fn resolve_with_catalogs( auth, capability, context_window, + max_context_window, }) } else { let scheme = manifest @@ -335,17 +349,24 @@ pub fn resolve_with_catalogs( .auth .clone() .ok_or(ResolveError::InlineMissing("auth"))?; + let desired_context_window = manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW); + let max_context_window = manifest.max_context_window; Ok(ModelConfig { scheme, base_url: manifest.base_url.clone(), model_id, auth, capability: manifest.capability.clone(), - context_window: manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW), + context_window: clamp_context_window(desired_context_window, max_context_window), + max_context_window, }) } } +fn clamp_context_window(desired: u64, max: Option) -> u64 { + max.map(|limit| desired.min(limit)).unwrap_or(desired) +} + #[cfg(test)] mod tests { use super::*; @@ -420,6 +441,52 @@ mod tests { assert_eq!(cfg.context_window, 123_456); } + #[test] + fn context_window_is_clamped_by_catalog_backend_max() { + let providers = load_builtin_providers().unwrap(); + let models = load_builtin_models().unwrap(); + let manifest = ModelManifest { + ref_: Some("codex-oauth/gpt-5.5".into()), + context_window: Some(1_000_000), + ..Default::default() + }; + let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap(); + assert_eq!(cfg.context_window, 272_000); + assert_eq!(cfg.max_context_window, Some(272_000)); + } + + #[test] + fn inline_context_window_is_clamped_by_manifest_backend_max() { + let providers = load_builtin_providers().unwrap(); + let models = load_builtin_models().unwrap(); + let manifest = ModelManifest { + scheme: Some(SchemeKind::Anthropic), + model_id: Some("custom".into()), + auth: Some(AuthRef::None), + context_window: Some(1_000_000), + max_context_window: Some(272_000), + ..Default::default() + }; + let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap(); + assert_eq!(cfg.context_window, 272_000); + assert_eq!(cfg.max_context_window, Some(272_000)); + } + + #[test] + fn manifest_backend_max_overrides_catalog_backend_max() { + let providers = load_builtin_providers().unwrap(); + let models = load_builtin_models().unwrap(); + let manifest = ModelManifest { + ref_: Some("codex-oauth/gpt-5.5".into()), + context_window: Some(1_000_000), + max_context_window: Some(500_000), + ..Default::default() + }; + let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap(); + assert_eq!(cfg.context_window, 500_000); + assert_eq!(cfg.max_context_window, Some(500_000)); + } + #[test] fn resolve_ref_with_inline_overrides() { let providers = load_builtin_providers().unwrap(); diff --git a/crates/provider/src/lib.rs b/crates/provider/src/lib.rs index 6bd3ea5b..c90897ce 100644 --- a/crates/provider/src/lib.rs +++ b/crates/provider/src/lib.rs @@ -187,6 +187,7 @@ mod tests { }, capability: None, context_window: 200_000, + max_context_window: None, } } @@ -315,6 +316,7 @@ mod tests { auth: AuthRef::None, capability: None, context_window: 200_000, + max_context_window: None, }; assert!(build_client_from_config(&config).is_ok()); } diff --git a/docs/ref/model-reasoning-context.md b/docs/ref/model-reasoning-context.md index 1f1d4a31..f44e3b66 100644 --- a/docs/ref/model-reasoning-context.md +++ b/docs/ref/model-reasoning-context.md @@ -85,7 +85,9 @@ reasoning トークンは各ターンの後に破棄される。次ターンに 1. `previous_response_id` パラメータで過去のレスポンスを参照 2. `response.output` の全アイテムを次の `input` に手動で渡す -ステートレス利用(`store=false`、ZDR組織)の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。 +ステートレス利用(`store=false`、ZDR組織)の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。ただし Insomnia では Responses リクエストに `reasoning.context="current_turn"` を明示し、直近の user message 以降の同一ターン内 reasoning item だけを `input` に残す。過去ターンの persisted `encrypted_content` は、履歴に残っていても次ターンへ盲目的には再送しない。 + +同一ターン内の function-call loop では、`reasoning item → function_call → function_call_output → 次の Responses request` の連続性を保つため、直近 user message 以降の reasoning item は保持する。ToolResult は wire 上で user 側 item に見えるが、新しい user turn ではなく function-call chain の継続なので reasoning reset の境界にはしない。 #### モデル世代差 @@ -185,7 +187,7 @@ Ollamaはローカル実行プラットフォームで、モデルごとに思 **ChatGPT を使うとき** - 新規実装は **Responses API** を選ぶ(Chat Completions は推論引き継ぎが弱い) -- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げる +- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げるが、Insomnia では `reasoning.context="current_turn"` に合わせて同一 user turn / function-call loop 内だけ再送する - raw reasoning の抽出を試みない(規約違反の可能性) **Ollama を使うとき** diff --git a/resources/models/builtin.toml b/resources/models/builtin.toml index 562c1d9e..ae56682f 100644 --- a/resources/models/builtin.toml +++ b/resources/models/builtin.toml @@ -38,6 +38,13 @@ provider = "codex-oauth" context_window = 400000 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } +[[model]] +id = "gpt-5.5" +provider = "codex-oauth" +context_window = 1000000 +max_context_window = 272000 +capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } + # OpenRouter [[model]] id = "anthropic/claude-sonnet-4"