diff --git a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs
index c74a3d07..acce55c3 100644
--- a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs
+++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs
@@ -62,6 +62,9 @@ pub(crate) struct ResponsesRequest {
 pub(crate) struct ReasoningConfig {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub effort: Option<String>,
+    /// Reasoning encrypted_content は同一 user turn 内だけ再利用する。
+    /// 古い turn の reasoning item は request input から除外する。
+    pub context: &'static str,
     /// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
     pub summary: &'static str,
 }
@@ -193,6 +196,7 @@ impl OpenAIResponsesScheme {
                     ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
                     ReasoningControl::BudgetTokens(_) => None,
                 },
+                context: "current_turn",
                 summary: "auto",
             })
             .filter(|reasoning| reasoning.effort.is_some());
@@ -236,8 +240,9 @@ impl OpenAIResponsesScheme {
 
 /// `Item` 列を `input[]` に変換する。
 fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
+    let current_turn_start = current_turn_start_index(items);
     let mut out = Vec::with_capacity(items.len());
-    for item in items {
+    for (idx, item) in items.iter().enumerate() {
         match item {
             Item::Message { role, content, .. } => {
                 let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
@@ -294,6 +299,9 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
                 encrypted_content,
                 ..
             } => {
+                if idx < current_turn_start {
+                    continue;
+                }
                 let summary_parts = summary
                     .iter()
                     .filter(|s| !s.is_empty())
@@ -316,6 +324,26 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
     out
 }
 
+/// Responses の `reasoning.context = "current_turn"` に合わせ、直近の
+/// user message 以降だけを current turn とみなす。ToolResult は Responses
+/// wire 上では user 側 item だが、新しい人間/外部入力ではなく function-call
+/// chain の継続なので turn reset には使わない。System/developer notes も
+/// 同一 turn 内の補助入力になり得るため reset しない。
+fn current_turn_start_index(items: &[Item]) -> usize {
+    items
+        .iter()
+        .rposition(|item| {
+            matches!(
+                item,
+                Item::Message {
+                    role: Role::User,
+                    ..
+                }
+            )
+        })
+        .unwrap_or(0)
+}
+
 fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
     ResponseTool {
         r#type: "function",
@@ -477,6 +505,60 @@ mod tests {
         }
     }
 
+    #[test]
+    fn old_turn_reasoning_items_are_omitted_for_current_turn_context() {
+        let scheme = OpenAIResponsesScheme::new();
+        let old_reasoning = Item::reasoning("old").with_encrypted_content("OLD_ENC");
+        let current_reasoning = Item::reasoning("current").with_encrypted_content("CURRENT_ENC");
+        let req = Request::new()
+            .user("old prompt")
+            .item(old_reasoning)
+            .assistant("old answer")
+            .user("new prompt")
+            .item(current_reasoning);
+        let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
+        let encrypted: Vec<_> = body
+            .input
+            .iter()
+            .filter_map(|item| match item {
+                InputItem::Reasoning {
+                    encrypted_content, ..
+                } => encrypted_content.as_deref(),
+                _ => None,
+            })
+            .collect();
+        assert_eq!(encrypted, vec!["CURRENT_ENC"]);
+    }
+
+    #[test]
+    fn current_turn_reasoning_is_kept_across_function_call_loop() {
+        let scheme = OpenAIResponsesScheme::new();
+        let req = Request::new()
+            .user("run tool")
+            .item(Item::reasoning("plan").with_encrypted_content("ENC"))
+            .item(Item::tool_call("c1", "tool", "{}"))
+            .item(Item::tool_result("c1", "ok"));
+        let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
+        assert!(matches!(body.input[1], InputItem::Reasoning { .. }));
+        assert!(matches!(body.input[2], InputItem::FunctionCall { .. }));
+        assert!(matches!(
+            body.input[3],
+            InputItem::FunctionCallOutput { .. }
+        ));
+    }
+
+    #[test]
+    fn reasoning_request_uses_current_turn_context() {
+        let scheme = OpenAIResponsesScheme::new();
+        let mut req = Request::new().user("hi");
+        req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
+        let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
+        let reasoning = body.reasoning.expect("reasoning should be set");
+        assert_eq!(reasoning.context, "current_turn");
+        let json = serde_json::to_value(reasoning).unwrap();
+        assert_eq!(json["context"], "current_turn");
+    }
+
     #[test]
     fn reasoning_summary_field_is_always_serialized() {
         // Responses API は reasoning item に `summary` を必須で要求する。
@@ -508,6 +590,7 @@ mod tests {
         let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
         let reasoning = body.reasoning.expect("reasoning should be set");
         assert_eq!(reasoning.effort.as_deref(), Some("high"));
+        assert_eq!(reasoning.context, "current_turn");
         assert_eq!(reasoning.summary, "auto");
     }
 
diff --git a/crates/llm-worker/src/llm_client/transport.rs b/crates/llm-worker/src/llm_client/transport.rs
index d3ef2ea6..26d4f9f3 100644
--- a/crates/llm-worker/src/llm_client/transport.rs
+++ b/crates/llm-worker/src/llm_client/transport.rs
@@ -14,7 +14,7 @@ use futures::{Stream, StreamExt, TryStreamExt};
 use reqwest::header::{
     ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
 };
-use serde_json::{Value, json};
+use serde_json::{Map, Value, json};
 
 use super::auth::{AuthProvider, AuthRequirement};
 use super::capability::ModelCapability;
@@ -260,6 +260,60 @@ fn json_value_kind(value: &Value) -> &'static str {
     }
 }
 
+fn request_body_shape_payload(body: &Value) -> Value {
+    let mut map = Map::new();
+    if let Some(input) = body.get("input").and_then(Value::as_array) {
+        let items_json_bytes = serde_json::to_vec(input).map(|bytes| bytes.len()).ok();
+        let mut reasoning_items = 0usize;
+        let mut reasoning_encrypted_content_count = 0usize;
+        let mut reasoning_encrypted_content_bytes = 0usize;
+        for item in input {
+            if item.get("type").and_then(Value::as_str) != Some("reasoning") {
+                continue;
+            }
+            reasoning_items += 1;
+            if let Some(encrypted) = item.get("encrypted_content").and_then(Value::as_str) {
+                reasoning_encrypted_content_count += 1;
+                reasoning_encrypted_content_bytes += encrypted.len();
+            }
+        }
+        map.insert("items_len".to_string(), json!(input.len()));
+        map.insert("items_json_bytes".to_string(), json!(items_json_bytes));
+        map.insert("reasoning_items".to_string(), json!(reasoning_items));
+        map.insert(
+            "reasoning_encrypted_content_count".to_string(),
+            json!(reasoning_encrypted_content_count),
+        );
+        map.insert(
+            "reasoning_encrypted_content_bytes".to_string(),
+            json!(reasoning_encrypted_content_bytes),
+        );
+    }
+    let reasoning_context = body
+        .get("reasoning")
+        .and_then(|reasoning| reasoning.get("context"))
+        .and_then(Value::as_str);
+    map.insert("reasoning_context".to_string(), json!(reasoning_context));
+    Value::Object(map)
+}
+
+fn api_error_code(error: &ClientError) -> Option<&str> {
+    match error {
+        ClientError::Api { code, .. } => code.as_deref(),
+        _ => None,
+    }
+}
+
+fn is_context_length_exceeded(error: &ClientError) -> bool {
+    match error {
+        ClientError::Api { code, message, .. } => {
+            code.as_deref() == Some("context_length_exceeded")
+                || message.contains("context_length_exceeded")
+        }
+        _ => false,
+    }
+}
+
 async fn response_with_timeout(
     future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
     timeout: Duration,
@@ -296,7 +350,11 @@ async fn classify_error_response(resp: reqwest::Response) -> ClientError {
     let text = resp.text().await.unwrap_or_default();
     if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
         let error = json.get("error").unwrap_or(&json);
-        let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
+        let code = error
+            .get("code")
+            .and_then(|v| v.as_str())
+            .or_else(|| error.get("type").and_then(|v| v.as_str()))
+            .map(String::from);
         let message = error
             .get("message")
             .and_then(|v| v.as_str())
@@ -406,12 +464,14 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
         let body = self
             .scheme
             .build_request_body(&self.model_id, &request, &self.capability);
+        let body_shape = request_body_shape_payload(&body);
         emit_transport_trace(
             &request,
             "transport_body_build_done",
             json!({
                 "elapsed_ms": body_started.elapsed().as_millis() as u64,
                 "body_kind": json_value_kind(&body),
+                "request_shape": body_shape.clone(),
             }),
         );
 
@@ -438,6 +498,7 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
                 "encoding": request_body.encoding(),
                 "raw_json_bytes": request_body.raw_json_bytes(),
                 "wire_bytes": request_body.wire_bytes(),
+                "request_shape": body_shape.clone(),
             }),
         );
 
@@ -479,15 +540,23 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
             };
 
         if !response.status().is_success() {
+            let status = response.status().as_u16();
+            let retry_after_present = response.headers().get(RETRY_AFTER).is_some();
+            let error = classify_error_response(response).await;
+            let context_length_exceeded = is_context_length_exceeded(&error);
             emit_transport_trace(
                 &request,
                 "transport_http_status_error",
                 json!({
-                    "status": response.status().as_u16(),
-                    "retry_after_present": response.headers().get(RETRY_AFTER).is_some(),
+                    "status": status,
+                    "retry_after_present": retry_after_present,
+                    "api_error_code": api_error_code(&error),
+                    "context_length_exceeded": context_length_exceeded,
+                    "provider_usage_absent": context_length_exceeded,
+                    "request_shape": body_shape.clone(),
                 }),
             );
-            return Err(classify_error_response(response).await);
+            return Err(error);
         }
 
         emit_transport_trace(
@@ -611,6 +680,24 @@ mod tests {
         )
     }
 
+    #[test]
+    fn request_body_shape_counts_reasoning_encrypted_content() {
+        let payload = request_body_shape_payload(&json!({
+            "reasoning": { "context": "current_turn" },
+            "input": [
+                { "type": "message", "role": "user", "content": [] },
+                { "type": "reasoning", "encrypted_content": "abc", "summary": [] },
+                { "type": "reasoning", "encrypted_content": "defgh", "summary": [] }
+            ]
+        }));
+        assert_eq!(payload["items_len"], 3);
+        assert_eq!(payload["reasoning_items"], 2);
+        assert_eq!(payload["reasoning_encrypted_content_count"], 2);
+        assert_eq!(payload["reasoning_encrypted_content_bytes"], 8);
+        assert_eq!(payload["reasoning_context"], "current_turn");
+        assert!(payload["items_json_bytes"].as_u64().unwrap() > 0);
+    }
+
     #[tokio::test]
     async fn response_timeout_returns_retryable_lifecycle_timeout() {
         let err = response_with_timeout(
diff --git a/crates/llm-worker/src/worker.rs b/crates/llm-worker/src/worker.rs
index c4ebe72f..2728838a 100644
--- a/crates/llm-worker/src/worker.rs
+++ b/crates/llm-worker/src/worker.rs
@@ -2029,12 +2029,31 @@ fn items_trace_payload(
         _ => None,
     };
 
+    let mut reasoning_items = 0usize;
+    let mut reasoning_encrypted_content_count = 0usize;
+    let mut reasoning_encrypted_content_bytes = 0usize;
+    for item in items {
+        if let Item::Reasoning {
+            encrypted_content, ..
+        } = item
+        {
+            reasoning_items += 1;
+            if let Some(encrypted) = encrypted_content {
+                reasoning_encrypted_content_count += 1;
+                reasoning_encrypted_content_bytes += encrypted.len();
+            }
+        }
+    }
+
     json!({
         "items_len": items.len(),
         "items_json_bytes": serde_json::to_vec(items).map(|bytes| bytes.len()).ok(),
         "tools_len": tools_len,
         "cache_anchor": cache_anchor,
         "cache_key_present": cache_key_present,
+        "reasoning_items": reasoning_items,
+        "reasoning_encrypted_content_count": reasoning_encrypted_content_count,
+        "reasoning_encrypted_content_bytes": reasoning_encrypted_content_bytes,
         "last_item_kind": last.map(item_kind),
         "last_item_json_bytes": last.and_then(|item| serde_json::to_vec(item).ok().map(|bytes| bytes.len())),
         "last_tool_result": last_tool_result,
diff --git a/crates/manifest/src/model.rs b/crates/manifest/src/model.rs
index cb6d4d6c..aebc7965 100644
--- a/crates/manifest/src/model.rs
+++ b/crates/manifest/src/model.rs
@@ -52,10 +52,16 @@ pub struct ModelManifest {
     /// `default_capability` → scheme 既定の順で解決される。
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub capability: Option<ModelCapability>,
-    /// モデルのコンテキストウィンドウ上限（tokens）。カタログ未掲載 / inline
-    /// モデルでもここで明示 override できる。
+    /// モデルの希望コンテキストウィンドウ（tokens）。カタログ未掲載 / inline
+    /// モデルでもここで明示 override できる。実効値は `max_context_window`
+    /// またはカタログ上の backend maximum で clamp される。
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub context_window: Option<u64>,
+    /// backend が実際に受け付けるコンテキストウィンドウ上限（tokens）。
+    /// 表示・安全判定に使う実効 context window は `context_window` とこの値の
+    /// 小さい方になる。
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub max_context_window: Option<u64>,
 }
 
 impl ModelManifest {
@@ -70,6 +76,7 @@ impl ModelManifest {
             auth: upper.auth.or(self.auth),
             capability: upper.capability.or(self.capability),
             context_window: upper.context_window.or(self.context_window),
+            max_context_window: upper.max_context_window.or(self.max_context_window),
         }
     }
 }
diff --git a/crates/pod/src/ipc/interceptor.rs b/crates/pod/src/ipc/interceptor.rs
index 3f2721fb..a39a08a2 100644
--- a/crates/pod/src/ipc/interceptor.rs
+++ b/crates/pod/src/ipc/interceptor.rs
@@ -22,6 +22,7 @@ use tracing::info;
 use tracing::warn;
 
 use crate::compact::state::CompactState;
+use crate::compact::usage_tracker::UsageTracker;
 use session_store::{SystemItem, SystemReminder};
 use tools::{TaskEntry, TaskStatus, TaskStore};
 
@@ -91,6 +92,10 @@ pub(crate) struct PodInterceptor {
     /// per-request `context` to estimate current occupancy for threshold
     /// checks. `None` when compaction is disabled (both thresholds unset).
     usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
+    /// In-flight usage records observed during the current run but not yet
+    /// persisted into `usage_history`. Subsequent tool-loop LLM calls must
+    /// see these records during pre-request safety accounting.
+    usage_tracker: Option<Arc<UsageTracker>>,
     /// Pending-notification buffer drained into `worker.history`
     /// via [`Self::pending_history_appends`] just before the next LLM
     /// request. The Worker `extend`s these into its persistent history
@@ -138,6 +143,7 @@ impl PodInterceptor {
             registry,
             compact_state,
             usage_history,
+            usage_tracker: None,
             pending_notifies,
             pending_attachments,
             task_store,
@@ -149,6 +155,11 @@ impl PodInterceptor {
         }
     }
 
+    pub(crate) fn with_usage_tracker(mut self, usage_tracker: Arc<UsageTracker>) -> Self {
+        self.usage_tracker = Some(usage_tracker);
+        self
+    }
+
     /// Commit each `SystemItem` as its own `LogEntry::SystemItem`
     /// entry through the attached writer (no-op when no writer is
     /// wired). Sync — writes complete before the matching
@@ -175,7 +186,10 @@ impl PodInterceptor {
     /// `usage_history` is not attached (compaction fully disabled).
     fn estimated_tokens(&self, context: &[Item]) -> Option<u64> {
         let handle = self.usage_history.as_ref()?;
-        let records = handle.lock().expect("usage_history poisoned").clone();
+        let mut records = handle.lock().expect("usage_history poisoned").clone();
+        if let Some(tracker) = self.usage_tracker.as_ref() {
+            records.extend(tracker.records());
+        }
         Some(total_tokens(context, &records).tokens)
     }
 
@@ -305,9 +319,15 @@ impl Interceptor for PodInterceptor {
             if !state.is_disabled() && !state.just_compacted() {
                 let current = current_tokens.unwrap_or(0);
                 if state.exceeds_request(current) {
+                    let shape = context_shape(context);
                     info!(
                         input_tokens = current,
                         threshold = state.request_threshold().unwrap_or(0),
+                        items_len = shape.items_len,
+                        items_json_bytes = shape.items_json_bytes,
+                        reasoning_items = shape.reasoning_items,
+                        reasoning_encrypted_content_count = shape.reasoning_encrypted_content_count,
+                        reasoning_encrypted_content_bytes = shape.reasoning_encrypted_content_bytes,
                         "Between-requests compaction threshold exceeded, yielding"
                     );
                     return PreRequestAction::Yield;
@@ -400,6 +420,37 @@ impl Interceptor for PodInterceptor {
     }
 }
 
+struct ContextShape {
+    items_len: usize,
+    items_json_bytes: Option<usize>,
+    reasoning_items: usize,
+    reasoning_encrypted_content_count: usize,
+    reasoning_encrypted_content_bytes: usize,
+}
+
+fn context_shape(context: &[Item]) -> ContextShape {
+    let mut shape = ContextShape {
+        items_len: context.len(),
+        items_json_bytes: serde_json::to_vec(context).ok().map(|bytes| bytes.len()),
+        reasoning_items: 0,
+        reasoning_encrypted_content_count: 0,
+        reasoning_encrypted_content_bytes: 0,
+    };
+    for item in context {
+        if let Item::Reasoning {
+            encrypted_content, ..
+        } = item
+        {
+            shape.reasoning_items += 1;
+            if let Some(encrypted) = encrypted_content {
+                shape.reasoning_encrypted_content_count += 1;
+                shape.reasoning_encrypted_content_bytes += encrypted.len();
+            }
+        }
+    }
+    shape
+}
+
 fn extract_message_text(item: &Item) -> Option<String> {
     match item {
         Item::Message { content, .. } => Some(
@@ -528,6 +579,40 @@ mod tests {
         assert_eq!(count.load(Ordering::Relaxed), 0);
     }
 
+    #[tokio::test]
+    async fn pre_llm_request_counts_in_flight_usage_records() {
+        let registry = Arc::new(HookRegistryBuilder::new().build());
+        let state = Arc::new(CompactState::new(None, Some(100), 2));
+        let ctx_items = vec![Item::user_message("hi")];
+        let history = usage_handle_with(ctx_items.len(), 50);
+        let usage_tracker = Arc::new(UsageTracker::new());
+        usage_tracker.note_request(ctx_items.len());
+        usage_tracker.record_usage(&llm_worker::event::UsageEvent {
+            input_tokens: Some(150),
+            output_tokens: Some(0),
+            total_tokens: Some(150),
+            cache_read_input_tokens: Some(0),
+            cache_creation_input_tokens: Some(0),
+        });
+
+        let interceptor = PodInterceptor::new(
+            registry,
+            Some(state),
+            Some(history),
+            NotifyBuffer::new(),
+            Arc::new(Mutex::new(Vec::new())),
+            TaskStore::new(),
+            Arc::new(TaskReminderState::new()),
+            PromptCatalog::builtins_only().unwrap(),
+            None,
+        )
+        .with_usage_tracker(usage_tracker);
+        let mut ctx = ctx_items;
+        let action = interceptor.pre_llm_request(&mut ctx).await;
+
+        assert!(matches!(action, PreRequestAction::Yield));
+    }
+
     #[tokio::test]
     async fn pre_llm_request_runs_hooks_when_under_threshold() {
         let count = Arc::new(AtomicUsize::new(0));
diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs
index 198e4593..786f2e50 100644
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@@ -1270,7 +1270,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
                 self.task_reminder_state.clone(),
                 self.prompts.clone(),
                 self.log_writer.clone(),
-            );
+            )
+            .with_usage_tracker(self.usage_tracker.clone());
             self.worker_mut().set_interceptor(interceptor);
             self.interceptor_installed = true;
         }
diff --git a/crates/provider/src/catalog.rs b/crates/provider/src/catalog.rs
index f34890cb..26e4092d 100644
--- a/crates/provider/src/catalog.rs
+++ b/crates/provider/src/catalog.rs
@@ -117,9 +117,14 @@ pub struct ModelEntry {
     #[serde(default)]
     pub capability: Option<ModelCapability>,
     /// モデル単位の context window。省略時は provider default → builtin
-    /// fallback にフォールバックする。
+    /// fallback にフォールバックする。実効値は `max_context_window` で clamp
+    /// される。
     #[serde(default)]
     pub context_window: Option<u64>,
+    /// backend が実際に受け付ける context window の上限。UI や pre-request
+    /// safety は希望値ではなく clamp 済みの実効値を使う。
+    #[serde(default)]
+    pub max_context_window: Option<u64>,
 }
 
 /// 解決済みモデル設定。`build_client` が消費する完成形。
@@ -130,7 +135,10 @@ pub struct ModelConfig {
     pub model_id: String,
     pub auth: AuthRef,
     pub capability: Option<ModelCapability>,
+    /// Effective context window after backend maximum clamping.
     pub context_window: u64,
+    /// Backend maximum that constrained `context_window`, when known.
+    pub max_context_window: Option<u64>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -259,7 +267,8 @@ fn split_ref(s: &str) -> Option<(&str, &str)> {
 /// manifest 明示 > model catalog > provider.default_capability >
 /// （`build_client` 側で）`Scheme::default_capability()`。
 /// context_window は manifest 明示 > model catalog > provider default >
-/// [`DEFAULT_CONTEXT_WINDOW`]。
+/// [`DEFAULT_CONTEXT_WINDOW`]。実効 context_window は manifest/model の
+/// max_context_window で clamp される。
 pub fn resolve_model_manifest(manifest: &ModelManifest) -> Result<ModelConfig, ResolveError> {
     let providers = load_providers().map_err(ResolveError::LoadProviders)?;
     let models = load_models().map_err(ResolveError::LoadModels)?;
@@ -310,11 +319,15 @@ pub fn resolve_with_catalogs(
                 .and_then(|m| m.capability.clone())
                 .or_else(|| provider.default_capability.clone())
         });
-        let context_window = manifest
+        let desired_context_window = manifest
             .context_window
             .or_else(|| model_entry.and_then(|m| m.context_window))
             .or(provider.default_context_window)
             .unwrap_or(DEFAULT_CONTEXT_WINDOW);
+        let max_context_window = manifest
+            .max_context_window
+            .or_else(|| model_entry.and_then(|m| m.max_context_window));
+        let context_window = clamp_context_window(desired_context_window, max_context_window);
         Ok(ModelConfig {
             scheme,
             base_url,
@@ -322,6 +335,7 @@ pub fn resolve_with_catalogs(
             auth,
             capability,
             context_window,
+            max_context_window,
         })
     } else {
         let scheme = manifest
@@ -335,17 +349,24 @@ pub fn resolve_with_catalogs(
             .auth
             .clone()
             .ok_or(ResolveError::InlineMissing("auth"))?;
+        let desired_context_window = manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW);
+        let max_context_window = manifest.max_context_window;
         Ok(ModelConfig {
             scheme,
             base_url: manifest.base_url.clone(),
             model_id,
             auth,
             capability: manifest.capability.clone(),
-            context_window: manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW),
+            context_window: clamp_context_window(desired_context_window, max_context_window),
+            max_context_window,
         })
     }
 }
 
+fn clamp_context_window(desired: u64, max: Option<u64>) -> u64 {
+    max.map(|limit| desired.min(limit)).unwrap_or(desired)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -420,6 +441,52 @@ mod tests {
         assert_eq!(cfg.context_window, 123_456);
     }
 
+    #[test]
+    fn context_window_is_clamped_by_catalog_backend_max() {
+        let providers = load_builtin_providers().unwrap();
+        let models = load_builtin_models().unwrap();
+        let manifest = ModelManifest {
+            ref_: Some("codex-oauth/gpt-5.5".into()),
+            context_window: Some(1_000_000),
+            ..Default::default()
+        };
+        let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
+        assert_eq!(cfg.context_window, 272_000);
+        assert_eq!(cfg.max_context_window, Some(272_000));
+    }
+
+    #[test]
+    fn inline_context_window_is_clamped_by_manifest_backend_max() {
+        let providers = load_builtin_providers().unwrap();
+        let models = load_builtin_models().unwrap();
+        let manifest = ModelManifest {
+            scheme: Some(SchemeKind::Anthropic),
+            model_id: Some("custom".into()),
+            auth: Some(AuthRef::None),
+            context_window: Some(1_000_000),
+            max_context_window: Some(272_000),
+            ..Default::default()
+        };
+        let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
+        assert_eq!(cfg.context_window, 272_000);
+        assert_eq!(cfg.max_context_window, Some(272_000));
+    }
+
+    #[test]
+    fn manifest_backend_max_overrides_catalog_backend_max() {
+        let providers = load_builtin_providers().unwrap();
+        let models = load_builtin_models().unwrap();
+        let manifest = ModelManifest {
+            ref_: Some("codex-oauth/gpt-5.5".into()),
+            context_window: Some(1_000_000),
+            max_context_window: Some(500_000),
+            ..Default::default()
+        };
+        let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
+        assert_eq!(cfg.context_window, 500_000);
+        assert_eq!(cfg.max_context_window, Some(500_000));
+    }
+
     #[test]
     fn resolve_ref_with_inline_overrides() {
         let providers = load_builtin_providers().unwrap();
diff --git a/crates/provider/src/lib.rs b/crates/provider/src/lib.rs
index 6bd3ea5b..c90897ce 100644
--- a/crates/provider/src/lib.rs
+++ b/crates/provider/src/lib.rs
@@ -187,6 +187,7 @@ mod tests {
             },
             capability: None,
             context_window: 200_000,
+            max_context_window: None,
         }
     }
 
@@ -315,6 +316,7 @@ mod tests {
             auth: AuthRef::None,
             capability: None,
             context_window: 200_000,
+            max_context_window: None,
         };
         assert!(build_client_from_config(&config).is_ok());
     }
diff --git a/docs/ref/model-reasoning-context.md b/docs/ref/model-reasoning-context.md
index 1f1d4a31..f44e3b66 100644
--- a/docs/ref/model-reasoning-context.md
+++ b/docs/ref/model-reasoning-context.md
@@ -85,7 +85,9 @@ reasoning トークンは各ターンの後に破棄される。次ターンに
 1. `previous_response_id` パラメータで過去のレスポンスを参照
 2. `response.output` の全アイテムを次の `input` に手動で渡す
 
-ステートレス利用（`store=false`、ZDR組織）の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。
+ステートレス利用（`store=false`、ZDR組織）の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。ただし Insomnia では Responses リクエストに `reasoning.context="current_turn"` を明示し、直近の user message 以降の同一ターン内 reasoning item だけを `input` に残す。過去ターンの persisted `encrypted_content` は、履歴に残っていても次ターンへ盲目的には再送しない。
+
+同一ターン内の function-call loop では、`reasoning item → function_call → function_call_output → 次の Responses request` の連続性を保つため、直近 user message 以降の reasoning item は保持する。ToolResult は wire 上で user 側 item に見えるが、新しい user turn ではなく function-call chain の継続なので reasoning reset の境界にはしない。
 
 #### モデル世代差
 
@@ -185,7 +187,7 @@ Ollamaはローカル実行プラットフォームで、モデルごとに思
 
 **ChatGPT を使うとき**
 - 新規実装は **Responses API** を選ぶ（Chat Completions は推論引き継ぎが弱い）
-- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げる
+- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げるが、Insomnia では `reasoning.context="current_turn"` に合わせて同一 user turn / function-call loop 内だけ再送する
 - raw reasoning の抽出を試みない（規約違反の可能性）
 
 **Ollama を使うとき**
diff --git a/resources/models/builtin.toml b/resources/models/builtin.toml
index 562c1d9e..ae56682f 100644
--- a/resources/models/builtin.toml
+++ b/resources/models/builtin.toml
@@ -38,6 +38,13 @@ provider = "codex-oauth"
 context_window = 400000
 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } }
 
+[[model]]
+id = "gpt-5.5"
+provider = "codex-oauth"
+context_window = 1000000
+max_context_window = 272000
+capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } }
+
 # OpenRouter
 [[model]]
 id = "anthropic/claude-sonnet-4"