fix: guard responses reasoning context
This commit is contained in:
parent
8f3c935f52
commit
b870a77a55
|
|
@ -62,6 +62,9 @@ pub(crate) struct ResponsesRequest {
|
||||||
pub(crate) struct ReasoningConfig {
|
pub(crate) struct ReasoningConfig {
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub effort: Option<String>,
|
pub effort: Option<String>,
|
||||||
|
/// Reasoning encrypted_content は同一 user turn 内だけ再利用する。
|
||||||
|
/// 古い turn の reasoning item は request input から除外する。
|
||||||
|
pub context: &'static str,
|
||||||
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
|
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
|
||||||
pub summary: &'static str,
|
pub summary: &'static str,
|
||||||
}
|
}
|
||||||
|
|
@ -193,6 +196,7 @@ impl OpenAIResponsesScheme {
|
||||||
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
|
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
|
||||||
ReasoningControl::BudgetTokens(_) => None,
|
ReasoningControl::BudgetTokens(_) => None,
|
||||||
},
|
},
|
||||||
|
context: "current_turn",
|
||||||
summary: "auto",
|
summary: "auto",
|
||||||
})
|
})
|
||||||
.filter(|reasoning| reasoning.effort.is_some());
|
.filter(|reasoning| reasoning.effort.is_some());
|
||||||
|
|
@ -236,8 +240,9 @@ impl OpenAIResponsesScheme {
|
||||||
|
|
||||||
/// `Item` 列を `input[]` に変換する。
|
/// `Item` 列を `input[]` に変換する。
|
||||||
fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
|
fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
|
||||||
|
let current_turn_start = current_turn_start_index(items);
|
||||||
let mut out = Vec::with_capacity(items.len());
|
let mut out = Vec::with_capacity(items.len());
|
||||||
for item in items {
|
for (idx, item) in items.iter().enumerate() {
|
||||||
match item {
|
match item {
|
||||||
Item::Message { role, content, .. } => {
|
Item::Message { role, content, .. } => {
|
||||||
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
|
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
|
||||||
|
|
@ -294,6 +299,9 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
|
||||||
encrypted_content,
|
encrypted_content,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
|
if idx < current_turn_start {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
let summary_parts = summary
|
let summary_parts = summary
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|s| !s.is_empty())
|
.filter(|s| !s.is_empty())
|
||||||
|
|
@ -316,6 +324,26 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Responses の `reasoning.context = "current_turn"` に合わせ、直近の
|
||||||
|
/// user message 以降だけを current turn とみなす。ToolResult は Responses
|
||||||
|
/// wire 上では user 側 item だが、新しい人間/外部入力ではなく function-call
|
||||||
|
/// chain の継続なので turn reset には使わない。System/developer notes も
|
||||||
|
/// 同一 turn 内の補助入力になり得るため reset しない。
|
||||||
|
fn current_turn_start_index(items: &[Item]) -> usize {
|
||||||
|
items
|
||||||
|
.iter()
|
||||||
|
.rposition(|item| {
|
||||||
|
matches!(
|
||||||
|
item,
|
||||||
|
Item::Message {
|
||||||
|
role: Role::User,
|
||||||
|
..
|
||||||
|
}
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
|
fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
|
||||||
ResponseTool {
|
ResponseTool {
|
||||||
r#type: "function",
|
r#type: "function",
|
||||||
|
|
@ -477,6 +505,60 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn old_turn_reasoning_items_are_omitted_for_current_turn_context() {
|
||||||
|
let scheme = OpenAIResponsesScheme::new();
|
||||||
|
let old_reasoning = Item::reasoning("old").with_encrypted_content("OLD_ENC");
|
||||||
|
let current_reasoning = Item::reasoning("current").with_encrypted_content("CURRENT_ENC");
|
||||||
|
let req = Request::new()
|
||||||
|
.user("old prompt")
|
||||||
|
.item(old_reasoning)
|
||||||
|
.assistant("old answer")
|
||||||
|
.user("new prompt")
|
||||||
|
.item(current_reasoning);
|
||||||
|
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||||
|
let encrypted: Vec<_> = body
|
||||||
|
.input
|
||||||
|
.iter()
|
||||||
|
.filter_map(|item| match item {
|
||||||
|
InputItem::Reasoning {
|
||||||
|
encrypted_content, ..
|
||||||
|
} => encrypted_content.as_deref(),
|
||||||
|
_ => None,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
assert_eq!(encrypted, vec!["CURRENT_ENC"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn current_turn_reasoning_is_kept_across_function_call_loop() {
|
||||||
|
let scheme = OpenAIResponsesScheme::new();
|
||||||
|
let req = Request::new()
|
||||||
|
.user("run tool")
|
||||||
|
.item(Item::reasoning("plan").with_encrypted_content("ENC"))
|
||||||
|
.item(Item::tool_call("c1", "tool", "{}"))
|
||||||
|
.item(Item::tool_result("c1", "ok"));
|
||||||
|
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||||
|
assert!(matches!(body.input[1], InputItem::Reasoning { .. }));
|
||||||
|
assert!(matches!(body.input[2], InputItem::FunctionCall { .. }));
|
||||||
|
assert!(matches!(
|
||||||
|
body.input[3],
|
||||||
|
InputItem::FunctionCallOutput { .. }
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_request_uses_current_turn_context() {
|
||||||
|
let scheme = OpenAIResponsesScheme::new();
|
||||||
|
let mut req = Request::new().user("hi");
|
||||||
|
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
|
||||||
|
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||||
|
let reasoning = body.reasoning.expect("reasoning should be set");
|
||||||
|
assert_eq!(reasoning.context, "current_turn");
|
||||||
|
let json = serde_json::to_value(reasoning).unwrap();
|
||||||
|
assert_eq!(json["context"], "current_turn");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reasoning_summary_field_is_always_serialized() {
|
fn reasoning_summary_field_is_always_serialized() {
|
||||||
// Responses API は reasoning item に `summary` を必須で要求する。
|
// Responses API は reasoning item に `summary` を必須で要求する。
|
||||||
|
|
@ -508,6 +590,7 @@ mod tests {
|
||||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||||
let reasoning = body.reasoning.expect("reasoning should be set");
|
let reasoning = body.reasoning.expect("reasoning should be set");
|
||||||
assert_eq!(reasoning.effort.as_deref(), Some("high"));
|
assert_eq!(reasoning.effort.as_deref(), Some("high"));
|
||||||
|
assert_eq!(reasoning.context, "current_turn");
|
||||||
assert_eq!(reasoning.summary, "auto");
|
assert_eq!(reasoning.summary, "auto");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ use futures::{Stream, StreamExt, TryStreamExt};
|
||||||
use reqwest::header::{
|
use reqwest::header::{
|
||||||
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
|
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
|
||||||
};
|
};
|
||||||
use serde_json::{Value, json};
|
use serde_json::{Map, Value, json};
|
||||||
|
|
||||||
use super::auth::{AuthProvider, AuthRequirement};
|
use super::auth::{AuthProvider, AuthRequirement};
|
||||||
use super::capability::ModelCapability;
|
use super::capability::ModelCapability;
|
||||||
|
|
@ -260,6 +260,60 @@ fn json_value_kind(value: &Value) -> &'static str {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn request_body_shape_payload(body: &Value) -> Value {
|
||||||
|
let mut map = Map::new();
|
||||||
|
if let Some(input) = body.get("input").and_then(Value::as_array) {
|
||||||
|
let items_json_bytes = serde_json::to_vec(input).map(|bytes| bytes.len()).ok();
|
||||||
|
let mut reasoning_items = 0usize;
|
||||||
|
let mut reasoning_encrypted_content_count = 0usize;
|
||||||
|
let mut reasoning_encrypted_content_bytes = 0usize;
|
||||||
|
for item in input {
|
||||||
|
if item.get("type").and_then(Value::as_str) != Some("reasoning") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
reasoning_items += 1;
|
||||||
|
if let Some(encrypted) = item.get("encrypted_content").and_then(Value::as_str) {
|
||||||
|
reasoning_encrypted_content_count += 1;
|
||||||
|
reasoning_encrypted_content_bytes += encrypted.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map.insert("items_len".to_string(), json!(input.len()));
|
||||||
|
map.insert("items_json_bytes".to_string(), json!(items_json_bytes));
|
||||||
|
map.insert("reasoning_items".to_string(), json!(reasoning_items));
|
||||||
|
map.insert(
|
||||||
|
"reasoning_encrypted_content_count".to_string(),
|
||||||
|
json!(reasoning_encrypted_content_count),
|
||||||
|
);
|
||||||
|
map.insert(
|
||||||
|
"reasoning_encrypted_content_bytes".to_string(),
|
||||||
|
json!(reasoning_encrypted_content_bytes),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let reasoning_context = body
|
||||||
|
.get("reasoning")
|
||||||
|
.and_then(|reasoning| reasoning.get("context"))
|
||||||
|
.and_then(Value::as_str);
|
||||||
|
map.insert("reasoning_context".to_string(), json!(reasoning_context));
|
||||||
|
Value::Object(map)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn api_error_code(error: &ClientError) -> Option<&str> {
|
||||||
|
match error {
|
||||||
|
ClientError::Api { code, .. } => code.as_deref(),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_context_length_exceeded(error: &ClientError) -> bool {
|
||||||
|
match error {
|
||||||
|
ClientError::Api { code, message, .. } => {
|
||||||
|
code.as_deref() == Some("context_length_exceeded")
|
||||||
|
|| message.contains("context_length_exceeded")
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn response_with_timeout(
|
async fn response_with_timeout(
|
||||||
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
|
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
|
||||||
timeout: Duration,
|
timeout: Duration,
|
||||||
|
|
@ -296,7 +350,11 @@ async fn classify_error_response(resp: reqwest::Response) -> ClientError {
|
||||||
let text = resp.text().await.unwrap_or_default();
|
let text = resp.text().await.unwrap_or_default();
|
||||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||||
let error = json.get("error").unwrap_or(&json);
|
let error = json.get("error").unwrap_or(&json);
|
||||||
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
|
let code = error
|
||||||
|
.get("code")
|
||||||
|
.and_then(|v| v.as_str())
|
||||||
|
.or_else(|| error.get("type").and_then(|v| v.as_str()))
|
||||||
|
.map(String::from);
|
||||||
let message = error
|
let message = error
|
||||||
.get("message")
|
.get("message")
|
||||||
.and_then(|v| v.as_str())
|
.and_then(|v| v.as_str())
|
||||||
|
|
@ -406,12 +464,14 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
|
||||||
let body = self
|
let body = self
|
||||||
.scheme
|
.scheme
|
||||||
.build_request_body(&self.model_id, &request, &self.capability);
|
.build_request_body(&self.model_id, &request, &self.capability);
|
||||||
|
let body_shape = request_body_shape_payload(&body);
|
||||||
emit_transport_trace(
|
emit_transport_trace(
|
||||||
&request,
|
&request,
|
||||||
"transport_body_build_done",
|
"transport_body_build_done",
|
||||||
json!({
|
json!({
|
||||||
"elapsed_ms": body_started.elapsed().as_millis() as u64,
|
"elapsed_ms": body_started.elapsed().as_millis() as u64,
|
||||||
"body_kind": json_value_kind(&body),
|
"body_kind": json_value_kind(&body),
|
||||||
|
"request_shape": body_shape.clone(),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -438,6 +498,7 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
|
||||||
"encoding": request_body.encoding(),
|
"encoding": request_body.encoding(),
|
||||||
"raw_json_bytes": request_body.raw_json_bytes(),
|
"raw_json_bytes": request_body.raw_json_bytes(),
|
||||||
"wire_bytes": request_body.wire_bytes(),
|
"wire_bytes": request_body.wire_bytes(),
|
||||||
|
"request_shape": body_shape.clone(),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -479,15 +540,23 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
|
||||||
};
|
};
|
||||||
|
|
||||||
if !response.status().is_success() {
|
if !response.status().is_success() {
|
||||||
|
let status = response.status().as_u16();
|
||||||
|
let retry_after_present = response.headers().get(RETRY_AFTER).is_some();
|
||||||
|
let error = classify_error_response(response).await;
|
||||||
|
let context_length_exceeded = is_context_length_exceeded(&error);
|
||||||
emit_transport_trace(
|
emit_transport_trace(
|
||||||
&request,
|
&request,
|
||||||
"transport_http_status_error",
|
"transport_http_status_error",
|
||||||
json!({
|
json!({
|
||||||
"status": response.status().as_u16(),
|
"status": status,
|
||||||
"retry_after_present": response.headers().get(RETRY_AFTER).is_some(),
|
"retry_after_present": retry_after_present,
|
||||||
|
"api_error_code": api_error_code(&error),
|
||||||
|
"context_length_exceeded": context_length_exceeded,
|
||||||
|
"provider_usage_absent": context_length_exceeded,
|
||||||
|
"request_shape": body_shape.clone(),
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
return Err(classify_error_response(response).await);
|
return Err(error);
|
||||||
}
|
}
|
||||||
|
|
||||||
emit_transport_trace(
|
emit_transport_trace(
|
||||||
|
|
@ -611,6 +680,24 @@ mod tests {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn request_body_shape_counts_reasoning_encrypted_content() {
|
||||||
|
let payload = request_body_shape_payload(&json!({
|
||||||
|
"reasoning": { "context": "current_turn" },
|
||||||
|
"input": [
|
||||||
|
{ "type": "message", "role": "user", "content": [] },
|
||||||
|
{ "type": "reasoning", "encrypted_content": "abc", "summary": [] },
|
||||||
|
{ "type": "reasoning", "encrypted_content": "defgh", "summary": [] }
|
||||||
|
]
|
||||||
|
}));
|
||||||
|
assert_eq!(payload["items_len"], 3);
|
||||||
|
assert_eq!(payload["reasoning_items"], 2);
|
||||||
|
assert_eq!(payload["reasoning_encrypted_content_count"], 2);
|
||||||
|
assert_eq!(payload["reasoning_encrypted_content_bytes"], 8);
|
||||||
|
assert_eq!(payload["reasoning_context"], "current_turn");
|
||||||
|
assert!(payload["items_json_bytes"].as_u64().unwrap() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn response_timeout_returns_retryable_lifecycle_timeout() {
|
async fn response_timeout_returns_retryable_lifecycle_timeout() {
|
||||||
let err = response_with_timeout(
|
let err = response_with_timeout(
|
||||||
|
|
|
||||||
|
|
@ -2029,12 +2029,31 @@ fn items_trace_payload(
|
||||||
_ => None,
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut reasoning_items = 0usize;
|
||||||
|
let mut reasoning_encrypted_content_count = 0usize;
|
||||||
|
let mut reasoning_encrypted_content_bytes = 0usize;
|
||||||
|
for item in items {
|
||||||
|
if let Item::Reasoning {
|
||||||
|
encrypted_content, ..
|
||||||
|
} = item
|
||||||
|
{
|
||||||
|
reasoning_items += 1;
|
||||||
|
if let Some(encrypted) = encrypted_content {
|
||||||
|
reasoning_encrypted_content_count += 1;
|
||||||
|
reasoning_encrypted_content_bytes += encrypted.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
json!({
|
json!({
|
||||||
"items_len": items.len(),
|
"items_len": items.len(),
|
||||||
"items_json_bytes": serde_json::to_vec(items).map(|bytes| bytes.len()).ok(),
|
"items_json_bytes": serde_json::to_vec(items).map(|bytes| bytes.len()).ok(),
|
||||||
"tools_len": tools_len,
|
"tools_len": tools_len,
|
||||||
"cache_anchor": cache_anchor,
|
"cache_anchor": cache_anchor,
|
||||||
"cache_key_present": cache_key_present,
|
"cache_key_present": cache_key_present,
|
||||||
|
"reasoning_items": reasoning_items,
|
||||||
|
"reasoning_encrypted_content_count": reasoning_encrypted_content_count,
|
||||||
|
"reasoning_encrypted_content_bytes": reasoning_encrypted_content_bytes,
|
||||||
"last_item_kind": last.map(item_kind),
|
"last_item_kind": last.map(item_kind),
|
||||||
"last_item_json_bytes": last.and_then(|item| serde_json::to_vec(item).ok().map(|bytes| bytes.len())),
|
"last_item_json_bytes": last.and_then(|item| serde_json::to_vec(item).ok().map(|bytes| bytes.len())),
|
||||||
"last_tool_result": last_tool_result,
|
"last_tool_result": last_tool_result,
|
||||||
|
|
|
||||||
|
|
@ -52,10 +52,16 @@ pub struct ModelManifest {
|
||||||
/// `default_capability` → scheme 既定の順で解決される。
|
/// `default_capability` → scheme 既定の順で解決される。
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub capability: Option<ModelCapability>,
|
pub capability: Option<ModelCapability>,
|
||||||
/// モデルのコンテキストウィンドウ上限(tokens)。カタログ未掲載 / inline
|
/// モデルの希望コンテキストウィンドウ(tokens)。カタログ未掲載 / inline
|
||||||
/// モデルでもここで明示 override できる。
|
/// モデルでもここで明示 override できる。実効値は `max_context_window`
|
||||||
|
/// またはカタログ上の backend maximum で clamp される。
|
||||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
pub context_window: Option<u64>,
|
pub context_window: Option<u64>,
|
||||||
|
/// backend が実際に受け付けるコンテキストウィンドウ上限(tokens)。
|
||||||
|
/// 表示・安全判定に使う実効 context window は `context_window` とこの値の
|
||||||
|
/// 小さい方になる。
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub max_context_window: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ModelManifest {
|
impl ModelManifest {
|
||||||
|
|
@ -70,6 +76,7 @@ impl ModelManifest {
|
||||||
auth: upper.auth.or(self.auth),
|
auth: upper.auth.or(self.auth),
|
||||||
capability: upper.capability.or(self.capability),
|
capability: upper.capability.or(self.capability),
|
||||||
context_window: upper.context_window.or(self.context_window),
|
context_window: upper.context_window.or(self.context_window),
|
||||||
|
max_context_window: upper.max_context_window.or(self.max_context_window),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ use tracing::info;
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
|
|
||||||
use crate::compact::state::CompactState;
|
use crate::compact::state::CompactState;
|
||||||
|
use crate::compact::usage_tracker::UsageTracker;
|
||||||
use session_store::{SystemItem, SystemReminder};
|
use session_store::{SystemItem, SystemReminder};
|
||||||
use tools::{TaskEntry, TaskStatus, TaskStore};
|
use tools::{TaskEntry, TaskStatus, TaskStore};
|
||||||
|
|
||||||
|
|
@ -91,6 +92,10 @@ pub(crate) struct PodInterceptor {
|
||||||
/// per-request `context` to estimate current occupancy for threshold
|
/// per-request `context` to estimate current occupancy for threshold
|
||||||
/// checks. `None` when compaction is disabled (both thresholds unset).
|
/// checks. `None` when compaction is disabled (both thresholds unset).
|
||||||
usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
|
usage_history: Option<Arc<Mutex<Vec<UsageRecord>>>>,
|
||||||
|
/// In-flight usage records observed during the current run but not yet
|
||||||
|
/// persisted into `usage_history`. Subsequent tool-loop LLM calls must
|
||||||
|
/// see these records during pre-request safety accounting.
|
||||||
|
usage_tracker: Option<Arc<UsageTracker>>,
|
||||||
/// Pending-notification buffer drained into `worker.history`
|
/// Pending-notification buffer drained into `worker.history`
|
||||||
/// via [`Self::pending_history_appends`] just before the next LLM
|
/// via [`Self::pending_history_appends`] just before the next LLM
|
||||||
/// request. The Worker `extend`s these into its persistent history
|
/// request. The Worker `extend`s these into its persistent history
|
||||||
|
|
@ -138,6 +143,7 @@ impl PodInterceptor {
|
||||||
registry,
|
registry,
|
||||||
compact_state,
|
compact_state,
|
||||||
usage_history,
|
usage_history,
|
||||||
|
usage_tracker: None,
|
||||||
pending_notifies,
|
pending_notifies,
|
||||||
pending_attachments,
|
pending_attachments,
|
||||||
task_store,
|
task_store,
|
||||||
|
|
@ -149,6 +155,11 @@ impl PodInterceptor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn with_usage_tracker(mut self, usage_tracker: Arc<UsageTracker>) -> Self {
|
||||||
|
self.usage_tracker = Some(usage_tracker);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Commit each `SystemItem` as its own `LogEntry::SystemItem`
|
/// Commit each `SystemItem` as its own `LogEntry::SystemItem`
|
||||||
/// entry through the attached writer (no-op when no writer is
|
/// entry through the attached writer (no-op when no writer is
|
||||||
/// wired). Sync — writes complete before the matching
|
/// wired). Sync — writes complete before the matching
|
||||||
|
|
@ -175,7 +186,10 @@ impl PodInterceptor {
|
||||||
/// `usage_history` is not attached (compaction fully disabled).
|
/// `usage_history` is not attached (compaction fully disabled).
|
||||||
fn estimated_tokens(&self, context: &[Item]) -> Option<u64> {
|
fn estimated_tokens(&self, context: &[Item]) -> Option<u64> {
|
||||||
let handle = self.usage_history.as_ref()?;
|
let handle = self.usage_history.as_ref()?;
|
||||||
let records = handle.lock().expect("usage_history poisoned").clone();
|
let mut records = handle.lock().expect("usage_history poisoned").clone();
|
||||||
|
if let Some(tracker) = self.usage_tracker.as_ref() {
|
||||||
|
records.extend(tracker.records());
|
||||||
|
}
|
||||||
Some(total_tokens(context, &records).tokens)
|
Some(total_tokens(context, &records).tokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -305,9 +319,15 @@ impl Interceptor for PodInterceptor {
|
||||||
if !state.is_disabled() && !state.just_compacted() {
|
if !state.is_disabled() && !state.just_compacted() {
|
||||||
let current = current_tokens.unwrap_or(0);
|
let current = current_tokens.unwrap_or(0);
|
||||||
if state.exceeds_request(current) {
|
if state.exceeds_request(current) {
|
||||||
|
let shape = context_shape(context);
|
||||||
info!(
|
info!(
|
||||||
input_tokens = current,
|
input_tokens = current,
|
||||||
threshold = state.request_threshold().unwrap_or(0),
|
threshold = state.request_threshold().unwrap_or(0),
|
||||||
|
items_len = shape.items_len,
|
||||||
|
items_json_bytes = shape.items_json_bytes,
|
||||||
|
reasoning_items = shape.reasoning_items,
|
||||||
|
reasoning_encrypted_content_count = shape.reasoning_encrypted_content_count,
|
||||||
|
reasoning_encrypted_content_bytes = shape.reasoning_encrypted_content_bytes,
|
||||||
"Between-requests compaction threshold exceeded, yielding"
|
"Between-requests compaction threshold exceeded, yielding"
|
||||||
);
|
);
|
||||||
return PreRequestAction::Yield;
|
return PreRequestAction::Yield;
|
||||||
|
|
@ -400,6 +420,37 @@ impl Interceptor for PodInterceptor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ContextShape {
|
||||||
|
items_len: usize,
|
||||||
|
items_json_bytes: Option<usize>,
|
||||||
|
reasoning_items: usize,
|
||||||
|
reasoning_encrypted_content_count: usize,
|
||||||
|
reasoning_encrypted_content_bytes: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn context_shape(context: &[Item]) -> ContextShape {
|
||||||
|
let mut shape = ContextShape {
|
||||||
|
items_len: context.len(),
|
||||||
|
items_json_bytes: serde_json::to_vec(context).ok().map(|bytes| bytes.len()),
|
||||||
|
reasoning_items: 0,
|
||||||
|
reasoning_encrypted_content_count: 0,
|
||||||
|
reasoning_encrypted_content_bytes: 0,
|
||||||
|
};
|
||||||
|
for item in context {
|
||||||
|
if let Item::Reasoning {
|
||||||
|
encrypted_content, ..
|
||||||
|
} = item
|
||||||
|
{
|
||||||
|
shape.reasoning_items += 1;
|
||||||
|
if let Some(encrypted) = encrypted_content {
|
||||||
|
shape.reasoning_encrypted_content_count += 1;
|
||||||
|
shape.reasoning_encrypted_content_bytes += encrypted.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
shape
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_message_text(item: &Item) -> Option<String> {
|
fn extract_message_text(item: &Item) -> Option<String> {
|
||||||
match item {
|
match item {
|
||||||
Item::Message { content, .. } => Some(
|
Item::Message { content, .. } => Some(
|
||||||
|
|
@ -528,6 +579,40 @@ mod tests {
|
||||||
assert_eq!(count.load(Ordering::Relaxed), 0);
|
assert_eq!(count.load(Ordering::Relaxed), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn pre_llm_request_counts_in_flight_usage_records() {
|
||||||
|
let registry = Arc::new(HookRegistryBuilder::new().build());
|
||||||
|
let state = Arc::new(CompactState::new(None, Some(100), 2));
|
||||||
|
let ctx_items = vec![Item::user_message("hi")];
|
||||||
|
let history = usage_handle_with(ctx_items.len(), 50);
|
||||||
|
let usage_tracker = Arc::new(UsageTracker::new());
|
||||||
|
usage_tracker.note_request(ctx_items.len());
|
||||||
|
usage_tracker.record_usage(&llm_worker::event::UsageEvent {
|
||||||
|
input_tokens: Some(150),
|
||||||
|
output_tokens: Some(0),
|
||||||
|
total_tokens: Some(150),
|
||||||
|
cache_read_input_tokens: Some(0),
|
||||||
|
cache_creation_input_tokens: Some(0),
|
||||||
|
});
|
||||||
|
|
||||||
|
let interceptor = PodInterceptor::new(
|
||||||
|
registry,
|
||||||
|
Some(state),
|
||||||
|
Some(history),
|
||||||
|
NotifyBuffer::new(),
|
||||||
|
Arc::new(Mutex::new(Vec::new())),
|
||||||
|
TaskStore::new(),
|
||||||
|
Arc::new(TaskReminderState::new()),
|
||||||
|
PromptCatalog::builtins_only().unwrap(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.with_usage_tracker(usage_tracker);
|
||||||
|
let mut ctx = ctx_items;
|
||||||
|
let action = interceptor.pre_llm_request(&mut ctx).await;
|
||||||
|
|
||||||
|
assert!(matches!(action, PreRequestAction::Yield));
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn pre_llm_request_runs_hooks_when_under_threshold() {
|
async fn pre_llm_request_runs_hooks_when_under_threshold() {
|
||||||
let count = Arc::new(AtomicUsize::new(0));
|
let count = Arc::new(AtomicUsize::new(0));
|
||||||
|
|
|
||||||
|
|
@ -1270,7 +1270,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
self.task_reminder_state.clone(),
|
self.task_reminder_state.clone(),
|
||||||
self.prompts.clone(),
|
self.prompts.clone(),
|
||||||
self.log_writer.clone(),
|
self.log_writer.clone(),
|
||||||
);
|
)
|
||||||
|
.with_usage_tracker(self.usage_tracker.clone());
|
||||||
self.worker_mut().set_interceptor(interceptor);
|
self.worker_mut().set_interceptor(interceptor);
|
||||||
self.interceptor_installed = true;
|
self.interceptor_installed = true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -117,9 +117,14 @@ pub struct ModelEntry {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub capability: Option<ModelCapability>,
|
pub capability: Option<ModelCapability>,
|
||||||
/// モデル単位の context window。省略時は provider default → builtin
|
/// モデル単位の context window。省略時は provider default → builtin
|
||||||
/// fallback にフォールバックする。
|
/// fallback にフォールバックする。実効値は `max_context_window` で clamp
|
||||||
|
/// される。
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub context_window: Option<u64>,
|
pub context_window: Option<u64>,
|
||||||
|
/// backend が実際に受け付ける context window の上限。UI や pre-request
|
||||||
|
/// safety は希望値ではなく clamp 済みの実効値を使う。
|
||||||
|
#[serde(default)]
|
||||||
|
pub max_context_window: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// 解決済みモデル設定。`build_client` が消費する完成形。
|
/// 解決済みモデル設定。`build_client` が消費する完成形。
|
||||||
|
|
@ -130,7 +135,10 @@ pub struct ModelConfig {
|
||||||
pub model_id: String,
|
pub model_id: String,
|
||||||
pub auth: AuthRef,
|
pub auth: AuthRef,
|
||||||
pub capability: Option<ModelCapability>,
|
pub capability: Option<ModelCapability>,
|
||||||
|
/// Effective context window after backend maximum clamping.
|
||||||
pub context_window: u64,
|
pub context_window: u64,
|
||||||
|
/// Backend maximum that constrained `context_window`, when known.
|
||||||
|
pub max_context_window: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
|
@ -259,7 +267,8 @@ fn split_ref(s: &str) -> Option<(&str, &str)> {
|
||||||
/// manifest 明示 > model catalog > provider.default_capability >
|
/// manifest 明示 > model catalog > provider.default_capability >
|
||||||
/// (`build_client` 側で)`Scheme::default_capability()`。
|
/// (`build_client` 側で)`Scheme::default_capability()`。
|
||||||
/// context_window は manifest 明示 > model catalog > provider default >
|
/// context_window は manifest 明示 > model catalog > provider default >
|
||||||
/// [`DEFAULT_CONTEXT_WINDOW`]。
|
/// [`DEFAULT_CONTEXT_WINDOW`]。実効 context_window は manifest/model の
|
||||||
|
/// max_context_window で clamp される。
|
||||||
pub fn resolve_model_manifest(manifest: &ModelManifest) -> Result<ModelConfig, ResolveError> {
|
pub fn resolve_model_manifest(manifest: &ModelManifest) -> Result<ModelConfig, ResolveError> {
|
||||||
let providers = load_providers().map_err(ResolveError::LoadProviders)?;
|
let providers = load_providers().map_err(ResolveError::LoadProviders)?;
|
||||||
let models = load_models().map_err(ResolveError::LoadModels)?;
|
let models = load_models().map_err(ResolveError::LoadModels)?;
|
||||||
|
|
@ -310,11 +319,15 @@ pub fn resolve_with_catalogs(
|
||||||
.and_then(|m| m.capability.clone())
|
.and_then(|m| m.capability.clone())
|
||||||
.or_else(|| provider.default_capability.clone())
|
.or_else(|| provider.default_capability.clone())
|
||||||
});
|
});
|
||||||
let context_window = manifest
|
let desired_context_window = manifest
|
||||||
.context_window
|
.context_window
|
||||||
.or_else(|| model_entry.and_then(|m| m.context_window))
|
.or_else(|| model_entry.and_then(|m| m.context_window))
|
||||||
.or(provider.default_context_window)
|
.or(provider.default_context_window)
|
||||||
.unwrap_or(DEFAULT_CONTEXT_WINDOW);
|
.unwrap_or(DEFAULT_CONTEXT_WINDOW);
|
||||||
|
let max_context_window = manifest
|
||||||
|
.max_context_window
|
||||||
|
.or_else(|| model_entry.and_then(|m| m.max_context_window));
|
||||||
|
let context_window = clamp_context_window(desired_context_window, max_context_window);
|
||||||
Ok(ModelConfig {
|
Ok(ModelConfig {
|
||||||
scheme,
|
scheme,
|
||||||
base_url,
|
base_url,
|
||||||
|
|
@ -322,6 +335,7 @@ pub fn resolve_with_catalogs(
|
||||||
auth,
|
auth,
|
||||||
capability,
|
capability,
|
||||||
context_window,
|
context_window,
|
||||||
|
max_context_window,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
let scheme = manifest
|
let scheme = manifest
|
||||||
|
|
@ -335,17 +349,24 @@ pub fn resolve_with_catalogs(
|
||||||
.auth
|
.auth
|
||||||
.clone()
|
.clone()
|
||||||
.ok_or(ResolveError::InlineMissing("auth"))?;
|
.ok_or(ResolveError::InlineMissing("auth"))?;
|
||||||
|
let desired_context_window = manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW);
|
||||||
|
let max_context_window = manifest.max_context_window;
|
||||||
Ok(ModelConfig {
|
Ok(ModelConfig {
|
||||||
scheme,
|
scheme,
|
||||||
base_url: manifest.base_url.clone(),
|
base_url: manifest.base_url.clone(),
|
||||||
model_id,
|
model_id,
|
||||||
auth,
|
auth,
|
||||||
capability: manifest.capability.clone(),
|
capability: manifest.capability.clone(),
|
||||||
context_window: manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW),
|
context_window: clamp_context_window(desired_context_window, max_context_window),
|
||||||
|
max_context_window,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn clamp_context_window(desired: u64, max: Option<u64>) -> u64 {
|
||||||
|
max.map(|limit| desired.min(limit)).unwrap_or(desired)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
@ -420,6 +441,52 @@ mod tests {
|
||||||
assert_eq!(cfg.context_window, 123_456);
|
assert_eq!(cfg.context_window, 123_456);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn context_window_is_clamped_by_catalog_backend_max() {
|
||||||
|
let providers = load_builtin_providers().unwrap();
|
||||||
|
let models = load_builtin_models().unwrap();
|
||||||
|
let manifest = ModelManifest {
|
||||||
|
ref_: Some("codex-oauth/gpt-5.5".into()),
|
||||||
|
context_window: Some(1_000_000),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
|
||||||
|
assert_eq!(cfg.context_window, 272_000);
|
||||||
|
assert_eq!(cfg.max_context_window, Some(272_000));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn inline_context_window_is_clamped_by_manifest_backend_max() {
|
||||||
|
let providers = load_builtin_providers().unwrap();
|
||||||
|
let models = load_builtin_models().unwrap();
|
||||||
|
let manifest = ModelManifest {
|
||||||
|
scheme: Some(SchemeKind::Anthropic),
|
||||||
|
model_id: Some("custom".into()),
|
||||||
|
auth: Some(AuthRef::None),
|
||||||
|
context_window: Some(1_000_000),
|
||||||
|
max_context_window: Some(272_000),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
|
||||||
|
assert_eq!(cfg.context_window, 272_000);
|
||||||
|
assert_eq!(cfg.max_context_window, Some(272_000));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn manifest_backend_max_overrides_catalog_backend_max() {
|
||||||
|
let providers = load_builtin_providers().unwrap();
|
||||||
|
let models = load_builtin_models().unwrap();
|
||||||
|
let manifest = ModelManifest {
|
||||||
|
ref_: Some("codex-oauth/gpt-5.5".into()),
|
||||||
|
context_window: Some(1_000_000),
|
||||||
|
max_context_window: Some(500_000),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap();
|
||||||
|
assert_eq!(cfg.context_window, 500_000);
|
||||||
|
assert_eq!(cfg.max_context_window, Some(500_000));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn resolve_ref_with_inline_overrides() {
|
fn resolve_ref_with_inline_overrides() {
|
||||||
let providers = load_builtin_providers().unwrap();
|
let providers = load_builtin_providers().unwrap();
|
||||||
|
|
|
||||||
|
|
@ -187,6 +187,7 @@ mod tests {
|
||||||
},
|
},
|
||||||
capability: None,
|
capability: None,
|
||||||
context_window: 200_000,
|
context_window: 200_000,
|
||||||
|
max_context_window: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -315,6 +316,7 @@ mod tests {
|
||||||
auth: AuthRef::None,
|
auth: AuthRef::None,
|
||||||
capability: None,
|
capability: None,
|
||||||
context_window: 200_000,
|
context_window: 200_000,
|
||||||
|
max_context_window: None,
|
||||||
};
|
};
|
||||||
assert!(build_client_from_config(&config).is_ok());
|
assert!(build_client_from_config(&config).is_ok());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,9 @@ reasoning トークンは各ターンの後に破棄される。次ターンに
|
||||||
1. `previous_response_id` パラメータで過去のレスポンスを参照
|
1. `previous_response_id` パラメータで過去のレスポンスを参照
|
||||||
2. `response.output` の全アイテムを次の `input` に手動で渡す
|
2. `response.output` の全アイテムを次の `input` に手動で渡す
|
||||||
|
|
||||||
ステートレス利用(`store=false`、ZDR組織)の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。
|
ステートレス利用(`store=false`、ZDR組織)の場合は `include=["reasoning.encrypted_content"]` を指定すれば暗号化された推論コンテンツを受け取り、次リクエストに渡すことで推論を引き継げる。ただし Insomnia では Responses リクエストに `reasoning.context="current_turn"` を明示し、直近の user message 以降の同一ターン内 reasoning item だけを `input` に残す。過去ターンの persisted `encrypted_content` は、履歴に残っていても次ターンへ盲目的には再送しない。
|
||||||
|
|
||||||
|
同一ターン内の function-call loop では、`reasoning item → function_call → function_call_output → 次の Responses request` の連続性を保つため、直近 user message 以降の reasoning item は保持する。ToolResult は wire 上で user 側 item に見えるが、新しい user turn ではなく function-call chain の継続なので reasoning reset の境界にはしない。
|
||||||
|
|
||||||
#### モデル世代差
|
#### モデル世代差
|
||||||
|
|
||||||
|
|
@ -185,7 +187,7 @@ Ollamaはローカル実行プラットフォームで、モデルごとに思
|
||||||
|
|
||||||
**ChatGPT を使うとき**
|
**ChatGPT を使うとき**
|
||||||
- 新規実装は **Responses API** を選ぶ(Chat Completions は推論引き継ぎが弱い)
|
- 新規実装は **Responses API** を選ぶ(Chat Completions は推論引き継ぎが弱い)
|
||||||
- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げる
|
- ZDR組織でも `reasoning.encrypted_content` で推論を引き継げるが、Insomnia では `reasoning.context="current_turn"` に合わせて同一 user turn / function-call loop 内だけ再送する
|
||||||
- raw reasoning の抽出を試みない(規約違反の可能性)
|
- raw reasoning の抽出を試みない(規約違反の可能性)
|
||||||
|
|
||||||
**Ollama を使うとき**
|
**Ollama を使うとき**
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,13 @@ provider = "codex-oauth"
|
||||||
context_window = 400000
|
context_window = 400000
|
||||||
capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } }
|
capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } }
|
||||||
|
|
||||||
|
[[model]]
|
||||||
|
id = "gpt-5.5"
|
||||||
|
provider = "codex-oauth"
|
||||||
|
context_window = 1000000
|
||||||
|
max_context_window = 272000
|
||||||
|
capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } }
|
||||||
|
|
||||||
# OpenRouter
|
# OpenRouter
|
||||||
[[model]]
|
[[model]]
|
||||||
id = "anthropic/claude-sonnet-4"
|
id = "anthropic/claude-sonnet-4"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user