Compare commits
2 Commits
1b1f8f40c6
...
d2ee84775b
| Author | SHA1 | Date | |
|---|---|---|---|
| d2ee84775b | |||
| cb1d3e72e4 |
1
TODO.md
1
TODO.md
|
|
@ -5,6 +5,7 @@
|
|||
- [ ] Bash ツール (Permission 層と統合) → [tickets/bash-tool.md](tickets/bash-tool.md)
|
||||
- [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
|
||||
- [ ] Pod CLI: マニフェスト関連フラグの整理 → [tickets/pod-cli-manifest-flags.md](tickets/pod-cli-manifest-flags.md)
|
||||
- [ ] OpenAI Responses: sampling パラメータの取り扱い → [tickets/responses-sampling-params.md](tickets/responses-sampling-params.md)
|
||||
- [ ] Pod オーケストレーション
|
||||
- [ ] 動的 Scope 変更 → [tickets/dynamic-scope.md](tickets/dynamic-scope.md)
|
||||
- [ ] ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
|
||||
|
|
|
|||
|
|
@ -16,12 +16,13 @@ pub use scheme_impl::OpenAIResponsesState;
|
|||
|
||||
/// OpenAI Responses scheme 本体。
|
||||
///
|
||||
/// `store` / `include_encrypted_content` / `send_max_output_tokens` は
|
||||
/// scheme 固定の wire 設定で、デフォルトは公式 OpenAI Responses API
|
||||
/// 向け (stateless + ZDR + `max_output_tokens` 送出可)。ChatGPT backend
|
||||
/// (codex-oauth) のように受理パラメータが subset の経路では provider 層で
|
||||
/// `send_max_output_tokens=false` 等に上書きする。`ModelCapability` には
|
||||
/// 入れない(モデル能力ではなく wire policy)。
|
||||
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
|
||||
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
|
||||
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
|
||||
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
|
||||
/// ように受理パラメータが subset の経路では provider 層で
|
||||
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
|
||||
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy)。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OpenAIResponsesScheme {
|
||||
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
|
||||
|
|
@ -33,6 +34,10 @@ pub struct OpenAIResponsesScheme {
|
|||
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
|
||||
/// で 400 を返すため、その経路では `false` にする。
|
||||
pub send_max_output_tokens: bool,
|
||||
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
|
||||
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
|
||||
/// で 400 を返すため、その経路では `false` にする。
|
||||
pub send_sampling_params: bool,
|
||||
}
|
||||
|
||||
impl Default for OpenAIResponsesScheme {
|
||||
|
|
@ -41,13 +46,14 @@ impl Default for OpenAIResponsesScheme {
|
|||
store: false,
|
||||
include_encrypted_content: true,
|
||||
send_max_output_tokens: true,
|
||||
send_sampling_params: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenAIResponsesScheme {
|
||||
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
|
||||
/// `send_max_output_tokens=true`)。
|
||||
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
|
@ -69,4 +75,10 @@ impl OpenAIResponsesScheme {
|
|||
self.send_max_output_tokens = send;
|
||||
self
|
||||
}
|
||||
|
||||
/// `temperature` / `top_p` を body に載せるかを上書き。
|
||||
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
|
||||
self.send_sampling_params = send;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,9 @@ pub(crate) struct ResponsesRequest {
|
|||
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<u32>,
|
||||
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
|
||||
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
|
||||
/// `send_sampling_params` が `false` のときは `None` のまま送る。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
|
|
@ -203,8 +206,16 @@ impl OpenAIResponsesScheme {
|
|||
} else {
|
||||
None
|
||||
},
|
||||
temperature: request.config.temperature,
|
||||
top_p: request.config.top_p,
|
||||
temperature: if self.send_sampling_params {
|
||||
request.config.temperature
|
||||
} else {
|
||||
None
|
||||
},
|
||||
top_p: if self.send_sampling_params {
|
||||
request.config.top_p
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -471,6 +482,29 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_params_passed_through_by_default() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.temperature, Some(0.4));
|
||||
assert_eq!(body.top_p, Some(0.9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_params_dropped_when_send_disabled() {
|
||||
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
|
||||
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.temperature, None);
|
||||
assert_eq!(body.top_p, None);
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert!(
|
||||
json.get("temperature").is_none() && json.get("top_p").is_none(),
|
||||
"temperature/top_p keys must not appear in serialised body, got: {json}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_schema_without_properties_is_normalized() {
|
||||
// schemars は引数なし struct から `type:"object"` だけのスキーマを
|
||||
|
|
|
|||
|
|
@ -3,8 +3,12 @@
|
|||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, auth::AuthRequirement, capability::ModelCapability,
|
||||
client::ConfigWarning, event::Event, scheme::Scheme,
|
||||
ClientError,
|
||||
auth::AuthRequirement,
|
||||
capability::ModelCapability,
|
||||
client::ConfigWarning,
|
||||
event::Event,
|
||||
scheme::Scheme,
|
||||
types::{Request, RequestConfig},
|
||||
};
|
||||
|
||||
|
|
@ -64,6 +68,21 @@ impl Scheme for OpenAIResponsesScheme {
|
|||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
|
||||
if !self.send_sampling_params {
|
||||
if config.temperature.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported(
|
||||
"temperature",
|
||||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
if config.top_p.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported(
|
||||
"top_p",
|
||||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
}
|
||||
warnings
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,9 +23,7 @@ pub struct ExtractPointerPayload {
|
|||
|
||||
/// `RestoredState.extensions` から最新の Phase 1 pointer を取り出す。
|
||||
/// 未抽出セッションでは `None`。
|
||||
pub fn fold_pointer(
|
||||
extensions: &[(String, serde_json::Value)],
|
||||
) -> Option<ExtractPointerPayload> {
|
||||
pub fn fold_pointer(extensions: &[(String, serde_json::Value)]) -> Option<ExtractPointerPayload> {
|
||||
extensions
|
||||
.iter()
|
||||
.rev()
|
||||
|
|
@ -48,10 +46,7 @@ mod tests {
|
|||
"staging_id": "old"
|
||||
}),
|
||||
),
|
||||
(
|
||||
"other.domain".to_string(),
|
||||
serde_json::json!({ "x": 1 }),
|
||||
),
|
||||
("other.domain".to_string(), serde_json::json!({ "x": 1 })),
|
||||
(
|
||||
EXTRACT_DOMAIN.to_string(),
|
||||
serde_json::json!({
|
||||
|
|
@ -69,21 +64,16 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn fold_returns_none_when_absent() {
|
||||
let exts = vec![(
|
||||
"other.domain".to_string(),
|
||||
serde_json::json!({ "x": 1 }),
|
||||
)];
|
||||
let exts = vec![("other.domain".to_string(), serde_json::json!({ "x": 1 }))];
|
||||
assert!(fold_pointer(&exts).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fold_skips_malformed_entries() {
|
||||
let exts = vec![
|
||||
(
|
||||
EXTRACT_DOMAIN.to_string(),
|
||||
serde_json::json!({ "wrong_shape": true }),
|
||||
),
|
||||
];
|
||||
let exts = vec![(
|
||||
EXTRACT_DOMAIN.to_string(),
|
||||
serde_json::json!({ "wrong_shape": true }),
|
||||
)];
|
||||
// 現状は最新を取り出して JSON 不一致なら None。古いものに fallback
|
||||
// しないのは、壊れた最新を黙って無視すると意図しない再抽出を招くため。
|
||||
assert!(fold_pointer(&exts).is_none());
|
||||
|
|
|
|||
|
|
@ -84,7 +84,12 @@ mod tests {
|
|||
};
|
||||
let (id, path) = write_staging(&layout, source.clone(), payload).unwrap();
|
||||
assert_eq!(path.parent().unwrap(), layout.staging_dir());
|
||||
assert!(path.file_name().unwrap().to_string_lossy().contains(&id.to_string()));
|
||||
assert!(
|
||||
path.file_name()
|
||||
.unwrap()
|
||||
.to_string_lossy()
|
||||
.contains(&id.to_string())
|
||||
);
|
||||
|
||||
let written: StagingRecord =
|
||||
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
|
||||
|
|
@ -101,8 +106,7 @@ mod tests {
|
|||
session_id: "sess".into(),
|
||||
range: [0, 0],
|
||||
};
|
||||
let (_, path) =
|
||||
write_staging(&layout, source, ExtractedPayload::default()).unwrap();
|
||||
let (_, path) = write_staging(&layout, source, ExtractedPayload::default()).unwrap();
|
||||
let written: StagingRecord =
|
||||
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
|
||||
assert!(written.payload.is_empty());
|
||||
|
|
|
|||
|
|
@ -134,8 +134,9 @@ mod tests {
|
|||
let ctx = Arc::new(ExtractWorkerContext::new());
|
||||
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
|
||||
|
||||
let first = serde_json::json!({"decisions": [], "discussions": [], "attempts": [], "requests": []})
|
||||
.to_string();
|
||||
let first =
|
||||
serde_json::json!({"decisions": [], "discussions": [], "attempts": [], "requests": []})
|
||||
.to_string();
|
||||
tool.execute(&first).await.unwrap();
|
||||
|
||||
let second = serde_json::json!({
|
||||
|
|
|
|||
|
|
@ -548,7 +548,8 @@ mod tests {
|
|||
// `db-pol` (1 deletion), `db-pools` (1 insertion).
|
||||
for slug in ["db-pol", "db-pools"] {
|
||||
write(
|
||||
&dir.path().join(format!(".insomnia/memory/decisions/{slug}.md")),
|
||||
&dir.path()
|
||||
.join(format!(".insomnia/memory/decisions/{slug}.md")),
|
||||
&format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
|
||||
n = iso_now()
|
||||
|
|
@ -577,7 +578,8 @@ mod tests {
|
|||
let (dir, linter) = workspace();
|
||||
for slug in ["alpha", "bravo"] {
|
||||
write(
|
||||
&dir.path().join(format!(".insomnia/memory/decisions/{slug}.md")),
|
||||
&dir.path()
|
||||
.join(format!(".insomnia/memory/decisions/{slug}.md")),
|
||||
&format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
|
||||
n = iso_now()
|
||||
|
|
|
|||
|
|
@ -155,7 +155,11 @@ mod tests {
|
|||
fn non_md_files_ignored() {
|
||||
let (dir, layout) = setup();
|
||||
write_knowledge(dir.path(), "good", "ok", true, "");
|
||||
std::fs::write(dir.path().join(".insomnia/knowledge/note.txt"), "not markdown\n").unwrap();
|
||||
std::fs::write(
|
||||
dir.path().join(".insomnia/knowledge/note.txt"),
|
||||
"not markdown\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = collect_resident_knowledge(&layout);
|
||||
assert_eq!(got.len(), 1);
|
||||
|
|
|
|||
|
|
@ -450,7 +450,9 @@ mod tests {
|
|||
}
|
||||
|
||||
fn write_decision(dir: &Path, slug: &str, body: &str) {
|
||||
let path = dir.join(".insomnia/memory/decisions").join(format!("{slug}.md"));
|
||||
let path = dir
|
||||
.join(".insomnia/memory/decisions")
|
||||
.join(format!("{slug}.md"));
|
||||
let content = format!(
|
||||
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n{body}",
|
||||
n = now()
|
||||
|
|
@ -501,12 +503,14 @@ mod tests {
|
|||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].slug, "alpha");
|
||||
assert_eq!(records[0].kind, "decision");
|
||||
assert!(records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama"));
|
||||
assert!(
|
||||
records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -634,12 +638,14 @@ mod tests {
|
|||
assert_eq!(records[0].kind.as_deref(), Some("policy"));
|
||||
assert_eq!(records[0].description.as_deref(), Some("the policy doc"));
|
||||
assert_eq!(records[0].model_invokation, Some(false));
|
||||
assert!(records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama"));
|
||||
assert!(
|
||||
records[0]
|
||||
.excerpt
|
||||
.as_deref()
|
||||
.unwrap()
|
||||
.to_lowercase()
|
||||
.contains("ollama")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
|||
|
|
@ -150,13 +150,14 @@ pub fn lookup_session(session_id: SessionId) -> Result<Option<SessionLockInfo>,
|
|||
let lock_path = default_registry_path()?;
|
||||
let mut guard = LockFileGuard::open(&lock_path)?;
|
||||
crate::mutate::reclaim_stale(&mut guard);
|
||||
Ok(guard.data().find_by_session(session_id).map(|a| {
|
||||
SessionLockInfo {
|
||||
Ok(guard
|
||||
.data()
|
||||
.find_by_session(session_id)
|
||||
.map(|a| SessionLockInfo {
|
||||
pod_name: a.pod_name.clone(),
|
||||
socket: a.socket.clone(),
|
||||
pid: a.pid,
|
||||
}
|
||||
}))
|
||||
}))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -249,10 +249,7 @@ mod tests {
|
|||
assert!(g.data().find_by_session(target_session).is_none());
|
||||
|
||||
// After adopt-style rewrite, the same allocation is now found.
|
||||
g.data_mut()
|
||||
.find_mut("child")
|
||||
.unwrap()
|
||||
.session_id = Some(target_session);
|
||||
g.data_mut().find_mut("child").unwrap().session_id = Some(target_session);
|
||||
let found = g.data().find_by_session(target_session).unwrap();
|
||||
assert_eq!(found.pod_name, "child");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,22 +12,22 @@ use std::sync::{Arc, Mutex};
|
|||
|
||||
use async_trait::async_trait;
|
||||
use llm_worker::Item;
|
||||
use llm_worker::UsageRecord;
|
||||
use llm_worker::interceptor::{
|
||||
Interceptor, PostToolAction, PreRequestAction, PreToolAction, PromptAction, ToolCallInfo,
|
||||
ToolResultInfo, TurnEndAction,
|
||||
};
|
||||
use llm_worker::UsageRecord;
|
||||
use llm_worker::tool::ToolOutput;
|
||||
use tracing::info;
|
||||
|
||||
use crate::compact::state::CompactState;
|
||||
use llm_worker::token_counter::total_tokens;
|
||||
use crate::hook::{
|
||||
AbortInfo, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary, ToolResultSummary,
|
||||
TurnEndInfo,
|
||||
};
|
||||
use crate::ipc::notify_buffer::{NotifyBuffer, format_notify};
|
||||
use crate::prompt::catalog::PromptCatalog;
|
||||
use llm_worker::token_counter::total_tokens;
|
||||
use tracing::warn;
|
||||
|
||||
/// Maximum number of bytes copied into `TurnEndInfo::final_text_preview`.
|
||||
|
|
|
|||
|
|
@ -1030,9 +1030,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
.prompts
|
||||
.compact_system()
|
||||
.map_err(PodError::PromptCatalog)?;
|
||||
let mut summary_worker = Worker::new(summary_client)
|
||||
.system_prompt(summary_system_prompt)
|
||||
.temperature(0.0);
|
||||
let mut summary_worker = Worker::new(summary_client).system_prompt(summary_system_prompt);
|
||||
|
||||
// Cumulative input-token meter + interceptor. The meter is bumped
|
||||
// from the on_usage callback and read on every pre_llm_request.
|
||||
|
|
@ -1209,10 +1207,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
// segments; only the user_messages surviving in retained_items
|
||||
// keep them. They are always the trailing K entries of
|
||||
// `self.user_segments` because submissions are appended in order.
|
||||
let drop_n = self
|
||||
.user_segments
|
||||
.len()
|
||||
.saturating_sub(retained_user_msgs);
|
||||
let drop_n = self.user_segments.len().saturating_sub(retained_user_msgs);
|
||||
if drop_n > 0 {
|
||||
self.user_segments.drain(..drop_n);
|
||||
}
|
||||
|
|
@ -1373,7 +1368,12 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
return Ok(ExtractDecision::Skipped);
|
||||
}
|
||||
|
||||
let current_history_len = self.worker.as_ref().expect("worker present").history().len();
|
||||
let current_history_len = self
|
||||
.worker
|
||||
.as_ref()
|
||||
.expect("worker present")
|
||||
.history()
|
||||
.len();
|
||||
if current_history_len <= processed_history_len {
|
||||
return Ok(ExtractDecision::Skipped);
|
||||
}
|
||||
|
|
@ -1394,11 +1394,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
return Ok(ExtractDecision::Skipped);
|
||||
}
|
||||
|
||||
let items_to_extract = self
|
||||
.worker
|
||||
.as_ref()
|
||||
.expect("worker present")
|
||||
.history()[processed_history_len..current_history_len]
|
||||
let items_to_extract = self.worker.as_ref().expect("worker present").history()
|
||||
[processed_history_len..current_history_len]
|
||||
.to_vec();
|
||||
|
||||
let layout = memory::WorkspaceLayout::resolve(memory_cfg, &self.pwd);
|
||||
|
|
@ -1407,9 +1404,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
.unwrap_or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS);
|
||||
|
||||
let client = self.build_extractor_client(memory_cfg)?;
|
||||
let mut extract_worker = Worker::new(client)
|
||||
.system_prompt(extract::EXTRACT_SYSTEM_PROMPT)
|
||||
.temperature(0.0);
|
||||
let mut extract_worker = Worker::new(client).system_prompt(extract::EXTRACT_SYSTEM_PROMPT);
|
||||
|
||||
// Cumulative input-token meter + interceptor (mirror of
|
||||
// CompactWorkerInterceptor). Aborts the extract worker if its
|
||||
|
|
@ -1432,7 +1427,10 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
|||
extract_worker.register_tool(extract::write_extracted_tool(ctx.clone()));
|
||||
|
||||
let input_text = extract::build_extract_input(&items_to_extract);
|
||||
extract_worker.run(input_text).await.map_err(PodError::Worker)?;
|
||||
extract_worker
|
||||
.run(input_text)
|
||||
.await
|
||||
.map_err(PodError::Worker)?;
|
||||
|
||||
let payload = ctx.take_payload().unwrap_or_else(|| {
|
||||
tracing::warn!(
|
||||
|
|
@ -1606,8 +1604,11 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
|
|||
let common = prepare_pod_common(&manifest, &loader, /* parse_template */ true)?;
|
||||
|
||||
let session_id = session_store::new_session_id();
|
||||
let scope_allocation =
|
||||
pod_registry::adopt_allocation(manifest.pod.name.clone(), std::process::id(), session_id)?;
|
||||
let scope_allocation = pod_registry::adopt_allocation(
|
||||
manifest.pod.name.clone(),
|
||||
std::process::id(),
|
||||
session_id,
|
||||
)?;
|
||||
|
||||
let mut worker = Worker::new(common.client);
|
||||
apply_worker_manifest(&mut worker, &manifest.worker);
|
||||
|
|
|
|||
|
|
@ -43,13 +43,9 @@ async fn restore_from_manifest_rejects_unknown_session() {
|
|||
// NotFound, which `Pod::restore_from_manifest` surfaces verbatim
|
||||
// as `PodError::Store`.
|
||||
let unknown = session_store::new_session_id();
|
||||
let result = Pod::restore_from_manifest(
|
||||
unknown,
|
||||
manifest,
|
||||
store,
|
||||
pod::PromptLoader::builtins_only(),
|
||||
)
|
||||
.await;
|
||||
let result =
|
||||
Pod::restore_from_manifest(unknown, manifest, store, pod::PromptLoader::builtins_only())
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(PodError::Store(StoreError::NotFound(id))) => assert_eq!(id, unknown),
|
||||
|
|
|
|||
|
|
@ -142,11 +142,13 @@ fn build_from_config(config: &ModelConfig) -> Result<Box<dyn LlmClient>, Provide
|
|||
SchemeKind::OpenaiChat => build_transport(OpenAIScheme::new(), config, resolved),
|
||||
SchemeKind::Gemini => build_transport(GeminiScheme::new(), config, resolved),
|
||||
SchemeKind::OpenaiResponses => {
|
||||
// ChatGPT backend (codex-oauth) は `max_output_tokens` を
|
||||
// 400 で弾くため、その経路では送出を止める。
|
||||
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(
|
||||
!matches!(config.auth, AuthRef::CodexOAuth),
|
||||
);
|
||||
// ChatGPT backend (codex-oauth) は `max_output_tokens` /
|
||||
// `temperature` / `top_p` を 400 で弾くため、その経路では
|
||||
// 送出を止める。
|
||||
let send_to_official = !matches!(config.auth, AuthRef::CodexOAuth);
|
||||
let scheme = OpenAIResponsesScheme::new()
|
||||
.with_send_max_output_tokens(send_to_official)
|
||||
.with_send_sampling_params(send_to_official);
|
||||
build_transport(scheme, config, resolved)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,14 +35,14 @@ pub mod store;
|
|||
|
||||
pub use event_trace::TraceEntry;
|
||||
pub use fs_store::FsStore;
|
||||
pub use llm_worker::UsageRecord;
|
||||
pub use llm_worker::llm_client::types::{ContentPart, Item, Role};
|
||||
pub use logged_item::{LoggedContentPart, LoggedItem, LoggedRole, from_logged, to_logged};
|
||||
pub use session::{
|
||||
SessionStartState, create_compacted_session, create_session, create_session_with_id,
|
||||
ensure_head_or_fork, fork, fork_at, restore, save_config_changed, save_delta, save_extension,
|
||||
save_run_completed, save_run_errored, save_turn_end, save_usage, save_user_input,
|
||||
};
|
||||
pub use llm_worker::UsageRecord;
|
||||
pub use llm_worker::llm_client::types::{ContentPart, Item, Role};
|
||||
pub use session_log::{
|
||||
EntryHash, HashedEntry, LogEntry, RestoredState, SessionOrigin, build_chain, collect_state,
|
||||
compute_hash,
|
||||
|
|
|
|||
|
|
@ -257,19 +257,13 @@ pub fn collect_state(entries: &[HashedEntry]) -> RestoredState {
|
|||
state.user_segments.push(segments.clone());
|
||||
}
|
||||
LogEntry::AssistantItems { items, .. } => {
|
||||
state
|
||||
.history
|
||||
.extend(items.iter().cloned().map(Item::from));
|
||||
state.history.extend(items.iter().cloned().map(Item::from));
|
||||
}
|
||||
LogEntry::ToolResults { items, .. } => {
|
||||
state
|
||||
.history
|
||||
.extend(items.iter().cloned().map(Item::from));
|
||||
state.history.extend(items.iter().cloned().map(Item::from));
|
||||
}
|
||||
LogEntry::HookInjectedItems { items, .. } => {
|
||||
state
|
||||
.history
|
||||
.extend(items.iter().cloned().map(Item::from));
|
||||
state.history.extend(items.iter().cloned().map(Item::from));
|
||||
}
|
||||
LogEntry::TurnEnd { turn_count, .. } => {
|
||||
state.turn_count = *turn_count;
|
||||
|
|
@ -419,9 +413,7 @@ mod tests {
|
|||
},
|
||||
LogEntry::AssistantItems {
|
||||
ts: 3000,
|
||||
items: vec![
|
||||
Item::tool_call("call_1", "get_weather", r#"{"city":"Tokyo"}"#).into(),
|
||||
],
|
||||
items: vec![Item::tool_call("call_1", "get_weather", r#"{"city":"Tokyo"}"#).into()],
|
||||
},
|
||||
LogEntry::ToolResults {
|
||||
ts: 3500,
|
||||
|
|
@ -721,10 +713,7 @@ mod tests {
|
|||
assert_eq!(content.len(), 1);
|
||||
match &content[0] {
|
||||
llm_worker::ContentPart::Text { text } => {
|
||||
assert_eq!(
|
||||
text,
|
||||
"see line1\nline2[unresolved file ref: src/main.rs]"
|
||||
);
|
||||
assert_eq!(text, "see line1\nline2[unresolved file ref: src/main.rs]");
|
||||
}
|
||||
other => panic!("unexpected content: {other:?}"),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -460,7 +460,9 @@ impl App {
|
|||
// single text segment.
|
||||
let segments = item
|
||||
.get("segments")
|
||||
.and_then(|v| serde_json::from_value::<Vec<Segment>>(v.clone()).ok())
|
||||
.and_then(|v| {
|
||||
serde_json::from_value::<Vec<Segment>>(v.clone()).ok()
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
if text.is_empty() {
|
||||
Vec::new()
|
||||
|
|
|
|||
|
|
@ -71,9 +71,7 @@ fn char_class(c: char) -> AtomClass {
|
|||
let cp = c as u32;
|
||||
match cp {
|
||||
0x3040..=0x309F => AtomClass::Word(WordKind::Hiragana),
|
||||
0x30A0..=0x30FF | 0x31F0..=0x31FF | 0xFF65..=0xFF9F => {
|
||||
AtomClass::Word(WordKind::Katakana)
|
||||
}
|
||||
0x30A0..=0x30FF | 0x31F0..=0x31FF | 0xFF65..=0xFF9F => AtomClass::Word(WordKind::Katakana),
|
||||
0x3400..=0x4DBF | 0x4E00..=0x9FFF | 0xF900..=0xFAFF | 0x20000..=0x2FFFF => {
|
||||
AtomClass::Word(WordKind::Han)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ use std::io;
|
|||
use std::time::Duration;
|
||||
|
||||
use crossterm::event::{self, Event as TermEvent, KeyCode, KeyEventKind, KeyModifiers};
|
||||
use pod_registry::lookup_session;
|
||||
use ratatui::Terminal;
|
||||
use ratatui::backend::CrosstermBackend;
|
||||
use ratatui::layout::{Constraint, Layout};
|
||||
|
|
@ -19,7 +20,6 @@ use ratatui::style::{Color, Modifier, Style};
|
|||
use ratatui::text::{Line, Span};
|
||||
use ratatui::widgets::Paragraph;
|
||||
use ratatui::{Frame, TerminalOptions, Viewport};
|
||||
use pod_registry::lookup_session;
|
||||
use session_store::{
|
||||
FsStore, HashedEntry, LogEntry, LoggedContentPart, LoggedItem, SessionId, Store,
|
||||
};
|
||||
|
|
@ -138,9 +138,7 @@ pub async fn run() -> Result<PickerOutcome, PickerError> {
|
|||
/// scrolls the terminal up exactly one row, so the next inline
|
||||
/// viewport opens immediately below the picker rather than on top of
|
||||
/// it.
|
||||
fn close_viewport(
|
||||
terminal: &mut Terminal<CrosstermBackend<io::Stdout>>,
|
||||
) -> io::Result<()> {
|
||||
fn close_viewport(terminal: &mut Terminal<CrosstermBackend<io::Stdout>>) -> io::Result<()> {
|
||||
let area = terminal.get_frame().area();
|
||||
let last_row = area.bottom().saturating_sub(1);
|
||||
terminal.set_cursor_position((0, last_row))?;
|
||||
|
|
@ -253,8 +251,7 @@ fn poll_event() -> io::Result<Option<Action>> {
|
|||
|
||||
fn draw(f: &mut Frame<'_>, rows: &[Row], selected: usize) {
|
||||
let area = f.area();
|
||||
let mut constraints: Vec<Constraint> =
|
||||
Vec::with_capacity(rows.len() + 3);
|
||||
let mut constraints: Vec<Constraint> = Vec::with_capacity(rows.len() + 3);
|
||||
constraints.push(Constraint::Length(1)); // title
|
||||
for _ in rows {
|
||||
constraints.push(Constraint::Length(1));
|
||||
|
|
@ -272,10 +269,7 @@ fn draw(f: &mut Frame<'_>, rows: &[Row], selected: usize) {
|
|||
);
|
||||
|
||||
for (i, row) in rows.iter().enumerate() {
|
||||
f.render_widget(
|
||||
Paragraph::new(row_line(row, i == selected)),
|
||||
layout[i + 1],
|
||||
);
|
||||
f.render_widget(Paragraph::new(row_line(row, i == selected)), layout[i + 1]);
|
||||
}
|
||||
|
||||
f.render_widget(
|
||||
|
|
@ -325,4 +319,3 @@ fn short_session(id: SessionId) -> String {
|
|||
let s = id.to_string();
|
||||
s.chars().take(8).collect()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -20,13 +20,13 @@ use std::time::Duration;
|
|||
use crossterm::event::{self, Event as TermEvent, KeyCode, KeyEventKind, KeyModifiers};
|
||||
use manifest::{PodManifestConfig, find_project_manifest_from, load_layer, user_manifest_path};
|
||||
use ratatui::Terminal;
|
||||
use session_store::SessionId;
|
||||
use ratatui::backend::CrosstermBackend;
|
||||
use ratatui::layout::{Constraint, Layout};
|
||||
use ratatui::style::{Color, Modifier, Style};
|
||||
use ratatui::text::{Line, Span};
|
||||
use ratatui::widgets::Paragraph;
|
||||
use ratatui::{Frame, TerminalOptions, Viewport};
|
||||
use session_store::SessionId;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::{Child, Command};
|
||||
use tokio::task::JoinHandle;
|
||||
|
|
|
|||
|
|
@ -47,9 +47,11 @@ model, input, instructions, stream, store, include,
|
|||
tools, tool_choice, reasoning, previous_response_id, truncation
|
||||
```
|
||||
|
||||
`max_output_tokens`, `max_tokens`, `max_completion_tokens`, `temperature`, `user`, `metadata`, `context_management` はすべて拒否される。
|
||||
`max_output_tokens`, `max_tokens`, `max_completion_tokens`, `temperature`, `top_p`, `user`, `metadata`, `context_management` はすべて拒否される(実観測でも `temperature` 同梱リクエストは `{"detail":"Unsupported parameter: temperature"}` を返す)。
|
||||
|
||||
Codex CLI 自身も `config.toml` の `model_max_output_tokens` を API リクエストに載せない実装になっており (https://github.com/openai/codex/issues/4138)、これはバグではなく ChatGPT backend の制約に対する回避策と解釈できる。
|
||||
Codex CLI 自身も `config.toml` の `model_max_output_tokens` を API リクエストに載せない実装になっており (https://github.com/openai/codex/issues/4138)、これはバグではなく ChatGPT backend の制約に対する回避策と解釈できる。同 CLI は `temperature` / `top_p` も送出しない。
|
||||
|
||||
本リポジトリでは `OpenAIResponsesScheme` の `send_max_output_tokens` / `send_sampling_params` フラグでこれらの送出を一括制御し、`provider/src/lib.rs` 内で `AuthRef::CodexOAuth` 指定時に両方 `false` にする。
|
||||
|
||||
## 6. ドキュメント URL
|
||||
|
||||
|
|
|
|||
102
tickets/responses-sampling-params.md
Normal file
102
tickets/responses-sampling-params.md
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
# OpenAI Responses: sampling パラメータの取り扱い
|
||||
|
||||
## 背景
|
||||
|
||||
ChatGPT backend (`https://chatgpt.com/backend-api/codex/responses`) は公式
|
||||
OpenAI Responses API のサブセットしか受け付けず、サポート外パラメータを
|
||||
含むリクエストを 400 (`Unsupported parameter: ...`) で拒否する。
|
||||
受理パラメータは概ね以下に限られる(`docs/research/openai_responses_max_output_tokens.md`):
|
||||
|
||||
```
|
||||
model, input, instructions, stream, store, include,
|
||||
tools, tool_choice, reasoning, previous_response_id, truncation
|
||||
```
|
||||
|
||||
`max_output_tokens` については先行修正 (commit `af57d5b`) で
|
||||
`OpenAIResponsesScheme::send_max_output_tokens` を導入し、
|
||||
`AuthRef::CodexOAuth` 経路では送らないようにしてある。
|
||||
|
||||
今回、同じ経路で `temperature` も 400 を返すことが確認された:
|
||||
|
||||
```
|
||||
[notice] pod: memory Phase 1 extract failed:
|
||||
Client error: API error (status: 400):
|
||||
{"detail":"Unsupported parameter: temperature"}
|
||||
```
|
||||
|
||||
加えて、Pod の compactor / extract worker は `pod.rs` で
|
||||
`.temperature(0.0)` をハードコードしている。「決定論的に振る舞う」程度の
|
||||
動機で 0.0 が選ばれているが:
|
||||
|
||||
- 公式 reasoning モデル (`gpt-5`, o 系) は temperature を無視/固定する
|
||||
- 他プロバイダ (Claude / Gemini / Ollama) でも 0.0 が extract / 要約に
|
||||
最適という自前検証は無い
|
||||
- そもそもプロバイダ既定値がそれぞれの妥当な値になっているはず
|
||||
|
||||
ハードコードを残す積極的理由が弱く、かつ codex-oauth で実害が出ている。
|
||||
|
||||
## 方針
|
||||
|
||||
二段で対処する。
|
||||
|
||||
1. **wire-level**: `OpenAIResponsesScheme` に
|
||||
`send_sampling_params: bool` を追加し、`AuthRef::CodexOAuth` 経路では
|
||||
`false` に設定する。`false` のとき `temperature` / `top_p` を
|
||||
body に載せない。`max_tokens` と同じ枠組みなので構造は揃える。
|
||||
2. **pod-level**: `pod.rs` の `.temperature(0.0)` ハードコード 2 箇所を
|
||||
撤去する。プロバイダ既定値に任せる。
|
||||
|
||||
(2) だけでも codex-oauth の現症状は消えるが、ユーザが manifest で
|
||||
明示的に `temperature` を設定しているケース(非 0.0)でも codex-oauth
|
||||
配下では 400 になるため、(1) も併せて入れる。
|
||||
|
||||
## 要件
|
||||
|
||||
### Scheme 側
|
||||
|
||||
- `OpenAIResponsesScheme` に `send_sampling_params: bool` フィールドを
|
||||
追加(デフォルト `true` = 公式 OpenAI API 向け)
|
||||
- `with_send_sampling_params(bool)` ビルダを生やす
|
||||
- `request.rs` の `ResponsesRequest` で `temperature` / `top_p` を
|
||||
`send_sampling_params == false` のときは `None` のまま送る
|
||||
(`#[serde(skip_serializing_if = "Option::is_none")]` で除外)
|
||||
- `validate_config` で `send_sampling_params == false` かつ
|
||||
`config.temperature.is_some()` または `config.top_p.is_some()` の
|
||||
ときに `ConfigWarning::unsupported` を返す(`max_tokens` と同じ流儀)
|
||||
- `provider/src/lib.rs` の `SchemeKind::OpenaiResponses` 分岐で、
|
||||
`AuthRef::CodexOAuth` のとき `send_sampling_params=false` を渡す
|
||||
|
||||
### Pod 側
|
||||
|
||||
- `crates/pod/src/pod.rs:1011` の compactor worker `.temperature(0.0)` を撤去
|
||||
- `crates/pod/src/pod.rs:1368` の extract worker `.temperature(0.0)` を撤去
|
||||
- 既存テストが落ちないことを確認(`pod.rs:2034` のテスト assert は
|
||||
`RequestConfig` に直接 `temperature: Some(0.2)` を入れているので
|
||||
ハードコード撤去とは独立)
|
||||
|
||||
### docs
|
||||
|
||||
- `docs/research/openai_responses_max_output_tokens.md` の
|
||||
「ChatGPT backend が拒否するパラメータ一覧」を補足するか、
|
||||
もしくは sampling 用の研究 doc を新設して `temperature` / `top_p`
|
||||
の扱いを明文化する(max_output_tokens の doc に追記する形で十分)
|
||||
|
||||
## 完了条件
|
||||
|
||||
- `OpenAIResponsesScheme::new().with_send_sampling_params(false)` で
|
||||
作った scheme から生成した body に `temperature` / `top_p` キーが
|
||||
載らない(unit test)
|
||||
- `provider::build_client` で `AuthRef::CodexOAuth` + `OpenaiResponses`
|
||||
の組合せから作った client が `temperature` を含まないリクエストを送る
|
||||
- pod の compaction / memory extract が codex-oauth 経由で 400 にならず
|
||||
最後まで走る
|
||||
- `pod.rs` から `.temperature(0.0)` のハードコードが消えている
|
||||
- `cargo check` / `cargo test` が `llm-worker`, `provider`, `pod` で通る
|
||||
|
||||
## 範囲外
|
||||
|
||||
- `user` / `metadata` 等、現状コードで送出していない他の拒否パラメータ
|
||||
- 公式 OpenAI Responses API 側の `temperature` 挙動の変更
|
||||
- 「extract / 要約タスクに最適な temperature は何か」という検証
|
||||
(必要になったら manifest で per-model 設定に逃がすのが筋であり、
|
||||
pod.rs 内に再ハードコードはしない)
|
||||
Loading…
Reference in New Issue
Block a user