templatureがcodexエンドポイントで使えない件の修正

2026-04-29 23:20:16 +09:00 · 2026-04-29 23:20:16 +09:00 · 3fc65e6f6b
commit 3fc65e6f6b
parent b0393d2fe9
8 changed files with 186 additions and 22 deletions
--- a/TODO.md
+++ b/TODO.md
@ -5,6 +5,7 @@
  - [ ] Bash ツール (Permission 層と統合) → [tickets/bash-tool.md](tickets/bash-tool.md)
 - [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
 - [ ] Pod CLI: マニフェスト関連フラグの整理 → [tickets/pod-cli-manifest-flags.md](tickets/pod-cli-manifest-flags.md)
+- [ ] OpenAI Responses: sampling パラメータの取り扱い → [tickets/responses-sampling-params.md](tickets/responses-sampling-params.md)
 - [ ] Pod オーケストレーション
  - [ ] 動的 Scope 変更 → [tickets/dynamic-scope.md](tickets/dynamic-scope.md)
 - [ ] ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
--- a/crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs
+++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/mod.rs
@ -16,12 +16,13 @@ pub use scheme_impl::OpenAIResponsesState;

 /// OpenAI Responses scheme 本体。
 ///
-/// `store` / `include_encrypted_content` / `send_max_output_tokens` は
-/// scheme 固定の wire 設定で、デフォルトは公式 OpenAI Responses API
-/// 向け (stateless + ZDR + `max_output_tokens` 送出可)。ChatGPT backend
-/// (codex-oauth) のように受理パラメータが subset の経路では provider 層で
-/// `send_max_output_tokens=false` 等に上書きする。`ModelCapability` には
-/// 入れない（モデル能力ではなく wire policy）。
+/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
+/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
+/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
+/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
+/// ように受理パラメータが subset の経路では provider 層で
+/// `send_max_output_tokens=false` / `send_sampling_params=false` に
+/// 上書きする。`ModelCapability` には入れない（モデル能力ではなく wire policy）。
 #[derive(Debug, Clone)]
 pub struct OpenAIResponsesScheme {
    /// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
@ -33,6 +34,10 @@ pub struct OpenAIResponsesScheme {
    /// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
    /// で 400 を返すため、その経路では `false` にする。
    pub send_max_output_tokens: bool,
+    /// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
+    /// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
+    /// で 400 を返すため、その経路では `false` にする。
+    pub send_sampling_params: bool,
 }

 impl Default for OpenAIResponsesScheme {
@ -41,13 +46,14 @@ impl Default for OpenAIResponsesScheme {
            store: false,
            include_encrypted_content: true,
            send_max_output_tokens: true,
+            send_sampling_params: true,
        }
    }
 }

 impl OpenAIResponsesScheme {
    /// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
-    /// `send_max_output_tokens=true`)。
+    /// `send_max_output_tokens=true`, `send_sampling_params=true`)。
    pub fn new() -> Self {
        Self::default()
    }
@ -69,4 +75,10 @@ impl OpenAIResponsesScheme {
        self.send_max_output_tokens = send;
        self
    }
+
+    /// `temperature` / `top_p` を body に載せるかを上書き。
+    pub fn with_send_sampling_params(mut self, send: bool) -> Self {
+        self.send_sampling_params = send;
+        self
+    }
 }
--- a/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs
+++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/request.rs
@ -43,6 +43,9 @@ pub(crate) struct ResponsesRequest {
    /// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_output_tokens: Option<u32>,
+    /// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
+    /// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
+    /// `send_sampling_params` が `false` のときは `None` のまま送る。
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -203,8 +206,16 @@ impl OpenAIResponsesScheme {
            } else {
                None
            },
-            temperature: request.config.temperature,
-            top_p: request.config.top_p,
+            temperature: if self.send_sampling_params {
+                request.config.temperature
+            } else {
+                None
+            },
+            top_p: if self.send_sampling_params {
+                request.config.top_p
+            } else {
+                None
+            },
        }
    }
 }
@ -471,6 +482,29 @@ mod tests {
        );
    }

+    #[test]
+    fn sampling_params_passed_through_by_default() {
+        let scheme = OpenAIResponsesScheme::new();
+        let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
+        let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
+        assert_eq!(body.temperature, Some(0.4));
+        assert_eq!(body.top_p, Some(0.9));
+    }
+
+    #[test]
+    fn sampling_params_dropped_when_send_disabled() {
+        let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
+        let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
+        let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
+        assert_eq!(body.temperature, None);
+        assert_eq!(body.top_p, None);
+        let json = serde_json::to_value(&body).unwrap();
+        assert!(
+            json.get("temperature").is_none() && json.get("top_p").is_none(),
+            "temperature/top_p keys must not appear in serialised body, got: {json}"
+        );
+    }
+
    #[test]
    fn tool_schema_without_properties_is_normalized() {
        // schemars は引数なし struct から `type:"object"` だけのスキーマを
--- a/crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs
+++ b/crates/llm-worker/src/llm_client/scheme/openai_responses/scheme_impl.rs
@ -64,6 +64,21 @@ impl Scheme for OpenAIResponsesScheme {
                "OpenAI Responses (ChatGPT backend)",
            ));
        }
+        // 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
+        if !self.send_sampling_params {
+            if config.temperature.is_some() {
+                warnings.push(ConfigWarning::unsupported(
+                    "temperature",
+                    "OpenAI Responses (ChatGPT backend)",
+                ));
+            }
+            if config.top_p.is_some() {
+                warnings.push(ConfigWarning::unsupported(
+                    "top_p",
+                    "OpenAI Responses (ChatGPT backend)",
+                ));
+            }
+        }
        warnings
    }
 }
--- a/crates/pod/src/pod.rs
+++ b/crates/pod/src/pod.rs
@ -1030,9 +1030,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            .prompts
            .compact_system()
            .map_err(PodError::PromptCatalog)?;
-        let mut summary_worker = Worker::new(summary_client)
-            .system_prompt(summary_system_prompt)
-            .temperature(0.0);
+        let mut summary_worker = Worker::new(summary_client).system_prompt(summary_system_prompt);

        // Cumulative input-token meter + interceptor. The meter is bumped
        // from the on_usage callback and read on every pre_llm_request.
@ -1407,9 +1405,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
            .unwrap_or(manifest::defaults::MEMORY_EXTRACT_WORKER_MAX_INPUT_TOKENS);

        let client = self.build_extractor_client(memory_cfg)?;
-        let mut extract_worker = Worker::new(client)
-            .system_prompt(extract::EXTRACT_SYSTEM_PROMPT)
-            .temperature(0.0);
+        let mut extract_worker = Worker::new(client).system_prompt(extract::EXTRACT_SYSTEM_PROMPT);

        // Cumulative input-token meter + interceptor (mirror of
        // CompactWorkerInterceptor). Aborts the extract worker if its
--- a/crates/provider/src/lib.rs
+++ b/crates/provider/src/lib.rs
@ -142,11 +142,13 @@ fn build_from_config(config: &ModelConfig) -> Result<Box<dyn LlmClient>, Provide
        SchemeKind::OpenaiChat => build_transport(OpenAIScheme::new(), config, resolved),
        SchemeKind::Gemini => build_transport(GeminiScheme::new(), config, resolved),
        SchemeKind::OpenaiResponses => {
-            // ChatGPT backend (codex-oauth) は `max_output_tokens` を
-            // 400 で弾くため、その経路では送出を止める。
-            let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(
-                !matches!(config.auth, AuthRef::CodexOAuth),
-            );
+            // ChatGPT backend (codex-oauth) は `max_output_tokens` /
+            // `temperature` / `top_p` を 400 で弾くため、その経路では
+            // 送出を止める。
+            let send_to_official = !matches!(config.auth, AuthRef::CodexOAuth);
+            let scheme = OpenAIResponsesScheme::new()
+                .with_send_max_output_tokens(send_to_official)
+                .with_send_sampling_params(send_to_official);
            build_transport(scheme, config, resolved)
        }
    }
--- a/docs/research/openai_responses_max_output_tokens.md
+++ b/docs/research/openai_responses_max_output_tokens.md
@ -47,9 +47,11 @@ model, input, instructions, stream, store, include,
 tools, tool_choice, reasoning, previous_response_id, truncation
 ```

-`max_output_tokens`, `max_tokens`, `max_completion_tokens`, `temperature`, `user`, `metadata`, `context_management` はすべて拒否される。
+`max_output_tokens`, `max_tokens`, `max_completion_tokens`, `temperature`, `top_p`, `user`, `metadata`, `context_management` はすべて拒否される（実観測でも `temperature` 同梱リクエストは `{"detail":"Unsupported parameter: temperature"}` を返す）。

-Codex CLI 自身も `config.toml` の `model_max_output_tokens` を API リクエストに載せない実装になっており (https://github.com/openai/codex/issues/4138)、これはバグではなく ChatGPT backend の制約に対する回避策と解釈できる。
+Codex CLI 自身も `config.toml` の `model_max_output_tokens` を API リクエストに載せない実装になっており (https://github.com/openai/codex/issues/4138)、これはバグではなく ChatGPT backend の制約に対する回避策と解釈できる。同 CLI は `temperature` / `top_p` も送出しない。
+
+本リポジトリでは `OpenAIResponsesScheme` の `send_max_output_tokens` / `send_sampling_params` フラグでこれらの送出を一括制御し、`provider/src/lib.rs` 内で `AuthRef::CodexOAuth` 指定時に両方 `false` にする。

 ## 6. ドキュメント URL

--- a/tickets/responses-sampling-params.md
+++ b/tickets/responses-sampling-params.md
@ -0,0 +1,102 @@
+# OpenAI Responses: sampling パラメータの取り扱い
+
+## 背景
+
+ChatGPT backend (`https://chatgpt.com/backend-api/codex/responses`) は公式
+OpenAI Responses API のサブセットしか受け付けず、サポート外パラメータを
+含むリクエストを 400 (`Unsupported parameter: ...`) で拒否する。
+受理パラメータは概ね以下に限られる（`docs/research/openai_responses_max_output_tokens.md`）:
+
+```
+model, input, instructions, stream, store, include,
+tools, tool_choice, reasoning, previous_response_id, truncation
+```
+
+`max_output_tokens` については先行修正 (commit `af57d5b`) で
+`OpenAIResponsesScheme::send_max_output_tokens` を導入し、
+`AuthRef::CodexOAuth` 経路では送らないようにしてある。
+
+今回、同じ経路で `temperature` も 400 を返すことが確認された:
+
+```
+[notice] pod: memory Phase 1 extract failed:
+Client error: API error (status: 400):
+{"detail":"Unsupported parameter: temperature"}
+```
+
+加えて、Pod の compactor / extract worker は `pod.rs` で
+`.temperature(0.0)` をハードコードしている。「決定論的に振る舞う」程度の
+動機で 0.0 が選ばれているが:
+
+- 公式 reasoning モデル (`gpt-5`, o 系) は temperature を無視/固定する
+- 他プロバイダ (Claude / Gemini / Ollama) でも 0.0 が extract / 要約に
+  最適という自前検証は無い
+- そもそもプロバイダ既定値がそれぞれの妥当な値になっているはず
+
+ハードコードを残す積極的理由が弱く、かつ codex-oauth で実害が出ている。
+
+## 方針
+
+二段で対処する。
+
+1. **wire-level**: `OpenAIResponsesScheme` に
+   `send_sampling_params: bool` を追加し、`AuthRef::CodexOAuth` 経路では
+   `false` に設定する。`false` のとき `temperature` / `top_p` を
+   body に載せない。`max_tokens` と同じ枠組みなので構造は揃える。
+2. **pod-level**: `pod.rs` の `.temperature(0.0)` ハードコード 2 箇所を
+   撤去する。プロバイダ既定値に任せる。
+
+(2) だけでも codex-oauth の現症状は消えるが、ユーザが manifest で
+明示的に `temperature` を設定しているケース（非 0.0）でも codex-oauth
+配下では 400 になるため、(1) も併せて入れる。
+
+## 要件
+
+### Scheme 側
+
+- `OpenAIResponsesScheme` に `send_sampling_params: bool` フィールドを
+  追加（デフォルト `true` = 公式 OpenAI API 向け）
+- `with_send_sampling_params(bool)` ビルダを生やす
+- `request.rs` の `ResponsesRequest` で `temperature` / `top_p` を
+  `send_sampling_params == false` のときは `None` のまま送る
+  （`#[serde(skip_serializing_if = "Option::is_none")]` で除外）
+- `validate_config` で `send_sampling_params == false` かつ
+  `config.temperature.is_some()` または `config.top_p.is_some()` の
+  ときに `ConfigWarning::unsupported` を返す（`max_tokens` と同じ流儀）
+- `provider/src/lib.rs` の `SchemeKind::OpenaiResponses` 分岐で、
+  `AuthRef::CodexOAuth` のとき `send_sampling_params=false` を渡す
+
+### Pod 側
+
+- `crates/pod/src/pod.rs:1011` の compactor worker `.temperature(0.0)` を撤去
+- `crates/pod/src/pod.rs:1368` の extract worker `.temperature(0.0)` を撤去
+- 既存テストが落ちないことを確認（`pod.rs:2034` のテスト assert は
+  `RequestConfig` に直接 `temperature: Some(0.2)` を入れているので
+  ハードコード撤去とは独立）
+
+### docs
+
+- `docs/research/openai_responses_max_output_tokens.md` の
+  「ChatGPT backend が拒否するパラメータ一覧」を補足するか、
+  もしくは sampling 用の研究 doc を新設して `temperature` / `top_p`
+  の扱いを明文化する（max_output_tokens の doc に追記する形で十分）
+
+## 完了条件
+
+- `OpenAIResponsesScheme::new().with_send_sampling_params(false)` で
+  作った scheme から生成した body に `temperature` / `top_p` キーが
+  載らない（unit test）
+- `provider::build_client` で `AuthRef::CodexOAuth` + `OpenaiResponses`
+  の組合せから作った client が `temperature` を含まないリクエストを送る
+- pod の compaction / memory extract が codex-oauth 経由で 400 にならず
+  最後まで走る
+- `pod.rs` から `.temperature(0.0)` のハードコードが消えている
+- `cargo check` / `cargo test` が `llm-worker`, `provider`, `pod` で通る
+
+## 範囲外
+
+- `user` / `metadata` 等、現状コードで送出していない他の拒否パラメータ
+- 公式 OpenAI Responses API 側の `temperature` 挙動の変更
+- 「extract / 要約タスクに最適な temperature は何か」という検証
+  （必要になったら manifest で per-model 設定に逃がすのが筋であり、
+  pod.rs 内に再ハードコードはしない）