merge: anthropic assistant burst bundling
This commit is contained in:
commit
a1b9c865df
1
TODO.md
1
TODO.md
|
|
@ -13,7 +13,6 @@
|
||||||
- Exchange / Turn / Call セマンティクス整理 → [tickets/exchange-turn-call-semantics.md](tickets/exchange-turn-call-semantics.md)
|
- Exchange / Turn / Call セマンティクス整理 → [tickets/exchange-turn-call-semantics.md](tickets/exchange-turn-call-semantics.md)
|
||||||
- llm-worker のエラー耐性
|
- llm-worker のエラー耐性
|
||||||
- ストリーム途中失敗時の継続 → [tickets/llm-worker-stream-continuation.md](tickets/llm-worker-stream-continuation.md)
|
- ストリーム途中失敗時の継続 → [tickets/llm-worker-stream-continuation.md](tickets/llm-worker-stream-continuation.md)
|
||||||
- llm-worker: Anthropic projection で assistant ターン内ブロックを 1 message に束ねる → [tickets/anthropic-assistant-burst-bundling.md](tickets/anthropic-assistant-burst-bundling.md)
|
|
||||||
- ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
|
- ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
|
||||||
- E2E テストハーネス(`tests/e2e/`、opt-in) → [tickets/e2e-harness.md](tickets/e2e-harness.md)
|
- E2E テストハーネス(`tests/e2e/`、opt-in) → [tickets/e2e-harness.md](tickets/e2e-harness.md)
|
||||||
- TUI 拡充
|
- TUI 拡充
|
||||||
|
|
|
||||||
|
|
@ -242,10 +242,13 @@ impl AnthropicScheme {
|
||||||
/// - Tool calls are content parts within assistant messages
|
/// - Tool calls are content parts within assistant messages
|
||||||
/// - Tool results are content parts within user messages
|
/// - Tool results are content parts within user messages
|
||||||
///
|
///
|
||||||
/// Each non-`Message` item produces exactly one content part, so
|
/// Assistant-side items are accumulated until a user/system message or
|
||||||
/// "last part for the item" is always well-defined. For breakpoint
|
/// tool result boundary so one logical assistant burst becomes one
|
||||||
/// `Message` items the output is forced into the array form so a
|
/// Anthropic assistant message content array. Pending parts carry their
|
||||||
/// marker has a part to attach to.
|
/// origin item index; when flushed, the final part for each item records
|
||||||
|
/// the `(msg_idx, part_idx)` used by breakpoint attachment. User/system
|
||||||
|
/// `Message` items keep the single-text shorthand unless a breakpoint
|
||||||
|
/// needs a concrete part to live on.
|
||||||
fn convert_items_to_messages(
|
fn convert_items_to_messages(
|
||||||
&self,
|
&self,
|
||||||
items: &[Item],
|
items: &[Item],
|
||||||
|
|
@ -261,19 +264,6 @@ impl AnthropicScheme {
|
||||||
for (i, item) in items.iter().enumerate() {
|
for (i, item) in items.iter().enumerate() {
|
||||||
match item {
|
match item {
|
||||||
Item::Message { role, content, .. } => {
|
Item::Message { role, content, .. } => {
|
||||||
flush_pending(
|
|
||||||
&mut messages,
|
|
||||||
&mut pending_assistant,
|
|
||||||
"assistant",
|
|
||||||
&mut locations,
|
|
||||||
);
|
|
||||||
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
|
|
||||||
|
|
||||||
let anthropic_role = match role {
|
|
||||||
Role::User | Role::System => "user",
|
|
||||||
Role::Assistant => "assistant",
|
|
||||||
};
|
|
||||||
|
|
||||||
let parts: Vec<AnthropicContentPart> = content
|
let parts: Vec<AnthropicContentPart> = content
|
||||||
.iter()
|
.iter()
|
||||||
.map(|p| match p {
|
.map(|p| match p {
|
||||||
|
|
@ -284,27 +274,43 @@ impl AnthropicScheme {
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let force_parts = breakpoints.contains(&i);
|
match role {
|
||||||
let msg_idx = messages.len();
|
Role::Assistant => {
|
||||||
|
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
|
||||||
|
pending_assistant.extend(parts.into_iter().map(|part| (i, part)));
|
||||||
|
}
|
||||||
|
Role::User | Role::System => {
|
||||||
|
flush_pending(
|
||||||
|
&mut messages,
|
||||||
|
&mut pending_assistant,
|
||||||
|
"assistant",
|
||||||
|
&mut locations,
|
||||||
|
);
|
||||||
|
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
|
||||||
|
|
||||||
// Preserve the single-text shorthand unless a
|
let force_parts = breakpoints.contains(&i);
|
||||||
// breakpoint needs a concrete part to live on.
|
let msg_idx = messages.len();
|
||||||
if parts.len() == 1 && !force_parts {
|
|
||||||
if let AnthropicContentPart::Text { text, .. } = &parts[0] {
|
// Preserve the single-text shorthand unless a
|
||||||
|
// breakpoint needs a concrete part to live on.
|
||||||
|
if parts.len() == 1 && !force_parts {
|
||||||
|
if let AnthropicContentPart::Text { text, .. } = &parts[0] {
|
||||||
|
messages.push(AnthropicMessage {
|
||||||
|
role: "user".to_string(),
|
||||||
|
content: AnthropicContent::Text(text.clone()),
|
||||||
|
});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let last_part_idx = parts.len().saturating_sub(1);
|
||||||
messages.push(AnthropicMessage {
|
messages.push(AnthropicMessage {
|
||||||
role: anthropic_role.to_string(),
|
role: "user".to_string(),
|
||||||
content: AnthropicContent::Text(text.clone()),
|
content: AnthropicContent::Parts(parts),
|
||||||
});
|
});
|
||||||
continue;
|
locations[i] = Some((msg_idx, last_part_idx));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let last_part_idx = parts.len().saturating_sub(1);
|
|
||||||
messages.push(AnthropicMessage {
|
|
||||||
role: anthropic_role.to_string(),
|
|
||||||
content: AnthropicContent::Parts(parts),
|
|
||||||
});
|
|
||||||
locations[i] = Some((msg_idx, last_part_idx));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Item::ToolCall {
|
Item::ToolCall {
|
||||||
|
|
@ -626,6 +632,109 @@ mod tests {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn assistant_burst_bundles_reasoning_text_and_tool_call() {
|
||||||
|
let scheme = AnthropicScheme::new();
|
||||||
|
let request = Request::new()
|
||||||
|
.user("question?")
|
||||||
|
.item(Item::reasoning("thinking").with_signature("SIG-A"))
|
||||||
|
.item(Item::assistant_message("answer"))
|
||||||
|
.item(Item::tool_call("c1", "tool_a", r#"{"x":1}"#));
|
||||||
|
|
||||||
|
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
|
||||||
|
|
||||||
|
assert_eq!(req.messages.len(), 2, "messages: {:?}", req.messages);
|
||||||
|
assert_eq!(req.messages[0].role, "user");
|
||||||
|
assert_eq!(req.messages[1].role, "assistant");
|
||||||
|
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
|
||||||
|
panic!("assistant burst must be emitted as content parts");
|
||||||
|
};
|
||||||
|
assert_eq!(parts.len(), 3, "parts: {:?}", parts);
|
||||||
|
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
|
||||||
|
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
|
||||||
|
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tool_result_and_user_messages_bound_assistant_bursts() {
|
||||||
|
let scheme = AnthropicScheme::new();
|
||||||
|
let request = Request::new()
|
||||||
|
.user("question?")
|
||||||
|
.item(Item::reasoning("thinking").with_signature("SIG-A"))
|
||||||
|
.item(Item::assistant_message("answer"))
|
||||||
|
.item(Item::tool_call("c1", "tool_a", "{}"))
|
||||||
|
.item(Item::tool_result("c1", "result"))
|
||||||
|
.item(Item::assistant_message("final"))
|
||||||
|
.user("follow up");
|
||||||
|
|
||||||
|
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
|
||||||
|
|
||||||
|
let roles: Vec<&str> = req.messages.iter().map(|msg| msg.role.as_str()).collect();
|
||||||
|
assert_eq!(
|
||||||
|
roles,
|
||||||
|
vec!["user", "assistant", "user", "assistant", "user"]
|
||||||
|
);
|
||||||
|
|
||||||
|
let AnthropicContent::Parts(first_assistant) = &req.messages[1].content else {
|
||||||
|
panic!("first assistant burst must be content parts");
|
||||||
|
};
|
||||||
|
assert_eq!(first_assistant.len(), 3);
|
||||||
|
assert!(matches!(
|
||||||
|
first_assistant[0],
|
||||||
|
AnthropicContentPart::Thinking { .. }
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
first_assistant[1],
|
||||||
|
AnthropicContentPart::Text { .. }
|
||||||
|
));
|
||||||
|
assert!(matches!(
|
||||||
|
first_assistant[2],
|
||||||
|
AnthropicContentPart::ToolUse { .. }
|
||||||
|
));
|
||||||
|
|
||||||
|
let AnthropicContent::Parts(tool_result) = &req.messages[2].content else {
|
||||||
|
panic!("tool result must be content parts");
|
||||||
|
};
|
||||||
|
assert_eq!(tool_result.len(), 1);
|
||||||
|
assert!(matches!(
|
||||||
|
tool_result[0],
|
||||||
|
AnthropicContentPart::ToolResult { .. }
|
||||||
|
));
|
||||||
|
|
||||||
|
let AnthropicContent::Parts(second_assistant) = &req.messages[3].content else {
|
||||||
|
panic!("second assistant burst must be content parts");
|
||||||
|
};
|
||||||
|
assert_eq!(second_assistant.len(), 1);
|
||||||
|
assert!(matches!(
|
||||||
|
second_assistant[0],
|
||||||
|
AnthropicContentPart::Text { .. }
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn assistant_message_breakpoint_maps_to_text_part_inside_burst() {
|
||||||
|
let scheme = AnthropicScheme::new();
|
||||||
|
let mut request = Request::new().items(vec![
|
||||||
|
Item::user_message("question?"),
|
||||||
|
Item::reasoning("thinking").with_signature("SIG-A"),
|
||||||
|
Item::assistant_message("answer"),
|
||||||
|
Item::tool_call("c1", "tool_a", "{}"),
|
||||||
|
Item::user_message("next"),
|
||||||
|
]);
|
||||||
|
request.cache_anchor = Some(2);
|
||||||
|
|
||||||
|
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
|
||||||
|
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
|
||||||
|
panic!("assistant burst must be content parts");
|
||||||
|
};
|
||||||
|
|
||||||
|
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
|
||||||
|
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
|
||||||
|
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
|
||||||
|
assert_eq!(part_cache_control(&parts[1]), Some(CacheControl::Ephemeral));
|
||||||
|
assert_eq!(part_cache_control(&parts[2]), Some(CacheControl::Ephemeral));
|
||||||
|
}
|
||||||
|
|
||||||
/// Convenience: a turn that ends with one assistant text, one tool
|
/// Convenience: a turn that ends with one assistant text, one tool
|
||||||
/// call/result pair, and a final assistant text. Produced at
|
/// call/result pair, and a final assistant text. Produced at
|
||||||
/// `history[head..]` indices shown alongside, so tests can reason
|
/// `history[head..]` indices shown alongside, so tests can reason
|
||||||
|
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
||||||
# Anthropic projection: assistant ターン内ブロックを 1 message に束ねる
|
|
||||||
|
|
||||||
## 背景
|
|
||||||
|
|
||||||
`crates/llm-worker/src/llm_client/scheme/anthropic/request.rs` の `convert_items_to_messages` は、Worker が 1 ターンで生成する `[Reasoning, assistant_message, ToolCall]` の連列を、Anthropic wire 上で **複数の隣接した assistant message** に分割している。
|
|
||||||
|
|
||||||
具体的には:
|
|
||||||
- `Item::Reasoning` を `pending_assistant` に push
|
|
||||||
- 次の `Item::Message { Role::Assistant }` が到来すると `pending_assistant` を flush し、自分自身は別 message として messages に直 push
|
|
||||||
- 続く `Item::ToolCall` は再び `pending_assistant` に積まれ、turn 末で flush され 3 つ目の assistant message に
|
|
||||||
|
|
||||||
結果として 1 turn が `assistant[Thinking] / assistant[text] / assistant[tool_use]` の 3 message に展開される。
|
|
||||||
|
|
||||||
Anthropic Messages API は user/assistant の交互を要求し、同一論理 turn 内の thinking/text/tool_use は **1 つの assistant message の `content` 配列** に並べる仕様。新世代 Claude (Opus 4.5+/Sonnet 4.6+) で thinking signature を round-trip する際、隣接 assistant message に分かれていると signature の文脈が崩れて 400 になる懸念がある(reasoning-history-persist のレビュー指摘)。
|
|
||||||
|
|
||||||
なお、本バグは reasoning-history-persist で導入されたものではなく、`assistant_message` + `tool_call` の組合せで以前から存在していた pre-existing な分割。Reasoning が同じ flush 経路を継承した形。
|
|
||||||
|
|
||||||
## 要件
|
|
||||||
|
|
||||||
- 同一論理ターンに属する `Item::Reasoning` / `Item::Message(Assistant)` / `Item::ToolCall` を、Anthropic wire 上の **1 つの assistant message の `content` 配列** に束ねる
|
|
||||||
- 順序は arrival 順 (= history 順)。Anthropic 仕様の典型は thinking → text → tool_use
|
|
||||||
- user / system role の `Item::Message` や `Item::ToolResult` を境界として assistant burst を区切る
|
|
||||||
- 既存の breakpoint (cache_control) 計算が壊れないこと: 各 item のオリジン index → (msg_idx, part_idx) マッピングは flush_pending 経由で記録されているので、Item::Message(Assistant) も pending を経由するように揃えれば自然に追従する
|
|
||||||
- Single-text 専用の `AnthropicContent::Text` shorthand は assistant burst 内 1 part のみのときに限定して維持するか、簡潔さのために常に `Parts` 形式に統一するかは実装時に判断
|
|
||||||
- 既存テスト群(`completed_turn`, `single_text_message_uses_text_shorthand_without_breakpoint`, `breakpoint_on_tool_result_head` 等)の意図を逸脱しないよう更新
|
|
||||||
|
|
||||||
## スコープ外
|
|
||||||
|
|
||||||
- モデル世代別の thinking keep/strip デフォルト分岐(reasoning-history-persist のフォローアップ候補と同じ扱い)
|
|
||||||
- `clear_thinking_20251015` context-edit
|
|
||||||
- prune.rs の reasoning aware 化
|
|
||||||
Loading…
Reference in New Issue
Block a user