Compare commits

...

15 Commits

41 changed files with 2993 additions and 335 deletions

View File

@ -91,16 +91,6 @@ impl Kind for ErrorKind {
type Event = ErrorEvent; type Event = ErrorEvent;
} }
/// Reasoning item Kind - 完成済み reasoning item の永続化用
///
/// 1 reasoning item につき 1 度だけ発火する。Worker は
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
/// `Item::Reasoning` として history に append する。
pub struct ReasoningItemKind;
impl Kind for ReasoningItemKind {
type Event = ReasoningItemEvent;
}
// ============================================================================= // =============================================================================
// Block Kind Definitions // Block Kind Definitions
// ============================================================================= // =============================================================================
@ -152,6 +142,7 @@ pub struct ThinkingBlockStart {
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub struct ThinkingBlockStop { pub struct ThinkingBlockStop {
pub index: usize, pub index: usize,
pub reasoning: Option<ReasoningBlockData>,
} }
/// ToolUseBlock Kind - for tool use blocks /// ToolUseBlock Kind - for tool use blocks

View File

@ -17,14 +17,12 @@ use serde::{Deserialize, Serialize};
/// ///
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse` /// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse`
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort` /// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
/// は別経路で発火する)
/// ///
/// # ブロックのライフサイクル /// # ブロックのライフサイクル
/// ///
/// テキストやツール呼び出しは、`BlockStart` → `BlockDelta`(複数) → `BlockStop` /// テキスト、thinking、ツール呼び出しは、`BlockStart` → `BlockDelta`(複数) → `BlockStop`
/// の順序でイベントが発生します。 /// の順序でイベントが発生します。thinking の round-trip metadata は
/// `BlockStop.reasoning` に載ります。
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Event { pub enum Event {
/// ハートビート /// ハートビート
@ -48,18 +46,6 @@ pub enum Event {
BlockStop(BlockStop), BlockStop(BlockStop),
/// ブロック中断 /// ブロック中断
BlockAbort(BlockAbort), BlockAbort(BlockAbort),
/// Reasoning item の完成。scheme が「次の request に送り返すための
/// reasoning material が揃った」点で 1 度だけ発火する。
///
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
///
/// 上位層Worker / ReasoningItemCollectorはこれを `Item::Reasoning`
/// として `worker.history` に append する。streaming 表示用の
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
/// は依然として並行発火するlive display と round-trip persist の責務分離)。
ReasoningItem(ReasoningItemEvent),
} }
// ============================================================================= // =============================================================================
@ -218,6 +204,12 @@ pub struct BlockStop {
pub block_type: BlockType, pub block_type: BlockType,
/// 停止理由 /// 停止理由
pub stop_reason: Option<StopReason>, pub stop_reason: Option<StopReason>,
/// Thinking block の停止時に確定した reasoning round-trip metadata。
///
/// `None` の Thinking block は live streaming / trace 用で、history に
/// `Item::Reasoning` として永続化しない。`Some` の場合は block lifecycle
/// が永続化の authoritative source になる。
pub reasoning: Option<ReasoningBlockData>,
} }
impl BlockStop { impl BlockStop {
@ -243,22 +235,17 @@ impl BlockAbort {
} }
} }
// ============================================================================= /// Thinking block stop で確定した reasoning material。
// Reasoning Item Event
// =============================================================================
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
/// materialtext, summary, encrypted_content, signature, idを揃えて
/// 1 度だけ発火する。
/// ///
/// `Item::Reasoning` のフィールドを 1:1 に持つ。 /// `Item::Reasoning` の round-trip に必要な provider material を保持する。
/// `text` は deltas から収集した本文を上書きするために使うmetadata-only
/// reasoning block や provider completion event で全文が届くケース)。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)] #[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ReasoningItemEvent { pub struct ReasoningBlockData {
/// scheme 側で観測した item idOpenAI Responses の `id`)。 /// scheme 側で観測した item idOpenAI Responses の `id`)。
pub id: Option<String>, pub id: Option<String>,
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は /// reasoning 本体テキスト。`None` の場合は block delta 収集結果を使う。
/// `reasoning_text` 累積。redacted_thinking では空。 pub text: Option<String>,
pub text: String,
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。 /// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
pub summary: Vec<String>, pub summary: Vec<String>,
/// 暗号化された opaque blobAnthropic `redacted_thinking.data` / /// 暗号化された opaque blobAnthropic `redacted_thinking.data` /
@ -309,6 +296,7 @@ impl Event {
index, index,
block_type: BlockType::Text, block_type: BlockType::Text,
stop_reason, stop_reason,
reasoning: None,
}) })
} }
@ -338,6 +326,7 @@ impl Event {
index, index,
block_type: BlockType::ToolUse, block_type: BlockType::ToolUse,
stop_reason: Some(StopReason::ToolUse), stop_reason: Some(StopReason::ToolUse),
reasoning: None,
}) })
} }

View File

@ -216,6 +216,7 @@ impl AnthropicScheme {
index: event.index, index: event.index,
block_type: BlockType::Text, // Timeline層で上書きされる block_type: BlockType::Text, // Timeline層で上書きされる
stop_reason: None, stop_reason: None,
reasoning: None,
}))) })))
} }
AnthropicEventType::MessageDelta => { AnthropicEventType::MessageDelta => {
@ -286,9 +287,9 @@ impl AnthropicScheme {
/// `parse_event` の単発 Event に加えて、以下を行う: /// `parse_event` の単発 Event に加えて、以下を行う:
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す /// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を /// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
/// `state.pending_thinking` に蓄積し、`content_block_stop` /// `state.pending_thinking` に蓄積し、`content_block_stop` の Thinking
/// `Event::ReasoningItem` を追加発火す /// BlockStop metadata に載せ
/// - `signature_delta` を蓄積Stream channel には流さず、reasoning event /// - `signature_delta` を蓄積Stream channel には流さず、reasoning metadata
/// にだけ反映する) /// にだけ反映する)
pub(crate) fn parse_with_state( pub(crate) fn parse_with_state(
&self, &self,
@ -374,16 +375,21 @@ impl AnthropicScheme {
AnthropicEventType::ContentBlockStop => { AnthropicEventType::ContentBlockStop => {
let raw: ContentBlockStopEvent = serde_json::from_str(data)?; let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text); let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
let reasoning = if matches!(block_type, BlockType::Thinking) {
state
.pending_thinking
.take()
.map(PendingThinking::into_reasoning)
} else {
state.pending_thinking.take();
None
};
emitted.push(Event::BlockStop(BlockStop { emitted.push(Event::BlockStop(BlockStop {
index: raw.index, index: raw.index,
block_type, block_type,
stop_reason: None, stop_reason: None,
reasoning,
})); }));
if matches!(block_type, BlockType::Thinking) {
if let Some(pending) = state.pending_thinking.take() {
emitted.push(Event::ReasoningItem(pending.into_event()));
}
}
} }
// 残りは state を必要としない。既存 parse_event に委譲。 // 残りは state を必要としない。既存 parse_event に委譲。
_ => { _ => {
@ -524,8 +530,8 @@ mod tests {
} }
#[test] #[test]
fn thinking_block_emits_reasoning_item_with_signature() { fn thinking_block_stop_carries_reasoning_with_signature() {
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること // thinking ブロックが完了したら reasoning metadata に text+signature が乗ること
let scheme = AnthropicScheme::new(); let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default(); let mut state = AnthropicState::default();
@ -567,18 +573,18 @@ mod tests {
&mut state, &mut state,
) )
.unwrap(); .unwrap();
// BlockStop と ReasoningItem の 2 件が並ぶ assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_))); let Event::BlockStop(stop) = &stop_evs[0] else {
let Event::ReasoningItem(reasoning) = &stop_evs[1] else { panic!("expected BlockStop, got {:?}", stop_evs[0]);
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
}; };
assert_eq!(reasoning.text, "hello world"); let reasoning = stop.reasoning.as_ref().expect("reasoning metadata");
assert_eq!(reasoning.text.as_deref(), Some("hello world"));
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ")); assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
assert!(reasoning.encrypted_content.is_none()); assert!(reasoning.encrypted_content.is_none());
} }
#[test] #[test]
fn redacted_thinking_emits_reasoning_item_with_data() { fn redacted_thinking_stop_carries_reasoning_with_data() {
let scheme = AnthropicScheme::new(); let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default(); let mut state = AnthropicState::default();
@ -596,16 +602,18 @@ mod tests {
&mut state, &mut state,
) )
.unwrap(); .unwrap();
let Event::ReasoningItem(reasoning) = &stop_evs[1] else { assert_eq!(stop_evs.len(), 1);
panic!("expected ReasoningItem"); let Event::BlockStop(stop) = &stop_evs[0] else {
panic!("expected BlockStop");
}; };
assert!(reasoning.text.is_empty()); let reasoning = stop.reasoning.as_ref().expect("reasoning metadata");
assert_eq!(reasoning.text.as_deref(), Some(""));
assert!(reasoning.signature.is_none()); assert!(reasoning.signature.is_none());
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob")); assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
} }
#[test] #[test]
fn text_block_does_not_emit_reasoning_item() { fn text_block_stop_has_no_reasoning_metadata() {
let scheme = AnthropicScheme::new(); let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default(); let mut state = AnthropicState::default();
@ -631,7 +639,10 @@ mod tests {
) )
.unwrap(); .unwrap();
assert_eq!(stop_evs.len(), 1); assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_))); let Event::BlockStop(stop) = &stop_evs[0] else {
panic!("expected BlockStop");
};
assert!(stop.reasoning.is_none());
} }
#[test] #[test]

View File

@ -9,7 +9,7 @@ use crate::llm_client::{
ClientError, ClientError,
auth::AuthRequirement, auth::AuthRequirement,
capability::ModelCapability, capability::ModelCapability,
event::{BlockType, Event, ReasoningItemEvent}, event::{BlockType, Event, ReasoningBlockData},
scheme::Scheme, scheme::Scheme,
types::Request, types::Request,
}; };
@ -23,7 +23,7 @@ use super::AnthropicScheme;
/// `BlockStop` に書き戻す。 /// `BlockStop` に書き戻す。
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta` /// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、 /// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
/// `content_block_stop` で `Event::ReasoningItem` を発火する /// `content_block_stop` の Thinking block metadata として返す
/// round-trip 永続化のため)。 /// round-trip 永続化のため)。
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct AnthropicState { pub struct AnthropicState {
@ -40,10 +40,10 @@ pub(crate) struct PendingThinking {
} }
impl PendingThinking { impl PendingThinking {
pub(crate) fn into_event(self) -> ReasoningItemEvent { pub(crate) fn into_reasoning(self) -> ReasoningBlockData {
ReasoningItemEvent { ReasoningBlockData {
id: None, id: None,
text: self.text, text: Some(self.text),
summary: Vec::new(), summary: Vec::new(),
encrypted_content: self.redacted_data, encrypted_content: self.redacted_data,
signature: self.signature, signature: self.signature,

View File

@ -205,6 +205,7 @@ impl GeminiScheme {
index: candidate_index, index: candidate_index,
block_type: BlockType::Text, block_type: BlockType::Text,
stop_reason, stop_reason,
reasoning: None,
})); }));
} }
} }

View File

@ -14,7 +14,7 @@ use crate::llm_client::{
ClientError, ClientError,
event::{ event::{
BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent, BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent,
Event, ReasoningItemEvent, ResponseStatus, StatusEvent, UnhandledSseEvent, UsageEvent, Event, ReasoningBlockData, ResponseStatus, StatusEvent, UnhandledSseEvent, UsageEvent,
}, },
}; };
@ -25,8 +25,8 @@ pub struct OpenAIResponsesState {
next_index: usize, next_index: usize,
/// 蓄積中の reasoning output_item。`output_item.added`(Reasoning) で /// 蓄積中の reasoning output_item。`output_item.added`(Reasoning) で
/// 確保し、`reasoning_text.delta` / `reasoning_summary_text.delta` で /// 確保し、`reasoning_text.delta` / `reasoning_summary_text.delta` で
/// 蓄積、`output_item.done`(Reasoning) で `Event::ReasoningItem` を /// 蓄積、`output_item.done`(Reasoning) で既存 reasoning_text block または
/// 発火してエントリを除去する。 /// metadata-only Thinking block に reasoning persistence material を載せる。
pending_reasoning: HashMap<usize, PendingReasoning>, pending_reasoning: HashMap<usize, PendingReasoning>,
} }
@ -38,6 +38,24 @@ struct PendingReasoning {
text: String, text: String,
/// `reasoning_summary_text.delta` を summary_index 順に蓄積。 /// `reasoning_summary_text.delta` を summary_index 順に蓄積。
summary: Vec<String>, summary: Vec<String>,
/// `response.content_part.done` が先に到着した reasoning/thinking block。
///
/// `response.output_item.done` まで待たないと encrypted_content や最終
/// summary が揃わないため、live-visible な余分な synthetic Thinking block
/// を作らず、既存 block の stop に persistence metadata を載せる。
deferred_thinking_stops: Vec<SlotInfo>,
}
impl PendingReasoning {
fn into_reasoning_data(self, encrypted_content: Option<String>) -> ReasoningBlockData {
ReasoningBlockData {
id: self.id,
text: Some(self.text),
summary: self.summary.into_iter().filter(|s| !s.is_empty()).collect(),
encrypted_content,
signature: None,
}
}
} }
impl OpenAIResponsesState { impl OpenAIResponsesState {
@ -73,6 +91,31 @@ impl OpenAIResponsesState {
} }
entry.summary[summary_index].push_str(text); entry.summary[summary_index].push_str(text);
} }
fn defer_reasoning_stop(&mut self, output_index: usize, info: SlotInfo) {
self.ensure_reasoning(output_index)
.deferred_thinking_stops
.push(info);
}
fn take_active_reasoning_slots(&mut self, output_index: usize) -> Vec<SlotInfo> {
let mut keys: Vec<_> = self
.slots
.iter()
.filter_map(|(key, info)| match key {
SlotKey::ContentPart { output, content }
if *output == output_index && info.block_type == BlockType::Thinking =>
{
Some((*content, *key))
}
_ => None,
})
.collect();
keys.sort_by_key(|(content, _)| *content);
keys.into_iter()
.filter_map(|(_, key)| self.slots.remove(&key))
.collect()
}
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -380,9 +423,10 @@ pub(crate) fn parse_sse(
"response.output_item.done" => { "response.output_item.done" => {
let ev: OutputItemDone = from_json(data)?; let ev: OutputItemDone = from_json(data)?;
// Reasoning wrapper の done で蓄積分を ReasoningItem として発火。 // Reasoning wrapper の done で蓄積分を既存 reasoning_text block の
// これは `slots` の OutputItem slot とは独立している // stop に載せる。content_part.done が先に来た場合は stop を defer
// (FunctionCall は slots、Reasoning は pending_reasoning)。 // しておき、ここで encrypted_content / summary と一緒に完了させる。
// reasoning_text が無い metadata-only item だけ synthetic block を作る。
if let OutputItem::Reasoning { if let OutputItem::Reasoning {
id, id,
encrypted_content, encrypted_content,
@ -396,23 +440,50 @@ pub(crate) fn parse_sse(
if pending.id.is_none() { if pending.id.is_none() {
pending.id = id; pending.id = id;
} }
return Ok(vec![Event::ReasoningItem(ReasoningItemEvent {
id: pending.id, let mut stop_blocks = std::mem::take(&mut pending.deferred_thinking_stops);
text: pending.text, stop_blocks.extend(state.take_active_reasoning_slots(ev.output_index));
summary: pending let reasoning = pending.into_reasoning_data(encrypted_content);
.summary
.into_iter() if stop_blocks.is_empty() {
.filter(|s| !s.is_empty()) let info =
.collect(), state.allocate(SlotKey::OutputItem(ev.output_index), BlockType::Thinking);
encrypted_content, state.slots.remove(&SlotKey::OutputItem(ev.output_index));
signature: None, return Ok(vec![
})]); Event::BlockStart(BlockStart {
index: info.flat_index,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
}),
Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: BlockType::Thinking,
stop_reason: None,
reasoning: Some(reasoning),
}),
]);
}
let last = stop_blocks.len() - 1;
return Ok(stop_blocks
.into_iter()
.enumerate()
.map(|(idx, info)| {
Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: info.block_type,
stop_reason: None,
reasoning: (idx == last).then(|| reasoning.clone()),
})
})
.collect());
} }
if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) { if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) {
Ok(vec![Event::BlockStop(BlockStop { Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index, index: info.flat_index,
block_type: info.block_type, block_type: info.block_type,
stop_reason: None, stop_reason: None,
reasoning: None,
})]) })])
} else { } else {
Ok(Vec::new()) Ok(Vec::new())
@ -446,11 +517,19 @@ pub(crate) fn parse_sse(
output: ev.output_index, output: ev.output_index,
content: ev.content_index, content: ev.content_index,
}) { }) {
Ok(vec![Event::BlockStop(BlockStop { if matches!(ev.part, ContentPart::ReasoningText { .. })
index: info.flat_index, || info.block_type == BlockType::Thinking
block_type: info.block_type, {
stop_reason: None, state.defer_reasoning_stop(ev.output_index, info);
})]) Ok(Vec::new())
} else {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: info.block_type,
stop_reason: None,
reasoning: None,
})])
}
} else { } else {
Ok(Vec::new()) Ok(Vec::new())
} }
@ -531,6 +610,7 @@ pub(crate) fn parse_sse(
index: info.flat_index, index: info.flat_index,
block_type: info.block_type, block_type: info.block_type,
stop_reason: None, stop_reason: None,
reasoning: None,
})]) })])
} else { } else {
Ok(Vec::new()) Ok(Vec::new())
@ -1116,9 +1196,9 @@ mod tests {
} }
#[test] #[test]
fn reasoning_output_item_emits_reasoning_item_with_text_summary_encrypted() { fn reasoning_output_item_completes_metadata_thinking_block_with_text_summary_encrypted() {
// 完成済み reasoning wrapper が text + summary[] + encrypted_content を持って // 完成済み reasoning wrapper が text + summary[] + encrypted_content を持って
// ReasoningItem として届くこと。 // Thinking BlockStop metadata として届くこと。
let mut state = OpenAIResponsesState::default(); let mut state = OpenAIResponsesState::default();
// wrapper added (id だけ持つ) // wrapper added (id だけ持つ)
@ -1128,11 +1208,15 @@ mod tests {
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#, r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#,
); );
// 内側の reasoning_text 用 content_part // 内側の reasoning_text 用 content_part
with( let start = with(
&mut state, &mut state,
"response.content_part.added", "response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#, r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
); );
let start_index = match start.as_slice() {
[Event::BlockStart(start)] => start.index,
other => panic!("expected one BlockStart, got {other:?}"),
};
with( with(
&mut state, &mut state,
"response.reasoning_text.delta", "response.reasoning_text.delta",
@ -1143,11 +1227,12 @@ mod tests {
"response.reasoning_text.delta", "response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"world"}"#, r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"world"}"#,
); );
with( let part_done = with(
&mut state, &mut state,
"response.content_part.done", "response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"hello world"}}"#, r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"hello world"}}"#,
); );
assert!(part_done.is_empty());
// summary 1 件 // summary 1 件
with( with(
&mut state, &mut state,
@ -1172,11 +1257,13 @@ mod tests {
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC-XYZ"}}"#, r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC-XYZ"}}"#,
); );
assert_eq!(evs.len(), 1); assert_eq!(evs.len(), 1);
let Event::ReasoningItem(reasoning) = &evs[0] else { let Event::BlockStop(stop) = &evs[0] else {
panic!("expected ReasoningItem, got {:?}", evs[0]); panic!("expected BlockStop, got {:?}", evs[0]);
}; };
assert_eq!(stop.index, start_index);
let reasoning = stop.reasoning.as_ref().expect("reasoning metadata");
assert_eq!(reasoning.id.as_deref(), Some("r1")); assert_eq!(reasoning.id.as_deref(), Some("r1"));
assert_eq!(reasoning.text, "hello world"); assert_eq!(reasoning.text.as_deref(), Some("hello world"));
assert_eq!(reasoning.summary, vec!["sum-A".to_string()]); assert_eq!(reasoning.summary, vec!["sum-A".to_string()]);
assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC-XYZ")); assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC-XYZ"));
assert!(reasoning.signature.is_none()); assert!(reasoning.signature.is_none());
@ -1184,10 +1271,94 @@ mod tests {
assert!(state.pending_reasoning.is_empty()); assert!(state.pending_reasoning.is_empty());
} }
#[test]
fn reasoning_text_done_then_output_done_emits_single_existing_block_stop() {
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#,
);
let mut lifecycle = Vec::new();
lifecycle.extend(with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
));
lifecycle.extend(with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"think"}"#,
));
lifecycle.extend(with(
&mut state,
"response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"think"}}"#,
));
lifecycle.extend(with(
&mut state,
"response.output_item.done",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC"}}"#,
));
let starts: Vec<_> = lifecycle
.iter()
.filter_map(|event| match event {
Event::BlockStart(start) => Some(start.index),
_ => None,
})
.collect();
let stops: Vec<_> = lifecycle
.iter()
.filter_map(|event| match event {
Event::BlockStop(stop) => Some(stop),
_ => None,
})
.collect();
assert_eq!(starts.len(), 1, "no synthetic second Thinking start");
assert_eq!(stops.len(), 1, "no duplicate empty Thinking stop");
assert_eq!(stops[0].index, starts[0]);
let reasoning = stops[0].reasoning.as_ref().expect("reasoning metadata");
assert_eq!(reasoning.text.as_deref(), Some("think"));
assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC"));
struct StopRecorder(std::sync::Arc<std::sync::Mutex<Vec<(usize, String, bool)>>>);
impl crate::handler::Handler<crate::handler::ThinkingBlockKind> for StopRecorder {
type Scope = String;
fn on_event(
&mut self,
scope: &mut Self::Scope,
event: &crate::handler::ThinkingBlockEvent,
) {
match event {
crate::handler::ThinkingBlockEvent::Start(_) => scope.clear(),
crate::handler::ThinkingBlockEvent::Delta(delta) => scope.push_str(delta),
crate::handler::ThinkingBlockEvent::Stop(stop) => self
.0
.lock()
.unwrap()
.push((stop.index, scope.clone(), stop.reasoning.is_some())),
}
}
}
let stops = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let mut timeline = crate::timeline::Timeline::new();
timeline.on_thinking_block(StopRecorder(stops.clone()));
for event in &lifecycle {
timeline.dispatch(event);
}
let stops = stops.lock().unwrap().clone();
assert_eq!(stops, vec![(starts[0], "think".to_string(), true)]);
}
#[test] #[test]
fn reasoning_wrapper_without_inner_content_emits_empty_text() { fn reasoning_wrapper_without_inner_content_emits_empty_text() {
// encrypted_content だけ届くreasoning_text 無し)ケースでも // encrypted_content だけ届くreasoning_text 無し)ケースでも
// ReasoningItem は発火する。 // reasoning metadata は届く
let mut state = OpenAIResponsesState::default(); let mut state = OpenAIResponsesState::default();
with( with(
&mut state, &mut state,
@ -1199,10 +1370,13 @@ mod tests {
"response.output_item.done", "response.output_item.done",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9","encrypted_content":"BLOB"}}"#, r#"{"output_index":2,"item":{"type":"reasoning","id":"r9","encrypted_content":"BLOB"}}"#,
); );
let Event::ReasoningItem(r) = &evs[0] else { assert_eq!(evs.len(), 2);
panic!() assert!(matches!(evs[0], Event::BlockStart(_)));
let Event::BlockStop(stop) = &evs[1] else {
panic!("expected BlockStop")
}; };
assert!(r.text.is_empty()); let r = stop.reasoning.as_ref().expect("reasoning metadata");
assert_eq!(r.text.as_deref(), Some(""));
assert!(r.summary.is_empty()); assert!(r.summary.is_empty());
assert_eq!(r.encrypted_content.as_deref(), Some("BLOB")); assert_eq!(r.encrypted_content.as_deref(), Some("BLOB"));
} }

View File

@ -7,18 +7,19 @@
//! - [`Timeline`] - イベントストリームの管理とディスパッチ //! - [`Timeline`] - イベントストリームの管理とディスパッチ
//! - [`Handler`] - イベントを処理するトレイト //! - [`Handler`] - イベントを処理するトレイト
//! - [`TextBlockCollector`] - テキストブロックを収集するHandler //! - [`TextBlockCollector`] - テキストブロックを収集するHandler
//! - [`ThinkingBlockCollector`] - reasoning material を Thinking block から収集するHandler
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler //! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
pub mod event; pub mod event;
mod reasoning_item_collector;
mod text_block_collector; mod text_block_collector;
mod thinking_block_collector;
mod timeline; mod timeline;
mod tool_call_collector; mod tool_call_collector;
// 公開API // 公開API
pub use event::*; pub use event::*;
pub use reasoning_item_collector::ReasoningItemCollector;
pub use text_block_collector::TextBlockCollector; pub use text_block_collector::TextBlockCollector;
pub use thinking_block_collector::ThinkingBlockCollector;
pub use timeline::Timeline; pub use timeline::Timeline;
pub use tool_call_collector::ToolCallCollector; pub use tool_call_collector::ToolCallCollector;
@ -30,7 +31,6 @@ pub use crate::handler::{
Handler, Handler,
Kind, Kind,
PingKind, PingKind,
ReasoningItemKind,
StatusKind, StatusKind,
// Block Events // Block Events
TextBlockEvent, TextBlockEvent,

View File

@ -1,77 +0,0 @@
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
//!
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
//! Worker はターン終了時に `take_collected()` でドレインして
//! `Item::Reasoning` として `worker.history` に append する。
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ReasoningItemKind};
use crate::llm_client::event::ReasoningItemEvent;
/// 収集された reasoning item の連列。
#[derive(Clone, Default)]
pub struct ReasoningItemCollector {
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
}
impl ReasoningItemCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
self.collected.lock().unwrap().push(event.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::Event;
use crate::timeline::Timeline;
#[test]
fn collects_in_order() {
let collector = ReasoningItemCollector::new();
let mut timeline = Timeline::new();
timeline.on_reasoning_item(collector.clone());
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "first".into(),
signature: Some("sig1".into()),
..Default::default()
}));
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r2".into()),
text: "second".into(),
..Default::default()
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text, "first");
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text, "second");
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
}

View File

@ -0,0 +1,138 @@
//! `ThinkingBlockCollector` - reasoning material from Thinking block lifecycle.
//!
//! Scheme implementations emit Thinking BlockStart/Delta/Stop events for live
//! streaming. A Thinking block stop with `ReasoningBlockData` is also the single
//! authoritative persistence signal for `Item::Reasoning` round-trip material.
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ThinkingBlockEvent, ThinkingBlockKind};
use crate::llm_client::event::ReasoningBlockData;
/// Reasoning material collected from completed Thinking blocks.
#[derive(Clone, Default)]
pub struct ThinkingBlockCollector {
collected: Arc<Mutex<Vec<ReasoningBlockData>>>,
}
impl ThinkingBlockCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningBlockData> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ThinkingBlockKind> for ThinkingBlockCollector {
type Scope = String;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
match event {
ThinkingBlockEvent::Start(_) => scope.clear(),
ThinkingBlockEvent::Delta(text) => scope.push_str(text),
ThinkingBlockEvent::Stop(stop) => {
if let Some(mut reasoning) = stop.reasoning.clone() {
if reasoning.text.is_none() {
reasoning.text = Some(scope.clone());
}
self.collected.lock().unwrap().push(reasoning);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::{
BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, Event, ReasoningBlockData,
};
use crate::timeline::Timeline;
#[test]
fn collects_in_order_from_thinking_block_stops() {
let collector = ThinkingBlockCollector::new();
let mut timeline = Timeline::new();
timeline.on_thinking_block(collector.clone());
timeline.dispatch(&Event::BlockStart(BlockStart {
index: 0,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
}));
timeline.dispatch(&Event::BlockDelta(crate::llm_client::event::BlockDelta {
index: 0,
delta: DeltaContent::Thinking("first".into()),
}));
timeline.dispatch(&Event::BlockStop(BlockStop {
index: 0,
block_type: BlockType::Thinking,
stop_reason: None,
reasoning: Some(ReasoningBlockData {
id: Some("r1".into()),
signature: Some("sig1".into()),
..Default::default()
}),
}));
timeline.dispatch(&Event::BlockStart(BlockStart {
index: 1,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
}));
timeline.dispatch(&Event::BlockStop(BlockStop {
index: 1,
block_type: BlockType::Thinking,
stop_reason: None,
reasoning: Some(ReasoningBlockData {
id: Some("r2".into()),
text: Some("second".into()),
..Default::default()
}),
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text.as_deref(), Some("first"));
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text.as_deref(), Some("second"));
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
#[test]
fn ignores_streaming_only_thinking_blocks() {
let collector = ThinkingBlockCollector::new();
let mut timeline = Timeline::new();
timeline.on_thinking_block(collector.clone());
timeline.dispatch(&Event::BlockStart(BlockStart {
index: 0,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
}));
timeline.dispatch(&Event::BlockDelta(crate::llm_client::event::BlockDelta {
index: 0,
delta: DeltaContent::Thinking("live only".into()),
}));
timeline.dispatch(&Event::BlockStop(BlockStop {
index: 0,
block_type: BlockType::Thinking,
stop_reason: None,
reasoning: None,
}));
assert!(collector.take_collected().is_empty());
}
}

View File

@ -3,7 +3,7 @@
//! LLMからのイベントストリームを受信し、登録されたHandlerにディスパッチします。 //! LLMからのイベントストリームを受信し、登録されたHandlerにディスパッチします。
//! 通常はWorker経由で使用しますが、直接使用することも可能です。 //! 通常はWorker経由で使用しますが、直接使用することも可能です。
use std::marker::PhantomData; use std::{collections::HashMap, marker::PhantomData};
use super::event::*; use super::event::*;
use crate::handler::*; use crate::handler::*;
@ -189,7 +189,7 @@ where
H: Handler<ThinkingBlockKind>, H: Handler<ThinkingBlockKind>,
{ {
handler: H, handler: H,
scope: Option<H::Scope>, scopes: HashMap<usize, H::Scope>,
} }
impl<H> ThinkingBlockHandlerWrapper<H> impl<H> ThinkingBlockHandlerWrapper<H>
@ -199,7 +199,7 @@ where
fn new(handler: H) -> Self { fn new(handler: H) -> Self {
Self { Self {
handler, handler,
scope: None, scopes: HashMap::new(),
} }
} }
} }
@ -210,44 +210,43 @@ where
H::Scope: Send + Sync, H::Scope: Send + Sync,
{ {
fn dispatch_start(&mut self, start: &BlockStart) { fn dispatch_start(&mut self, start: &BlockStart) {
if let Some(scope) = &mut self.scope { let scope = self.scopes.entry(start.index).or_default();
self.handler.on_event( self.handler.on_event(
scope, scope,
&ThinkingBlockEvent::Start(ThinkingBlockStart { index: start.index }), &ThinkingBlockEvent::Start(ThinkingBlockStart { index: start.index }),
); );
}
} }
fn dispatch_delta(&mut self, delta: &BlockDelta) { fn dispatch_delta(&mut self, delta: &BlockDelta) {
if let Some(scope) = &mut self.scope { if let DeltaContent::Thinking(text) = &delta.delta {
if let DeltaContent::Thinking(text) = &delta.delta { let scope = self.scopes.entry(delta.index).or_default();
self.handler self.handler
.on_event(scope, &ThinkingBlockEvent::Delta(text.clone())); .on_event(scope, &ThinkingBlockEvent::Delta(text.clone()));
}
} }
} }
fn dispatch_stop(&mut self, stop: &BlockStop) { fn dispatch_stop(&mut self, stop: &BlockStop) {
if let Some(scope) = &mut self.scope { if let Some(mut scope) = self.scopes.remove(&stop.index) {
self.handler.on_event( self.handler.on_event(
scope, &mut scope,
&ThinkingBlockEvent::Stop(ThinkingBlockStop { index: stop.index }), &ThinkingBlockEvent::Stop(ThinkingBlockStop {
index: stop.index,
reasoning: stop.reasoning.clone(),
}),
); );
} }
} }
fn dispatch_abort(&mut self, _abort: &BlockAbort) {} fn dispatch_abort(&mut self, _abort: &BlockAbort) {
self.scopes.clear();
fn start_scope(&mut self) {
self.scope = Some(H::Scope::default());
} }
fn end_scope(&mut self) { fn start_scope(&mut self) {}
self.scope = None;
} fn end_scope(&mut self) {}
fn has_scope(&self) -> bool { fn has_scope(&self) -> bool {
self.scope.is_some() !self.scopes.is_empty()
} }
} }
@ -375,8 +374,6 @@ pub struct Timeline {
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>, ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>, status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>, error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
// Block系ハンドラーBlockTypeごとにグループ化 // Block系ハンドラーBlockTypeごとにグループ化
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>, text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
thinking_block_handlers: Vec<Box<dyn ErasedBlockHandler>>, thinking_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
@ -405,7 +402,6 @@ impl Timeline {
ping_handlers: Vec::new(), ping_handlers: Vec::new(),
status_handlers: Vec::new(), status_handlers: Vec::new(),
error_handlers: Vec::new(), error_handlers: Vec::new(),
reasoning_item_handlers: Vec::new(),
text_block_handlers: Vec::new(), text_block_handlers: Vec::new(),
thinking_block_handlers: Vec::new(), thinking_block_handlers: Vec::new(),
tool_use_block_handlers: Vec::new(), tool_use_block_handlers: Vec::new(),
@ -467,18 +463,6 @@ impl Timeline {
self self
} }
/// `ReasoningItemKind` 用 Handler を登録
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
where
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
H::Scope: Send + Sync,
{
let mut wrapper = HandlerWrapper::new(handler);
wrapper.start_scope();
self.reasoning_item_handlers.push(Box::new(wrapper));
self
}
/// TextBlockKind用のHandlerを登録 /// TextBlockKind用のHandlerを登録
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
where where
@ -532,9 +516,6 @@ impl Timeline {
Event::BlockDelta(d) => self.handle_block_delta(d), Event::BlockDelta(d) => self.handle_block_delta(d),
Event::BlockStop(s) => self.handle_block_stop(s), Event::BlockStop(s) => self.handle_block_stop(s),
Event::BlockAbort(a) => self.handle_block_abort(a), Event::BlockAbort(a) => self.handle_block_abort(a),
// 完成済み reasoning item: 即時ディスパッチ
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
} }
} }
@ -577,12 +558,6 @@ impl Timeline {
} }
} }
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
for handler in &mut self.reasoning_item_handlers {
handler.dispatch(event);
}
}
fn handle_block_start(&mut self, start: &BlockStart) { fn handle_block_start(&mut self, start: &BlockStart) {
self.current_block = Some(start.block_type); self.current_block = Some(start.block_type);

View File

@ -24,7 +24,7 @@ use crate::{
}, },
state::{Locked, Mutable, WorkerState}, state::{Locked, Mutable, WorkerState},
timeline::event::{ErrorEvent, StatusEvent, UsageEvent}, timeline::event::{ErrorEvent, StatusEvent, UsageEvent},
timeline::{ReasoningItemCollector, TextBlockCollector, Timeline, ToolCallCollector}, timeline::{TextBlockCollector, ThinkingBlockCollector, Timeline, ToolCallCollector},
tool::{ tool::{
ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolOutputLimits, ToolResult, ToolCall, ToolDefinition as WorkerToolDefinition, ToolError, ToolOutputLimits, ToolResult,
truncate_content, truncate_content,
@ -163,9 +163,9 @@ pub struct Worker<C: LlmClient, S: WorkerState = Mutable> {
text_block_collector: TextBlockCollector, text_block_collector: TextBlockCollector,
/// Tool call collector (Timeline handler) /// Tool call collector (Timeline handler)
tool_call_collector: ToolCallCollector, tool_call_collector: ToolCallCollector,
/// Reasoning item collector (Timeline handler)。完成済み reasoning /// Thinking block collector (Timeline handler)。metadata 付きで完了した
/// item を 1 ターン分バッファし、history に append する。 /// Thinking block を 1 ターン分バッファし、history に append する。
reasoning_item_collector: ReasoningItemCollector, thinking_block_collector: ThinkingBlockCollector,
/// Tool server handle /// Tool server handle
tool_server: ToolServerHandle, tool_server: ToolServerHandle,
/// Interceptor for control-flow decisions /// Interceptor for control-flow decisions
@ -771,14 +771,14 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
/// 置くことを要求するためでもある。 /// 置くことを要求するためでもある。
fn build_assistant_items( fn build_assistant_items(
&self, &self,
reasoning_items: &[crate::llm_client::event::ReasoningItemEvent], reasoning_items: &[crate::llm_client::event::ReasoningBlockData],
text_blocks: &[String], text_blocks: &[String],
tool_calls: &[ToolCall], tool_calls: &[ToolCall],
) -> Vec<Item> { ) -> Vec<Item> {
let mut items = Vec::new(); let mut items = Vec::new();
for r in reasoning_items { for r in reasoning_items {
let mut item = Item::reasoning(r.text.clone()); let mut item = Item::reasoning(r.text.clone().unwrap_or_default());
if let Some(id) = &r.id { if let Some(id) = &r.id {
item = item.with_id(id); item = item.with_id(id);
} }
@ -1238,7 +1238,7 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
self.timeline.abort_current_block(); self.timeline.abort_current_block();
self.timeline.flush_usage(); self.timeline.flush_usage();
let reasoning_items = self.reasoning_item_collector.take_collected(); let reasoning_items = self.thinking_block_collector.take_collected();
let text_blocks = self.text_block_collector.take_collected(); let text_blocks = self.text_block_collector.take_collected();
// Do not recover tool calls from an interrupted stream. A completed // Do not recover tool calls from an interrupted stream. A completed
// tool_use is executable only when the provider finishes the stream. // tool_use is executable only when the provider finishes the stream.
@ -1270,7 +1270,7 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
// `append_history_items` so observers (e.g. the // `append_history_items` so observers (e.g. the
// Pod-side per-item session-log committer) see each item // Pod-side per-item session-log committer) see each item
// as it lands. // as it lands.
let reasoning_items = self.reasoning_item_collector.take_collected(); let reasoning_items = self.thinking_block_collector.take_collected();
let text_blocks = self.text_block_collector.take_collected(); let text_blocks = self.text_block_collector.take_collected();
let tool_calls = self.tool_call_collector.take_collected(); let tool_calls = self.tool_call_collector.take_collected();
let assistant_items = let assistant_items =
@ -1595,14 +1595,14 @@ impl<C: LlmClient> Worker<C, Mutable> {
pub fn new(client: C) -> Self { pub fn new(client: C) -> Self {
let text_block_collector = TextBlockCollector::new(); let text_block_collector = TextBlockCollector::new();
let tool_call_collector = ToolCallCollector::new(); let tool_call_collector = ToolCallCollector::new();
let reasoning_item_collector = ReasoningItemCollector::new(); let thinking_block_collector = ThinkingBlockCollector::new();
let mut timeline = Timeline::new(); let mut timeline = Timeline::new();
let (cancel_tx, cancel_rx) = mpsc::channel(1); let (cancel_tx, cancel_rx) = mpsc::channel(1);
// Register collectors with Timeline // Register collectors with Timeline
timeline.on_text_block(text_block_collector.clone()); timeline.on_text_block(text_block_collector.clone());
timeline.on_tool_use_block(tool_call_collector.clone()); timeline.on_tool_use_block(tool_call_collector.clone());
timeline.on_reasoning_item(reasoning_item_collector.clone()); timeline.on_thinking_block(thinking_block_collector.clone());
Self { Self {
client, client,
@ -1610,7 +1610,7 @@ impl<C: LlmClient> Worker<C, Mutable> {
timeline, timeline,
text_block_collector, text_block_collector,
tool_call_collector, tool_call_collector,
reasoning_item_collector, thinking_block_collector,
tool_server: ToolServer::new().handle(), tool_server: ToolServer::new().handle(),
interceptor: Box::new(DefaultInterceptor), interceptor: Box::new(DefaultInterceptor),
system_prompt: None, system_prompt: None,
@ -1874,7 +1874,7 @@ impl<C: LlmClient> Worker<C, Mutable> {
timeline: self.timeline, timeline: self.timeline,
text_block_collector: self.text_block_collector, text_block_collector: self.text_block_collector,
tool_call_collector: self.tool_call_collector, tool_call_collector: self.tool_call_collector,
reasoning_item_collector: self.reasoning_item_collector, thinking_block_collector: self.thinking_block_collector,
tool_server: self.tool_server, tool_server: self.tool_server,
interceptor: self.interceptor, interceptor: self.interceptor,
system_prompt: self.system_prompt, system_prompt: self.system_prompt,
@ -1966,7 +1966,7 @@ impl<C: LlmClient> Worker<C, Locked> {
timeline: self.timeline, timeline: self.timeline,
text_block_collector: self.text_block_collector, text_block_collector: self.text_block_collector,
tool_call_collector: self.tool_call_collector, tool_call_collector: self.tool_call_collector,
reasoning_item_collector: self.reasoning_item_collector, thinking_block_collector: self.thinking_block_collector,
tool_server: self.tool_server, tool_server: self.tool_server,
interceptor: self.interceptor, interceptor: self.interceptor,
system_prompt: self.system_prompt, system_prompt: self.system_prompt,

View File

@ -16,27 +16,53 @@ mod common;
use common::MockLlmClient; use common::MockLlmClient;
use llm_worker::Item; use llm_worker::Item;
use llm_worker::Worker; use llm_worker::Worker;
use llm_worker::llm_client::event::{Event, ReasoningItemEvent, ResponseStatus, StatusEvent}; use llm_worker::llm_client::event::{
BlockMetadata, BlockStart, BlockStop, BlockType, Event, ReasoningBlockData, ResponseStatus,
StatusEvent,
};
fn reasoning_block(text: impl Into<String>, data: ReasoningBlockData) -> Vec<Event> {
vec![
Event::BlockStart(BlockStart {
index: 100,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
}),
Event::BlockDelta(llm_worker::llm_client::event::BlockDelta {
index: 100,
delta: llm_worker::llm_client::event::DeltaContent::Thinking(text.into()),
}),
Event::BlockStop(BlockStop {
index: 100,
block_type: BlockType::Thinking,
stop_reason: None,
reasoning: Some(data),
}),
]
}
/// Anthropic 風: thinking ブロック → text → 終了 のシーケンス。 /// Anthropic 風: thinking ブロック → text → 終了 のシーケンス。
/// Worker history に Reasoning(signature 付き) → assistant_message が並ぶ。 /// Worker history に Reasoning(signature 付き) → assistant_message が並ぶ。
#[tokio::test] #[tokio::test]
async fn anthropic_thinking_round_trips_signature_into_history() { async fn anthropic_thinking_round_trips_signature_into_history() {
let events = vec![ let mut events = reasoning_block(
Event::ReasoningItem(ReasoningItemEvent { "let me think...",
ReasoningBlockData {
id: None, id: None,
text: "let me think...".into(), text: None,
summary: Vec::new(), summary: Vec::new(),
encrypted_content: None, encrypted_content: None,
signature: Some("SIG-OPUS".into()), signature: Some("SIG-OPUS".into()),
}), },
);
events.extend([
Event::text_block_start(0), Event::text_block_start(0),
Event::text_delta(0, "Here's the answer"), Event::text_delta(0, "Here's the answer"),
Event::text_block_stop(0, None), Event::text_block_stop(0, None),
Event::Status(StatusEvent { Event::Status(StatusEvent {
status: ResponseStatus::Completed, status: ResponseStatus::Completed,
}), }),
]; ]);
let client = MockLlmClient::new(events); let client = MockLlmClient::new(events);
let worker = Worker::new(client); let worker = Worker::new(client);
let out = worker.run("question?").await.expect("run ok"); let out = worker.run("question?").await.expect("run ok");
@ -63,21 +89,24 @@ async fn anthropic_thinking_round_trips_signature_into_history() {
/// `Item::Reasoning` のフィールドに展開されること。 /// `Item::Reasoning` のフィールドに展開されること。
#[tokio::test] #[tokio::test]
async fn openai_reasoning_round_trips_encrypted_and_summary() { async fn openai_reasoning_round_trips_encrypted_and_summary() {
let events = vec![ let mut events = reasoning_block(
Event::ReasoningItem(ReasoningItemEvent { "",
ReasoningBlockData {
id: Some("r1".into()), id: Some("r1".into()),
text: "inner reasoning".into(), text: Some("inner reasoning".into()),
summary: vec!["sum-A".into(), "sum-B".into()], summary: vec!["sum-A".into(), "sum-B".into()],
encrypted_content: Some("ENC-OPAQUE".into()), encrypted_content: Some("ENC-OPAQUE".into()),
signature: None, signature: None,
}), },
);
events.extend([
Event::text_block_start(0), Event::text_block_start(0),
Event::text_delta(0, "answer"), Event::text_delta(0, "answer"),
Event::text_block_stop(0, None), Event::text_block_stop(0, None),
Event::Status(StatusEvent { Event::Status(StatusEvent {
status: ResponseStatus::Completed, status: ResponseStatus::Completed,
}), }),
]; ]);
let client = MockLlmClient::new(events); let client = MockLlmClient::new(events);
let worker = Worker::new(client); let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok"); let out = worker.run("q").await.expect("run ok");
@ -107,21 +136,23 @@ async fn openai_reasoning_round_trips_encrypted_and_summary() {
/// が thinking を assistant メッセージの先頭に要求するため)。 /// が thinking を assistant メッセージの先頭に要求するため)。
#[tokio::test] #[tokio::test]
async fn reasoning_precedes_text_in_assistant_burst() { async fn reasoning_precedes_text_in_assistant_burst() {
let events = vec![ let mut events = vec![
// text/tool_call とは独立に、ReasoningItem が中盤で発火しても、 // text/tool_call とは独立に、reasoning block が中盤で完了しても、
// history append 時には assistant items の先頭に置かれる。 // history append 時には assistant items の先頭に置かれる。
Event::text_block_start(0), Event::text_block_start(0),
Event::text_delta(0, "intermediate"), Event::text_delta(0, "intermediate"),
Event::text_block_stop(0, None), Event::text_block_stop(0, None),
Event::ReasoningItem(ReasoningItemEvent { ];
text: "after text".into(), events.extend(reasoning_block(
"after text",
ReasoningBlockData {
signature: Some("SIG".into()), signature: Some("SIG".into()),
..Default::default() ..Default::default()
}), },
Event::Status(StatusEvent { ));
status: ResponseStatus::Completed, events.push(Event::Status(StatusEvent {
}), status: ResponseStatus::Completed,
]; }));
let client = MockLlmClient::new(events); let client = MockLlmClient::new(events);
let worker = Worker::new(client); let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok"); let out = worker.run("q").await.expect("run ok");

View File

@ -2,8 +2,7 @@
//! //!
//! Hooks are the **public** orchestration extension point. They receive //! Hooks are the **public** orchestration extension point. They receive
//! read-only summary information about each event in the Worker //! read-only summary information about each event in the Worker
//! execution loop and return a control-flow action //! execution loop and return a safe public control-flow action.
//! (continue / skip / abort / pause).
//! //!
//! Hooks intentionally cannot mutate the Worker's context, history, tool //! Hooks intentionally cannot mutate the Worker's context, history, tool
//! call, or tool result. Internal mechanisms that need such access (e.g. //! call, or tool result. Internal mechanisms that need such access (e.g.
@ -18,7 +17,7 @@ use async_trait::async_trait;
use llm_worker::interceptor::{ use llm_worker::interceptor::{
PostToolAction, PreRequestAction, PreToolAction, PromptAction, TurnEndAction, PostToolAction, PreRequestAction, PreToolAction, PromptAction, TurnEndAction,
}; };
use llm_worker::tool::ToolOutput; use llm_worker::tool::{ToolOutput, ToolResult};
use serde_json::Value; use serde_json::Value;
/// Hook-facing prompt-submit action. /// Hook-facing prompt-submit action.
@ -32,7 +31,7 @@ use serde_json::Value;
pub enum HookPromptAction { pub enum HookPromptAction {
/// Proceed normally. /// Proceed normally.
Continue, Continue,
/// Cancel with a reason. /// Cancel this submitted prompt with a reason.
Cancel(String), Cancel(String),
} }
@ -45,6 +44,109 @@ impl From<HookPromptAction> for PromptAction {
} }
} }
/// Hook-facing pre-LLM-request action.
///
/// Public hooks may observe the request boundary, cancel the run, or yield
/// control back to the caller. They cannot return
/// `PreRequestAction::ContinueWith(Vec<Item>)`; model-visible request/history
/// additions must use durable host-owned paths such as notifications or
/// system-item commits.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HookPreRequestAction {
/// Proceed normally.
Continue,
/// Cancel the run with a reason.
Cancel(String),
/// Yield control to the caller for host-owned processing/resume.
Yield,
}
impl From<HookPreRequestAction> for PreRequestAction {
fn from(action: HookPreRequestAction) -> Self {
match action {
HookPreRequestAction::Continue => PreRequestAction::Continue,
HookPreRequestAction::Cancel(reason) => PreRequestAction::Cancel(reason),
HookPreRequestAction::Yield => PreRequestAction::Yield,
}
}
}
/// Hook-facing pre-tool-call action.
///
/// Hooks may continue, pause/abort the call, or deny it with an error
/// string that Pod converts into a synthetic tool result for the current
/// tool call. Hooks cannot express the internal no-result skip path, mutate
/// the tool call arguments, or construct arbitrary `ToolResult` values.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HookPreToolAction {
/// Proceed with tool execution.
Continue,
/// Deny this tool call and commit a synthetic error result.
Deny(String),
/// Abort the entire run.
Abort(String),
/// Pause execution.
Pause,
}
impl HookPreToolAction {
pub(crate) fn into_worker_action(self, call_id: String) -> PreToolAction {
match self {
HookPreToolAction::Continue => PreToolAction::Continue,
HookPreToolAction::Deny(reason) => {
PreToolAction::SyntheticResult(ToolResult::error(call_id, reason))
}
HookPreToolAction::Abort(reason) => PreToolAction::Abort(reason),
HookPreToolAction::Pause => PreToolAction::Pause,
}
}
}
/// Hook-facing post-tool-call action.
///
/// Post-tool hooks are observational except that they may abort the run. They
/// cannot rewrite the tool output; adding an explicit bounded transform would
/// require a separate safe public type.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HookPostToolAction {
/// Proceed normally.
Continue,
/// Abort the entire run.
Abort(String),
}
impl From<HookPostToolAction> for PostToolAction {
fn from(action: HookPostToolAction) -> Self {
match action {
HookPostToolAction::Continue => PostToolAction::Continue,
HookPostToolAction::Abort(reason) => PostToolAction::Abort(reason),
}
}
}
/// Hook-facing turn-end action.
///
/// Turn-end hooks may observe a completed turn and optionally pause further
/// execution. They cannot return
/// `TurnEndAction::ContinueWithMessages(Vec<Item>)`; public hooks must not
/// append arbitrary model-visible messages at turn boundaries.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum HookTurnEndAction {
/// Finish the turn normally.
Finish,
/// Pause execution.
Pause,
}
impl From<HookTurnEndAction> for TurnEndAction {
fn from(action: HookTurnEndAction) -> Self {
match action {
HookTurnEndAction::Finish => TurnEndAction::Finish,
HookTurnEndAction::Pause => TurnEndAction::Pause,
}
}
}
// ============================================================================= // =============================================================================
// Hook input summary types (read-only) // Hook input summary types (read-only)
// ============================================================================= // =============================================================================
@ -121,8 +223,8 @@ pub struct AbortInfo {
/// Marker trait for hook event kinds. /// Marker trait for hook event kinds.
/// ///
/// Each event kind specifies its read-only input and the control-flow /// Each event kind specifies its read-only input and the safe public
/// action returned by hooks. /// control-flow action returned by hooks.
pub trait HookEventKind: Send + Sync + 'static { pub trait HookEventKind: Send + Sync + 'static {
/// Read-only input passed to the hook. /// Read-only input passed to the hook.
type Input: Send + Sync; type Input: Send + Sync;
@ -130,17 +232,18 @@ pub trait HookEventKind: Send + Sync + 'static {
type Output; type Output;
} }
/// After receiving user input, before adding to history. /// After receiving user input, before adding to history; may continue or cancel.
pub struct OnPromptSubmit; pub struct OnPromptSubmit;
/// Before each LLM request. /// Before each LLM request; may continue, cancel, or yield.
pub struct PreLlmRequest; pub struct PreLlmRequest;
/// Before each tool is executed. /// Before each tool is executed; may continue, deny with a synthetic result,
/// abort, or pause.
pub struct PreToolCall; pub struct PreToolCall;
/// After each tool completes. /// After each tool completes; observational except it may abort the run.
pub struct PostToolCall; pub struct PostToolCall;
/// When a turn ends with no tool calls. /// When a turn ends with no tool calls; observational except it may pause.
pub struct OnTurnEnd; pub struct OnTurnEnd;
/// When execution is interrupted. /// When execution is interrupted; observational only.
pub struct OnAbort; pub struct OnAbort;
impl HookEventKind for OnPromptSubmit { impl HookEventKind for OnPromptSubmit {
@ -150,22 +253,22 @@ impl HookEventKind for OnPromptSubmit {
impl HookEventKind for PreLlmRequest { impl HookEventKind for PreLlmRequest {
type Input = PreRequestInfo; type Input = PreRequestInfo;
type Output = PreRequestAction; type Output = HookPreRequestAction;
} }
impl HookEventKind for PreToolCall { impl HookEventKind for PreToolCall {
type Input = ToolCallSummary; type Input = ToolCallSummary;
type Output = PreToolAction; type Output = HookPreToolAction;
} }
impl HookEventKind for PostToolCall { impl HookEventKind for PostToolCall {
type Input = ToolResultSummary; type Input = ToolResultSummary;
type Output = PostToolAction; type Output = HookPostToolAction;
} }
impl HookEventKind for OnTurnEnd { impl HookEventKind for OnTurnEnd {
type Input = TurnEndInfo; type Input = TurnEndInfo;
type Output = TurnEndAction; type Output = HookTurnEndAction;
} }
impl HookEventKind for OnAbort { impl HookEventKind for OnAbort {
@ -180,9 +283,9 @@ impl HookEventKind for OnAbort {
/// Async hook for a specific event kind. /// Async hook for a specific event kind.
/// ///
/// Hooks receive a shared reference to the event's read-only input /// Hooks receive a shared reference to the event's read-only input
/// and return a control-flow action. Multiple hooks can be registered /// and return a safe public control-flow action. Multiple hooks can be
/// per event; they are evaluated in registration order and /// registered per event; they are evaluated in registration order and
/// short-circuit on the first non-Continue (or non-Finish) result. /// short-circuit on the first non-continue action.
#[async_trait] #[async_trait]
pub trait Hook<E: HookEventKind>: Send + Sync { pub trait Hook<E: HookEventKind>: Send + Sync {
async fn call(&self, input: &E::Input) -> E::Output; async fn call(&self, input: &E::Input) -> E::Output;
@ -257,3 +360,32 @@ pub struct HookRegistry {
pub(crate) on_turn_end: Vec<Box<dyn Hook<OnTurnEnd>>>, pub(crate) on_turn_end: Vec<Box<dyn Hook<OnTurnEnd>>>,
pub(crate) on_abort: Vec<Box<dyn Hook<OnAbort>>>, pub(crate) on_abort: Vec<Box<dyn Hook<OnAbort>>>,
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn public_pre_tool_hook_actions_cannot_emit_internal_no_result_skip() {
let continue_action = HookPreToolAction::Continue.into_worker_action("call_1".into());
assert!(matches!(continue_action, PreToolAction::Continue));
let deny_action =
HookPreToolAction::Deny("blocked".into()).into_worker_action("call_2".into());
match deny_action {
PreToolAction::SyntheticResult(result) => {
assert_eq!(result.tool_use_id, "call_2");
assert_eq!(result.summary, "blocked");
assert!(result.is_error);
}
other => panic!("public deny must produce synthetic result, got {other:?}"),
}
let abort_action =
HookPreToolAction::Abort("stop".into()).into_worker_action("call_3".into());
assert!(matches!(abort_action, PreToolAction::Abort(reason) if reason == "stop"));
let pause_action = HookPreToolAction::Pause.into_worker_action("call_4".into());
assert!(matches!(pause_action, PreToolAction::Pause));
}
}

View File

@ -27,7 +27,8 @@ use session_store::{SystemItem, SystemReminder};
use tools::{TaskEntry, TaskStatus, TaskStore}; use tools::{TaskEntry, TaskStatus, TaskStore};
use crate::hook::{ use crate::hook::{
AbortInfo, HookPromptAction, HookRegistry, PreRequestInfo, PromptSubmitInfo, ToolCallSummary, AbortInfo, HookPostToolAction, HookPreRequestAction, HookPreToolAction, HookPromptAction,
HookRegistry, HookTurnEndAction, PreRequestInfo, PromptSubmitInfo, ToolCallSummary,
ToolResultSummary, TurnEndInfo, ToolResultSummary, TurnEndInfo,
}; };
use crate::ipc::notify_buffer::{NotifyBuffer, build_system_item}; use crate::ipc::notify_buffer::{NotifyBuffer, build_system_item};
@ -343,8 +344,8 @@ impl Interceptor for PodInterceptor {
}; };
for hook in &self.registry.pre_llm_request { for hook in &self.registry.pre_llm_request {
let action = hook.call(&info).await; let action = hook.call(&info).await;
if !matches!(action, PreRequestAction::Continue) { if !matches!(action, HookPreRequestAction::Continue) {
return action; return action.into();
} }
} }
PreRequestAction::Continue PreRequestAction::Continue
@ -358,8 +359,8 @@ impl Interceptor for PodInterceptor {
}; };
for hook in &self.registry.pre_tool_call { for hook in &self.registry.pre_tool_call {
let action = hook.call(&summary).await; let action = hook.call(&summary).await;
if !matches!(action, PreToolAction::Continue) { if !matches!(action, HookPreToolAction::Continue) {
return action; return action.into_worker_action(summary.call_id.clone());
} }
} }
if is_task_management_tool(&info.call.name) { if is_task_management_tool(&info.call.name) {
@ -381,8 +382,8 @@ impl Interceptor for PodInterceptor {
}; };
for hook in &self.registry.post_tool_call { for hook in &self.registry.post_tool_call {
let action = hook.call(&summary).await; let action = hook.call(&summary).await;
if !matches!(action, PostToolAction::Continue) { if !matches!(action, HookPostToolAction::Continue) {
return action; return action.into();
} }
} }
PostToolAction::Continue PostToolAction::Continue
@ -403,8 +404,8 @@ impl Interceptor for PodInterceptor {
}; };
for hook in &self.registry.on_turn_end { for hook in &self.registry.on_turn_end {
let action = hook.call(&info).await; let action = hook.call(&info).await;
if !matches!(action, TurnEndAction::Finish) { if !matches!(action, HookTurnEndAction::Finish) {
return action; return action.into();
} }
} }
TurnEndAction::Finish TurnEndAction::Finish
@ -480,16 +481,19 @@ mod tests {
use std::sync::atomic::{AtomicBool, AtomicUsize}; use std::sync::atomic::{AtomicBool, AtomicUsize};
use super::*; use super::*;
use crate::hook::{Hook, HookRegistryBuilder, PreLlmRequest}; use crate::hook::{
Hook, HookPostToolAction, HookPreRequestAction, HookPreToolAction, HookRegistryBuilder,
HookTurnEndAction, OnTurnEnd, PostToolCall, PreLlmRequest, PreToolCall,
};
use session_store::SystemReminderSource; use session_store::SystemReminderSource;
struct CountingHook(Arc<AtomicUsize>); struct CountingHook(Arc<AtomicUsize>);
#[async_trait] #[async_trait]
impl Hook<PreLlmRequest> for CountingHook { impl Hook<PreLlmRequest> for CountingHook {
async fn call(&self, _info: &PreRequestInfo) -> PreRequestAction { async fn call(&self, _info: &PreRequestInfo) -> HookPreRequestAction {
self.0.fetch_add(1, Ordering::Relaxed); self.0.fetch_add(1, Ordering::Relaxed);
PreRequestAction::Continue HookPreRequestAction::Continue
} }
} }
@ -516,7 +520,7 @@ mod tests {
) )
} }
async fn call_pre_tool(interceptor: &PodInterceptor, name: &str) { fn task_tool_call_info(name: &str, input: serde_json::Value) -> ToolCallInfo {
let def = tools::task_tools(TaskStore::new()) let def = tools::task_tools(TaskStore::new())
.into_iter() .into_iter()
.find(|def| { .find(|def| {
@ -525,15 +529,19 @@ mod tests {
}) })
.expect("task tool definition"); .expect("task tool definition");
let (meta, tool) = def(); let (meta, tool) = def();
let mut info = ToolCallInfo { ToolCallInfo {
call: llm_worker::tool::ToolCall { call: llm_worker::tool::ToolCall {
id: "call-id".into(), id: "call-id".into(),
name: name.into(), name: name.into(),
input: serde_json::json!({}), input,
}, },
meta, meta,
tool, tool,
}; }
}
async fn call_pre_tool(interceptor: &PodInterceptor, name: &str) {
let mut info = task_tool_call_info(name, serde_json::json!({}));
let action = interceptor.pre_tool_call(&mut info).await; let action = interceptor.pre_tool_call(&mut info).await;
assert!(matches!(action, PreToolAction::Continue)); assert!(matches!(action, PreToolAction::Continue));
} }
@ -739,12 +747,160 @@ mod tests {
#[async_trait] #[async_trait]
impl Hook<PreLlmRequest> for AbortingHook { impl Hook<PreLlmRequest> for AbortingHook {
async fn call(&self, _info: &PreRequestInfo) -> PreRequestAction { async fn call(&self, _info: &PreRequestInfo) -> HookPreRequestAction {
self.0.store(true, Ordering::Relaxed); self.0.store(true, Ordering::Relaxed);
PreRequestAction::Cancel("nope".into()) HookPreRequestAction::Cancel("nope".into())
} }
} }
#[tokio::test]
async fn public_pre_tool_hook_deny_becomes_synthetic_error_and_short_circuits() {
struct DenyToolHook(Arc<AtomicUsize>);
struct CountingToolHook(Arc<AtomicUsize>);
#[async_trait]
impl Hook<PreToolCall> for DenyToolHook {
async fn call(&self, input: &ToolCallSummary) -> HookPreToolAction {
self.0.fetch_add(1, Ordering::Relaxed);
assert_eq!(input.call_id, "call-id");
assert_eq!(input.tool_name, "TaskList");
assert_eq!(input.arguments, serde_json::json!({"scope": "all"}));
HookPreToolAction::Deny("blocked by public hook".into())
}
}
#[async_trait]
impl Hook<PreToolCall> for CountingToolHook {
async fn call(&self, _input: &ToolCallSummary) -> HookPreToolAction {
self.0.fetch_add(1, Ordering::Relaxed);
HookPreToolAction::Continue
}
}
let first_count = Arc::new(AtomicUsize::new(0));
let second_count = Arc::new(AtomicUsize::new(0));
let mut builder = HookRegistryBuilder::new();
builder.add_pre_tool_call(DenyToolHook(first_count.clone()));
builder.add_pre_tool_call(CountingToolHook(second_count.clone()));
let registry = Arc::new(builder.build());
let interceptor = PodInterceptor::new(
registry,
None,
None,
NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
TaskStore::new(),
Arc::new(TaskReminderState::new()),
PromptCatalog::builtins_only().unwrap(),
None,
);
let mut info = task_tool_call_info("TaskList", serde_json::json!({"scope": "all"}));
let action = interceptor.pre_tool_call(&mut info).await;
match action {
PreToolAction::SyntheticResult(result) => {
assert_eq!(result.tool_use_id, "call-id");
assert_eq!(result.summary, "blocked by public hook");
assert_eq!(result.content, None);
assert!(result.is_error);
}
other => panic!("expected synthetic denial, got {other:?}"),
}
assert_eq!(first_count.load(Ordering::Relaxed), 1);
assert_eq!(second_count.load(Ordering::Relaxed), 0);
}
#[tokio::test]
async fn public_post_tool_hooks_observe_output_but_only_abort() {
struct AbortAfterToolHook(Arc<AtomicUsize>);
#[async_trait]
impl Hook<PostToolCall> for AbortAfterToolHook {
async fn call(&self, input: &ToolResultSummary) -> HookPostToolAction {
self.0.fetch_add(1, Ordering::Relaxed);
assert_eq!(input.call_id, "call-id");
assert_eq!(input.tool_name, "TaskList");
assert!(!input.is_error);
assert_eq!(input.output.summary, "ok");
assert_eq!(input.output.content.as_deref(), Some("full"));
HookPostToolAction::Abort("post tool abort".into())
}
}
let count = Arc::new(AtomicUsize::new(0));
let mut builder = HookRegistryBuilder::new();
builder.add_post_tool_call(AbortAfterToolHook(count.clone()));
let registry = Arc::new(builder.build());
let interceptor = PodInterceptor::new(
registry,
None,
None,
NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
TaskStore::new(),
Arc::new(TaskReminderState::new()),
PromptCatalog::builtins_only().unwrap(),
None,
);
let info = task_tool_call_info("TaskList", serde_json::json!({}));
let mut result_info = ToolResultInfo {
call: info.call,
result: llm_worker::tool::ToolResult::from_output(
"call-id",
ToolOutput {
summary: "ok".into(),
content: Some("full".into()),
},
),
meta: info.meta,
tool: info.tool,
};
let action = interceptor.post_tool_call(&mut result_info).await;
assert_eq!(action, PostToolAction::Abort("post tool abort".to_string()));
assert_eq!(count.load(Ordering::Relaxed), 1);
}
#[tokio::test]
async fn public_turn_end_hooks_are_observational_or_pause_only() {
struct PauseTurnEndHook(Arc<AtomicUsize>);
#[async_trait]
impl Hook<OnTurnEnd> for PauseTurnEndHook {
async fn call(&self, input: &TurnEndInfo) -> HookTurnEndAction {
self.0.fetch_add(1, Ordering::Relaxed);
assert_eq!(input.turn_index, 0);
assert_eq!(input.tool_calls_count, 0);
assert_eq!(input.final_text_preview, "done");
HookTurnEndAction::Pause
}
}
let count = Arc::new(AtomicUsize::new(0));
let mut builder = HookRegistryBuilder::new();
builder.add_on_turn_end(PauseTurnEndHook(count.clone()));
let registry = Arc::new(builder.build());
let interceptor = PodInterceptor::new(
registry,
None,
None,
NotifyBuffer::new(),
Arc::new(Mutex::new(Vec::new())),
TaskStore::new(),
Arc::new(TaskReminderState::new()),
PromptCatalog::builtins_only().unwrap(),
None,
);
let history = vec![Item::user_message("hi"), Item::assistant_message("done")];
let action = interceptor.on_turn_end(&history).await;
assert!(matches!(action, TurnEndAction::Pause));
assert_eq!(count.load(Ordering::Relaxed), 1);
}
#[tokio::test] #[tokio::test]
async fn pending_history_appends_drains_buffer_into_items() { async fn pending_history_appends_drains_buffer_into_items() {
let registry = Arc::new(HookRegistryBuilder::new().build()); let registry = Arc::new(HookRegistryBuilder::new().build());

View File

@ -1,13 +1,11 @@
use async_trait::async_trait; use async_trait::async_trait;
use llm_worker::interceptor::PreToolAction;
use llm_worker::llm_client::client::LlmClient; use llm_worker::llm_client::client::LlmClient;
use llm_worker::tool::ToolResult;
use manifest::{ToolPermissionAction, ToolPermissionConfig}; use manifest::{ToolPermissionAction, ToolPermissionConfig};
use serde_json::Value; use serde_json::Value;
use session_store::Store; use session_store::Store;
use crate::Pod; use crate::Pod;
use crate::hook::{Hook, PreToolCall, ToolCallSummary}; use crate::hook::{Hook, HookPreToolAction, PreToolCall, ToolCallSummary};
/// Built-in manifest permission policy for `PreToolCall`. /// Built-in manifest permission policy for `PreToolCall`.
/// ///
@ -47,34 +45,28 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
#[async_trait] #[async_trait]
impl Hook<PreToolCall> for PermissionHook { impl Hook<PreToolCall> for PermissionHook {
async fn call(&self, input: &ToolCallSummary) -> PreToolAction { async fn call(&self, input: &ToolCallSummary) -> HookPreToolAction {
match self.action_for(input) { match self.action_for(input) {
ToolPermissionAction::Allow => PreToolAction::Continue, ToolPermissionAction::Allow => HookPreToolAction::Continue,
ToolPermissionAction::Deny => PreToolAction::SyntheticResult(permission_denied(input)), ToolPermissionAction::Deny => HookPreToolAction::Deny(permission_denied_message(input)),
ToolPermissionAction::Ask => { ToolPermissionAction::Ask => {
PreToolAction::SyntheticResult(permission_ask_unsupported(input)) HookPreToolAction::Deny(permission_ask_unsupported_message(input))
} }
} }
} }
} }
fn permission_denied(input: &ToolCallSummary) -> ToolResult { fn permission_denied_message(input: &ToolCallSummary) -> String {
ToolResult::error( format!(
input.call_id.clone(), "permission denied: tool `{}` arguments matched the manifest permission policy",
format!( input.tool_name
"permission denied: tool `{}` arguments matched the manifest permission policy",
input.tool_name
),
) )
} }
fn permission_ask_unsupported(input: &ToolCallSummary) -> ToolResult { fn permission_ask_unsupported_message(input: &ToolCallSummary) -> String {
ToolResult::error( format!(
input.call_id.clone(), "permission ask unsupported: tool `{}` requires approval, but this runtime has no permission approval protocol; denied fail-closed",
format!( input.tool_name
"permission ask unsupported: tool `{}` requires approval, but this runtime has no permission approval protocol; denied fail-closed",
input.tool_name
),
) )
} }
@ -123,6 +115,7 @@ fn wildcard_match(pattern: &str, text: &str) -> bool {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::hook::HookPreToolAction;
use manifest::ToolPermissionRule; use manifest::ToolPermissionRule;
fn summary(tool_name: &str, arguments: Value) -> ToolCallSummary { fn summary(tool_name: &str, arguments: Value) -> ToolCallSummary {
@ -168,6 +161,45 @@ mod tests {
assert_eq!(hook.action_for(&input), ToolPermissionAction::Deny); assert_eq!(hook.action_for(&input), ToolPermissionAction::Deny);
} }
#[tokio::test]
async fn deny_and_ask_fail_closed_as_public_deny_actions() {
let deny = PermissionHook::new(ToolPermissionConfig {
default_action: ToolPermissionAction::Deny,
rules: Vec::new(),
});
let denied = deny
.call(&summary(
"Bash",
serde_json::json!({ "command": "rm -rf target" }),
))
.await;
match denied {
HookPreToolAction::Deny(message) => {
assert!(message.contains("permission denied"));
assert!(message.contains("Bash"));
}
other => panic!("expected fail-closed deny action, got {other:?}"),
}
let ask = PermissionHook::new(ToolPermissionConfig {
default_action: ToolPermissionAction::Ask,
rules: Vec::new(),
});
let asked = ask
.call(&summary(
"Bash",
serde_json::json!({ "command": "git status" }),
))
.await;
match asked {
HookPreToolAction::Deny(message) => {
assert!(message.contains("permission ask unsupported"));
assert!(message.contains("denied fail-closed"));
}
other => panic!("expected ask fail-closed deny action, got {other:?}"),
}
}
#[test] #[test]
fn target_prefers_known_builtin_argument_fields() { fn target_prefers_known_builtin_argument_fields() {
assert_eq!( assert_eq!(

View File

@ -29,8 +29,8 @@ use manifest::{
use crate::compact::state::CompactState; use crate::compact::state::CompactState;
use crate::compact::usage_tracker::UsageTracker; use crate::compact::usage_tracker::UsageTracker;
use crate::hook::{ use crate::hook::{
Hook, HookRegistryBuilder, OnAbort, OnPromptSubmit, OnTurnEnd, PostToolCall, PreLlmRequest, Hook, HookPreRequestAction, HookRegistryBuilder, OnAbort, OnPromptSubmit, OnTurnEnd,
PreRequestInfo, PreToolCall, PostToolCall, PreLlmRequest, PreRequestInfo, PreToolCall,
}; };
use crate::ipc::alerter::Alerter; use crate::ipc::alerter::Alerter;
use crate::ipc::interceptor::{PodInterceptor, TaskReminderState}; use crate::ipc::interceptor::{PodInterceptor, TaskReminderState};
@ -43,7 +43,6 @@ use crate::runtime::dir;
use crate::runtime::pod_registry::{self, ScopeAllocationGuard, ScopeLockError}; use crate::runtime::pod_registry::{self, ScopeAllocationGuard, ScopeLockError};
use crate::workflow::WorkflowResolveError; use crate::workflow::WorkflowResolveError;
use async_trait::async_trait; use async_trait::async_trait;
use llm_worker::interceptor::PreRequestAction;
use protocol::{ use protocol::{
AlertLevel, AlertSource, Event, RewindSummary, RewindTarget, RewindTargetId, Segment, AlertLevel, AlertSource, Event, RewindSummary, RewindTarget, RewindTargetId, Segment,
}; };
@ -221,9 +220,9 @@ struct UsageTrackingHook {
#[async_trait] #[async_trait]
impl Hook<PreLlmRequest> for UsageTrackingHook { impl Hook<PreLlmRequest> for UsageTrackingHook {
async fn call(&self, info: &PreRequestInfo) -> PreRequestAction { async fn call(&self, info: &PreRequestInfo) -> HookPreRequestAction {
self.tracker.note_request(info.item_count); self.tracker.note_request(info.item_count);
PreRequestAction::Continue HookPreRequestAction::Continue
} }
} }

View File

@ -0,0 +1,63 @@
# Delegation intent: unify reasoning persistence with block lifecycle
Intent:
- Implement the `unify-reasoning-block-lifecycle` ticket by removing the separate `ReasoningItem` side channel and making the reasoning/thinking block lifecycle the single authoritative path for live streaming and persistence.
Requirements:
- Start by mapping the current reasoning paths in `llm-worker`: Anthropic thinking stream, OpenAI Responses reasoning items, Timeline block handlers, `ReasoningItemCollector`, Worker finalization, and request serialization round-trip tests.
- Remove the separate finalized reasoning event path:
- `ReasoningItemEvent`;
- `ReasoningItemKind`;
- `ReasoningItemCollector`;
- `Timeline::on_reasoning_item` / `dispatch_reasoning_item` / `reasoning_item_handlers`.
- Extend the reasoning/thinking block lifecycle so stop/finalization carries all provider material needed to build `Item::Reasoning`:
- text;
- reasoning item id;
- summary;
- encrypted content;
- Anthropic thinking signature;
- redacted thinking payload metadata.
- Convert Anthropic `thinking_delta` / `signature_delta` / redacted thinking handling to finalize through reasoning block stop metadata, without emitting a separate `ReasoningItem` event.
- Convert OpenAI Responses completed reasoning items into reasoning block lifecycle events, including metadata-only reasoning where there is no streamed text delta.
- Update Worker collection/finalization so `Item::Reasoning` is built from reasoning block lifecycle state.
- Preserve live streaming thinking/reasoning callbacks for UI/trace consumers.
- Preserve persisted reasoning history round-trip behavior for Anthropic and OpenAI Responses.
- Remove misleading comments that treat reasoning as meta/single-event content.
- Do not add backward compatibility aliases or keep duplicate old/new reasoning concepts.
Invariants:
- Do not drop provider material required for reasoning round-trip (`signature`, redacted thinking metadata, `id`, `summary`, `encrypted_content`).
- Do not hide model-affecting reasoning persistence in a context-only path; persisted reasoning must remain explainable through committed history items.
- Do not redesign unrelated provider request serialization.
- Do not read ignored secret-like file contents.
- Do not edit the parent workspace; work only in the delegated worktree.
- Do not close the ticket, merge the branch, delete worktrees, or push.
Non-goals:
- No E2E spawned-process test framework.
- No product-level behavior changes beyond event model cleanup.
- No dependency changes.
- No compatibility layer for the removed `ReasoningItem` event path.
Escalate if:
- A provider requires reasoning persistence material that cannot naturally fit in block lifecycle metadata.
- OpenAI Responses reasoning items cannot be represented as synthetic block lifecycle events without losing ordering or identity semantics.
- The change requires broad public API churn outside `llm-worker` and its direct consumers.
- Existing tests imply a behavior conflict between live streaming callbacks and persistence.
Validation:
- Run focused reasoning/timeline/provider tests that cover Anthropic thinking signature/redacted material and OpenAI Responses `id` / `summary` / `encrypted_content` round-trip.
- Run `cargo test -p llm-worker --lib`.
- Run `cargo check --workspace --all-targets`.
- Run `./tickets.sh doctor` and `git diff --check`.
- Run `nix build .#yoi` if feasible; record if skipped and why.
- Commit the implementation in the worktree when reviewable.
Completion report:
- investigation summary;
- implementation summary;
- changed files;
- commit hash(es);
- validation commands and results;
- unresolved risks or parent decisions needed;
- whether ready for external review.

View File

@ -0,0 +1,41 @@
# Implementation report: reasoning block lifecycle
## Investigation
Initial implementation unified reasoning persistence through `BlockStop.reasoning`, but OpenAI Responses text-bearing reasoning items still had two Thinking lifecycles:
1. `response.reasoning_text.delta` streamed through the real reasoning content-part block.
2. `response.content_part.done` stopped that block with no persistence metadata.
3. `response.output_item.done` emitted a second synthetic metadata-only Thinking `BlockStart`/`BlockStop` pair.
That preserved persistence but changed live callback semantics: UI/trace consumers that listen to Thinking block stop callbacks could observe an extra empty Thinking stop after the real streamed reasoning block.
## Fix summary
OpenAI Responses now defers the stop for reasoning `content_part.done` when the part is a Thinking/reasoning-text content block. At `response.output_item.done`, the provider finalizes the deferred existing block with `ReasoningBlockData` instead of creating a second synthetic live-visible block.
Thinking block handler scopes are also keyed by block index, so a deferred reasoning-text stop still uses its original streamed buffer even if another Thinking block (for example a reasoning summary block) starts and stops before `output_item.done`.
Metadata-only reasoning items with no reasoning content-part still emit a synthetic metadata-bearing Thinking block so encrypted/id-only reasoning can be persisted and round-tripped.
The fix preserves:
- live `reasoning_text.delta` Thinking deltas;
- OpenAI Responses `id`, `summary`, and `encrypted_content` persistence;
- a single Thinking lifecycle for text-bearing reasoning items;
- metadata-only reasoning coverage.
## Validation
Passed:
- `cargo test -p llm-worker openai_responses::events::tests::reasoning --lib`
- `cargo test -p llm-worker --lib`
- `cargo check --workspace --all-targets`
- `./tickets.sh doctor`
- `git diff --check`
- `nix build .#yoi`
## Residual risk
The provider delays the stop event for OpenAI Responses reasoning text blocks until `response.output_item.done` so final encrypted/summary metadata can be attached to the same block. This avoids duplicate live stops but means the block stop is slightly later than the raw `content_part.done` SSE for reasoning text. This is intentional for the unified persistence model and covered by focused provider tests for the reviewed sequence.

View File

@ -0,0 +1,57 @@
# Review: unify reasoning block lifecycle
Reviewer Pods:
- Initial review: `reasoning-block-lifecycle-reviewer-20260603`
- Re-review: `reasoning-block-lifecycle-rereviewer-20260603`
## Result
Approved after fixes. No remaining blockers.
## Initial blocker
The initial reviewer found that OpenAI Responses text-bearing reasoning items produced two live-visible Thinking lifecycles:
1. streamed `reasoning_text.delta` used the real Thinking block;
2. later `response.output_item.done` emitted a second synthetic empty Thinking block to carry persistence metadata.
That preserved persistence but changed live callback/trace semantics beyond the intended event-model cleanup.
## Fix verification
Coder added commit `abb6adb fix: preserve openai reasoning live stops`.
The re-review confirmed:
- text-bearing OpenAI reasoning now attaches `ReasoningBlockData` to the deferred existing Thinking block stop;
- no second synthetic empty Thinking start/stop is emitted for text-bearing reasoning;
- metadata-only OpenAI reasoning still persists through a synthetic metadata-bearing Thinking block when no live reasoning text block exists;
- Anthropic signature/redacted material and OpenAI id/summary/encrypted_content remain carried through `ReasoningBlockData`;
- the old `ReasoningItem` event/collector/Timeline side channel was not reintroduced.
## Validation evidence
Coder reported:
- `cargo test -p llm-worker --lib`
- `cargo test -p llm-worker --test reasoning_round_trip_test`
- `cargo check --workspace --all-targets`
- `./tickets.sh doctor`
- `git diff --check`
- `nix build .#yoi`
Parent/orchestrator reran after merge:
- `cargo test -p llm-worker openai_responses::events::tests::reasoning --lib`
- `cargo test -p llm-worker --lib`
- `cargo test -p llm-worker --test reasoning_round_trip_test`
- `cargo check --workspace --all-targets`
- `./tickets.sh doctor`
- `git diff --check`
- `nix build .#yoi`
- `./result/bin/yoi pod --help`
## Residual risk
Low. OpenAI Responses reasoning-text block stop is intentionally delayed until `response.output_item.done` so provider persistence metadata can be attached to the same Thinking block lifecycle. The remaining edge risk is unusual ordering/error behavior around multiple concurrent Thinking scopes, but the implementation now keys Thinking scopes by block index and focused tests cover the reviewed duplicate-stop sequence.

View File

@ -2,12 +2,12 @@
id: 20260603-001124-unify-reasoning-block-lifecycle id: 20260603-001124-unify-reasoning-block-lifecycle
slug: unify-reasoning-block-lifecycle slug: unify-reasoning-block-lifecycle
title: Unify reasoning persistence with block lifecycle title: Unify reasoning persistence with block lifecycle
status: open status: closed
kind: task kind: task
priority: P2 priority: P2
labels: [llm-worker, reasoning, timeline] labels: [llm-worker, reasoning, timeline]
created_at: 2026-06-03T00:11:24Z created_at: 2026-06-03T00:11:24Z
updated_at: 2026-06-03T00:11:24Z updated_at: 2026-06-03T02:15:02Z
assignee: null assignee: null
legacy_ticket: null legacy_ticket: null
--- ---

View File

@ -0,0 +1 @@
Unified reasoning persistence with the Thinking block lifecycle. Removed the separate ReasoningItem event/collector path; Anthropic and OpenAI Responses reasoning metadata now persist via block stop metadata; reviewer blocker fixed; focused tests and nix build passed.

View File

@ -0,0 +1,152 @@
<!-- event: create author: tickets.sh at: 2026-06-03T00:11:24Z -->
## Created
Created by tickets.sh create.
---
<!-- event: plan author: hare at: 2026-06-03T00:51:09Z -->
## Plan
# Delegation intent: unify reasoning persistence with block lifecycle
Intent:
- Implement the `unify-reasoning-block-lifecycle` ticket by removing the separate `ReasoningItem` side channel and making the reasoning/thinking block lifecycle the single authoritative path for live streaming and persistence.
Requirements:
- Start by mapping the current reasoning paths in `llm-worker`: Anthropic thinking stream, OpenAI Responses reasoning items, Timeline block handlers, `ReasoningItemCollector`, Worker finalization, and request serialization round-trip tests.
- Remove the separate finalized reasoning event path:
- `ReasoningItemEvent`;
- `ReasoningItemKind`;
- `ReasoningItemCollector`;
- `Timeline::on_reasoning_item` / `dispatch_reasoning_item` / `reasoning_item_handlers`.
- Extend the reasoning/thinking block lifecycle so stop/finalization carries all provider material needed to build `Item::Reasoning`:
- text;
- reasoning item id;
- summary;
- encrypted content;
- Anthropic thinking signature;
- redacted thinking payload metadata.
- Convert Anthropic `thinking_delta` / `signature_delta` / redacted thinking handling to finalize through reasoning block stop metadata, without emitting a separate `ReasoningItem` event.
- Convert OpenAI Responses completed reasoning items into reasoning block lifecycle events, including metadata-only reasoning where there is no streamed text delta.
- Update Worker collection/finalization so `Item::Reasoning` is built from reasoning block lifecycle state.
- Preserve live streaming thinking/reasoning callbacks for UI/trace consumers.
- Preserve persisted reasoning history round-trip behavior for Anthropic and OpenAI Responses.
- Remove misleading comments that treat reasoning as meta/single-event content.
- Do not add backward compatibility aliases or keep duplicate old/new reasoning concepts.
Invariants:
- Do not drop provider material required for reasoning round-trip (`signature`, redacted thinking metadata, `id`, `summary`, `encrypted_content`).
- Do not hide model-affecting reasoning persistence in a context-only path; persisted reasoning must remain explainable through committed history items.
- Do not redesign unrelated provider request serialization.
- Do not read ignored secret-like file contents.
- Do not edit the parent workspace; work only in the delegated worktree.
- Do not close the ticket, merge the branch, delete worktrees, or push.
Non-goals:
- No E2E spawned-process test framework.
- No product-level behavior changes beyond event model cleanup.
- No dependency changes.
- No compatibility layer for the removed `ReasoningItem` event path.
Escalate if:
- A provider requires reasoning persistence material that cannot naturally fit in block lifecycle metadata.
- OpenAI Responses reasoning items cannot be represented as synthetic block lifecycle events without losing ordering or identity semantics.
- The change requires broad public API churn outside `llm-worker` and its direct consumers.
- Existing tests imply a behavior conflict between live streaming callbacks and persistence.
Validation:
- Run focused reasoning/timeline/provider tests that cover Anthropic thinking signature/redacted material and OpenAI Responses `id` / `summary` / `encrypted_content` round-trip.
- Run `cargo test -p llm-worker --lib`.
- Run `cargo check --workspace --all-targets`.
- Run `./tickets.sh doctor` and `git diff --check`.
- Run `nix build .#yoi` if feasible; record if skipped and why.
- Commit the implementation in the worktree when reviewable.
Completion report:
- investigation summary;
- implementation summary;
- changed files;
- commit hash(es);
- validation commands and results;
- unresolved risks or parent decisions needed;
- whether ready for external review.
---
<!-- event: review author: hare at: 2026-06-03T02:15:01Z status: approve -->
## Review: approve
# Review: unify reasoning block lifecycle
Reviewer Pods:
- Initial review: `reasoning-block-lifecycle-reviewer-20260603`
- Re-review: `reasoning-block-lifecycle-rereviewer-20260603`
## Result
Approved after fixes. No remaining blockers.
## Initial blocker
The initial reviewer found that OpenAI Responses text-bearing reasoning items produced two live-visible Thinking lifecycles:
1. streamed `reasoning_text.delta` used the real Thinking block;
2. later `response.output_item.done` emitted a second synthetic empty Thinking block to carry persistence metadata.
That preserved persistence but changed live callback/trace semantics beyond the intended event-model cleanup.
## Fix verification
Coder added commit `abb6adb fix: preserve openai reasoning live stops`.
The re-review confirmed:
- text-bearing OpenAI reasoning now attaches `ReasoningBlockData` to the deferred existing Thinking block stop;
- no second synthetic empty Thinking start/stop is emitted for text-bearing reasoning;
- metadata-only OpenAI reasoning still persists through a synthetic metadata-bearing Thinking block when no live reasoning text block exists;
- Anthropic signature/redacted material and OpenAI id/summary/encrypted_content remain carried through `ReasoningBlockData`;
- the old `ReasoningItem` event/collector/Timeline side channel was not reintroduced.
## Validation evidence
Coder reported:
- `cargo test -p llm-worker --lib`
- `cargo test -p llm-worker --test reasoning_round_trip_test`
- `cargo check --workspace --all-targets`
- `./tickets.sh doctor`
- `git diff --check`
- `nix build .#yoi`
Parent/orchestrator reran after merge:
- `cargo test -p llm-worker openai_responses::events::tests::reasoning --lib`
- `cargo test -p llm-worker --lib`
- `cargo test -p llm-worker --test reasoning_round_trip_test`
- `cargo check --workspace --all-targets`
- `./tickets.sh doctor`
- `git diff --check`
- `nix build .#yoi`
- `./result/bin/yoi pod --help`
## Residual risk
Low. OpenAI Responses reasoning-text block stop is intentionally delayed until `response.output_item.done` so provider persistence metadata can be attached to the same Thinking block lifecycle. The remaining edge risk is unusual ordering/error behavior around multiple concurrent Thinking scopes, but the implementation now keys Thinking scopes by block index and focused tests cover the reviewed duplicate-stop sequence.
---
<!-- event: close author: hare at: 2026-06-03T02:15:02Z status: closed -->
## Closed
Unified reasoning persistence with the Thinking block lifecycle. Removed the separate ReasoningItem event/collector path; Anthropic and OpenAI Responses reasoning metadata now persist via block stop metadata; reviewer blocker fixed; focused tests and nix build passed.
---

View File

@ -0,0 +1,84 @@
# Delegation intent: Hook public surface hardening
## Intent
Harden `pod::hook` so it can be safely used as the public Hook contribution boundary for the feature/plugin registry. Public Hooks must not expose internal `llm_worker::Interceptor` action types that can inject raw model-visible `Item` values into request/history paths.
## Requirements
- Audit the current public Hook API in `crates/pod/src/hook.rs` and its bridge in `crates/pod/src/ipc/interceptor.rs`.
- Replace or wrap public Hook outputs that currently reuse internal interceptor action types with safe public action subsets.
- `OnPromptSubmit` already uses `HookPromptAction`; use the same pattern for events that need public actions.
- Public Hook APIs must not expose raw `Item` vector injection such as `PreRequestAction::ContinueWith(Vec<Item>)` or `TurnEndAction::ContinueWithMessages(Vec<Item>)`.
- Preserve internal mechanisms that legitimately need richer `llm_worker::Interceptor` actions, but keep them internal and separate from public feature/plugin Hooks.
- Preserve current manifest permission policy behavior.
- `PreToolCall` deny/ask still fails closed through the existing synthetic tool result path.
- Preserve usage tracking behavior.
- Clarify through names/types/tests which Hook events are observation-only and which can cancel/abort/yield/deny.
- Add focused Pod-layer tests for public Hook behavior and short-circuit ordering.
## Invariants
- Do not add hidden prompt/context injection paths.
- Do not mutate session history from public Hooks except through already-approved durable host paths.
- Do not expose `llm_worker::Item` or raw history/message vectors through public plugin/feature Hook actions.
- Do not implement plugin runtime, feature registry, MCP, or WorkItem tools in this ticket.
- Do not weaken manifest permission enforcement.
- Keep `llm_worker::Interceptor` internal capabilities available where currently required by Pod internals.
## Non-goals
- Implementing `plugin-feature-contribution-registry`.
- Adding new Hook event kinds unless the audit finds a strict safety gap.
- Allowing Hooks to rewrite tool outputs or arbitrary model context.
- Broad refactors of Pod/Worker runtime unrelated to the Hook surface.
## Suggested files to inspect
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/permission.rs`
- `crates/pod/src/pod.rs`
- `crates/llm-worker/src/interceptor.rs`
- `crates/llm-worker/tests/parallel_execution_test.rs`
## Known observations from pre-delegation investigation
- Current production Hook use is light:
- `PermissionHook` implements `Hook<PreToolCall>` in `crates/pod/src/permission.rs`.
- `UsageTrackingHook` uses `PreLlmRequest` from `crates/pod/src/pod.rs`.
- `OnPromptSubmit` already has a safe public subset action: `HookPromptAction`.
- `PreLlmRequest` and `OnTurnEnd` currently expose internal action types with raw `Item` injection capability.
- `PostToolCall` is currently observation + abort only and does not rewrite tool output; keep that conservative unless a strictly bounded explicit transform is justified, which is not expected for this ticket.
- Existing `llm-worker` interceptor tests cover some lower-level behavior, but Pod-layer Hook coverage should be improved.
- `cargo test -p pod hook --lib` passed during investigation.
- Individual relevant `llm-worker` interceptor tests passed, but the full `parallel_execution_test` file had an unrelated timing-sensitive failure (`test_parallel_tool_execution` took ~1.37s instead of ~100ms). Do not treat that file-wide failure as a Hook blocker without confirming.
## Validation
Run at least:
- `cargo test -p pod hook --lib`
- focused Pod Hook tests added/updated by this ticket
- `cargo test -p pod --lib`
- `cargo test -p llm-worker --lib`
- `cargo check --workspace --all-targets`
- `cargo fmt --check`
- `./tickets.sh doctor`
- `git diff --check`
If broader validation fails due to pre-existing unrelated timing flakes, report exact command/output and run focused commands that isolate this change.
## Completion report
Report:
- worktree path / branch
- commit hash
- changed files
- public Hook API changes
- internal mechanism separation
- tests added/updated
- validation commands and results
- unresolved risks or follow-up recommendations
- whether the work is ready for external review

View File

@ -0,0 +1,52 @@
# External rereview: hook public surface hardening
## 1. Result: approve
Approve. The previous blocker is fixed: the public pre-tool hook action surface no longer exposes a no-result skip action, and public deny still maps to a synthetic error result.
## 2. Summary of rereviewed changes
The follow-up commit `a4e30e2 fix: remove public hook skip action` narrowed `HookPreToolAction` by removing the `Skip` variant and its conversion to `llm_worker::interceptor::PreToolAction::Skip`. The public pre-tool actions are now `Continue`, `Deny(String)`, `Abort(String)`, and `Pause`.
`HookPreToolAction::Deny` still converts internally to `PreToolAction::SyntheticResult(ToolResult::error(call_id, reason))`, preserving the fail-closed/synthetic-result path needed by manifest permissions and future public hooks.
## 3. Requirement-by-requirement assessment
- Previous blocker fixed: satisfied. `HookPreToolAction::Skip` is gone, and grep found no `HookPreToolAction::Skip`, `PreToolAction::Skip`, or standalone `Skip` usage under `crates/pod/src`.
- Public pre-tool deny maps to synthetic error result: satisfied. `HookPreToolAction::Deny` converts to `PreToolAction::SyntheticResult(ToolResult::error(...))`; existing interceptor tests also assert the synthetic result has the expected call id, summary, no content, and `is_error = true`.
- Public Hook API exposes no no-result skip: satisfied. The only no-result skip capability remains in the lower-level `llm_worker` interceptor model; it is no longer reachable through the public `pod::hook::HookPreToolAction` surface.
- Public Hook API exposes no raw `Item` injection: satisfied. `PreLlmRequest` and `OnTurnEnd` use safe public action types rather than raw `PreRequestAction::ContinueWith(Vec<Item>)` or `TurnEndAction::ContinueWithMessages(Vec<Item>)` as hook outputs.
- Public Hook API exposes no arbitrary `ToolResult` construction: satisfied for action outputs. Public pre-tool hooks provide only a denial message; Pod constructs the synthetic error result internally.
- Internal capabilities remain internal: satisfied. Internal Pod/Worker code still uses richer `llm_worker::Interceptor` actions where needed, including durable host-owned prompt append paths and compact/internal mechanisms.
- Tests cover the fixed path sufficiently: satisfied. The added `public_pre_tool_hook_actions_cannot_emit_internal_no_result_skip` unit test verifies the available public pre-tool conversions and asserts public deny produces a synthetic result. The existing `public_pre_tool_hook_deny_becomes_synthetic_error_and_short_circuits` integration-style interceptor test covers bridge behavior and ordering.
## 4. Blockers
None.
## 5. Non-blockers / follow-ups
- As noted in the original review, `ToolResultSummary` still exposes `llm_worker::tool::ToolOutput` as public hook input. This is not a blocker because it does not allow output rewriting or arbitrary `ToolResult` construction, but the future plugin/feature registry may still want a Pod-owned summary type before freezing a public API boundary.
## 6. Validation assessed or rerun
Rerun/read-only checks from `/home/hare/Projects/yoi/.worktree/hook-public-surface-hardening`:
- `git rev-parse HEAD` reported `a4e30e292abf5c640b923e3307a75eded366351a`.
- `git status --short` was clean.
- `git diff --check develop...HEAD` passed.
- `cargo fmt --check` passed.
- `./tickets.sh doctor` passed.
- `git grep -n -E 'HookPreToolAction::Skip|PreToolAction::Skip|pub (use|type).*\b(PreRequestAction|TurnEndAction|PreToolAction|PostToolAction|ToolResult|Item)\b' crates/pod/src` produced no matches.
Assessed by source review:
- Follow-up commit `a4e30e2` diff.
- Full diff `develop...HEAD` for `crates/pod/src/hook.rs`, `crates/pod/src/ipc/interceptor.rs`, `crates/pod/src/permission.rs`, and `crates/pod/src/pod.rs`.
- Relevant lower-level `llm-worker` pre-tool handling to confirm internal skip remains lower-level only.
I did not run `cargo test`, `cargo check`, or `nix build`; those commands would write build artifacts outside this review artifact scope. The fixed-path tests were assessed from source.
## 7. Residual risk
No merge-blocking residual risk found. The remaining risk is API polish for the future feature/plugin registry, especially whether all hook input summary types should become Pod-owned wrapper types before being treated as a stable plugin boundary.

View File

@ -0,0 +1,65 @@
# External review: hook public surface hardening
## 1. Result: request changes
Request changes. The implementation largely moves prompt/request/turn-end hook actions behind public wrapper types and preserves the internal `llm_worker::Interceptor` action model, but one public pre-tool action still exposes the unsafe internal skip semantics instead of the ticketed fail-closed/synthetic-result behavior.
## 2. Summary of implementation
The coder introduced a public `pod::hook` action surface with event-specific wrapper actions:
- `HookPreRequestAction` and `HookTurnEndAction` expose `Continue`, `Abort`, and bounded textual prompt actions instead of raw `llm_worker::Item` continuation actions.
- `HookPreToolAction` exposes `Continue`, `Skip`, `Deny`, `Pause`, and `Abort`, with `Deny` carrying a public message string that is converted into an internal synthetic tool result.
- `HookPostToolAction` exposes only `Continue` and `Abort`.
- `PodInterceptor` now adapts public hook outputs to the richer internal `llm_worker::Interceptor` actions, so internal code can still use `PreRequestAction::ContinueWith`, `TurnEndAction::ContinueWithMessages`, and synthetic `ToolResult` construction where needed.
- Permission policy was adapted onto `HookPreToolAction::Deny`, preserving synthetic denial results for `deny` and fail-closed `ask`.
## 3. Requirement-by-requirement assessment
- Public `pod::hook` surface no longer exposes raw request/turn continuation injection: mostly satisfied. I did not find a public re-export or alias of `PreRequestAction`, `TurnEndAction`, raw `Item`, or arbitrary `ToolResult` construction through `pod::hook`. Public pre-request and turn-end hooks can only emit textual prompt actions plus continue/abort, and the raw `Item` conversions remain internal to the adapter.
- Internal mechanisms that need richer `llm_worker::Interceptor` actions remain internal: satisfied. The bridge still maps public prompt actions into internal `PreRequestAction::ContinueWith` / `TurnEndAction::ContinueWithMessages`, and compact/internal interceptors can keep using the richer worker-level API.
- Manifest permission policy fail-closed behavior: satisfied for deny/ask. `PolicyDecision::Deny` and `PolicyDecision::Ask` both convert to public `HookPreToolAction::Deny`, and the bridge converts that to internal `PreToolAction::SyntheticResult` with `is_error = true`.
- Public hooks cannot invisibly mutate prompt context/history: not fully satisfied. Pre-request and turn-end prompt mutations are explicit textual hook actions, but public pre-tool `Skip` still maps to the internal no-result skip path; see blocker below.
- Public hook names/types are usable for a future feature/plugin API: broadly satisfied. The event-specific `Hook*Action` types are clearer than exporting internal worker actions. One follow-up API tightening is noted below.
- No unnecessary compatibility aliases or broad refactors: satisfied. The diff is limited to the hook bridge, permission adapter, `Pod` registration plumbing, and tests.
- Tests cover public hook behavior and short-circuit ordering: partially satisfied. Added tests cover pre-request/turn-end public prompt actions, pre-request abort short-circuiting, pre-tool deny synthetic result, post-tool abort, and registration ordering. They do not cover the public `Skip` behavior required by the ticket.
## 4. Blockers
### Blocker: public `HookPreToolAction::Skip` keeps the internal no-result skip semantics
`crates/pod/src/hook.rs` exposes `HookPreToolAction::Skip` as a public action, documented as skipping the tool call without executing it, and converts it directly to `llm_worker::interceptor::PreToolAction::Skip`. In `llm-worker`, `PreToolAction::Skip` removes the call from the approved tool list and does not construct a synthetic `ToolResult`.
That conflicts with the ticket/delegation requirement that public pre-tool hooks can deny/skip only through the intended synthetic-result path. It also means a public feature/plugin hook can cause a model-emitted tool call to have no corresponding tool result, which is an invisible conversation/tool-history mutation and can break the assistant/tool-call pairing expected by later model requests.
Required fix before merge: either remove public `Skip`, or define the public skip/deny path as a synthetic error/result action rather than mapping to internal `PreToolAction::Skip`. Add a test that the public skip/deny path produces a synthetic tool result and preserves tool-call/result pairing or, if public skip is intentionally unsupported, that the public API cannot express it.
## 5. Non-blockers / follow-ups
- `ToolResultSummary` still exposes `llm_worker::tool::ToolOutput` as part of the public hook API. This does not allow arbitrary `ToolResult` construction or output rewriting, so I am not treating it as a merge blocker for this ticket. For the plugin/feature registry API, a dedicated bounded public summary type would reduce coupling to `llm_worker` internals.
- The current public action names are mechanically clear but somewhat verbose (`HookPreRequestAction`, `HookTurnEndAction`, etc.). They are acceptable for this hardening step; any naming polish can happen as part of the broader registry/API design.
## 6. Validation assessed or rerun
Rerun/read-only checks from `/home/hare/Projects/yoi/.worktree/hook-public-surface-hardening`:
- `git status --short` was clean.
- `git rev-parse HEAD` reported `2f020ed0bb3b9487ebc4671afd969ee5c8727cfa`.
- `git diff --check develop...HEAD` passed.
- `cargo fmt --check` passed.
- `./tickets.sh doctor` passed.
Assessed by source review:
- `git diff develop...HEAD`
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/permission.rs`
- `crates/pod/src/pod.rs`
- relevant `llm-worker` pre-tool handling
I did not run `cargo test`, `cargo check`, or `nix build`; those commands would write build artifacts outside this review artifact scope. Existing tests were reviewed from the diff/source instead.
## 7. Residual risk
After fixing the public skip path, the main remaining risk is API shape stabilization for the future plugin/feature registry: public hook inputs still share some worker/tool data types, and those should be deliberately frozen or wrapped before becoming a long-term plugin ABI. The raw `Item` injection risk for pre-request and turn-end hooks appears addressed by this implementation.

View File

@ -0,0 +1,58 @@
---
id: 20260603-122317-hook-public-surface-hardening
slug: hook-public-surface-hardening
title: Hook: harden public hook surface before plugin exposure
status: closed
kind: task
priority: P1
labels: [hooks, plugin, safety, tests]
created_at: 2026-06-03T12:23:17Z
updated_at: 2026-06-03T17:07:44Z
assignee: null
legacy_ticket: null
---
## Issue
`crates/pod/src/hook.rs` is already positioned as the public orchestration extension point over the internal `llm_worker::Interceptor`, but actual production usage is still light. Current usage is mostly manifest permission policy (`PreToolCall`) and usage tracking (`PreLlmRequest`). Before external or plugin-like features rely on Hooks, the public Hook surface needs tighter safety boundaries and better Pod-layer test coverage.
The main concern is that some Hook outputs still reuse internal `llm_worker::interceptor` action types. In particular, `PreRequestAction::ContinueWith(Vec<Item>)` and `TurnEndAction::ContinueWithMessages(Vec<Item>)` allow model-visible item injection from a Hook path. That conflicts with the intended public rule that Hooks must not mutate history/context except through approved durable paths.
## Requirements
- Audit the public Hook API against the intended plugin/feature contract.
- Replace or wrap internal interceptor action types with safe public Hook action subsets where needed.
- `OnPromptSubmit` already has a safe `HookPromptAction`; use the same pattern for other public Hook events.
- Public Plugin/Feature Hooks should not be able to return raw `Item` vectors for hidden context/history mutation.
- Preserve internal mechanisms that legitimately need richer interceptor actions, but keep them separate from public feature/plugin Hooks.
- Keep manifest permission policy behavior intact.
- Deny/ask still fails closed with synthetic tool results as today.
- Clarify which Hook events are observational only and which can cancel/abort/yield/deny.
- Add Pod-layer tests for Hook behavior that is currently only lightly covered.
## Suggested test coverage
- `OnPromptSubmit` Hook can cancel a submitted prompt through the safe public action type.
- `PreLlmRequest` public Hook runs under normal conditions and cannot inject raw `Item` context.
- `PreToolCall` Hook can deny/skip with a synthetic result through the intended path.
- `PostToolCall` Hook can observe a bounded summary and abort when allowed, but cannot rewrite tool output unless an explicit safe transform capability is introduced.
- `OnTurnEnd` public Hook cannot append arbitrary messages through raw internal action types.
- `OnAbort` Hook is called on abort paths.
- Multiple Hooks execute in registration order and short-circuit on non-continue actions.
- Internal mechanisms run in the expected order relative to public Hooks.
## Non-goals
- Implementing a Plugin runtime.
- Implementing a feature registry.
- Adding declarative hooks.
- Adding new Hook event kinds unless a gap is found during the audit.
- Allowing Hooks to rewrite arbitrary history/tool/model context.
## Acceptance criteria
- Public `pod::hook` APIs no longer expose internal action types that allow raw model-visible `Item` injection.
- Internal interceptor capabilities remain available only through internal mechanisms where justified.
- Existing manifest permission policy and usage tracking behavior remain intact.
- Focused Pod-layer tests cover the Hook events and short-circuit behavior needed by the feature registry/plugin surface.
- The resulting Hook surface is safe to reference from `plugin-feature-contribution-registry` as the public Hook contribution boundary.

View File

@ -0,0 +1 @@
Hardened public pod::hook actions for plugin/feature exposure. Public hooks no longer expose raw Item injection or no-result pre-tool skip; public pre-tool deny maps to synthetic error results; internal interceptor capabilities remain internal. Added focused Pod-layer hook tests; reviewer blocker fixed and rereview approved. Validation passed: cargo test -p pod hook --lib, cargo test -p pod --lib, cargo test -p llm-worker --lib, cargo fmt --check, cargo check --workspace --all-targets, ./tickets.sh doctor, git diff --check, nix build .#yoi, ./result/bin/yoi pod --help.

View File

@ -0,0 +1,241 @@
<!-- event: create author: tickets.sh at: 2026-06-03T12:23:17Z -->
## Created
Created by tickets.sh create.
---
<!-- event: plan author: hare at: 2026-06-03T16:36:57Z -->
## Plan
# Delegation intent: Hook public surface hardening
## Intent
Harden `pod::hook` so it can be safely used as the public Hook contribution boundary for the feature/plugin registry. Public Hooks must not expose internal `llm_worker::Interceptor` action types that can inject raw model-visible `Item` values into request/history paths.
## Requirements
- Audit the current public Hook API in `crates/pod/src/hook.rs` and its bridge in `crates/pod/src/ipc/interceptor.rs`.
- Replace or wrap public Hook outputs that currently reuse internal interceptor action types with safe public action subsets.
- `OnPromptSubmit` already uses `HookPromptAction`; use the same pattern for events that need public actions.
- Public Hook APIs must not expose raw `Item` vector injection such as `PreRequestAction::ContinueWith(Vec<Item>)` or `TurnEndAction::ContinueWithMessages(Vec<Item>)`.
- Preserve internal mechanisms that legitimately need richer `llm_worker::Interceptor` actions, but keep them internal and separate from public feature/plugin Hooks.
- Preserve current manifest permission policy behavior.
- `PreToolCall` deny/ask still fails closed through the existing synthetic tool result path.
- Preserve usage tracking behavior.
- Clarify through names/types/tests which Hook events are observation-only and which can cancel/abort/yield/deny.
- Add focused Pod-layer tests for public Hook behavior and short-circuit ordering.
## Invariants
- Do not add hidden prompt/context injection paths.
- Do not mutate session history from public Hooks except through already-approved durable host paths.
- Do not expose `llm_worker::Item` or raw history/message vectors through public plugin/feature Hook actions.
- Do not implement plugin runtime, feature registry, MCP, or WorkItem tools in this ticket.
- Do not weaken manifest permission enforcement.
- Keep `llm_worker::Interceptor` internal capabilities available where currently required by Pod internals.
## Non-goals
- Implementing `plugin-feature-contribution-registry`.
- Adding new Hook event kinds unless the audit finds a strict safety gap.
- Allowing Hooks to rewrite tool outputs or arbitrary model context.
- Broad refactors of Pod/Worker runtime unrelated to the Hook surface.
## Suggested files to inspect
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/permission.rs`
- `crates/pod/src/pod.rs`
- `crates/llm-worker/src/interceptor.rs`
- `crates/llm-worker/tests/parallel_execution_test.rs`
## Known observations from pre-delegation investigation
- Current production Hook use is light:
- `PermissionHook` implements `Hook<PreToolCall>` in `crates/pod/src/permission.rs`.
- `UsageTrackingHook` uses `PreLlmRequest` from `crates/pod/src/pod.rs`.
- `OnPromptSubmit` already has a safe public subset action: `HookPromptAction`.
- `PreLlmRequest` and `OnTurnEnd` currently expose internal action types with raw `Item` injection capability.
- `PostToolCall` is currently observation + abort only and does not rewrite tool output; keep that conservative unless a strictly bounded explicit transform is justified, which is not expected for this ticket.
- Existing `llm-worker` interceptor tests cover some lower-level behavior, but Pod-layer Hook coverage should be improved.
- `cargo test -p pod hook --lib` passed during investigation.
- Individual relevant `llm-worker` interceptor tests passed, but the full `parallel_execution_test` file had an unrelated timing-sensitive failure (`test_parallel_tool_execution` took ~1.37s instead of ~100ms). Do not treat that file-wide failure as a Hook blocker without confirming.
## Validation
Run at least:
- `cargo test -p pod hook --lib`
- focused Pod Hook tests added/updated by this ticket
- `cargo test -p pod --lib`
- `cargo test -p llm-worker --lib`
- `cargo check --workspace --all-targets`
- `cargo fmt --check`
- `./tickets.sh doctor`
- `git diff --check`
If broader validation fails due to pre-existing unrelated timing flakes, report exact command/output and run focused commands that isolate this change.
## Completion report
Report:
- worktree path / branch
- commit hash
- changed files
- public Hook API changes
- internal mechanism separation
- tests added/updated
- validation commands and results
- unresolved risks or follow-up recommendations
- whether the work is ready for external review
---
<!-- event: review author: hare at: 2026-06-03T16:58:49Z status: request_changes -->
## Review: request changes
# External review: hook public surface hardening
## 1. Result: request changes
Request changes. The implementation largely moves prompt/request/turn-end hook actions behind public wrapper types and preserves the internal `llm_worker::Interceptor` action model, but one public pre-tool action still exposes the unsafe internal skip semantics instead of the ticketed fail-closed/synthetic-result behavior.
## 2. Summary of implementation
The coder introduced a public `pod::hook` action surface with event-specific wrapper actions:
- `HookPreRequestAction` and `HookTurnEndAction` expose `Continue`, `Abort`, and bounded textual prompt actions instead of raw `llm_worker::Item` continuation actions.
- `HookPreToolAction` exposes `Continue`, `Skip`, `Deny`, `Pause`, and `Abort`, with `Deny` carrying a public message string that is converted into an internal synthetic tool result.
- `HookPostToolAction` exposes only `Continue` and `Abort`.
- `PodInterceptor` now adapts public hook outputs to the richer internal `llm_worker::Interceptor` actions, so internal code can still use `PreRequestAction::ContinueWith`, `TurnEndAction::ContinueWithMessages`, and synthetic `ToolResult` construction where needed.
- Permission policy was adapted onto `HookPreToolAction::Deny`, preserving synthetic denial results for `deny` and fail-closed `ask`.
## 3. Requirement-by-requirement assessment
- Public `pod::hook` surface no longer exposes raw request/turn continuation injection: mostly satisfied. I did not find a public re-export or alias of `PreRequestAction`, `TurnEndAction`, raw `Item`, or arbitrary `ToolResult` construction through `pod::hook`. Public pre-request and turn-end hooks can only emit textual prompt actions plus continue/abort, and the raw `Item` conversions remain internal to the adapter.
- Internal mechanisms that need richer `llm_worker::Interceptor` actions remain internal: satisfied. The bridge still maps public prompt actions into internal `PreRequestAction::ContinueWith` / `TurnEndAction::ContinueWithMessages`, and compact/internal interceptors can keep using the richer worker-level API.
- Manifest permission policy fail-closed behavior: satisfied for deny/ask. `PolicyDecision::Deny` and `PolicyDecision::Ask` both convert to public `HookPreToolAction::Deny`, and the bridge converts that to internal `PreToolAction::SyntheticResult` with `is_error = true`.
- Public hooks cannot invisibly mutate prompt context/history: not fully satisfied. Pre-request and turn-end prompt mutations are explicit textual hook actions, but public pre-tool `Skip` still maps to the internal no-result skip path; see blocker below.
- Public hook names/types are usable for a future feature/plugin API: broadly satisfied. The event-specific `Hook*Action` types are clearer than exporting internal worker actions. One follow-up API tightening is noted below.
- No unnecessary compatibility aliases or broad refactors: satisfied. The diff is limited to the hook bridge, permission adapter, `Pod` registration plumbing, and tests.
- Tests cover public hook behavior and short-circuit ordering: partially satisfied. Added tests cover pre-request/turn-end public prompt actions, pre-request abort short-circuiting, pre-tool deny synthetic result, post-tool abort, and registration ordering. They do not cover the public `Skip` behavior required by the ticket.
## 4. Blockers
### Blocker: public `HookPreToolAction::Skip` keeps the internal no-result skip semantics
`crates/pod/src/hook.rs` exposes `HookPreToolAction::Skip` as a public action, documented as skipping the tool call without executing it, and converts it directly to `llm_worker::interceptor::PreToolAction::Skip`. In `llm-worker`, `PreToolAction::Skip` removes the call from the approved tool list and does not construct a synthetic `ToolResult`.
That conflicts with the ticket/delegation requirement that public pre-tool hooks can deny/skip only through the intended synthetic-result path. It also means a public feature/plugin hook can cause a model-emitted tool call to have no corresponding tool result, which is an invisible conversation/tool-history mutation and can break the assistant/tool-call pairing expected by later model requests.
Required fix before merge: either remove public `Skip`, or define the public skip/deny path as a synthetic error/result action rather than mapping to internal `PreToolAction::Skip`. Add a test that the public skip/deny path produces a synthetic tool result and preserves tool-call/result pairing or, if public skip is intentionally unsupported, that the public API cannot express it.
## 5. Non-blockers / follow-ups
- `ToolResultSummary` still exposes `llm_worker::tool::ToolOutput` as part of the public hook API. This does not allow arbitrary `ToolResult` construction or output rewriting, so I am not treating it as a merge blocker for this ticket. For the plugin/feature registry API, a dedicated bounded public summary type would reduce coupling to `llm_worker` internals.
- The current public action names are mechanically clear but somewhat verbose (`HookPreRequestAction`, `HookTurnEndAction`, etc.). They are acceptable for this hardening step; any naming polish can happen as part of the broader registry/API design.
## 6. Validation assessed or rerun
Rerun/read-only checks from `/home/hare/Projects/yoi/.worktree/hook-public-surface-hardening`:
- `git status --short` was clean.
- `git rev-parse HEAD` reported `2f020ed0bb3b9487ebc4671afd969ee5c8727cfa`.
- `git diff --check develop...HEAD` passed.
- `cargo fmt --check` passed.
- `./tickets.sh doctor` passed.
Assessed by source review:
- `git diff develop...HEAD`
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/permission.rs`
- `crates/pod/src/pod.rs`
- relevant `llm-worker` pre-tool handling
I did not run `cargo test`, `cargo check`, or `nix build`; those commands would write build artifacts outside this review artifact scope. Existing tests were reviewed from the diff/source instead.
## 7. Residual risk
After fixing the public skip path, the main remaining risk is API shape stabilization for the future plugin/feature registry: public hook inputs still share some worker/tool data types, and those should be deliberately frozen or wrapped before becoming a long-term plugin ABI. The raw `Item` injection risk for pre-request and turn-end hooks appears addressed by this implementation.
---
<!-- event: review author: hare at: 2026-06-03T17:05:43Z status: approve -->
## Review: approve
# External rereview: hook public surface hardening
## 1. Result: approve
Approve. The previous blocker is fixed: the public pre-tool hook action surface no longer exposes a no-result skip action, and public deny still maps to a synthetic error result.
## 2. Summary of rereviewed changes
The follow-up commit `a4e30e2 fix: remove public hook skip action` narrowed `HookPreToolAction` by removing the `Skip` variant and its conversion to `llm_worker::interceptor::PreToolAction::Skip`. The public pre-tool actions are now `Continue`, `Deny(String)`, `Abort(String)`, and `Pause`.
`HookPreToolAction::Deny` still converts internally to `PreToolAction::SyntheticResult(ToolResult::error(call_id, reason))`, preserving the fail-closed/synthetic-result path needed by manifest permissions and future public hooks.
## 3. Requirement-by-requirement assessment
- Previous blocker fixed: satisfied. `HookPreToolAction::Skip` is gone, and grep found no `HookPreToolAction::Skip`, `PreToolAction::Skip`, or standalone `Skip` usage under `crates/pod/src`.
- Public pre-tool deny maps to synthetic error result: satisfied. `HookPreToolAction::Deny` converts to `PreToolAction::SyntheticResult(ToolResult::error(...))`; existing interceptor tests also assert the synthetic result has the expected call id, summary, no content, and `is_error = true`.
- Public Hook API exposes no no-result skip: satisfied. The only no-result skip capability remains in the lower-level `llm_worker` interceptor model; it is no longer reachable through the public `pod::hook::HookPreToolAction` surface.
- Public Hook API exposes no raw `Item` injection: satisfied. `PreLlmRequest` and `OnTurnEnd` use safe public action types rather than raw `PreRequestAction::ContinueWith(Vec<Item>)` or `TurnEndAction::ContinueWithMessages(Vec<Item>)` as hook outputs.
- Public Hook API exposes no arbitrary `ToolResult` construction: satisfied for action outputs. Public pre-tool hooks provide only a denial message; Pod constructs the synthetic error result internally.
- Internal capabilities remain internal: satisfied. Internal Pod/Worker code still uses richer `llm_worker::Interceptor` actions where needed, including durable host-owned prompt append paths and compact/internal mechanisms.
- Tests cover the fixed path sufficiently: satisfied. The added `public_pre_tool_hook_actions_cannot_emit_internal_no_result_skip` unit test verifies the available public pre-tool conversions and asserts public deny produces a synthetic result. The existing `public_pre_tool_hook_deny_becomes_synthetic_error_and_short_circuits` integration-style interceptor test covers bridge behavior and ordering.
## 4. Blockers
None.
## 5. Non-blockers / follow-ups
- As noted in the original review, `ToolResultSummary` still exposes `llm_worker::tool::ToolOutput` as public hook input. This is not a blocker because it does not allow output rewriting or arbitrary `ToolResult` construction, but the future plugin/feature registry may still want a Pod-owned summary type before freezing a public API boundary.
## 6. Validation assessed or rerun
Rerun/read-only checks from `/home/hare/Projects/yoi/.worktree/hook-public-surface-hardening`:
- `git rev-parse HEAD` reported `a4e30e292abf5c640b923e3307a75eded366351a`.
- `git status --short` was clean.
- `git diff --check develop...HEAD` passed.
- `cargo fmt --check` passed.
- `./tickets.sh doctor` passed.
- `git grep -n -E 'HookPreToolAction::Skip|PreToolAction::Skip|pub (use|type).*\b(PreRequestAction|TurnEndAction|PreToolAction|PostToolAction|ToolResult|Item)\b' crates/pod/src` produced no matches.
Assessed by source review:
- Follow-up commit `a4e30e2` diff.
- Full diff `develop...HEAD` for `crates/pod/src/hook.rs`, `crates/pod/src/ipc/interceptor.rs`, `crates/pod/src/permission.rs`, and `crates/pod/src/pod.rs`.
- Relevant lower-level `llm-worker` pre-tool handling to confirm internal skip remains lower-level only.
I did not run `cargo test`, `cargo check`, or `nix build`; those commands would write build artifacts outside this review artifact scope. The fixed-path tests were assessed from source.
## 7. Residual risk
No merge-blocking residual risk found. The remaining risk is API polish for the future feature/plugin registry, especially whether all hook input summary types should become Pod-owned wrapper types before being treated as a stable plugin boundary.
---
<!-- event: close author: hare at: 2026-06-03T17:07:44Z status: closed -->
## Closed
Hardened public pod::hook actions for plugin/feature exposure. Public hooks no longer expose raw Item injection or no-result pre-tool skip; public pre-tool deny maps to synthetic error results; internal interceptor capabilities remain internal. Added focused Pod-layer hook tests; reviewer blocker fixed and rereview approved. Validation passed: cargo test -p pod hook --lib, cargo test -p pod --lib, cargo test -p llm-worker --lib, cargo fmt --check, cargo check --workspace --all-targets, ./tickets.sh doctor, git diff --check, nix build .#yoi, ./result/bin/yoi pod --help.
---

View File

@ -0,0 +1,10 @@
# Decision: split feature registry and Hook hardening from Plugin architecture
The Plugin architecture ticket remains the broad architecture surface for Tools, Hooks, runtime kinds, capability model, trust model, discovery/enablement, and MCP/WASM/declarative runtime mapping.
Two implementation-oriented prerequisite tickets are split out:
- `plugin-feature-contribution-registry`: define and implement the Pod-layer feature contribution registry so built-in and future external capabilities register through existing Tool / Hook / Notify paths instead of ad hoc Pod code paths.
- `hook-public-surface-hardening`: audit and harden `pod::hook` before exposing it as a feature/plugin contribution boundary, especially removing public access to raw internal action types that can inject model-visible `Item` values.
This preserves the desired detachable shape: feature state remains in the feature/extension module, while Pod interaction happens through existing durable host surfaces. WorkItem management should be implemented as a built-in feature contribution once the registry boundary is in place, rather than as a special Pod context-injection path.

View File

@ -7,7 +7,7 @@ kind: feature
priority: P2 priority: P2
labels: [plugin, hooks, tools, wasm, mcp] labels: [plugin, hooks, tools, wasm, mcp]
created_at: 2026-05-31T01:00:05Z created_at: 2026-05-31T01:00:05Z
updated_at: 2026-05-31T01:01:09Z updated_at: 2026-06-03T12:25:05Z
assignee: null assignee: null
legacy_ticket: null legacy_ticket: null
--- ---
@ -26,6 +26,8 @@ The plugin surface should not be a grab bag of arbitrary code execution. Candida
## Related work ## Related work
- `work-items/open/20260529-161928-mcp-integration/` — MCP integration as one plugin backend / external capability bridge. - `work-items/open/20260529-161928-mcp-integration/` — MCP integration as one plugin backend / external capability bridge.
- `work-items/open/20260603-122317-plugin-feature-contribution-registry/` — implementation-oriented runtime registry split-out for built-in and external feature contributions.
- `work-items/open/20260603-122317-hook-public-surface-hardening/` — prerequisite hardening for public Hook contribution safety.
- Existing internal hooks/tools code: `crates/pod`, `crates/tools`, `crates/llm-worker`. - Existing internal hooks/tools code: `crates/pod`, `crates/tools`, `crates/llm-worker`.
- Manifest permission policy and scope enforcement must remain authoritative for plugin-provided tools. - Manifest permission policy and scope enforcement must remain authoritative for plugin-provided tools.
@ -69,7 +71,7 @@ The plugin surface should not be a grab bag of arbitrary code execution. Candida
- Define Plugin, PluginManifest, PluginRuntimeKind, Tool contribution, Hook contribution, capability request, and trust/source model. - Define Plugin, PluginManifest, PluginRuntimeKind, Tool contribution, Hook contribution, capability request, and trust/source model.
- Map MCP, declarative hooks, and WASM onto that model. - Map MCP, declarative hooks, and WASM onto that model.
2. **Internal registry boundary** 2. **Internal registry boundary**
- Introduce code structure that can register plugin-provided tool/hook descriptors without changing runtime behavior yet. - Detailed implementation is split to `plugin-feature-contribution-registry` so this ticket can stay focused on the architecture surface and invariants.
3. **Declarative hooks MVP** 3. **Declarative hooks MVP**
- Add a non-code configuration path for simple hook behavior if an immediate use case exists. - Add a non-code configuration path for simple hook behavior if an immediate use case exists.
4. **WASM spike** 4. **WASM spike**

View File

@ -21,4 +21,22 @@ Initial decision note from user discussion:
- General scripting languages were considered, but the initial direction is WASM because it offers a clearer sandbox/capability boundary. - General scripting languages were considered, but the initial direction is WASM because it offers a clearer sandbox/capability boundary.
---
<!-- event: decision author: hare at: 2026-06-03T12:25:05Z -->
## Decision
# Decision: split feature registry and Hook hardening from Plugin architecture
The Plugin architecture ticket remains the broad architecture surface for Tools, Hooks, runtime kinds, capability model, trust model, discovery/enablement, and MCP/WASM/declarative runtime mapping.
Two implementation-oriented prerequisite tickets are split out:
- `plugin-feature-contribution-registry`: define and implement the Pod-layer feature contribution registry so built-in and future external capabilities register through existing Tool / Hook / Notify paths instead of ad hoc Pod code paths.
- `hook-public-surface-hardening`: audit and harden `pod::hook` before exposing it as a feature/plugin contribution boundary, especially removing public access to raw internal action types that can inject model-visible `Item` values.
This preserves the desired detachable shape: feature state remains in the feature/extension module, while Pod interaction happens through existing durable host surfaces. WorkItem management should be implemented as a built-in feature contribution once the registry boundary is in place, rather than as a special Pod context-injection path.
--- ---

View File

@ -7,7 +7,7 @@ kind: task
priority: P1 priority: P1
labels: [work-item, intake, orchestration, tui] labels: [work-item, intake, orchestration, tui]
created_at: 2026-06-01T03:12:52Z created_at: 2026-06-01T03:12:52Z
updated_at: 2026-06-01T05:26:27Z updated_at: 2026-06-03T12:25:05Z
assignee: null assignee: null
legacy_ticket: null legacy_ticket: null
--- ---
@ -21,6 +21,7 @@ legacy_ticket: null
## Direction ## Direction
- `tickets.sh` 相当の work item 管理を、Insomnia の built-in feature / tool surface として設計・実装する。 - `tickets.sh` 相当の work item 管理を、Insomnia の built-in feature / tool surface として設計・実装する。
- WorkItem は Pod 専用の独自 context injection や特殊 queue ではなく、`plugin-feature-contribution-registry` で定義する feature contribution として既存 Tool / Hook / Notify 経路に載せる。
- Intake Pod はユーザーと直接会話し、必要な調査・重複確認・要件同期を行い、合意済み WorkItem / ticket を作成できるようにする。 - Intake Pod はユーザーと直接会話し、必要な調査・重複確認・要件同期を行い、合意済み WorkItem / ticket を作成できるようにする。
- Orchestrator はユーザー意図の一次解釈や ticket draft の再承認者ではなく、登録済み WorkItem の scheduling / prioritization / interruption / implementation delegation を担当する。 - Orchestrator はユーザー意図の一次解釈や ticket draft の再承認者ではなく、登録済み WorkItem の scheduling / prioritization / interruption / implementation delegation を担当する。
- ユーザー向け Inbox 専用 UI は初期スコープにしない。Intake は既存の `--multi` UI の延長として通常の Pod 会話で制御できるようにする。 - ユーザー向け Inbox 専用 UI は初期スコープにしない。Intake は既存の `--multi` UI の延長として通常の Pod 会話で制御できるようにする。
@ -42,6 +43,7 @@ Scope は、エージェントが占有して作業する filesystem space を
- status transition - status transition
- close / resolution - close / resolution
- doctor / consistency check - doctor / consistency check
- WorkItem tool surface は `plugin-feature-contribution-registry` の built-in feature contribution として登録できること。
- WorkItem / ticket の保存形式は、現在の markdown + frontmatter + thread / artifacts 方式との移行可能性を保つこと。 - WorkItem / ticket の保存形式は、現在の markdown + frontmatter + thread / artifacts 方式との移行可能性を保つこと。
- 既存の `tickets.sh` 運用を即座に破壊しないこと。built-in 化の途中でも git history / work-items directory を authority として読めること。 - 既存の `tickets.sh` 運用を即座に破壊しないこと。built-in 化の途中でも git history / work-items directory を authority として読めること。
- Intake Pod がユーザーと合意済み WorkItem を作成できる導線を設計すること。 - Intake Pod がユーザーと合意済み WorkItem を作成できる導線を設計すること。

View File

@ -230,4 +230,22 @@ This does not imply unlimited file access. The WorkItem tool should be limited t
The first implementation can be LocalFilesBackend over current `work-items/`, preserving markdown/frontmatter/thread/artifacts and `tickets.sh` compatibility. The API should already be backend-shaped so GitHub Issues or other trackers can be added later without changing Intake/Orchestrator semantics. The first implementation can be LocalFilesBackend over current `work-items/`, preserving markdown/frontmatter/thread/artifacts and `tickets.sh` compatibility. The API should already be backend-shaped so GitHub Issues or other trackers can be added later without changing Intake/Orchestrator semantics.
---
<!-- event: decision author: hare at: 2026-06-03T12:25:05Z -->
## Decision
# Decision: split feature registry and Hook hardening from Plugin architecture
The Plugin architecture ticket remains the broad architecture surface for Tools, Hooks, runtime kinds, capability model, trust model, discovery/enablement, and MCP/WASM/declarative runtime mapping.
Two implementation-oriented prerequisite tickets are split out:
- `plugin-feature-contribution-registry`: define and implement the Pod-layer feature contribution registry so built-in and future external capabilities register through existing Tool / Hook / Notify paths instead of ad hoc Pod code paths.
- `hook-public-surface-hardening`: audit and harden `pod::hook` before exposing it as a feature/plugin contribution boundary, especially removing public access to raw internal action types that can inject model-visible `Item` values.
This preserves the desired detachable shape: feature state remains in the feature/extension module, while Pod interaction happens through existing durable host surfaces. WorkItem management should be implemented as a built-in feature contribution once the registry boundary is in place, rather than as a special Pod context-injection path.
--- ---

View File

@ -1,7 +0,0 @@
<!-- event: create author: tickets.sh at: 2026-06-03T00:11:24Z -->
## Created
Created by tickets.sh create.
---

View File

@ -0,0 +1,106 @@
# Delegation intent: Plugin base Pod API design
## Intent
Design the public Pod-side API that will serve as the base for Plugin / Feature contributions. The result should make Plugin-provided or built-in extension modules easy to register cleanly without adding ad hoc Pod processing paths.
This is a design task, not an implementation task. The output should be a concise but concrete design document suitable for turning into implementation tickets or acceptance criteria for `plugin-feature-contribution-registry` and `hook-public-surface-hardening`.
## Background
The current direction is that feature state remains owned by the feature/extension module, while interaction with Pod happens through existing durable host surfaces:
- Tools
- Hooks
- notifications / events / durable history append paths
The concern is that adding WorkItem, MCP, memory, plugin, and other capabilities without a common registry will create many unrelated Pod-specific insertion points. The Plugin system should establish a common contribution and authority boundary, even for built-in features.
`hook-public-surface-hardening` is being implemented separately to make public Hook actions safe before plugin exposure.
## Design question
What should the clean public API look like for a feature/plugin module that wants to contribute capabilities to a Pod?
The design should answer:
- What API types should extension modules use to declare/register capabilities?
- What belongs in a pure descriptor vs a runtime install callback?
- How should Tools, Hooks, and notifications be represented in the same public surface?
- How should capability request / host grant / diagnostics be expressed?
- What state should the feature keep itself, and what state may Pod keep?
- What must be impossible through this API?
- Where should the API live initially, and what parts should be movable to a future `plugin`/`extension` crate?
## Required constraints
- Public API must not let features/plugins mutate prompt context or session history invisibly.
- Model-visible additions must go through durable host paths: tool result, committed history append, explicit notification/history append, or user-visible event path.
- Public Hook contribution must depend on the safe Hook surface after `hook-public-surface-hardening`.
- Tool contributions must use the normal ToolRegistry / PreToolCall permission / history result path.
- Feature registry must install into existing Pod/Worker surfaces; it must not create a parallel Pod runtime path.
- Capability grant is host-controlled. A feature may request capabilities but must not assume them.
- Built-in features and future external plugins should fit the same shape.
- Avoid designing package distribution, WASM execution, or MCP implementation details beyond the minimal runtime-kind placeholders needed for the API.
- Avoid broad refactors of Pod/Worker crate boundaries unless needed to explain a clean API boundary.
## Files / records to read
Tickets:
- `/home/hare/Projects/yoi/work-items/open/20260603-122317-plugin-feature-contribution-registry/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260603-122317-hook-public-surface-hardening/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260531-010005-plugin-extension-surface/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260601-031252-builtin-work-item-intake-routing/item.md`
Code:
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/controller.rs`
- `crates/pod/src/pod.rs`
- `crates/pod/src/permission.rs`
- `crates/llm-worker/src/tool.rs`
- `crates/llm-worker/src/interceptor.rs`
- `crates/tools/src/lib.rs`
- `crates/pod/src/workflow/mod.rs`
## Expected output
Write a design document to:
`/home/hare/Projects/yoi/work-items/open/20260603-122317-plugin-feature-contribution-registry/artifacts/pod-api-design.md`
Use this structure:
1. Summary recommendation
2. Current relevant Pod/Worker surfaces
3. Proposed public API shape
- types/modules
- example registration snippet
- Tool contribution
- Hook contribution
- notification/event contribution
- capability request/grant/diagnostics
4. State ownership model
5. Safety invariants / forbidden operations
6. Placement and crate-boundary recommendation
7. Migration path from current built-in registrations
8. Impact on WorkItem / MCP / plugin distribution follow-ups
9. Open questions / risks
## Non-goals
- Do not edit source code.
- Do not implement tests.
- Do not create a worktree.
- Do not close or modify tickets except writing the requested design artifact.
## Completion report
Report:
- whether the artifact was written
- the recommended API placement
- the highest-risk API decision
- any blockers that require parent/user decision

View File

@ -0,0 +1,462 @@
# Public Pod-side API for Feature / Plugin Contributions
## 1. Summary recommendation
Introduce a `pod::feature` public API as the single Pod-side registration layer for built-in features and future external plugins. A feature module should declare its identity, requested capabilities, and contributions, then install those contributions only through typed host registrars for existing Pod/Worker surfaces: `ToolRegistry`, the hardened safe `pod::hook` surface, and host-owned notification/event/history append paths.
The registry should not become a second runtime, a plugin dispatcher tool, or a generic `Pod` mutation escape hatch. Feature state remains inside the feature module; the Pod owns only install metadata, diagnostics, granted host handles, and normal durable session/runtime surfaces.
Recommended placement: create `crates/pod/src/feature.rs` (or `crates/pod/src/feature/mod.rs` once it grows) and export it as `pod::feature`. Keep `llm-worker::Interceptor` internal; expose only hardened `pod::hook` types and contribution registrars.
## 2. Current relevant Pod/Worker surfaces
The design should build on these existing surfaces rather than bypassing them:
- `crates/pod/src/hook.rs`
- Current public-ish hook layer wraps `llm_worker::Interceptor` with `HookRegistry`, `HookRegistryBuilder`, `Hook`, and per-event hook traits.
- It already provides Pod-specific hook events such as pre-request, post-assistant, pre-tool-call, post-tool-call, and turn-end.
- It is not yet safe enough as a public plugin API because some hook actions can carry raw `llm_worker::Item` values (`PreRequestAction::ContinueWith`, `TurnEndAction::ContinueWithMessages`). The feature API must depend on the post-hardening surface, not these raw item mutation forms.
- `crates/pod/src/ipc/interceptor.rs`
- `PodInterceptor` is the bridge between Worker callbacks and Pod behavior.
- It runs hooks, drains pending attachments/notifications, records memory/tool usage, and turns model-visible additions into committed `SystemItem` session log entries before appending them to Worker history.
- This is the right place for host-mediated durable append paths; it is not a plugin API itself.
- `crates/pod/src/controller.rs`
- Controller startup currently registers built-in Pod tools through ad hoc code paths.
- The feature registry should replace those ad hoc registrations incrementally by installing contributions into the same worker/tool/hook surfaces during Pod construction.
- `crates/pod/src/pod.rs`
- `Pod` owns the durable session log, metadata, runtime event channel, notification helpers, pending system attachments, scope, and Worker lifecycle.
- It exposes internal methods that can append history or send alerts/events. The public feature API should not expose `Pod` or `Worker` directly; it should expose narrow sinks that route through these existing methods.
- `crates/pod/src/permission.rs`
- Manifest tool permissions are enforced as a `PreToolCallHook`.
- Feature tools must remain subject to the same PreToolCall permission path. Feature capability grants do not replace per-call tool permission.
- `crates/llm-worker/src/tool.rs` and `crates/llm-worker/src/tool_server.rs`
- `ToolDefinition`, `Tool`, `ToolMeta`, `ToolResult`, `ToolOutput`, and `ToolServerHandle` define the normal tool execution path.
- Tools registered here get normal schema exposure, execution, bounded output handling, and history result recording.
- The public feature API should register `ToolDefinition`s into this registry rather than introducing a separate plugin dispatch layer.
- `crates/llm-worker/src/interceptor.rs`
- The lower-level interceptor is powerful and Worker-oriented. It should remain internal because it can influence model request construction too directly.
- Public features should use `pod::hook` only after that API has been narrowed to durable, auditable actions.
- `crates/tools/src/lib.rs`
- Existing built-in tools already use shared tool abstractions and scoped filesystem/runtime handles.
- Those tool constructors can become built-in feature contributions without changing model-visible tool names.
- `crates/pod/src/workflow/mod.rs`
- Workflow invocation currently resolves user input segments into system items through the Pod's durable attachment path.
- This is a useful pattern for feature-owned model-visible additions: resolve through a host-owned append path and commit what the model sees. It should not become a general plugin context injection mechanism.
## 3. Proposed public API shape
### Types/modules
Add a new module under `pod`:
```rust
pub mod feature {
pub mod capability;
pub mod diagnostic;
pub mod event;
pub mod hook;
pub mod registry;
pub mod tool;
pub use capability::{CapabilityGrantSet, CapabilityRequest, HostCapability};
pub use diagnostic::{FeatureDiagnostic, FeatureInstallReport};
pub use registry::{FeatureDescriptor, FeatureId, FeatureInstallContext, FeatureModule, FeatureRegistryBuilder, FeatureRuntimeKind};
pub use tool::ToolContribution;
}
```
Core trait and registry shape:
```rust
pub trait FeatureModule: Send + Sync + 'static {
fn descriptor(&self) -> FeatureDescriptor;
fn install(&self, ctx: &mut FeatureInstallContext<'_>) -> Result<(), FeatureInstallError>;
}
pub struct FeatureDescriptor {
pub id: FeatureId, // source-qualified identity, e.g. builtin:task
pub display_name: String,
pub version: Option<String>,
pub runtime: FeatureRuntimeKind, // Builtin, ExternalProcess, McpBridge, WasmPlaceholder, DeclarativePlaceholder
pub requested_capabilities: Vec<CapabilityRequest>,
pub declared_tools: Vec<ToolDeclaration>,
pub declared_hooks: Vec<HookDeclaration>,
pub declared_event_channels: Vec<EventChannelDeclaration>,
}
pub enum FeatureRuntimeKind {
Builtin,
ExternalProcess,
McpBridge,
WasmPlaceholder,
DeclarativePlaceholder,
}
pub struct FeatureInstallContext<'a> {
// No Pod or Worker reference.
pub feature_id: &'a FeatureId,
pub grants: &'a CapabilityGrantSet,
pub tools: ToolRegistrar<'a>,
pub hooks: PublicHookRegistrar<'a>,
pub notify: FeatureNotifySink<'a>,
pub events: FeatureEventSink<'a>,
pub diagnostics: FeatureDiagnosticSink<'a>,
pub services: FeatureServiceProvider<'a>,
}
```
Important details:
- `FeatureDescriptor` is declarative and serializable. It is safe to show in diagnostics, profile previews, and `ListFeatures`-style future tooling.
- `FeatureModule::install` is runtime code that wires stateful tool/hook implementations into host registrars.
- `FeatureInstallContext` must not expose `Pod`, `Worker`, raw `ToolServerHandle`, raw `Interceptor`, raw `NotifyBuffer`, raw `LogWriter`, raw `event_tx`, or direct history mutation.
- `FeatureServiceProvider` returns only host services backed by granted capabilities, for example scoped filesystem access, WorkItem store access, memory access, Pod orchestration handles, web provider handles, or secret references. It should return `Denied`/`Unavailable` diagnostics instead of exposing partial internals.
### Example registration snippet
This is illustrative shape, not proposed final exact Rust syntax:
```rust
use pod::feature::{
CapabilityRequest, FeatureDescriptor, FeatureId, FeatureInstallContext,
FeatureModule, FeatureRuntimeKind, HostCapability, ToolContribution,
};
pub struct WorkItemFeature {
state: std::sync::Arc<WorkItemFeatureState>,
}
impl FeatureModule for WorkItemFeature {
fn descriptor(&self) -> FeatureDescriptor {
FeatureDescriptor::builder(FeatureId::builtin("work-item"))
.display_name("WorkItem intake and routing")
.runtime(FeatureRuntimeKind::Builtin)
.request(CapabilityRequest::required(
HostCapability::WorkItemStore { read: true, write: true },
"create and update WorkItem records through host-owned ticket storage",
))
.request(CapabilityRequest::optional(
HostCapability::EmitUserEvent,
"surface routing diagnostics to the TUI/actionbar",
))
.tool("WorkItemCreate")
.tool("WorkItemComment")
.hook("work_item_intake_pre_tool_audit", pod::hook::HookPoint::PreToolCall)
.event_channel("work-item")
.build()
}
fn install(&self, ctx: &mut FeatureInstallContext<'_>) -> Result<(), FeatureInstallError> {
let store = ctx.services.work_item_store()?;
ctx.tools.register(ToolContribution::new(
"WorkItemCreate",
work_item_create_tool(store.clone(), self.state.clone()),
))?;
ctx.hooks.pre_tool_call(
"work_item_intake_pre_tool_audit",
WorkItemAuditHook::new(self.state.clone()),
)?;
ctx.events.declare_channel("work-item")?;
Ok(())
}
}
```
The feature keeps `WorkItemFeatureState`. The Pod keeps only registration records, diagnostics, and the normal host services it already owns.
### Tool contribution
A tool contribution should be a thin wrapper around `llm_worker::ToolDefinition` plus feature metadata:
```rust
pub struct ToolContribution {
pub feature_id: FeatureId,
pub name: ToolName,
pub definition: llm_worker::ToolDefinition,
pub required_capabilities: Vec<HostCapability>,
}
```
Rules:
- Register into the existing `ToolRegistry` / `ToolServerHandle`; do not add a plugin-dispatcher tool that multiplexes plugin calls outside normal tool history.
- Preserve normal `PreToolCall` permission evaluation, tool-call history, result history, output truncation/bounding, and diagnostic behavior.
- Host-controlled feature enablement decides whether a contributed tool is installed. Manifest/profile tool permission still decides whether a model may call it at runtime.
- Duplicate tool names should be rejected during feature registry preflight with a diagnostic, not discovered later through a panic or undefined ordering.
- Public feature identity should be source-qualified (`builtin:memory`, `project:foo`, `plugin:<digest>:bar`), while model-visible tool names should remain explicit stable names. Do not auto-prefix model tool names unless the project deliberately chooses a future namespacing policy.
- Tool schemas/descriptions must be part of the normal `ToolDefinition` path so model-visible surfaces remain inspectable and bounded.
- If a required host service is not granted or configured, the tool should not be registered; the install report should explain the skipped contribution.
### Hook contribution
Hook contribution must depend on the safe hook surface produced by `hook-public-surface-hardening`.
Recommended public hook principles:
- Public hooks register through `PublicHookRegistrar`, which wraps `HookRegistryBuilder` but exposes only hardened hook traits/actions.
- Public hooks receive snapshots/views, not mutable Pod/Worker handles.
- Public hook return values should be decisions such as continue, deny/rewrite a tool decision through a host-defined synthetic result path, emit diagnostics, or request a durable notification/history append through a host sink. They should not return raw `llm_worker::Item` vectors.
- Public hooks must not be able to mutate request context, session history, or Worker state invisibly.
- Permission enforcement hooks remain host/internal and run before feature hooks for `PreToolCall` so a feature cannot approve a denied tool call.
- Hook ordering should be explicit and stable: internal safety hooks first, public feature hooks in registry order or declared priority bands, internal usage/accounting hooks where needed. Priority should be coarse, not arbitrary integer ordering that lets plugins fight for precedence.
Possible hardened hook action shape:
```rust
pub enum PublicPreToolCallDecision {
Continue,
DenyWithSyntheticError { message: String },
EmitDiagnostic { diagnostic: FeatureDiagnostic },
}
pub trait PublicPreToolCallHook: Send + Sync {
fn on_pre_tool_call(&self, event: PublicPreToolCallEvent<'_>) -> PublicPreToolCallDecision;
}
```
If a hook needs to add model-visible text, it should use `FeatureNotifySink::notify_model(...)` or another host-owned durable append API, not return an `Item`.
### Notification/event contribution
Expose two distinct sinks:
```rust
pub struct FeatureNotifySink<'a> { /* host-owned */ }
pub struct FeatureEventSink<'a> { /* host-owned */ }
```
Recommended behavior:
- `FeatureNotifySink::notify_model(...)` creates a model-visible notification through the existing durable notification/system-item path. The host commits the corresponding `SystemItem` before it is appended to Worker history.
- `FeatureNotifySink::notify_user(...)` or `FeatureEventSink::emit(...)` creates user-visible diagnostics/progress/action events through the existing alert/event path. These are not model-visible unless explicitly routed through `notify_model`.
- Event payloads should be typed, bounded, and feature-identified. Avoid arbitrary JSON blobs as the first public API; allow an opaque bounded metadata field only if diagnostics require it.
- Notifications and events should require explicit capabilities such as `EmitModelNotification` and `EmitUserEvent`.
- Background feature tasks must use these sinks; they must not hold raw log writers or append directly to history.
Useful initial event shape:
```rust
pub struct FeatureEvent {
pub feature_id: FeatureId,
pub level: FeatureEventLevel, // Info, Warn, Error
pub channel: String, // e.g. "work-item"
pub summary: String,
pub detail: Option<String>,
pub model_visible: bool, // false unless host routes through notify_model
}
```
`model_visible` should be host-controlled in practice: a feature may request model visibility, but the sink decides whether that capability is granted and records the durable append if it is.
### Capability request/grant/diagnostics
Capabilities are requested by descriptors and granted by the host. A feature may request a capability, but it must not assume the capability exists.
Initial capability categories:
```rust
pub enum HostCapability {
ContributeTool { name: ToolName },
ContributeHook { point: pod::hook::HookPoint },
EmitUserEvent,
EmitModelNotification,
ScopedFs { read: bool, write: bool, execute: bool },
WorkItemStore { read: bool, write: bool },
MemoryStore { read: bool, write: bool },
PodManagement { spawn: bool, message: bool, restore: bool },
Network { purpose: NetworkPurpose },
SecretRef { id: String },
}
```
Important separation:
- Capability grants decide whether a feature may install and receive host services.
- Tool permissions decide whether an installed tool call may execute for a specific Pod/run.
- Scope permissions decide which filesystem paths or delegated Pod capabilities a host service may touch.
Diagnostics should be first-class:
```rust
pub struct FeatureInstallReport {
pub feature_id: FeatureId,
pub enabled: bool,
pub granted: Vec<HostCapability>,
pub denied: Vec<CapabilityDenial>,
pub installed_tools: Vec<ToolName>,
pub installed_hooks: Vec<String>,
pub skipped_contributions: Vec<SkippedContribution>,
pub diagnostics: Vec<FeatureDiagnostic>,
}
```
Diagnostics must avoid secrets and must be safe for session logs, TUI display, and future `ListFeatures`/profile validation output.
## 4. State ownership model
Feature state belongs to the feature module.
- A feature may own `Arc<State>` and clone it into contributed tools, hooks, and background tasks.
- The Pod registry stores descriptors, install reports, enabled/disabled status, and host-owned handles. It does not store feature business state.
- Durable feature data must live in a feature-owned or host-granted store with an explicit API: WorkItem files through a WorkItem service, memory records through memory APIs, plugin config/state through a future plugin-state service, etc.
- Session history is not feature storage. It is an audit/replay record of model-visible interactions and host-visible events.
- A feature that needs restoration after process restart should reconstruct itself from its own durable store/config plus normal Pod metadata, not from private data hidden in Worker context.
- Background tasks are allowed only if they communicate through granted sinks/services and have a defined shutdown/lifecycle policy owned by the host.
This model lets built-ins and plugins share the same contribution shape while keeping Pod runtime ownership clear.
## 5. Safety invariants / forbidden operations
Public features/plugins must not be able to perform these operations:
- Mutate prompt context directly.
- Append, remove, reorder, or rewrite Worker history directly.
- Insert model-visible text that is not committed through a durable host path.
- Return raw `llm_worker::Item` values from public hooks.
- Access raw `Worker`, raw `Pod`, raw `ToolServerHandle`, raw `llm_worker::Interceptor`, raw `NotifyBuffer`, raw session log writer, or raw event sender.
- Register tools outside `ToolRegistry` or bypass normal tool-result history recording.
- Bypass `PreToolCall` permission policy.
- Grant themselves capabilities or infer grants from successful construction.
- Mutate manifest/profile/scope state directly.
- Perform filesystem/process/network/secret access outside granted host services.
- Emit unbounded tool outputs, event payloads, diagnostics, or notification bodies.
- Put secrets into diagnostics, session logs, model context, TUI output, or feature install reports.
- Depend on MCP/WASM/package-distribution mechanics in the base Pod API.
Positive invariant: if the model can see a feature-produced fact, a future replay/resume must have a durable explanation for why that fact was present.
## 6. Placement and crate-boundary recommendation
Recommended placement:
- `crates/pod/src/feature.rs` or `crates/pod/src/feature/mod.rs`
- public feature traits/types
- feature registry builder
- install reports/diagnostics
- capability request/grant model
- typed registrars/sinks
- `crates/pod/src/hook.rs`
- remains the public hook module after hardening
- should expose safe Pod-level hook traits/actions only
- should not re-export `llm_worker::Interceptor` power
- `crates/llm-worker`
- remains owner of generic LLM tools/interceptors/history machinery
- should not depend on `pod::feature`
- `crates/tools`
- remains a source of reusable tool implementations
- built-in feature modules in `pod` can wrap these constructors into `ToolContribution`s
- Future external plugin crates/processes
- should adapt into `FeatureDescriptor` + `FeatureModule` or a host-side adapter that produces equivalent contributions
- should not be called directly by the Pod except through the registry/registrars
Install location in Pod startup:
1. Resolve manifest/profile and host capability policy.
2. Construct `Pod` and internal safety surfaces.
3. Install host/internal hooks such as manifest permission enforcement.
4. Build and install enabled feature modules through `FeatureRegistryBuilder`.
5. Flush/register tools through the existing Worker tool registry.
6. Freeze/install the Pod interceptor and start normal run/attach behavior.
The exact sequencing can be adjusted to match current construction, but the invariant should hold: public feature hooks cannot precede host safety hooks, and feature tools must exist before the model receives the final tool schema for a run.
## 7. Migration path from current built-in registrations
Recommended migration is incremental and behavior-preserving:
1. Land hook public-surface hardening first.
- Remove/replace public raw `Item`-carrying hook actions.
- Define which hook decisions are safe for external contributors.
2. Add `pod::feature` with no behavior change.
- Implement descriptors, capability grants, install reports, and registrars.
- Initially register no external plugins.
3. Wrap current built-in tool registration as built-in feature modules.
- Start with a small built-in feature whose state/services are already cleanly bounded.
- Preserve existing tool names, schemas, and permission behavior.
- Convert duplicate-name failures into registry diagnostics before flushing tools.
4. Move larger built-in groups behind feature modules.
- Filesystem/process tools from `crates/tools`.
- Memory tools.
- Pod orchestration tools.
- Task/WorkItem tools once their stores and hooks have explicit capabilities.
- Web tools as configured provider-backed features.
5. Move built-in hook contributions only after safe hook semantics are stable.
- Keep manifest permission enforcement as an internal host hook, not a feature hook.
- Keep accounting/usage hooks internal unless they become genuine feature behavior.
6. Treat workflow/user-input expansion separately.
- Workflow invocation already uses a durable system-item attachment pattern.
- Do not expose arbitrary workflow-like context injection to plugins until there is a safe typed command/input-contribution API with durable append semantics.
7. Add profile/manifest enablement after built-ins work through the same registry.
- Built-ins and external plugins should share descriptor/capability/install-report mechanics.
- Host policy may grant built-ins by default, but built-ins should still declare what they use.
## 8. Impact on WorkItem / MCP / plugin distribution follow-ups
WorkItem / intake routing:
- WorkItem routing can become a built-in feature that contributes WorkItem tools, optional routing hooks, and user-visible action events.
- It should request `WorkItemStore` and event/notification capabilities instead of reaching into ticket files ad hoc.
- Model-visible routing hints or intake results must be committed through notification/history append paths.
- This registry gives the WorkItem feature a clean way to install without making WorkItem a special Pod runtime mode.
MCP:
- MCP should be an adapter/runtime kind that produces normal `ToolContribution`s and possibly safe event diagnostics.
- MCP tool calls must still pass through `ToolRegistry`, PreToolCall permission, output bounding, and history result recording.
- MCP resources/prompts should not become invisible prompt injection. If exposed later, they should be explicit tools, user-invoked attachments, or durable notification/history appends.
- MCP transport/session details are out of scope for the base API beyond the `FeatureRuntimeKind::McpBridge` placeholder.
Plugin distribution:
- Archive validation, cache extraction, signing/trust, WASM execution, external process supervision, and package update policy should remain separate follow-up designs.
- Distribution mechanisms should eventually produce the same descriptor/capability/contribution objects as built-ins.
- Capability grants are the host trust boundary; package installation alone must not grant runtime authority.
## 9. Open questions / risks
1. Tool naming policy is the highest-risk API decision.
- Recommendation: feature identities are source-qualified, model-visible tool names stay explicit and stable, and collisions are rejected by the host.
- Risk: external plugins may need namespacing later. Auto-prefixing now would avoid collisions but would also change model-facing ergonomics and diverge from current built-in tool names.
2. The exact safe hook action set must be settled by `hook-public-surface-hardening`.
- Especially important: whether public pre-tool hooks may synthesize denials/results, and how durable append requests are represented.
3. Notification/event durability needs precise semantics.
- User-visible events may be live-only, while model-visible notifications must be durable. The public API should make this distinction impossible to miss.
4. Capability granularity can easily become either too coarse or too noisy.
- Start with coarse host-service capabilities plus normal tool permissions, then split only when real features need finer grants.
5. Runtime enable/disable is not designed here.
- Initial registry should be install-at-startup. Hot reload or dynamic plugin enablement needs separate lifecycle, cleanup, and schema-refresh design.
6. Persistent plugin state needs a future host service.
- The base API says state is feature-owned, but external plugins will still need a sanctioned durable state directory/store with migration/versioning rules.
7. Background tasks need lifecycle policy.
- If external plugins can spawn tasks, the host must define shutdown, cancellation, panic handling, diagnostic routing, and whether task output may become model-visible.
8. Existing workflow/input expansion is close to the forbidden boundary.
- It is safe only because it commits system items before model visibility. Any future plugin command/input contribution must preserve that durable replay property.

View File

@ -0,0 +1,89 @@
---
id: 20260603-122317-plugin-feature-contribution-registry
slug: plugin-feature-contribution-registry
title: Plugin: feature contribution registry for built-in and external capabilities
status: open
kind: feature
priority: P1
labels: [plugin, registry, tools, hooks, orchestration]
created_at: 2026-06-03T12:23:17Z
updated_at: 2026-06-03T16:44:05Z
assignee: null
legacy_ticket: null
---
## Issue
Yoi already has many capability surfaces: built-in tools, memory tools, Pod management tools, manifest permission hooks, workflow assets, notifications, and planned WorkItem / MCP / plugin features. If new features keep registering themselves through ad hoc Pod/Worker code paths, Plugin system work will not produce a single management boundary and later features such as WorkItem intake will be hard to detach.
The immediate need is not package distribution or WASM execution. The immediate need is a runtime feature contribution registry that lets built-in features and future external plugins contribute through the same existing host surfaces: Tools, Hooks, and notification/event paths.
## Direction
Introduce a feature registry boundary for Pod runtime capability installation.
- Feature state remains owned by the feature/extension module, not by Pod history or prompt context.
- Pod interaction happens through existing surfaces:
- Tool contributions registered into the normal ToolRegistry / permission / history path.
- Hook contributions registered through the public Pod Hook boundary.
- Notification/event contributions use existing durable Notify / Event paths rather than invisible context injection.
- The registry is responsible for discovery/enablement diagnostics and installation into existing surfaces; it must not create a parallel execution path.
- Built-in features should be expressible as feature contributions first. External plugin runtimes can be added later.
## Placement note
The runtime registry should initially live in the Pod layer, e.g. `crates/pod/src/feature.rs` or an equivalent module, because installation requires access to Worker tool registration, Pod Hook registry setup, manifest/profile-resolved policy, notification buffers, and Pod event bridges.
Pure descriptor types may later move to a separate `plugin` / `extension` crate if needed, but descriptor types must not depend on Pod internals.
## Requirements
- Define feature identity/source/runtime metadata for at least built-in features and future user/project plugins.
- Example sources: builtin, user, project.
- Example runtime kinds: builtin, declarative, mcp bridge future, wasm future.
- Define contribution descriptors or install abstractions for:
- Tools
- Hooks
- Notification/event-facing capabilities where needed
- Define capability request / host grant data structures suitable for policy diagnostics.
- Add a registry/builder/install context that can install enabled feature contributions into existing Pod/Worker surfaces.
- Preserve current behavior while moving registration toward the registry.
- Existing built-in tool registration must still work.
- Existing manifest permission hook behavior must still work.
- Existing notification/history invariants must still hold.
- Keep model-visible plugin/feature output on durable paths: tool result, committed history, or explicit notification/history append paths.
- Do not let feature contributions mutate session history, memory, prompt context, or scope outside approved host APIs.
- Document how WorkItem tools should become a built-in feature contribution rather than a special Pod context path.
## Non-goals
- Implementing WASM execution.
- Implementing plugin package discovery or archive validation.
- Implementing MCP itself.
- Implementing WorkItem management tools.
- Adding UI/TUI rendering plugins.
- Auto-enabling user/project plugins.
## Suggested phases
1. **Registry design**
- Define feature descriptor, source, runtime kind, contribution kinds, capability request/grant, and diagnostics.
2. **Pod runtime registry skeleton**
- Add a Pod-layer feature registry/builder and install context.
- Keep behavior unchanged initially.
3. **Builtin registration migration**
- Move a small, low-risk built-in feature group through the registry first.
- Then migrate built-in tool groups / memory / Pod-management registration as appropriate.
4. **Hook integration**
- Integrate only after `hook-public-surface-hardening` establishes a safe public Hook action surface.
5. **Follow-up alignment**
- Update WorkItem, MCP, and plugin distribution tickets to depend on this registry boundary where they contribute capabilities.
## Acceptance criteria
- The codebase has a first-class feature contribution registry boundary for Pod runtime installation.
- At least one built-in capability group is registered through the new registry without changing behavior.
- The registry can describe Tool and Hook contributions and records source/runtime/capability diagnostics.
- Feature installation uses existing ToolRegistry, HookRegistry, and notification/history paths; no parallel Pod context injection path is introduced.
- WorkItem and MCP follow-up tickets can target this registry instead of adding ad hoc registration code.
- Focused tests cover the migrated built-in registration and capability/diagnostic behavior.

View File

@ -0,0 +1,591 @@
<!-- event: create author: tickets.sh at: 2026-06-03T12:23:17Z -->
## Created
Created by tickets.sh create.
---
<!-- event: plan author: hare at: 2026-06-03T16:38:46Z -->
## Plan
# Delegation intent: Plugin base Pod API design
## Intent
Design the public Pod-side API that will serve as the base for Plugin / Feature contributions. The result should make Plugin-provided or built-in extension modules easy to register cleanly without adding ad hoc Pod processing paths.
This is a design task, not an implementation task. The output should be a concise but concrete design document suitable for turning into implementation tickets or acceptance criteria for `plugin-feature-contribution-registry` and `hook-public-surface-hardening`.
## Background
The current direction is that feature state remains owned by the feature/extension module, while interaction with Pod happens through existing durable host surfaces:
- Tools
- Hooks
- notifications / events / durable history append paths
The concern is that adding WorkItem, MCP, memory, plugin, and other capabilities without a common registry will create many unrelated Pod-specific insertion points. The Plugin system should establish a common contribution and authority boundary, even for built-in features.
`hook-public-surface-hardening` is being implemented separately to make public Hook actions safe before plugin exposure.
## Design question
What should the clean public API look like for a feature/plugin module that wants to contribute capabilities to a Pod?
The design should answer:
- What API types should extension modules use to declare/register capabilities?
- What belongs in a pure descriptor vs a runtime install callback?
- How should Tools, Hooks, and notifications be represented in the same public surface?
- How should capability request / host grant / diagnostics be expressed?
- What state should the feature keep itself, and what state may Pod keep?
- What must be impossible through this API?
- Where should the API live initially, and what parts should be movable to a future `plugin`/`extension` crate?
## Required constraints
- Public API must not let features/plugins mutate prompt context or session history invisibly.
- Model-visible additions must go through durable host paths: tool result, committed history append, explicit notification/history append, or user-visible event path.
- Public Hook contribution must depend on the safe Hook surface after `hook-public-surface-hardening`.
- Tool contributions must use the normal ToolRegistry / PreToolCall permission / history result path.
- Feature registry must install into existing Pod/Worker surfaces; it must not create a parallel Pod runtime path.
- Capability grant is host-controlled. A feature may request capabilities but must not assume them.
- Built-in features and future external plugins should fit the same shape.
- Avoid designing package distribution, WASM execution, or MCP implementation details beyond the minimal runtime-kind placeholders needed for the API.
- Avoid broad refactors of Pod/Worker crate boundaries unless needed to explain a clean API boundary.
## Files / records to read
Tickets:
- `/home/hare/Projects/yoi/work-items/open/20260603-122317-plugin-feature-contribution-registry/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260603-122317-hook-public-surface-hardening/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260531-010005-plugin-extension-surface/item.md`
- `/home/hare/Projects/yoi/work-items/open/20260601-031252-builtin-work-item-intake-routing/item.md`
Code:
- `crates/pod/src/hook.rs`
- `crates/pod/src/ipc/interceptor.rs`
- `crates/pod/src/controller.rs`
- `crates/pod/src/pod.rs`
- `crates/pod/src/permission.rs`
- `crates/llm-worker/src/tool.rs`
- `crates/llm-worker/src/interceptor.rs`
- `crates/tools/src/lib.rs`
- `crates/pod/src/workflow/mod.rs`
## Expected output
Write a design document to:
`/home/hare/Projects/yoi/work-items/open/20260603-122317-plugin-feature-contribution-registry/artifacts/pod-api-design.md`
Use this structure:
1. Summary recommendation
2. Current relevant Pod/Worker surfaces
3. Proposed public API shape
- types/modules
- example registration snippet
- Tool contribution
- Hook contribution
- notification/event contribution
- capability request/grant/diagnostics
4. State ownership model
5. Safety invariants / forbidden operations
6. Placement and crate-boundary recommendation
7. Migration path from current built-in registrations
8. Impact on WorkItem / MCP / plugin distribution follow-ups
9. Open questions / risks
## Non-goals
- Do not edit source code.
- Do not implement tests.
- Do not create a worktree.
- Do not close or modify tickets except writing the requested design artifact.
## Completion report
Report:
- whether the artifact was written
- the recommended API placement
- the highest-risk API decision
- any blockers that require parent/user decision
---
<!-- event: plan author: hare at: 2026-06-03T16:44:05Z -->
## Plan
# Public Pod-side API for Feature / Plugin Contributions
## 1. Summary recommendation
Introduce a `pod::feature` public API as the single Pod-side registration layer for built-in features and future external plugins. A feature module should declare its identity, requested capabilities, and contributions, then install those contributions only through typed host registrars for existing Pod/Worker surfaces: `ToolRegistry`, the hardened safe `pod::hook` surface, and host-owned notification/event/history append paths.
The registry should not become a second runtime, a plugin dispatcher tool, or a generic `Pod` mutation escape hatch. Feature state remains inside the feature module; the Pod owns only install metadata, diagnostics, granted host handles, and normal durable session/runtime surfaces.
Recommended placement: create `crates/pod/src/feature.rs` (or `crates/pod/src/feature/mod.rs` once it grows) and export it as `pod::feature`. Keep `llm-worker::Interceptor` internal; expose only hardened `pod::hook` types and contribution registrars.
## 2. Current relevant Pod/Worker surfaces
The design should build on these existing surfaces rather than bypassing them:
- `crates/pod/src/hook.rs`
- Current public-ish hook layer wraps `llm_worker::Interceptor` with `HookRegistry`, `HookRegistryBuilder`, `Hook`, and per-event hook traits.
- It already provides Pod-specific hook events such as pre-request, post-assistant, pre-tool-call, post-tool-call, and turn-end.
- It is not yet safe enough as a public plugin API because some hook actions can carry raw `llm_worker::Item` values (`PreRequestAction::ContinueWith`, `TurnEndAction::ContinueWithMessages`). The feature API must depend on the post-hardening surface, not these raw item mutation forms.
- `crates/pod/src/ipc/interceptor.rs`
- `PodInterceptor` is the bridge between Worker callbacks and Pod behavior.
- It runs hooks, drains pending attachments/notifications, records memory/tool usage, and turns model-visible additions into committed `SystemItem` session log entries before appending them to Worker history.
- This is the right place for host-mediated durable append paths; it is not a plugin API itself.
- `crates/pod/src/controller.rs`
- Controller startup currently registers built-in Pod tools through ad hoc code paths.
- The feature registry should replace those ad hoc registrations incrementally by installing contributions into the same worker/tool/hook surfaces during Pod construction.
- `crates/pod/src/pod.rs`
- `Pod` owns the durable session log, metadata, runtime event channel, notification helpers, pending system attachments, scope, and Worker lifecycle.
- It exposes internal methods that can append history or send alerts/events. The public feature API should not expose `Pod` or `Worker` directly; it should expose narrow sinks that route through these existing methods.
- `crates/pod/src/permission.rs`
- Manifest tool permissions are enforced as a `PreToolCallHook`.
- Feature tools must remain subject to the same PreToolCall permission path. Feature capability grants do not replace per-call tool permission.
- `crates/llm-worker/src/tool.rs` and `crates/llm-worker/src/tool_server.rs`
- `ToolDefinition`, `Tool`, `ToolMeta`, `ToolResult`, `ToolOutput`, and `ToolServerHandle` define the normal tool execution path.
- Tools registered here get normal schema exposure, execution, bounded output handling, and history result recording.
- The public feature API should register `ToolDefinition`s into this registry rather than introducing a separate plugin dispatch layer.
- `crates/llm-worker/src/interceptor.rs`
- The lower-level interceptor is powerful and Worker-oriented. It should remain internal because it can influence model request construction too directly.
- Public features should use `pod::hook` only after that API has been narrowed to durable, auditable actions.
- `crates/tools/src/lib.rs`
- Existing built-in tools already use shared tool abstractions and scoped filesystem/runtime handles.
- Those tool constructors can become built-in feature contributions without changing model-visible tool names.
- `crates/pod/src/workflow/mod.rs`
- Workflow invocation currently resolves user input segments into system items through the Pod's durable attachment path.
- This is a useful pattern for feature-owned model-visible additions: resolve through a host-owned append path and commit what the model sees. It should not become a general plugin context injection mechanism.
## 3. Proposed public API shape
### Types/modules
Add a new module under `pod`:
```rust
pub mod feature {
pub mod capability;
pub mod diagnostic;
pub mod event;
pub mod hook;
pub mod registry;
pub mod tool;
pub use capability::{CapabilityGrantSet, CapabilityRequest, HostCapability};
pub use diagnostic::{FeatureDiagnostic, FeatureInstallReport};
pub use registry::{FeatureDescriptor, FeatureId, FeatureInstallContext, FeatureModule, FeatureRegistryBuilder, FeatureRuntimeKind};
pub use tool::ToolContribution;
}
```
Core trait and registry shape:
```rust
pub trait FeatureModule: Send + Sync + 'static {
fn descriptor(&self) -> FeatureDescriptor;
fn install(&self, ctx: &mut FeatureInstallContext<'_>) -> Result<(), FeatureInstallError>;
}
pub struct FeatureDescriptor {
pub id: FeatureId, // source-qualified identity, e.g. builtin:task
pub display_name: String,
pub version: Option<String>,
pub runtime: FeatureRuntimeKind, // Builtin, ExternalProcess, McpBridge, WasmPlaceholder, DeclarativePlaceholder
pub requested_capabilities: Vec<CapabilityRequest>,
pub declared_tools: Vec<ToolDeclaration>,
pub declared_hooks: Vec<HookDeclaration>,
pub declared_event_channels: Vec<EventChannelDeclaration>,
}
pub enum FeatureRuntimeKind {
Builtin,
ExternalProcess,
McpBridge,
WasmPlaceholder,
DeclarativePlaceholder,
}
pub struct FeatureInstallContext<'a> {
// No Pod or Worker reference.
pub feature_id: &'a FeatureId,
pub grants: &'a CapabilityGrantSet,
pub tools: ToolRegistrar<'a>,
pub hooks: PublicHookRegistrar<'a>,
pub notify: FeatureNotifySink<'a>,
pub events: FeatureEventSink<'a>,
pub diagnostics: FeatureDiagnosticSink<'a>,
pub services: FeatureServiceProvider<'a>,
}
```
Important details:
- `FeatureDescriptor` is declarative and serializable. It is safe to show in diagnostics, profile previews, and `ListFeatures`-style future tooling.
- `FeatureModule::install` is runtime code that wires stateful tool/hook implementations into host registrars.
- `FeatureInstallContext` must not expose `Pod`, `Worker`, raw `ToolServerHandle`, raw `Interceptor`, raw `NotifyBuffer`, raw `LogWriter`, raw `event_tx`, or direct history mutation.
- `FeatureServiceProvider` returns only host services backed by granted capabilities, for example scoped filesystem access, WorkItem store access, memory access, Pod orchestration handles, web provider handles, or secret references. It should return `Denied`/`Unavailable` diagnostics instead of exposing partial internals.
### Example registration snippet
This is illustrative shape, not proposed final exact Rust syntax:
```rust
use pod::feature::{
CapabilityRequest, FeatureDescriptor, FeatureId, FeatureInstallContext,
FeatureModule, FeatureRuntimeKind, HostCapability, ToolContribution,
};
pub struct WorkItemFeature {
state: std::sync::Arc<WorkItemFeatureState>,
}
impl FeatureModule for WorkItemFeature {
fn descriptor(&self) -> FeatureDescriptor {
FeatureDescriptor::builder(FeatureId::builtin("work-item"))
.display_name("WorkItem intake and routing")
.runtime(FeatureRuntimeKind::Builtin)
.request(CapabilityRequest::required(
HostCapability::WorkItemStore { read: true, write: true },
"create and update WorkItem records through host-owned ticket storage",
))
.request(CapabilityRequest::optional(
HostCapability::EmitUserEvent,
"surface routing diagnostics to the TUI/actionbar",
))
.tool("WorkItemCreate")
.tool("WorkItemComment")
.hook("work_item_intake_pre_tool_audit", pod::hook::HookPoint::PreToolCall)
.event_channel("work-item")
.build()
}
fn install(&self, ctx: &mut FeatureInstallContext<'_>) -> Result<(), FeatureInstallError> {
let store = ctx.services.work_item_store()?;
ctx.tools.register(ToolContribution::new(
"WorkItemCreate",
work_item_create_tool(store.clone(), self.state.clone()),
))?;
ctx.hooks.pre_tool_call(
"work_item_intake_pre_tool_audit",
WorkItemAuditHook::new(self.state.clone()),
)?;
ctx.events.declare_channel("work-item")?;
Ok(())
}
}
```
The feature keeps `WorkItemFeatureState`. The Pod keeps only registration records, diagnostics, and the normal host services it already owns.
### Tool contribution
A tool contribution should be a thin wrapper around `llm_worker::ToolDefinition` plus feature metadata:
```rust
pub struct ToolContribution {
pub feature_id: FeatureId,
pub name: ToolName,
pub definition: llm_worker::ToolDefinition,
pub required_capabilities: Vec<HostCapability>,
}
```
Rules:
- Register into the existing `ToolRegistry` / `ToolServerHandle`; do not add a plugin-dispatcher tool that multiplexes plugin calls outside normal tool history.
- Preserve normal `PreToolCall` permission evaluation, tool-call history, result history, output truncation/bounding, and diagnostic behavior.
- Host-controlled feature enablement decides whether a contributed tool is installed. Manifest/profile tool permission still decides whether a model may call it at runtime.
- Duplicate tool names should be rejected during feature registry preflight with a diagnostic, not discovered later through a panic or undefined ordering.
- Public feature identity should be source-qualified (`builtin:memory`, `project:foo`, `plugin:<digest>:bar`), while model-visible tool names should remain explicit stable names. Do not auto-prefix model tool names unless the project deliberately chooses a future namespacing policy.
- Tool schemas/descriptions must be part of the normal `ToolDefinition` path so model-visible surfaces remain inspectable and bounded.
- If a required host service is not granted or configured, the tool should not be registered; the install report should explain the skipped contribution.
### Hook contribution
Hook contribution must depend on the safe hook surface produced by `hook-public-surface-hardening`.
Recommended public hook principles:
- Public hooks register through `PublicHookRegistrar`, which wraps `HookRegistryBuilder` but exposes only hardened hook traits/actions.
- Public hooks receive snapshots/views, not mutable Pod/Worker handles.
- Public hook return values should be decisions such as continue, deny/rewrite a tool decision through a host-defined synthetic result path, emit diagnostics, or request a durable notification/history append through a host sink. They should not return raw `llm_worker::Item` vectors.
- Public hooks must not be able to mutate request context, session history, or Worker state invisibly.
- Permission enforcement hooks remain host/internal and run before feature hooks for `PreToolCall` so a feature cannot approve a denied tool call.
- Hook ordering should be explicit and stable: internal safety hooks first, public feature hooks in registry order or declared priority bands, internal usage/accounting hooks where needed. Priority should be coarse, not arbitrary integer ordering that lets plugins fight for precedence.
Possible hardened hook action shape:
```rust
pub enum PublicPreToolCallDecision {
Continue,
DenyWithSyntheticError { message: String },
EmitDiagnostic { diagnostic: FeatureDiagnostic },
}
pub trait PublicPreToolCallHook: Send + Sync {
fn on_pre_tool_call(&self, event: PublicPreToolCallEvent<'_>) -> PublicPreToolCallDecision;
}
```
If a hook needs to add model-visible text, it should use `FeatureNotifySink::notify_model(...)` or another host-owned durable append API, not return an `Item`.
### Notification/event contribution
Expose two distinct sinks:
```rust
pub struct FeatureNotifySink<'a> { /* host-owned */ }
pub struct FeatureEventSink<'a> { /* host-owned */ }
```
Recommended behavior:
- `FeatureNotifySink::notify_model(...)` creates a model-visible notification through the existing durable notification/system-item path. The host commits the corresponding `SystemItem` before it is appended to Worker history.
- `FeatureNotifySink::notify_user(...)` or `FeatureEventSink::emit(...)` creates user-visible diagnostics/progress/action events through the existing alert/event path. These are not model-visible unless explicitly routed through `notify_model`.
- Event payloads should be typed, bounded, and feature-identified. Avoid arbitrary JSON blobs as the first public API; allow an opaque bounded metadata field only if diagnostics require it.
- Notifications and events should require explicit capabilities such as `EmitModelNotification` and `EmitUserEvent`.
- Background feature tasks must use these sinks; they must not hold raw log writers or append directly to history.
Useful initial event shape:
```rust
pub struct FeatureEvent {
pub feature_id: FeatureId,
pub level: FeatureEventLevel, // Info, Warn, Error
pub channel: String, // e.g. "work-item"
pub summary: String,
pub detail: Option<String>,
pub model_visible: bool, // false unless host routes through notify_model
}
```
`model_visible` should be host-controlled in practice: a feature may request model visibility, but the sink decides whether that capability is granted and records the durable append if it is.
### Capability request/grant/diagnostics
Capabilities are requested by descriptors and granted by the host. A feature may request a capability, but it must not assume the capability exists.
Initial capability categories:
```rust
pub enum HostCapability {
ContributeTool { name: ToolName },
ContributeHook { point: pod::hook::HookPoint },
EmitUserEvent,
EmitModelNotification,
ScopedFs { read: bool, write: bool, execute: bool },
WorkItemStore { read: bool, write: bool },
MemoryStore { read: bool, write: bool },
PodManagement { spawn: bool, message: bool, restore: bool },
Network { purpose: NetworkPurpose },
SecretRef { id: String },
}
```
Important separation:
- Capability grants decide whether a feature may install and receive host services.
- Tool permissions decide whether an installed tool call may execute for a specific Pod/run.
- Scope permissions decide which filesystem paths or delegated Pod capabilities a host service may touch.
Diagnostics should be first-class:
```rust
pub struct FeatureInstallReport {
pub feature_id: FeatureId,
pub enabled: bool,
pub granted: Vec<HostCapability>,
pub denied: Vec<CapabilityDenial>,
pub installed_tools: Vec<ToolName>,
pub installed_hooks: Vec<String>,
pub skipped_contributions: Vec<SkippedContribution>,
pub diagnostics: Vec<FeatureDiagnostic>,
}
```
Diagnostics must avoid secrets and must be safe for session logs, TUI display, and future `ListFeatures`/profile validation output.
## 4. State ownership model
Feature state belongs to the feature module.
- A feature may own `Arc<State>` and clone it into contributed tools, hooks, and background tasks.
- The Pod registry stores descriptors, install reports, enabled/disabled status, and host-owned handles. It does not store feature business state.
- Durable feature data must live in a feature-owned or host-granted store with an explicit API: WorkItem files through a WorkItem service, memory records through memory APIs, plugin config/state through a future plugin-state service, etc.
- Session history is not feature storage. It is an audit/replay record of model-visible interactions and host-visible events.
- A feature that needs restoration after process restart should reconstruct itself from its own durable store/config plus normal Pod metadata, not from private data hidden in Worker context.
- Background tasks are allowed only if they communicate through granted sinks/services and have a defined shutdown/lifecycle policy owned by the host.
This model lets built-ins and plugins share the same contribution shape while keeping Pod runtime ownership clear.
## 5. Safety invariants / forbidden operations
Public features/plugins must not be able to perform these operations:
- Mutate prompt context directly.
- Append, remove, reorder, or rewrite Worker history directly.
- Insert model-visible text that is not committed through a durable host path.
- Return raw `llm_worker::Item` values from public hooks.
- Access raw `Worker`, raw `Pod`, raw `ToolServerHandle`, raw `llm_worker::Interceptor`, raw `NotifyBuffer`, raw session log writer, or raw event sender.
- Register tools outside `ToolRegistry` or bypass normal tool-result history recording.
- Bypass `PreToolCall` permission policy.
- Grant themselves capabilities or infer grants from successful construction.
- Mutate manifest/profile/scope state directly.
- Perform filesystem/process/network/secret access outside granted host services.
- Emit unbounded tool outputs, event payloads, diagnostics, or notification bodies.
- Put secrets into diagnostics, session logs, model context, TUI output, or feature install reports.
- Depend on MCP/WASM/package-distribution mechanics in the base Pod API.
Positive invariant: if the model can see a feature-produced fact, a future replay/resume must have a durable explanation for why that fact was present.
## 6. Placement and crate-boundary recommendation
Recommended placement:
- `crates/pod/src/feature.rs` or `crates/pod/src/feature/mod.rs`
- public feature traits/types
- feature registry builder
- install reports/diagnostics
- capability request/grant model
- typed registrars/sinks
- `crates/pod/src/hook.rs`
- remains the public hook module after hardening
- should expose safe Pod-level hook traits/actions only
- should not re-export `llm_worker::Interceptor` power
- `crates/llm-worker`
- remains owner of generic LLM tools/interceptors/history machinery
- should not depend on `pod::feature`
- `crates/tools`
- remains a source of reusable tool implementations
- built-in feature modules in `pod` can wrap these constructors into `ToolContribution`s
- Future external plugin crates/processes
- should adapt into `FeatureDescriptor` + `FeatureModule` or a host-side adapter that produces equivalent contributions
- should not be called directly by the Pod except through the registry/registrars
Install location in Pod startup:
1. Resolve manifest/profile and host capability policy.
2. Construct `Pod` and internal safety surfaces.
3. Install host/internal hooks such as manifest permission enforcement.
4. Build and install enabled feature modules through `FeatureRegistryBuilder`.
5. Flush/register tools through the existing Worker tool registry.
6. Freeze/install the Pod interceptor and start normal run/attach behavior.
The exact sequencing can be adjusted to match current construction, but the invariant should hold: public feature hooks cannot precede host safety hooks, and feature tools must exist before the model receives the final tool schema for a run.
## 7. Migration path from current built-in registrations
Recommended migration is incremental and behavior-preserving:
1. Land hook public-surface hardening first.
- Remove/replace public raw `Item`-carrying hook actions.
- Define which hook decisions are safe for external contributors.
2. Add `pod::feature` with no behavior change.
- Implement descriptors, capability grants, install reports, and registrars.
- Initially register no external plugins.
3. Wrap current built-in tool registration as built-in feature modules.
- Start with a small built-in feature whose state/services are already cleanly bounded.
- Preserve existing tool names, schemas, and permission behavior.
- Convert duplicate-name failures into registry diagnostics before flushing tools.
4. Move larger built-in groups behind feature modules.
- Filesystem/process tools from `crates/tools`.
- Memory tools.
- Pod orchestration tools.
- Task/WorkItem tools once their stores and hooks have explicit capabilities.
- Web tools as configured provider-backed features.
5. Move built-in hook contributions only after safe hook semantics are stable.
- Keep manifest permission enforcement as an internal host hook, not a feature hook.
- Keep accounting/usage hooks internal unless they become genuine feature behavior.
6. Treat workflow/user-input expansion separately.
- Workflow invocation already uses a durable system-item attachment pattern.
- Do not expose arbitrary workflow-like context injection to plugins until there is a safe typed command/input-contribution API with durable append semantics.
7. Add profile/manifest enablement after built-ins work through the same registry.
- Built-ins and external plugins should share descriptor/capability/install-report mechanics.
- Host policy may grant built-ins by default, but built-ins should still declare what they use.
## 8. Impact on WorkItem / MCP / plugin distribution follow-ups
WorkItem / intake routing:
- WorkItem routing can become a built-in feature that contributes WorkItem tools, optional routing hooks, and user-visible action events.
- It should request `WorkItemStore` and event/notification capabilities instead of reaching into ticket files ad hoc.
- Model-visible routing hints or intake results must be committed through notification/history append paths.
- This registry gives the WorkItem feature a clean way to install without making WorkItem a special Pod runtime mode.
MCP:
- MCP should be an adapter/runtime kind that produces normal `ToolContribution`s and possibly safe event diagnostics.
- MCP tool calls must still pass through `ToolRegistry`, PreToolCall permission, output bounding, and history result recording.
- MCP resources/prompts should not become invisible prompt injection. If exposed later, they should be explicit tools, user-invoked attachments, or durable notification/history appends.
- MCP transport/session details are out of scope for the base API beyond the `FeatureRuntimeKind::McpBridge` placeholder.
Plugin distribution:
- Archive validation, cache extraction, signing/trust, WASM execution, external process supervision, and package update policy should remain separate follow-up designs.
- Distribution mechanisms should eventually produce the same descriptor/capability/contribution objects as built-ins.
- Capability grants are the host trust boundary; package installation alone must not grant runtime authority.
## 9. Open questions / risks
1. Tool naming policy is the highest-risk API decision.
- Recommendation: feature identities are source-qualified, model-visible tool names stay explicit and stable, and collisions are rejected by the host.
- Risk: external plugins may need namespacing later. Auto-prefixing now would avoid collisions but would also change model-facing ergonomics and diverge from current built-in tool names.
2. The exact safe hook action set must be settled by `hook-public-surface-hardening`.
- Especially important: whether public pre-tool hooks may synthesize denials/results, and how durable append requests are represented.
3. Notification/event durability needs precise semantics.
- User-visible events may be live-only, while model-visible notifications must be durable. The public API should make this distinction impossible to miss.
4. Capability granularity can easily become either too coarse or too noisy.
- Start with coarse host-service capabilities plus normal tool permissions, then split only when real features need finer grants.
5. Runtime enable/disable is not designed here.
- Initial registry should be install-at-startup. Hot reload or dynamic plugin enablement needs separate lifecycle, cleanup, and schema-refresh design.
6. Persistent plugin state needs a future host service.
- The base API says state is feature-owned, but external plugins will still need a sanctioned durable state directory/store with migration/versioning rules.
7. Background tasks need lifecycle policy.
- If external plugins can spawn tasks, the host must define shutdown, cancellation, panic handling, diagnostic routing, and whether task output may become model-visible.
8. Existing workflow/input expansion is close to the forbidden boundary.
- It is safe only because it commits system items before model visibility. Any future plugin command/input contribution must preserve that durable replay property.
---