Compare commits

...

550 Commits

Author SHA1 Message Date
365b8c34fd sanitize: neutralize provider notes and remove claude knowledge 2026-05-28 07:45:49 +09:00
4361385946 sanitize: remove local path references from current tree 2026-05-28 06:26:34 +09:00
9ccbdda27c chore: record spawnpod hang report and local manifest 2026-05-28 06:21:01 +09:00
9a0ef7c799 work-items: close openai unhandled sse observability 2026-05-28 05:44:20 +09:00
732d6a57b7 merge: openai unhandled sse observability 2026-05-28 05:44:14 +09:00
3b90f26dea fix: trace unhandled openai responses sse 2026-05-28 05:18:57 +09:00
6877447616 work-items: add openai unhandled sse observability 2026-05-28 05:13:41 +09:00
b808811843 work-items: add pod orchestration guidance item 2026-05-28 04:45:03 +09:00
838ccbb65f work-items: close tickets sh mvp 2026-05-28 04:29:35 +09:00
4d080ca985 merge: tickets work item thread mvp 2026-05-28 04:27:56 +09:00
b1d8f7f181 fix: repair migrated work item encoding 2026-05-28 04:09:47 +09:00
134d0ce2a1 feat: add tickets work item mvp 2026-05-28 03:59:05 +09:00
2820cbbe53 ticket: clarify workitem migration scope 2026-05-28 03:49:21 +09:00
5c6df298aa ticket: complete openai responses diagnostics 2026-05-28 03:23:54 +09:00
ae196c2a87 ticket: record openai responses diagnostics fix 2026-05-28 03:23:25 +09:00
f56793589f fix: preserve openai responses incomplete diagnostics 2026-05-28 03:22:53 +09:00
33884bd0ce ticket: complete memory consolidation skip observability 2026-05-28 03:09:42 +09:00
2ba35cca23 merge: memory consolidation skip observability 2026-05-28 03:09:14 +09:00
860767a143 ticket: complete llm request timeout fix 2026-05-28 02:44:00 +09:00
d65cfe146d ticket: record llm request timeout fix 2026-05-28 02:43:23 +09:00
1babd021b0 fix: add llm request lifecycle timeouts 2026-05-28 02:42:31 +09:00
bdabe789e3 ticket: openai responses incomplete observability 2026-05-28 02:40:30 +09:00
48c4c9b56b ticket: llm client request timeouts 2026-05-28 02:07:01 +09:00
b1fb3ec0fa ticket: complete codex oauth wire compatibility 2026-05-28 02:05:49 +09:00
b3c739867e fix: align codex oauth wire behavior 2026-05-28 01:57:04 +09:00
5ae886ea99 ticket: codex oauth wire compatibility 2026-05-28 01:44:30 +09:00
2c67f99054 fix: suppress memory idle skip notices 2026-05-27 18:55:58 +09:00
67b5d6354c ticket: complete compact retained split fix 2026-05-26 21:40:18 +09:00
cdc42e5a86 ticket: record compact retained split fix 2026-05-26 21:39:57 +09:00
e49817c2d5 feat: trace pre-stream lifecycle 2026-05-26 21:05:45 +09:00
9405ffc633 feat: add session stream event trace flag 2026-05-26 19:57:47 +09:00
77e2ad0c40 fix: compact retained split uses raw tail size 2026-05-26 17:52:09 +09:00
a2771180cc ticket: compact retained split usage records 2026-05-26 17:04:29 +09:00
2cfc3b63c2 ticket: pod scope persistence authority 2026-05-26 16:50:01 +09:00
b2e53f2f61 chore: complete memory summary resident injection ticket 2026-05-26 13:29:03 +09:00
b22040ac84 chore: complete tui user manifest env overlay ticket 2026-05-26 10:10:00 +09:00
80a4f90004 fix: align spawn user manifest env overlay 2026-05-26 10:09:17 +09:00
0b582faebc merge: memory summary resident injection 2026-05-26 09:55:24 +09:00
d084923878 fix: split resident injection gates 2026-05-26 09:44:24 +09:00
df3373c3f2 docs: add tickets.sh workitem mvp ticket 2026-05-26 09:33:30 +09:00
a3e852c6b3 docs: add memory tool guidance ticket 2026-05-26 09:21:57 +09:00
b25f4c7468 feat: inject memory summary into resident prompt 2026-05-26 09:21:10 +09:00
39d40d391b chore: tune project memory thresholds 2026-05-26 09:05:14 +09:00
f87cf5bd00 docs: add memory summary resident injection ticket 2026-05-26 08:50:58 +09:00
9f5e27f3fd merge: memory consolidation skip observability 2026-05-26 08:37:32 +09:00
c101b42619 fix: confirm SpawnPod initial run delivery 2026-05-26 08:37:24 +09:00
f56ef010a8 chore: ignore generated insomnia memory 2026-05-26 08:14:46 +09:00
8095c86be2 fix: suppress memory idle skip notices 2026-05-26 08:03:17 +09:00
99797b9e40 docs: refine memory consolidation skip ticket 2026-05-26 07:53:37 +09:00
1ac197fc6c chore: complete llm retry continuation ticket 2026-05-26 07:22:45 +09:00
3ff78c03af feat: surface llm retry and continuation state 2026-05-26 07:13:59 +09:00
156a55d1d1 docs: refine llm retry continuation ticket 2026-05-26 05:20:43 +09:00
597c6fc3e9 docs: note spawnpod delivery race precedent 2026-05-25 07:03:00 +09:00
a70fe65ed5 docs: add spawnpod run delivery ticket 2026-05-25 06:37:38 +09:00
fa225eb01d docs: add live pending pod picker ticket 2026-05-25 06:29:13 +09:00
8e21e2f3f2 docs: add memory consolidation skip ticket 2026-05-25 05:43:06 +09:00
f51f17cf93 docs: specify stream continuation policy 2026-05-25 04:48:07 +09:00
7e4d90fc1b chore: complete memory audit log ticket 2026-05-25 03:38:18 +09:00
235ddba9c5 merge: memory-audit-log 2026-05-25 03:38:03 +09:00
fe6f5eb326 memory: add audit log events 2026-05-25 03:24:04 +09:00
06da8c5b00 docs: add actionbar notice api ticket 2026-05-25 02:40:59 +09:00
87b2e8eb16 docs: expand memory audit log ticket 2026-05-25 02:06:42 +09:00
2f3adc3d14 fix: refine command mode footer 2026-05-25 01:08:41 +09:00
21ec057de0 chore: complete tui-system-command-compact ticket 2026-05-24 09:40:41 +09:00
dd571a963e merge: tui-system-command-compact 2026-05-24 09:40:25 +09:00
afd65442c5 test: clean up compact event assertion 2026-05-24 09:39:57 +09:00
a4358eed14 feat: add manual compact command 2026-05-24 08:59:44 +09:00
9685bfffba chore: complete tui-command-mode ticket 2026-05-24 08:39:25 +09:00
3a734c30bf merge: tui-command-mode 2026-05-24 08:38:39 +09:00
6e8aa92e38 feat: add TUI command mode 2026-05-24 08:32:21 +09:00
811a449c28 docs: replace gui mvp with tui spawned pod panel 2026-05-24 08:10:21 +09:00
0fd995c85e docs: split tui command and navigation tickets 2026-05-24 07:59:51 +09:00
e0d7468ebb chore: complete worker-history-append-contract ticket 2026-05-24 07:37:29 +09:00
07dc185032 merge: worker-history-append-contract 2026-05-24 07:37:05 +09:00
efb0ac7da3 docs: split maintainer workflows by role 2026-05-24 07:34:30 +09:00
e7b0a0b20f fix: route worker history appends through callbacks 2026-05-24 06:44:19 +09:00
5508299e76 chore: drop stale tui spawn error todo 2026-05-24 06:29:15 +09:00
59e4aac7f7 chore: complete tui-input-queue ticket 2026-05-23 13:58:09 +09:00
6485632a4c merge: tui-input-queue 2026-05-23 13:57:32 +09:00
8d6b47bef1 feat: queue tui input during runs 2026-05-23 13:57:22 +09:00
6046842242 docs: add manual turn rollback ticket 2026-05-23 13:35:03 +09:00
1c8b349e01 chore: complete tui-empty-turn-restore ticket 2026-05-23 13:30:01 +09:00
d70c10b782 merge: tui-empty-turn-restore 2026-05-23 13:29:07 +09:00
70c0548190 feat: restore rolled back tui input 2026-05-23 13:28:56 +09:00
6acaccccf7 chore: complete pod-empty-turn-rollback ticket 2026-05-23 12:52:42 +09:00
b9dd0ba0d0 merge: pod-empty-turn-rollback 2026-05-23 12:52:12 +09:00
23e218abaa chore: handle rolled back run result clients 2026-05-23 12:51:40 +09:00
55dedd173c feat: rollback empty interrupted turns 2026-05-23 12:50:46 +09:00
df629b4dc6 fix: make visible pod list schema object 2026-05-23 12:29:37 +09:00
7c573f36e2 chore: complete pod-discovery-restore-tools ticket 2026-05-23 12:05:30 +09:00
ca869195dc merge: pod-discovery-restore-tools 2026-05-23 12:04:59 +09:00
e6fa660a5f feat: add visible pod discovery tools 2026-05-23 12:04:45 +09:00
f7a3b0adf1 chore: complete memory-extract-remove-input-cap ticket 2026-05-23 09:14:37 +09:00
ea9e924d35 merge: memory-extract-remove-input-cap 2026-05-23 09:14:15 +09:00
3b582a4f73 fix: remove memory extract input cap 2026-05-23 09:14:07 +09:00
2a721e3776 chore: complete tui-pod-restore-picker ticket 2026-05-23 09:13:57 +09:00
61347362d1 merge: tui-pod-restore-picker 2026-05-23 09:13:19 +09:00
e2688da828 feat: restore tui sessions by pod 2026-05-23 09:13:06 +09:00
a0e544e3e4 chore: complete spawned-delegation-scope-reclaim ticket 2026-05-23 08:39:04 +09:00
96fd9574a2 merge: spawned-delegation-scope-reclaim 2026-05-23 08:38:50 +09:00
ab4611001e fix: reclaim delegated scope from stopped children 2026-05-23 08:38:42 +09:00
5b20d21ea0 docs: refine pod visibility and tui restore flow 2026-05-23 08:33:00 +09:00
48625f5077 update: tui -rの際のリストの時系列ソート 2026-05-23 08:02:05 +09:00
fbe8846393 chore: complete tui-streaming-input-loss ticket 2026-05-23 07:16:08 +09:00
8ae3849cc8 merge: tui-streaming-input-loss 2026-05-23 07:15:55 +09:00
e29861f787 fix: preserve tui input during streaming 2026-05-23 07:15:39 +09:00
fa00c1f188 chore: complete tui-context-usage-indicator ticket 2026-05-23 07:15:30 +09:00
879434e240 merge: tui-context-usage-indicator 2026-05-23 07:15:17 +09:00
4b263f8743 feat: show context usage in tui status 2026-05-23 07:15:03 +09:00
7315114b20 docs: identify tui streaming input loss race 2026-05-23 05:47:59 +09:00
f14c8cb614 Create tui-parts.md 2026-05-23 05:41:48 +09:00
da5d789897 fix: tighten task tool usage guidance 2026-05-23 05:11:48 +09:00
802cbf2f45 chore: complete prune-token-budget ticket 2026-05-23 05:00:30 +09:00
18c30c5f90 merge: prune-token-budget 2026-05-23 05:00:15 +09:00
dfec60438e feat: protect prune tail by token budget 2026-05-23 05:00:06 +09:00
6a5b8ed152 chore: complete pod-event-callback-delivery ticket 2026-05-23 04:57:26 +09:00
baaec0c77f merge: pod-event-callback-delivery 2026-05-23 04:57:10 +09:00
fdd2f16df0 fix: drain snapshots before pod callbacks 2026-05-23 04:57:03 +09:00
3e7a15a2b5 docs: add memory extract input cap ticket 2026-05-23 04:42:38 +09:00
b5219dc862 docs: add pod event callback delivery ticket 2026-05-23 03:29:01 +09:00
c1173dd8a1 docs: add spawned delegation scope reclaim ticket 2026-05-23 03:02:48 +09:00
f03e84a62a refactor: remove legacy plural log entries 2026-05-23 02:03:42 +09:00
e80a3fbf8e docs: track read pod output log entry bug 2026-05-23 00:53:47 +09:00
8947a89e7b docs: add pod discovery restore tools ticket 2026-05-23 00:09:34 +09:00
f46cdd6dbc chore: complete spawned-registry-persist ticket 2026-05-22 23:30:16 +09:00
1a5b5331d6 merge: spawned-registry-persist 2026-05-22 23:30:06 +09:00
530027c62b feat: persist spawned pod registry 2026-05-22 23:30:02 +09:00
8e7126d177 chore: complete pod-name-resume ticket 2026-05-22 22:57:31 +09:00
3fe4a6bc14 merge: pod-name-resume 2026-05-22 22:57:23 +09:00
12a4ba5edf feat: resume pods by name 2026-05-22 22:57:16 +09:00
d3b78234c2 chore: complete pod-state-write-points ticket 2026-05-22 22:29:23 +09:00
baf7403c8c merge: pod-state-write-points 2026-05-22 22:29:12 +09:00
5955695db8 feat: wire pod metadata lifecycle writes 2026-05-22 22:29:08 +09:00
bacba69d31 chore: complete pod-state-backend ticket 2026-05-22 22:03:36 +09:00
08dc6b29f8 style: run cargo fmt 2026-05-22 22:03:27 +09:00
d08ea1734e merge: pod-state-backend 2026-05-22 22:03:17 +09:00
ec5b891fec feat: add pod metadata store backend 2026-05-22 22:03:11 +09:00
5830bb9c85 Merge: live-fork-marker 2026-05-20 06:45:49 +09:00
16ef135f1f chore: 空になった Storage 親見出しを TODO から削除 2026-05-20 06:45:43 +09:00
15f514dfe2 ticket: live-fork-marker 完了 2026-05-20 06:45:19 +09:00
fbd97c3546 chore: auto-fork ロジック二重実装を KNOWN_ISSUES に登録 2026-05-20 06:45:14 +09:00
bb4205b531 ticket: live-fork-marker レビュー (Approve) 2026-05-20 06:44:54 +09:00
077efee13b feat: live auto-fork の marker 形式を確定(seq 比較 + forked_from 記録)
方針: 末尾 entry-count 比較で検知し、元 Segment は immutable のまま
(terminal marker を書き戻さない)。fork lineage は新 Segment の
SegmentStart.forked_from に前向きに記録するため、log だけから辿れる。
過去 fork と対称で、nested fork も marker 位置の調停が不要。

- session-store ensure_head_or_fork に at_turn_index 引数を追加し
  新 Segment へ forked_from を記録
- pod ensure_segment_head の auto-fork も同様に forked_from を記録
  (at_turn_index = writer の現 turn_count)
- fork_at の doc に「元 Segment を mutate しない」invariant を明記
- test: nested past-fork が祖先を不変に保つ / Pod 並行 writer drift で
  auto-fork し forked_from を記録 / 元 Segment に marker が書かれない
2026-05-20 06:42:09 +09:00
b5d5c03412 Merge: session-grouping-introduce 2026-05-20 06:29:48 +09:00
bee41379fa ticket: session-grouping-introduce 完了 2026-05-20 06:29:43 +09:00
842e7a3c58 update: session-grouping review follow-up
- PickerOutcome::Picked から未使用の session_id を除去(pod-cli が lookup_session_of で再解決)
- picker preview が singular AssistantItem も拾うように
- fs_store layout doc に migration(後方互換なし、旧 flat sessions は破棄)を明記
- TaskStore は Session-lifetime、ScopedFs/Tracker は Pod-process lifetime と用語整理
- Pod::session_id / from_manifest_spawned のコメント補強
2026-05-20 06:29:37 +09:00
e8c16be475 feat: Session(Segment 群の grouping)を導入
- SessionId 型を新設、各 SegmentStart に session_id を持たせる
- compaction / 内部 fork は同 SessionId を継承、fork() は新 Session を発行
- Store API を (SessionId, SegmentId) ベースに、FsStore layout は
  <root>/<session_id>/<segment_id>.jsonl に
- Store::list_sessions / list_segments(session_id) / lookup_session_of を追加
- restore_by_segment shim を session-store に提供(pod-cli --session で使用)
- SegmentState に SegmentLocation (session_id, segment_id) を保持し ArcSwap で更新
- RestoredState に session_id: Option<SessionId> を追加
- Picker は Session 単位に列挙、leaf segment を解決して resume
2026-05-20 06:17:56 +09:00
58f54b99f3 Merge: segment-rename 2026-05-20 05:18:11 +09:00
5aea9730c6 ticket: segment-rename 完了 2026-05-20 05:18:04 +09:00
a63f076856 update: 残存 Session 識別子の Segment 化(review follow-up)
レビュー指摘の通り、次の session-grouping-introduce で新 SessionId が
入る前に名称衝突を避けるため取り残しを掃除。

- PodError::Session{Empty,ScopeMissing} → Segment{Empty,ScopeMissing}
- ScopeLockError::SessionConflict → SegmentConflict
- Pod.session_state / SegmentState.set_session_id 系
- source_session_id / prev_session_id / ensure_session_head / short_session
- pod_cli の "Session ID:" 表示
- fs_store の sessions ローカル変数
2026-05-20 05:17:49 +09:00
ac1d8b1c7d update: Session-lifetime/scoped を Pod-lifetime に修正
タスクストア/ファイルトラッカーは compaction を跨いで Pod プロセス寿命まで生きる。
旧 SessionId = Segment の時代の表現を Pod-lifetime に正す。pod_cli の表示も Segment: に。
2026-05-20 05:06:38 +09:00
d5fcbc2125 update: SessionId / SessionStart / SessionOrigin 等を Segment 系名称へ
- Type/Function/Variantを Segment* 系へ統一
  - SessionId/SessionStart/SessionOrigin/SessionStartState/SessionState/SessionLogSink/SessionLockInfo
  - new_session_id / session_id / create_session* / list_sessions / lookup_session / update_session / find_by_session
  - protocol Event::SessionRotated → SegmentRotated、CompactDone.new_session_id → new_segment_id
- Module: session_log → segment_log / session → segment (file mv 含む)
  pod 側の session_log_sink → segment_log_sink も同様
- crate 名 (session-store)、CLI flag (--session)、ResumeWithSession (CLI tied) は据え置き
- session-tests/session_metrics_test 等の Store impl も追従
2026-05-20 05:06:04 +09:00
45db480b0b Merge: entry-hash-abolish 2026-05-20 04:53:52 +09:00
4b8aee909b ticket: entry-hash-abolish 完了 2026-05-20 04:53:47 +09:00
903cfa3060 update: 旧用語コメントの掃除と KNOWN_ISSUES 追記
- 残存していた head_hash / SessionHead 言及コメントを 3 箇所更新
- FsStore::read_entry_count の O(n) 計測コストを KNOWN_ISSUES に登録
2026-05-20 04:53:33 +09:00
27a1d07e98 ticket: entry-hash-abolish レビュー (Approve) 2026-05-20 04:49:17 +09:00
9bfbb2fb4c update: entry hash chain と session_head mutex を撤廃
- HashedEntry / EntryHash / compute_hash / build_chain 撤去、JSONL は 1 行 1 LogEntry
- SessionOrigin.at_hash → at_turn_index (TurnEnd 由来) に置換
- Pod 側 SessionHead mutex を ArcSwap<SessionId> + AtomicUsize の SessionState に置換
- ensure_head_or_fork は store の entry count と writer の append tally で判定
- session-store から sha2 / hex 依存、pod から parking_lot 依存を削除
2026-05-20 04:31:37 +09:00
1a9bb30824 ticket: 永続化整理を 8 個に分割
persistence-semantics と pod-persistent-state を実装可能な粒度に分割。
Storage 層 (Phase 1) を entry-hash-abolish / segment-rename /
session-grouping-introduce / live-fork-marker に、Pod 単位永続化
(Phase 2) を pod-state-backend / pod-state-write-points /
pod-name-resume / spawned-registry-persist に切り出した。
2026-05-20 04:07:44 +09:00
0440d5c6dc Merge: invoke-turn-llmcall-semantics
# Conflicts:
#	crates/pod/src/controller.rs
2026-05-15 22:08:41 +09:00
01200a0d33 ticket: invoke-turn-llmcall-semantics 完了 2026-05-15 21:54:40 +09:00
d3b7663d41 ticket: worker-history-append-contract 作成 2026-05-15 21:53:24 +09:00
b204909c4c chore: KNOWN_ISSUES に controller_test::double_run_returns_error の flakiness を追記 2026-05-15 21:52:40 +09:00
9a89d2419a ticket: pod-interrupt-prep-internalize 完了 2026-05-15 21:52:24 +09:00
af6427ff67 ticket: pod-interrupt-prep-internalize レビュー (Approve with follow-up) 2026-05-15 21:51:57 +09:00
4c8596db38 update: Paused→Run の interrupt 前処理を Pod::run に内包 2026-05-15 21:51:57 +09:00
c779768b6e ticket: invoke-turn-llmcall-semantics review (Approve) 2026-05-15 21:42:43 +09:00
49b78612d6 feat: Invoke marker と LlmCall callback を導入し AgentTurn セマンティクスを明確化
- protocol: InvokeKind enum、Event::InvokeStart / LlmCallStart / LlmCallEnd 追加
- llm-worker: Worker.llm_call_count と on_llm_call_start/end callback、turn_count を AgentTurn 数として doc 更新
- session-store: LogEntry::Invoke { ts, trigger } 追加 (replay は marker のみで state 不変)
- pod: run/run_for_notification 開始時に Invoke marker commit、PendingRun::RunForNotification(InvokeKind) で kind を伝搬
- pod ipc: sink + server で Invoke エントリーを Event::InvokeStart として broadcast
- tui: 新 Event 3種を no-op で受理 (UI 設計はチケット範囲外)
2026-05-15 07:04:26 +09:00
ce6085b5f4 ticket: invoke/turn/llmcall 決定事項と実装範囲を明文化 2026-05-15 06:48:57 +09:00
1b83b2c40a ticket: Exchange語撤廃、Invoke/Turn/LlmCall でセマンティクスを再整理 2026-05-15 05:41:13 +09:00
9d04008123 ticket: pod-input-validate-internlize完了 2026-05-15 05:38:27 +09:00
4ebc2c96b3 update: Controllerで入力のValidationを行っていた部分をPod側に移す 2026-05-15 05:33:33 +09:00
bb6f7e2022 ticket: PodとControllerの責務の抱え違いを修正するチケット 2026-05-15 04:52:39 +09:00
86b48a9fdf ticket: pod-parent-turn-callback完了 2026-05-15 04:43:12 +09:00
59067bd115 ticket: pod-parent-turn-callbackレビュー 2026-05-15 04:42:29 +09:00
6116d72570 ticket: 消し忘れ 2026-05-15 04:39:30 +09:00
8e8c0887de update: 親にターン完了を通達する経路の整理 2026-05-15 04:38:53 +09:00
3143353ddc update: エントリの単数化のフォローアップ 2026-05-14 19:42:23 +09:00
f35d99900f update: 書き込みの不要なasyncを削除 2026-05-14 19:16:48 +09:00
6e7494553b ticket: 書き込みのsync化を計画 2026-05-14 16:45:58 +09:00
904ea6e326 update: SystemItem1本化 2026-05-14 14:36:29 +09:00
b6b158a244 ticket: イベントプロトコルと永続化におけるシステムイベントの統合 2026-05-14 04:12:40 +09:00
e32b210d50 chore: cargo fmt 2026-05-14 03:36:08 +09:00
a02f34437c fix: 実態にそぐわないEvent::Entryを実装した構造を訂正 2026-05-14 03:35:52 +09:00
1ef094f039 refactor: Podのメインループのリファクタリング 2026-05-14 03:27:49 +09:00
e57e23b999 ticket: 追加:Podのメインループとソケット通信周りのリファクタリング 2026-05-13 22:16:25 +09:00
13feb36518 ticket: add tui manual compact command 2026-05-13 06:50:27 +09:00
9e4bdf315f docs: update pod cli manifest flags 2026-05-13 06:44:48 +09:00
068a975488 ticket: note tui user manifest overlay mismatch 2026-05-13 06:41:23 +09:00
3d23c4ed40 close: complete pod manifest and file ref tickets 2026-05-13 06:30:45 +09:00
d2149d11d3 merge: file-ref-directory 2026-05-13 06:30:45 +09:00
ada2988105 merge: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
d6cfea463a review: file-ref-directory 2026-05-13 06:30:45 +09:00
43330cf624 review: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
21a78fb19e refactor: PodControllerの構造のリファクタリング 2026-05-13 06:07:38 +09:00
0ae6592032 docs(tickets): PodControllerの構造調整チケット作成 2026-05-13 05:43:23 +09:00
0e1539fefa chore: planの更新 2026-05-13 05:42:55 +09:00
dff72e291b feat: handle directory file refs 2026-05-13 02:57:58 +09:00
c6a9007b58 feat: organize pod manifest cli flags 2026-05-13 02:57:50 +09:00
d1c7297f87 feat: Languageインストラクションの追加 2026-05-13 02:27:30 +09:00
3c4a34b13b update: fmt + memoryに用いる言語の構成 2026-05-13 01:57:04 +09:00
076cf9af18 fix: compact時にToolCallとOutputの間でCutしてしまう問題 2026-05-13 00:59:02 +09:00
2f5f5b8a26 chore: workflowの調整・knowledgeの追加テスト 2026-05-13 00:06:33 +09:00
a363546a14 merge: lint common crate 2026-05-12 21:56:49 +09:00
599b24fa9e chore: complete lint common crate ticket 2026-05-12 21:56:39 +09:00
4bdbac6597 refactor: extract shared lint record primitives 2026-05-12 21:56:25 +09:00
20a6748cdd docs(tickets): submit時FileRefでディレクトリを参照した時の挙動 2026-05-12 17:39:40 +09:00
1271d13f26 docs(tickets): mainfest-output-upload-limits完了 2026-05-12 17:27:47 +09:00
5882341b21 feat: add manifest output upload limits 2026-05-12 16:20:15 +09:00
19730ba7c0 Merge branch 'tui-knowledge-completion' into develop 2026-05-12 15:43:29 +09:00
7a76276539 docs(memory): fix knowledge dir path in collect_resident_knowledge doc 2026-05-12 15:07:39 +09:00
64d12f2a6f docs(tickets): review tui knowledge completion (approve) 2026-05-12 14:56:30 +09:00
668bde46f4 feat(pod): wire knowledge slugs into # completion 2026-05-12 14:45:46 +09:00
3647614ab0 docs(tickets): tui knowledge completion unimplemented fix 2026-05-12 14:40:37 +09:00
5a2e69b2bf docs(tickets): define work item query strategy 2026-05-12 02:32:32 +09:00
1d53929250 docs(tickets): use timestamp work item ids 2026-05-12 02:07:29 +09:00
91a0a935b0 docs: add ai maintainer work item plan 2026-05-12 01:53:52 +09:00
bd46491b04 docs(tickets): add lint-common crate ticket 2026-05-12 00:06:06 +09:00
18b0f8b19f merge: workflow crate extraction 2026-05-11 22:50:19 +09:00
f5d69504b5 docs(tickets): complete workflow crate extraction 2026-05-11 22:50:06 +09:00
76e1287cbe review: workflow crate extraction 2026-05-11 22:49:50 +09:00
eb791f9e80 refactor: extract workflow crate 2026-05-11 22:49:07 +09:00
a1b9c865df merge: anthropic assistant burst bundling 2026-05-11 22:24:36 +09:00
985931d6fa docs(tickets): complete anthropic assistant burst bundling 2026-05-11 22:23:53 +09:00
d35c9f40a7 review: anthropic assistant burst bundling 2026-05-11 22:23:38 +09:00
4d6d5b631c fix: bundle anthropic assistant bursts 2026-05-11 22:22:36 +09:00
3354c41e66 merge: memory usage metrics 2026-05-11 21:46:24 +09:00
73d1c05edc docs(tickets): complete memory usage metrics 2026-05-11 21:46:19 +09:00
9e615a41f0 review: memory usage metrics 2026-05-11 21:46:19 +09:00
da4f4cc954 feat: add memory usage event metrics 2026-05-11 21:29:48 +09:00
01d38f042c docs(tickets): complete memory phase naming cleanup 2026-05-11 17:16:36 +09:00
9b99f50264 docs(tickets): simplify memory usage metrics 2026-05-11 16:54:23 +09:00
646b47b40f fix: remove remaining memory phase wording 2026-05-11 01:57:39 +09:00
5cf8eb94c7 docs(tickets): compact-worker-occupancy-cap完了 2026-05-11 01:56:20 +09:00
4d61d044ec update: memoryシステムの"Phase"表記を撤廃 2026-05-11 01:55:28 +09:00
967e57c933 docs(tickets): memory-extract-occupancy-cap 完了 2026-05-11 01:32:45 +09:00
acfe073b29 review: memory-extract-occupancy-cap (approve) 2026-05-11 01:25:20 +09:00
0b79e0ed65 feat: extract worker サーキットブレーカーを占有量ベースに統一 2026-05-11 01:20:37 +09:00
c8871ec4fe docs(tickets): add memory-extract-occupancy-cap ticket 2026-05-11 01:14:59 +09:00
3fece8749b Merge branch 'compact-worker-occupancy-cap' into develop 2026-05-11 01:12:32 +09:00
cac1f4d4fe review: compact-worker-occupancy-cap (set_max_turns 分岐削除) 2026-05-11 00:56:41 +09:00
e664def920 feat: compact worker サーキットブレーカーを占有量ベースに統一 2026-05-11 00:43:16 +09:00
f0a1f98912 docs(tickets): add memory audit log ticket 2026-05-11 00:06:42 +09:00
5ca771ded4 docs(tickets): completed tickets cleanup 2026-05-10 17:31:34 +09:00
9b15135416 merge: memory prompt record policy 2026-05-10 14:40:58 +09:00
b6f99b7651 docs: generalize memory prompt record policy 2026-05-10 14:40:52 +09:00
13c05b1083 docs: memory effectiveness plan 2026-05-10 01:25:10 +09:00
05da79f966 docs: memory prompt ticket policy ticket 2026-05-10 01:13:57 +09:00
92cee690f8 feat: client-crateの実装 2026-05-10 00:57:50 +09:00
6f0ec92f91 chore: E2Eの計画とgit運用の話 2026-05-09 05:04:57 +09:00
32ed5a812c docs(tickets): file-ref-symlink-diagnostics完了 2026-05-09 04:22:27 +09:00
856a0a2432 docs(tickets): file-ref-symlink-diagnosticsレビュー 2026-05-09 04:21:56 +09:00
ced26b952e feat: Toolsのシンボリックリンク対応 2026-05-09 04:21:56 +09:00
e451b07783 docs(tickets): tui-assistant-markdown完了 2026-05-09 03:31:49 +09:00
f6600feab5 docs(tickets): permission既定policy整理チケット追加 2026-05-09 03:27:22 +09:00
553d67a910 docs(tickets): permission-extension-point完了 2026-05-09 03:20:17 +09:00
805be47128 feat: パターンベースのツール権限制御を追加 2026-05-09 03:20:02 +09:00
aa9409869e chore: tui compact progress ticket完了 2026-05-09 03:14:23 +09:00
8ebdd47fbb feat: compactのプログレス表示 2026-05-09 03:11:53 +09:00
ec1eccd10d chore: git方針の変更とセマンティクス変更の計画の帳尻合わせ 2026-05-08 20:17:11 +09:00
42127554d4 docs(tickets): 自己改善workflowの設計 2026-05-08 01:50:55 +09:00
9dbfd15687 docs(tickets): workflow-directory-layout完了 2026-05-08 01:08:25 +09:00
6c31264377 update: Workflowディレクトリ修正のフォローアップ 2026-05-08 00:59:08 +09:00
b6b4168503 feat: Workflowの読み取り位置変更の実装 2026-05-08 00:15:50 +09:00
40cde699a8 docs(tickets): reportの運用・Workflowのディレクトリ位置修正 2026-05-07 23:34:00 +09:00
1ed45032be feat: TUIのmarkdown対応 2026-05-05 18:30:25 +09:00
64814c2e15 docs(tickets): PermissionのチケットとTUIのmd表示 2026-05-05 17:16:03 +09:00
96daebff30 docs(tickets): agent-skills完了 2026-05-05 16:00:40 +09:00
85fe1a094c update: Agent skills実装のレビュー・対応 2026-05-05 13:54:02 +09:00
68249b8072 feat: writingに対する基本的な指示promptを追加 2026-05-05 13:42:34 +09:00
98018972aa feat: agent skillsの互換実装 2026-05-05 13:16:10 +09:00
5b1324a630 fix: Reasoningの永続化のスキーマのミスを修正 2026-05-05 12:30:29 +09:00
4e352bb9ff docs(tickets): turnのセマンティクスを変える計画 2026-05-05 12:29:52 +09:00
5c8d00e49b docs(tickets): reasoning-history-perisit完了 2026-05-04 23:06:21 +09:00
94bb8804f4 update: Reasoningコンテキスト管理のレビュー・対応 2026-05-04 23:05:08 +09:00
30023349b9 feat: Reasoningのコンテキスト管理の対応 2026-05-04 21:31:44 +09:00
b0e6ab16b1 docs(tickets): Reasoningのコンテキスト管理とPruneの調整チケット追加 2026-05-04 21:16:31 +09:00
6e6be6f3ff docs(tickets): tui-task-display完了 2026-05-04 20:43:21 +09:00
eb9bd84b05 feat: Task表示のレビュー・修正 2026-05-04 17:28:39 +09:00
17a7744da1 feat: TUI上に進行中のTaskを表示する実装 2026-05-04 17:06:02 +09:00
a3082072d7 docs(tickets): Compaction進行中のライブ表示 2026-05-04 17:03:51 +09:00
04a471b669 docs(tickets): post-run memory detach 完了 2026-05-04 16:11:38 +09:00
3266ddb2d4 feat: Pos処理の非同期化・Busy状態の削除 2026-05-04 15:52:27 +09:00
7527b55de4 docs(tickets): 追加:タスクリストの表示とコンテキスト長インジケータ 2026-05-04 15:32:40 +09:00
c57d4be413 docs(tickets): Busyの切り離し 2026-05-04 13:20:25 +09:00
344dca6ffa Merge branch 'llm-worker-transient-retry' into develop 2026-05-04 13:16:26 +09:00
93fe2eb0ff docs(tickets): pod状態のTUI同期完了 2026-05-04 13:08:44 +09:00
09e465d583 feat: Podのステータス同期の修正 2026-05-04 12:55:29 +09:00
4eb73fa552 feat: Podのステータスを厳密にし、同期漏れを防ぐ 2026-05-04 12:55:11 +09:00
2d59ddd228 docs(tickets): llm-worker-transient-retry完了 2026-05-04 12:51:41 +09:00
39882263d3 docs(tickets): llm-worker-transient-retry レビュー追記
7183847 のレビュー結果を Approve として記録する。チケット要件
(リトライ対象 / バックオフ / Retry-After 上書き / mid-stream 温存 /
完了条件) はすべて満たしており、コードベースの層構造を歪める変更も
ない。Retry-After テストの方針差 (実時間 1s vs 仮想時間 5s) と
connect refused テストの試行回数未検証は non-blocking として
review.md に記録。
2026-05-04 12:49:13 +09:00
c2caaa21a0 feat(llm-worker): HTTP transient エラーへのリトライを追加
`transport.rs` の HTTP 送信〜ステータスチェック区間に指数バックオフ
+ フルジッターのリトライループを追加する。SSE 読み出し開始後 (
`bytes_stream()` 以降) のエラーは従来どおりそのまま流す。

- `is_retryable(&ClientError)`: 408/425/429/500/502/503/504/529 と
  reqwest の connect/timeout のみ true
- `RetryPolicy` (default: base 500ms / cap 10s / max_attempts 4 /
  total_timeout 30s)
- `Retry-After` ヘッダ (秒数) があればバックオフを上書き
- リトライ発火ごとに warn! でステータス・attempt・wait を出す

ref: tickets/llm-worker-transient-retry.md
2026-05-04 12:45:33 +09:00
20097e8296 Merge branch 'tui-system-message-render' into develop 2026-05-04 12:10:17 +09:00
185db7f8cd docs(tickets): tui-system-message-render完了 2026-05-04 12:05:50 +09:00
8870af800f feat: システムメッセージをTUIで表示させる 2026-05-04 12:04:09 +09:00
56f9bab7b7 update: Taskツールの説明を更新 2026-05-04 11:32:04 +09:00
194d29723e docs(tickets): tuiトークン表示完了 2026-05-04 00:07:59 +09:00
a22cb479f4 docs(tickets): tuiトークン表示レビュー 2026-05-04 00:05:59 +09:00
5efe0e4910 feat: tuiのトークン集計表示の修正 2026-05-04 00:01:37 +09:00
6168e3f924 docs(tickets): TUI表示トークンの集計の修正 2026-05-03 23:28:31 +09:00
9b676238a2 docs(tickets): チケット追加:システムメッセージのTUI表示とセッションのロールバック・フォーク 2026-05-03 22:43:21 +09:00
8df34a1d64 docs(tickets): tui-pod-event-render 完了 (消し忘れ片付け) 2026-05-03 22:14:24 +09:00
45ef661651 update: Taskツール群の説明を更新 2026-05-03 22:09:45 +09:00
2d8767f940 docs(tickets): notify-history-persist 完了 (消し忘れ片付け) 2026-05-03 22:07:18 +09:00
8f7a023897 docs(tickets): session-todo-reminder spec を pending_history_appends に改訂 (AGENTS.md 揮発禁止に整合) 2026-05-03 21:53:20 +09:00
302a1a7f58 Merge branch 'session-todo-tools' into develop
# Conflicts:
#	tickets/session-todo.md
2026-05-03 21:50:30 +09:00
284d07b569 docs(tickets): session-todo (本体) 完了 2026-05-03 21:48:44 +09:00
5fbb9c47dd update: tuiからspawnする際にエラー詳細が落ちていた問題を修正 2026-05-03 21:47:54 +09:00
f18cf7c172 docs(tickets): notify-history-persist完了 2026-05-03 21:37:13 +09:00
cae0c1ea2f docs(tickets): session-todo レビュー反映 (Approve) + reminder spec 段階レビュー 2026-05-03 21:34:54 +09:00
ada1fe6c63 fix: TaskStore snapshot を JSON ブロック化 + 構造ラウンドトリップテスト追加 2026-05-03 21:33:50 +09:00
fde55c96d4 fix: TaskStore snapshot を compact 後 history の末尾に置いて retained 中の TaskCreate 重複を防ぐ 2026-05-03 21:26:49 +09:00
05c2605aae feat: notify-history-persist実装 2026-05-03 19:27:22 +09:00
d1a9b622d4 feat: セッション内 Task ツール (TaskCreate/List/Get/Update + 履歴 replay + compact 跨ぎ) 2026-05-03 19:03:52 +09:00
a87be4cbc2 docs(tickets): セッション内 Task ツールを本体と注意機構に分割 2026-05-03 19:03:48 +09:00
30bb096513 Merge branch 'resume-scope-claim' into develop
# Conflicts:
#	TODO.md
2026-05-03 18:59:01 +09:00
e0261591b6 docs(tickets): resume-scope-claim 完了 2026-05-03 18:56:39 +09:00
eb054b3e88 fix: resume-scope-claim レビュー指摘対応 (deny セマンティクス doc・破損 snapshot の警告ログ) 2026-05-03 18:56:21 +09:00
1be6d34010 docs(tickets): resume-scope-claim レビュー (Approve) 2026-05-03 18:46:15 +09:00
eb0d0433a1 docs(tickets): Notifyが永続化されいない問題についてのチケット 2026-05-03 18:45:10 +09:00
557d5da391 feat: resume時のscope claimを過去の有効scopeに揃える 2026-05-03 17:12:36 +09:00
3f987e9885 feat: session-metrics完了 2026-05-03 15:56:06 +09:00
a86f69fd8d feat: session-metrics実装 2026-05-03 15:10:43 +09:00
cae18a4339 feat: TUIに他Podからの通知を表示する 2026-05-03 12:45:05 +09:00
69a6f63023 docs(tickets): 消し忘れチケットども 2026-05-03 01:16:22 +09:00
1236c68073 chore: TODOから[ ]を削除 2026-05-03 01:08:43 +09:00
d64d1b2ae8 Update AGENTS.md 2026-05-03 01:06:23 +09:00
159ffb0c6d docs(tickets): tuiでPodEventを表示する・セッション中でメトリクスを取るチケットを追加 2026-05-03 01:01:09 +09:00
97a1c10ef7 update: tuiの文字入力のCtrlブロックを追加 2026-05-03 00:44:38 +09:00
eeb570c71f update: memoryシステム周りのプロンプトの整理 2026-05-03 00:27:10 +09:00
9be7caae99 docs(tickets): memory-consolidation-drop-input-cap完了 2026-05-02 23:57:36 +09:00
0e7be01807 update: Consolidationの不要なToken上限の削除 2026-05-02 23:48:33 +09:00
35c8ee3a73 docs(tickets): セッション内TODOツールと注意機構のチケット 2026-05-02 23:48:01 +09:00
c79c54ba9d update: codexのキャッシュ利用が出来てなかった問題 2026-05-02 03:23:44 +09:00
f1d8f42fd5 fix: tuiからのPod作成の挙動を修正・開発時にcargo runでpodを起動する経路を実装 2026-05-02 02:13:30 +09:00
14862fbc37 Merge branch 'workflow-impl' into develop
# Conflicts:
#	crates/pod/src/controller.rs
#	crates/pod/src/pod.rs
2026-05-02 01:47:49 +09:00
ef3f0a8a78 docs(tickets): workflow完了 2026-05-02 01:40:06 +09:00
2ef397b562 update: workflowの実装修正 2026-05-02 01:38:50 +09:00
bebe1169c8 docs(tickets): 消し忘れチケット 2026-05-02 01:36:19 +09:00
ba5b8db9cf feat: dynamic-scopeの実装修正 2026-05-02 01:33:32 +09:00
189ee43a0c feat: dynamic-scopeの実装 2026-05-02 01:26:17 +09:00
6bf1f9a110 fix: SpawnPodの起動経路の問題・を修正 2026-05-02 01:09:57 +09:00
8307ca965c Implement workflow MVP 2026-05-02 00:46:47 +09:00
e97f803104 update: manifestで一部値のzeroの扱いを変更 2026-05-02 00:08:46 +09:00
c4bc994cab fix(llm-worker): openai_responsesのroleの最新の投影を反映 2026-05-01 23:55:26 +09:00
6d84d4df19 chore: dev-depsの整理 2026-05-01 23:50:14 +09:00
ac4133ddf9 docs(tickets): workflowのプロパティ名の修正 2026-05-01 23:40:47 +09:00
6d15d1e2b6 chore: 依存パッケージの集約 2026-05-01 23:35:46 +09:00
ffda357218 Merge branch 'tui-mouse-scroll' into develop 2026-05-01 23:22:58 +09:00
09eb29b0b7 feat: memory P2の修正 2026-05-01 23:22:49 +09:00
300234df57 feat(tui): マウスホイールスクロール完了 2026-05-01 23:16:02 +09:00
7e938b2d3b スキルの整理 2026-05-01 23:14:37 +09:00
0e98d67a5f feat(tui): マウスホイールでスクロールする実装 2026-05-01 23:14:16 +09:00
31eeded4a6 メモリPhase2の実装 2026-05-01 23:00:55 +09:00
ca27d88869 docs: memoryシステムの仕様変更と、動的Tool・VCSの話 2026-05-01 18:47:52 +09:00
38efe82544 bashツール一旦完了 2026-05-01 18:47:09 +09:00
31a1c1d879 bashツール実装 2026-05-01 18:14:13 +09:00
e21f43c70a ClaudeによるTool出力メタ認知 2026-05-01 02:47:44 +09:00
e058dc576d ファイル参照を与えた際に自動的に読ませる実装 2026-04-30 21:58:10 +09:00
a05d7533b0 TUI補完の細かい挙動修正 2026-04-30 14:38:03 +09:00
621acbe224 tuiの補完の実装 2026-04-30 12:46:48 +09:00
e259ab7bd3 claudeの動的ツールの調査レポート 2026-04-30 01:35:42 +09:00
1f3ad13c83 fix: セッション復元時にhistoryが表示されない問題 2026-04-30 00:02:26 +09:00
2c9db5a27b cargo fmt 2026-04-29 23:20:25 +09:00
dcc71e3a14 templatureがcodexエンドポイントで使えない件の修正 2026-04-29 23:20:16 +09:00
426d477584 session-log関連完了 2026-04-29 23:00:55 +09:00
09d56272d8 session-logリファクタのレビュー・修正 2026-04-29 22:55:36 +09:00
de6b8faf55 session-log-segments実装 2026-04-29 22:42:10 +09:00
bb2a6013fa session-log-decouple-item実装 2026-04-29 22:24:18 +09:00
709b17d309 session-storeの永続化形式からllm-workerの内部型を削除 2026-04-29 22:09:30 +09:00
f74716c2e4 tui-input-word-motion完了 2026-04-29 21:45:49 +09:00
f6fe978db4 tui-input-word-motionレビュー・半角カナに関する修正 2026-04-29 21:41:24 +09:00
99d6a4cf4b tuiの単語単位Backspace 2026-04-29 21:31:19 +09:00
9782323885 tuiの単語境界カーソル移動実装 2026-04-29 21:23:29 +09:00
28c2b0eb1c workflowのチケットとtuiの単語境界カーソル移動のチケット 2026-04-29 21:22:49 +09:00
437fe9fe85 pod-registry-rename完了 2026-04-29 21:05:09 +09:00
c647cac983 pod-registry-rename修正 2026-04-29 21:04:47 +09:00
e2d6f00d6d pod-registryのモジュール分割 2026-04-29 20:14:34 +09:00
40d19ca702 scope-lock -> pod-registry 2026-04-29 20:01:32 +09:00
e304b17a7e scope.lockの意味変更に伴うクレート名変更チケット作成 2026-04-29 19:54:08 +09:00
ca0b772242 memory-phase1-extract完了消し忘れ 2026-04-29 19:53:37 +09:00
3962db4d37 tui-session-restore完了 2026-04-29 19:52:24 +09:00
5ea99673fc tuiからセッションを復帰する経路の実装 2026-04-29 19:03:03 +09:00
dad75b592e 不要なforkの削除 2026-04-28 20:19:50 +09:00
d1be97fbc2 resumeの実装 2026-04-28 18:52:58 +09:00
f2b364ec0d max_tokenとreasoning_tokenに関するdocs修正 2026-04-28 18:01:17 +09:00
f1ba5b5686 max_tokensのスキーマ不整合に関する修正 2026-04-28 17:58:24 +09:00
ce7153f6e8 tui-thinking-display完了 2026-04-28 16:23:09 +09:00
04ad20e760 tui-thinking-display修正 2026-04-28 16:22:45 +09:00
fc2c6bc81c TUIにThinkingを表示する実装 2026-04-28 16:10:48 +09:00
31d5de1a37 ThinkingのTUI表示のチケット作成 2026-04-28 16:07:41 +09:00
cfd1879f7e session-store-llm-worker-type-ownership完了 2026-04-28 15:44:16 +09:00
eed3f13e51 セッション関連の責務の分離 2026-04-28 15:43:34 +09:00
a9d30e1c37 memory-phase1の、トークンカウントの実装位置が悪い件 2026-04-28 14:24:38 +09:00
11bd486740 memory-phase1-extract修正 2026-04-28 13:12:21 +09:00
fd88c72e2e memoryを抽出する仕組みの実装 2026-04-28 12:58:33 +09:00
2ef4f26a8f session-restoreの設計更新 2026-04-28 12:42:49 +09:00
cb3642d12c session復帰経路を作るチケット・テスト用のファイルの削除 2026-04-28 12:31:38 +09:00
e4d7cc1924 memoryが.insomnia配下ではなくworkspace root直下を想定していた問題の修正 2026-04-28 11:53:08 +09:00
c4e1a969c1 memoryのクエリと動作のテスト 2026-04-28 11:37:41 +09:00
2e38a24ac2 worker-generation-settings完了 2026-04-28 09:38:23 +09:00
8114d3c4fd 生成設定のmanifest化の実装 2026-04-28 09:37:22 +09:00
cabf9c967c cargo fmt 2026-04-27 22:51:07 +09:00
1c98938b6f model-reasoning-control完了 2026-04-27 22:49:56 +09:00
5fa3d140ab model-reasoning-contolレビュー 2026-04-27 22:41:51 +09:00
7d23cff0a9 model-reasoning-control実装 2026-04-27 22:25:27 +09:00
5246b3ce92 home-dir-layout完了 2026-04-27 22:11:15 +09:00
45ede7a6fc home-dir-layout修正 2026-04-27 22:10:36 +09:00
f8fe6f83aa home-dirの整理 2026-04-27 21:45:30 +09:00
9998539e71 reasoningを利用可能にするチケット 2026-04-27 20:21:22 +09:00
29ea180b18 memory-resident-injection完了 2026-04-27 18:30:21 +09:00
ee60758138 メモリー内容のシステムプロンプトへの埋め込みの実装 2026-04-27 18:25:47 +09:00
db9faa0fad 環境変数に関するチケットの修正 2026-04-27 18:11:40 +09:00
325ae6fa27 pod-spawn-ui完了・設定UI関連のチケット作成 2026-04-27 17:38:32 +09:00
d0a1eaeb57 memory-search-tool完了 2026-04-27 17:26:07 +09:00
56c6758da5 memoryサーチツールを実装 2026-04-27 17:24:08 +09:00
30abefe747 manifest読み込み経路の整理チケット作成 2026-04-27 17:17:00 +09:00
2ed4bd007b manifest側で設定ファイルの収集を行うようにした 2026-04-27 16:52:23 +09:00
5ebdeff76d tuiからSpawnする仮UI 2026-04-27 16:22:06 +09:00
d80d06ff2e memory-file-format完了 2026-04-27 13:59:04 +09:00
f43d8fba3b メモリーに関するクレート作成・ファイル構造の実装 2026-04-27 13:33:31 +09:00
0a676524ae セグメントのセッション永続化チケット 2026-04-27 13:25:16 +09:00
fd89c754f1 submit-segment-protocol完了 2026-04-27 11:42:42 +09:00
2722e0b7ba submitをvec segmentを受け付ける形に変更 2026-04-27 11:03:58 +09:00
e0c4dbdc73 notification-naming完了 2026-04-26 23:30:46 +09:00
e44d49e80f Method::NotifyとEvent::Notificationが紛らわしい問題 2026-04-26 23:25:50 +09:00
123fc3b0ad memory実装チケット 2026-04-26 17:00:38 +09:00
89c2c701fd カタログの実装完了、ドキュメント整理 2026-04-24 13:33:56 +09:00
ce6198102f podのモジュール分割完了 2026-04-24 11:58:11 +09:00
c75d777cec podのモジュール分割 2026-04-24 11:48:27 +09:00
1b1dc73d7f modelsとprovidersをカタログ化 2026-04-24 10:45:03 +09:00
a730717fc7 モデルとプロバイダーをカタログ化するチケット 2026-04-23 16:18:30 +09:00
45b1e7b6de llm-provider-catalog実装 2026-04-23 15:37:51 +09:00
a86c22e6f5 Agents.mdを一定閾値でturncateする仕様を削除 2026-04-23 01:34:25 +09:00
6146b2806f pod-prompt-catalog完了 2026-04-22 17:43:42 +09:00
c68cd64882 Promptを一元管理するファイルから参照する実装 2026-04-22 17:43:05 +09:00
c492765d1a Memoryシステムの整理・Promptカタログチケット 2026-04-22 13:21:15 +09:00
7ce77f0ad5 TUIのEditツール周りの表示とカラー 2026-04-22 01:17:58 +09:00
3717569533 複数クライアント間でのRunメソッドの同期漏れ 2026-04-21 23:59:49 +09:00
676137c246 改行テキストの行計算・Padding設定 2026-04-21 23:26:34 +09:00
84fedd8048 TUIのオーバーホール実装 2026-04-21 23:12:35 +09:00
9bf6378041 protocol-tool-result-shape完了 2026-04-21 20:52:19 +09:00
d4055fb19d TUIに向けたprotocolの詳細調整 2026-04-21 20:50:59 +09:00
3b2bdcb19a TUIオーバーホールチケット 2026-04-21 19:37:14 +09:00
ee694b310f メモリシステムの設計 2026-04-21 19:23:07 +09:00
e513825da9 モデル性能のハードコードを消し飛し、Codexのフォーマットの修正 2026-04-21 18:35:56 +09:00
d37347fe68 Docsのアップロード 2026-04-21 17:39:43 +09:00
225e1bf58e protocol拡張の実装(完了) 2026-04-21 09:30:02 +09:00
b7b315cd39 protocol拡張の実装 2026-04-21 09:27:58 +09:00
13c9923486 protocolの拡張に関するチケット修正 2026-04-21 08:42:54 +09:00
1aa992d07e llm-auth-codex-oauth完了 2026-04-20 23:14:45 +09:00
6c6eb0dcb6 codexのOAuthを使う実装 2026-04-20 23:13:52 +09:00
24ade197d1 openai-responses完了 2026-04-20 03:00:48 +09:00
74a45f86b9 openai-responses対応 2026-04-20 02:59:16 +09:00
5aea67ff5e llm-model-config完了 2026-04-20 00:57:27 +09:00
230936274b llm-model-configの実装 2026-04-19 23:32:14 +09:00
e1d672e9c0 llmのモデル情報の設計チケット 2026-04-19 22:29:37 +09:00
a89701bc43 マニフェストを継承してPodをスポーンさせる 2026-04-19 18:01:47 +09:00
25df7a79c1 SpawnPodツールが落ちる問題の発見 2026-04-19 15:14:15 +09:00
ddd7327290 Pause実装完了 2026-04-19 15:12:06 +09:00
223d06c77e TUIからPauseする実装 2026-04-19 14:27:53 +09:00
605e78468c compact-improvements をマージ
- 閾値の個別指定化 (compact_threshold / compact_request_threshold) と Option 化
- 占有量ソースを UsageRecord timeline に一本化 (last_input_tokens 撤去)
- retained_turns → retained_tokens
- compact worker をツール駆動に再設計 (mark_read_required / add_reference / write_summary / read_file)
- Auto-read budget と compact_worker_max_input_tokens の上限制御
- 新 history は system message のみで構成 [summary, auto-read..., references, retained...]
2026-04-19 12:14:16 +09:00
3c510860fa compact-improvements チケット完了 2026-04-19 12:13:03 +09:00
ec3bf7324b anthropic-cache完了 2026-04-19 12:07:03 +09:00
1b33e63ce2 compact: retained_tokens テスト値を現実的な値に変更
2 を 8_000 に。retained_turns 時代の名残で 2 は "2 トークン保持" と読めてしまい意味不明だったため。
2026-04-19 12:02:11 +09:00
663ec91b45 Anthropicのキャッシュポイントを打つ実装 2026-04-19 11:57:55 +09:00
34d1e78b40 compact: compact worker をツール駆動マルチターンに再設計
段階 4〜9 を一括で実装:
- mark_read_required / add_reference / write_summary + read_file の 4 ツールで
  compact worker を駆動。結果は CompactWorkerContext に集約
- 新セッションの先頭を [summary, ...auto-read, references, ...retained] で構築
- デフォルトリファレンスは tracker.recent_files(5) から
- auto-read は compact_auto_read_budget で総量制限。超過は即エラー
- compact worker 自身は compact_worker_max_input_tokens で累計入力を制限
- 5 セクション要約フォーマットに system prompt を更新
- write_summary 未呼び出し / auto-read 空のときは 1 回追加プロンプトで促す
2026-04-19 09:26:55 +09:00
758ced5e7f compact: retained_turns を retained_tokens に置換
保護単位をターン数からトークン量に変更。compact 時のカット位置は
Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、
ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
2026-04-19 08:56:16 +09:00
da16015768 compact: 要約入力から content/arguments/reasoning を除く
ToolCall.arguments, ToolResult.content, Reasoning は auto-read 側の責務。
要約は意思決定と意図のキャプチャに集中させ、コードや tool IO は持ち込まない。
2026-04-19 08:51:04 +09:00
967acd23ee compact: 閾値を個別指定化し占有量ソースを UsageRecord に一本化
- manifest に compact_request_threshold を追加 (proactive と safety net を個別指定)
- CompactState の両閾値を Option<u64> 化、last_input_tokens を撤去
- 閾値判定は Pod::total_tokens() / usage_history 経由の実測値ベースに切替
- turn_threshold → request_threshold にリネーム、Between-requests のログへ
2026-04-19 08:49:25 +09:00
68885a03d8 引数なしでToolCallすると構造エラーになる問題の修正 2026-04-19 08:39:16 +09:00
879858dc94 pod-upstream-event完了 2026-04-19 08:31:42 +09:00
ed412cb6a8 pod-upstream-event修正 2026-04-19 08:31:16 +09:00
255e370856 pod-upstream-event実装 2026-04-19 08:20:07 +09:00
911d3b8d6c マニフェスト改修完了 2026-04-19 08:05:20 +09:00
4ec8f63482 プロジェクトManifestの相対基準の修正 2026-04-19 08:03:59 +09:00
88e29d7bbe マニフェスト解決の相対パス化 2026-04-19 07:53:54 +09:00
cc9fa2d632 Pod操作ツール実装完了 2026-04-19 06:41:20 +09:00
2af7089396 Pod操作ツール修正 2026-04-19 06:40:45 +09:00
5d63d0f6e2 Pod操作ツールの実装 2026-04-19 06:32:44 +09:00
73acfcb7f2 SpawnPodツール完了 2026-04-18 20:31:10 +09:00
e7a4b76c54 scope-lock完了 2026-04-18 19:26:23 +09:00
a7b9b6fa4b Scope-Lockの実装 2026-04-18 19:25:03 +09:00
4ba58723dc Compactのチケット修正 2026-04-18 19:15:39 +09:00
b685fedf1a チケット分割 2026-04-18 18:48:26 +09:00
74ee96ef82 Notificationの実装 2026-04-18 17:48:35 +09:00
b538c2f1ea Interceptorの責務分離完了 2026-04-18 17:27:22 +09:00
84a8bd099b interceptorの修正 2026-04-18 17:19:59 +09:00
79f342ca60 shutdown実装完了 2026-04-16 13:55:17 +09:00
aa138e6583 プロトコル経由のshutdow経路 2026-04-16 13:49:53 +09:00
710220c920 instruction-file-refs完了 2026-04-16 13:08:08 +09:00
381d31a1dc instructionファイルの定義・読み込みの実装 2026-04-16 11:16:16 +09:00
493ed2c781 pod-factory完了 2026-04-16 00:57:26 +09:00
81e28a3c07 podのマニフェストの分離実装 2026-04-16 00:54:27 +09:00
5848954ca8 tui-notification-channel完了 2026-04-15 12:59:15 +09:00
faa8eb5793 warn/errorのTUIへの通知ルート 2026-04-15 12:58:31 +09:00
0c29de1b10 greetingカードの作成 2026-04-15 10:35:15 +09:00
c48abf062e AGENTS.md完了 2026-04-15 05:21:54 +09:00
38e8c66c90 AGENTS.mdの読み取り 2026-04-15 05:21:43 +09:00
41120cf200 tool出力制限の修正 2026-04-15 04:23:07 +09:00
b6ffbe4255 tool出力の制限 2026-04-15 04:08:56 +09:00
92fbd2e3f6 システムプロンプト完了 2026-04-15 02:46:12 +09:00
66c6edec3e システムプロンプトの実装 2026-04-15 02:44:42 +09:00
309dba7203 tuiの文字間隔修正・prompt設計の計画 2026-04-14 13:11:18 +09:00
cbf728d66a scope再設計の完了 2026-04-14 12:10:00 +09:00
2db2c1611c scopeの再設計 2026-04-14 12:09:18 +09:00
3c58b5dde4 prune-savings-estimation完了 2026-04-14 03:42:04 +09:00
a0a9df11c0 cargo fmt 2026-04-14 03:13:36 +09:00
7ec6e88605 prune-projection完了 2026-04-14 02:57:25 +09:00
2e004161e4 pruneのトークン計算置き換え・Podに接続 2026-04-14 02:35:35 +09:00
5a995cf099 pruneで用いるトークン計算の改善 2026-04-14 00:15:09 +09:00
2edc2dc245 token-counter実装 2026-04-13 20:32:02 +09:00
f607a52fbb token-counter実装 2026-04-13 20:21:26 +09:00
7fb2e4bc6c usage永続化のdoc修正 2026-04-13 07:13:49 +09:00
17d0430a4d usageデータの永続化実装 2026-04-13 07:09:05 +09:00
22fe502d71 TODO・Ticketのアップデート 2026-04-13 05:58:33 +09:00
9b9e37cc84 ToolsのTracker実装 2026-04-13 04:26:27 +09:00
5bc4a6d6d6 チケット更新 2026-04-13 04:10:19 +09:00
3d0d5ffe85 組み込みツールの実装 2026-04-13 03:43:02 +09:00
a05eec42d7 Compactの実装 2026-04-13 02:08:25 +09:00
8b120504a7 TUIをinline viewportに変更 2026-04-12 07:32:06 +09:00
bcc7faa0ba compactの実装 2026-04-12 07:09:48 +09:00
47c59a416e TUIのratatuiを0.30.0にした 2026-04-12 06:57:07 +09:00
cdafd5d914 session-storeとして分離 2026-04-12 06:31:34 +09:00
eb670bfba5 Pruneの実装 2026-04-12 06:02:46 +09:00
c0d283b47d TUI上のターンカウンタ・ターン統計の実装 2026-04-12 05:41:22 +09:00
2c5a0edef3 Tool Outputの仕様簡素化 2026-04-12 05:19:00 +09:00
dc1a335e1c コンテキスト圧縮の設計更新 2026-04-12 04:47:42 +09:00
0e7a7b02fe Pod切断時にTUIがハングする問題 2026-04-12 04:22:26 +09:00
b19eb52511 history取得:TUI側の実装 2026-04-12 03:44:45 +09:00
0332d446cd historyを返すプロトコル 2026-04-12 03:37:49 +09:00
29e1bc8253 Tickets整理 2026-04-12 03:19:12 +09:00
8e394005b2 ツールの動的削除の実装 2026-04-11 20:01:55 +09:00
02b266dce7 Workerのリファクタリング 2026-04-11 19:47:34 +09:00
7249a8ee6a Podにキーを渡す実装 2026-04-11 19:28:59 +09:00
9b78c51d0a Workerの自動キャッシュロック 2026-04-11 18:47:33 +09:00
f241dafac8 workerのAPI設計 2026-04-11 17:30:32 +09:00
fc8ff9362e HookのPod側への移動・Interceptorの実装 2026-04-11 17:19:20 +09:00
496038307f プロトコルStreamのユーティリティ共通化 2026-04-11 15:58:52 +09:00
3d2a49e1e4 Sessionのハッシュ 2026-04-11 15:14:02 +09:00
59bfd89940 Remove Pod-ID 2026-04-11 14:18:49 +09:00
89481c2c82 llm-workerのAPI改善 2026-04-11 14:11:40 +09:00
3883fab29d Pod-ID (UUID)の削除 2026-04-11 03:44:37 +09:00
7d1b74fb32 Create remove-pod-id.md 2026-04-11 03:40:55 +09:00
9363c76354 Podのバイナリ実装 2026-04-11 03:26:38 +09:00
f4f398279e tickets 2026-04-11 03:23:48 +09:00
0fe05e502e Max Turnの実装 2026-04-11 03:16:36 +09:00
60505f206b Add README to all crates 2026-04-11 03:07:56 +09:00
4c3f81b4fa cratesの整理 2026-04-11 02:48:50 +09:00
cff082ff3a プロトコルの定義 2026-04-09 05:23:57 +09:00
66d005aa30 Crate設計・mv 2026-04-09 04:55:14 +09:00
437 changed files with 82987 additions and 6484 deletions

View File

@ -0,0 +1,119 @@
---
name: "ticket-reviewer"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. "
model: opus
color: purple
---
You are a senior reviewer specialized in the `insomnia` project. You are an expert at evaluating ticket-scoped implementations against their stated premises and requirements, and at safeguarding the codebase from unnecessary complexity or architectural drift. You operate strictly within the project's ticket lifecycle conventions defined in `CLAUDE.md`.
## Your Core Responsibility
Given a ticket (normally `tickets/<name>.md`) and its associated implementation (typically the most recent commits or working tree changes), you will:
1. Read the ticket thoroughly to understand its **背景・前提・要件**.
2. Inspect the implementation (diff + surrounding code, not only the diff).
3. Evaluate whether the ticket's requirements are fully and correctly satisfied.
4. Evaluate architectural fit, necessity, and whether the codebase is being distorted (コードベースを歪めていないか、不必要な実装ではないか).
5. Produce `tickets/<name>.review.md` with findings and a clear judgment.
6. Update the original `tickets/<name>.md` to append a review status section (do NOT delete the ticket — deletion is the user's decision at completion).
You must NEVER run `git` write operations (commit, add, push, etc.). Git is the user's responsibility (per CLAUDE.md). You only edit/create files in the working tree.
## Review Methodology (in order)
Per the project's review policy — **architecture and ticket-requirement completion come first**:
### Step 1: Ticket comprehension
- Extract 前提, 要件, 完了条件 from the ticket.
- Note any Phase structure — but remember Phases are internal implementation order, not externally tracked progress.
- Confirm the ticket's intended scope boundary.
### Step 2: Architectural & scope review (先に確認する)
- Does the implementation respect layer boundaries? (e.g., `llm-worker` stays low-level; higher-level features live in upper layers.)
- Are new crates named without the `insomnia-` prefix, short and consistent?
- Were dependencies added via `cargo add` (not manual edits to Cargo.toml)?
- Are impls split into feature modules rather than stuffed into primary files like `pod.rs`?
- Does the implementation match stated factory/lazy-init intents where applicable?
- Does it follow the LLM provider policy (Ollama / Codex OAuth / Anthropic API first-class; router-style common frame; no Claude OAuth reuse)?
- Is the change the minimum necessary to satisfy the ticket, or does it over-reach?
### Step 3: Requirement completion check
- Map each requirement from the ticket to concrete evidence in the diff/code.
- Flag any requirement that is unmet, partially met, or silently deferred.
- Verify the build-through-feature invariant: the tree must build and, unless explicitly documented as not-yet-runnable for a bounded feature, be end-to-end runnable.
### Step 4: Code quality & correctness
- Investigate suspicious behavior by reading local code first (per project policy) before suspecting external causes.
- Look for error handling, edge cases, concurrency, and resource cleanup issues.
- Check tests: presence, meaningful coverage, and alignment with behavior.
- Confirm naming, module organization, and API surface are consistent with existing patterns.
### Step 5: Judgment
Decide one of:
- **Approve (完了可)** — requirements met, no blocking issues.
- **Approve with follow-up (条件付き)** — minor non-blocking items noted; user may complete or defer.
- **Request changes (要修正)** — blocking issues must be addressed.
## Output Artifacts
### A. `tickets/<name>.review.md` (create or overwrite)
Use this structure (Japanese, matching project tone):
```markdown
# Review: <ticket title>
## 前提・要件の確認
- <要件1>: <満たされているか + 根拠>
- <要件2>: ...
## アーキテクチャ・スコープ
- <観点と判断>
## 指摘事項
### Blocking
- <項目><理由と該当箇所 path:line>
### Non-blocking / Follow-up
- <項目><理由>
### Nits
- <項目>
## 判断
<Approve / Approve with follow-up / Request changes><一文の理由>
```
Omit empty sections. Cite concrete file paths and line ranges. Be concise; avoid restating obvious code.
### B. Update `tickets/<name>.md`
Append (or update if present) a trailing section like:
```markdown
## Review
- 状態: <Approve / Approve with follow-up / Request changes>
- レビュー詳細: [./<name>.review.md](./<name>.review.md)
- 日付: 2026-04-21
```
Do not modify the ticket's 背景・要件 sections unless the user explicitly asked for it. Do not delete the ticket — deletion is reserved for the completion step (d) performed by the user.
## Operating Principles
- **Do not commit or stage anything.** File edits only. The user will handle git.
- **Do not over-engineer the review.** Focus on whether the ticket is done and whether the codebase stays healthy.
- **Prefer concrete citations** (path:line) over abstract complaints.
- **Ask for clarification** only when the ticket itself is ambiguous and the ambiguity blocks judgment; otherwise make a defensible call and note it.
- **Re-review mode**: if `.review.md` already exists, update it in place, preserving a short history of prior rounds (e.g., `## Round 2` section) so the evolution is visible until the ticket is closed.
- **TODO.md is not your concern** unless a requirement explicitly demands it; ticket lifecycle edits to TODO.md are the user's.
## Quality Self-Check (before finishing)
1. Did I evaluate architectural fit before nitpicks?
2. Did I map every ticket requirement to evidence?
3. Are all blocking issues genuinely blocking (not stylistic)?
4. Did I avoid making git writes?
5. Did I update both `<name>.review.md` and `<name>.md`?
6. Is my judgment line unambiguous?

View File

@ -0,0 +1,26 @@
---
name: worktree-workflow
description: "Worktreeを用いた開発フローを進める。git上の開発に置けるミクロな指示で、プロジェクトの管理に関する指示は提供されていない。"
allowed-tools: "Bash(cd *), Bash(git worktree *), Bash(mkdir *), Bash(cp *), Bash(ln *), Bash(ls *), Bash(find *)"
---
# Worktreeを用いた開発
Goal: 実装を完了させ、ブランチをマージ待ちの状態にする。
`./.worktree`にworktreeを作成します。
エージェントの1セッション=1ワークツリーとしており、ブランチ/イシュー/チケット単位で切ります。
このワークフローにおいては、ブランチはローカルで並行開発するためのマージ後削除の運用とし、Worktreeと同名のbranchを同時に作って進めます。メインのディレクトリのブランチから切るものとして扱います。
```
git worktree add .worktree/<task-name> -n <task-name>
```
## flake.nixの無効化
基本的に、CWDを変更できない場合、.envrcによる自動アクティベートは効かないので無視で構わない。
## 完了時
マージウィンドウからこのスキルがinvokeされた際は、ブランチのマージ・worktreeの削除まで行う。対して、実装者がマージしてクローズしてはならない。

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
/target
.direnv
*.local
*.local*
.env
.worktree

1
.insomnia/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/memory/

13
.insomnia/manifest.toml Normal file
View File

@ -0,0 +1,13 @@
[scope]
allow = [
{ target = ".", permission = "write", recursive = true },
]
[session]
record_event_trace = true
[memory]
extract_threshold = 50000
consolidation_threshold_files = 5
consolidation_threshold_bytes = 50000

View File

@ -0,0 +1,143 @@
---
description: TODO / tickets / docs / git history から次の作業候補を見繕い、課題発見や方針決定を半自動でイテレーションする WIP maintainer workflow
model_invokation: false
user_invocable: true
requires: []
---
# Auto Maintain Workflow (WIP)
insomnia を AI maintainer として運用するための半自動 loop。TODO / tickets から「今進められそうな作業」を選ぶだけでなく、課題の発見、設計判断の切り分け、次に人間へ戻すべき問いの整理までを扱う。
これは unattended 自動開発ではない。実装の並列委譲は `multi-agent-workflow`、worktree の機械的作成は `worktree-workflow` に任せる。本 Workflow はその前段として、何を進めるべきか、何をまだ決めるべきかを整理する。
参照:
- `docs/plan/ai-maintainer.md`
- `tickets/auto-maintain-workflow.md`
## 位置づけ
AI maintainer の目的は、コードを書くこと自体ではなく、プロジェクト状態を前に進めることである。
この Workflow は WIP として、以下を行う。
- TODO / tickets / docs / git history を読んで現在地を把握する。
- 実装可能な ticket と、方針決定が必要な ticket を分ける。
- 小さく実装できる候補を提案する。
- 設計相談が必要な論点を人間に戻す。
- 運用上の問題や繰り返し発生する詰まりを report / ticket / workflow 改訂候補として整理する。
## 非目標
現時点では以下をしない。
- 常駐 scheduler として自動実行する。
- 人間の合意なしに新規 ticket を作る。
- 人間の合意なしに既存 ticket を大幅変更する。
- 人間の合意なしに ticket 完了削除を行う。
- push する。
- Workflow を自律生成・自律改訂する。
- scope / permission / history persistence / prompt context 加工原則に関わる判断を勝手に決める。
## 入力として読むもの
必要に応じて以下を読む。
1. `TODO.md`
2. `tickets/*.md`
3. `docs/plan/`
4. `docs/report/`
5. `git log --oneline` / ticket file の git history
6. 既存 worktree / branch 状態
7. 最近の失敗や通知、ユーザーからの観測
TODO と ticket の不整合を見つけたら、勝手に修正せず、まず報告する。ただしユーザーが明示的に「直して」と言った場合は Mode 1 として整理してよい。
## 分類
候補を以下に分ける。
### A. 実装委譲可能
- 要件と完了条件が具体的。
- 影響範囲が限定的。
- test / build で確認できる。
- 大きな設計判断が不要。
- scope を狭く切れる。
この場合は、人間に候補として提示する。人間が実行を許可したら `$user/multi-agent-workflow` に進む。
### B. 方針決定が必要
- 複数の設計方針が自然に導ける。
- protocol / permission / scope / persistence / prompt context に触れる。
- UX の仕様が未確定。
- 既存 ticket の要件が古い。
この場合は、実装せず、決めるべき問いを短く提示する。
### C. ticket 整理が必要
- TODO にあるが ticket がない。
- ticket があるが TODO にない。
- 完了済みに見えるが残っている。
- ticket の前提が変わっている。
この場合は、不整合と修正案を提示する。修正は人間の許可後に行う。
### D. report / workflow 改善候補
- 同じ tool 問題が繰り返し出る。
- Workflow の指示が曖昧で実装 Pod が迷った。
- AI が過剰に Task tool を使うなど、運用上の癖が出た。
- 通知や Pod completion tracking など、開発基盤の不足が観測された。
この場合は、すぐ ticket 化するか、`docs/report/` に観測として残すか、人間に確認する。
## 半自動 iteration
1. 状態把握
- TODO / tickets / git status を読む。
- 最近完了した流れや未完了 branch を確認する。
2. 候補抽出
- 実装可能そうな ticket を 2〜5 件挙げる。
- correctness / developer experience / user-visible UX / cleanup で分類する。
3. 推奨順位
- blocking correctness を最優先。
- 実害が出ている運用問題を次点。
- 小さく完了できる UX / cleanup を次点。
- 大きな設計変更は方針相談に回す。
4. 人間への提示
- 「次に進めるなら X」を1つ推奨する。
- 理由を短く述べる。
- 実装委譲する場合の scope / test 方針を添える。
5. 実行への接続
- 人間が「進めて」と言ったら `$user/multi-agent-workflow` に接続する。
- worktree 作成は `$user/worktree-workflow` に従う。
## エスカレーション基準
以下では実装に進まず、人間へ戻す。
- ticket の要件から複数の設計方針が自然に導ける。
- 長期構造、crate boundary、protocol、permission、scope、history persistence に触れる。
- prompt context 加工原則に関わる。
- 新 ticket の作成、既存 ticket の大幅変更、ticket 完了削除について合意がない。
- test 不能、再現不能、または作業範囲外の不具合に遭遇した。
- WorkItem / Thread / Lease / maintainer state など、まだ設計中の概念が必要になる。
## まだ固定しないもの
以下は `docs/plan/ai-maintainer.md` の上位設計に残し、本 Workflow では詳細を固定しない。
- WorkItemStore / LeaseStore。
- operation inbox / trial log。
- QA feedback を ticket / review / report のどれに落とすか。
- AI 自身の feedback を Knowledge / report / ticket / workflow 改訂のどれにするか。
- maintainer doctor。
- reviewer Pod の評価基準の機械化。

View File

@ -0,0 +1,150 @@
---
description: worktree と子 Pod を使って複数 ticket の実装・レビュー・修正・完了処理を並列に進める orchestration フロー
model_invokation: true
user_invocable: true
requires: []
---
# Multi-agent Worktree Workflow
insomnia を insomnia で開発する際の、worktree + 実装 Pod + 親 Pod review の標準フロー。これは **実装を並列に進めるためのフロー** であり、worktree の機械的作成手順は `$user/worktree-workflow`、ticket 候補選定や方針探索の半自動 loop は `$user/auto-maintain` に分ける。
## 目的
- 実装差分を ticket ごとの child worktree に隔離する。
- 実装 Pod に narrow write scope を渡して並列実装させる。
- 親 Pod が diff / test / ticket 要件を review し、必要なら修正依頼する。
- approve 後に merge / ticket 完了処理 / main workspace での再検証を行う。
## 開始条件
以下が揃っている時に使う。
- 対象 ticket が決まっている。
- ticket の背景・要件・完了条件から実装方針が概ね導ける。
- worktree 作成と git 書き込み操作について、人間の許可がある。
- main workspace の unrelated dirty changes を把握している。
設計方針が複数自然に導ける場合、protocol / scope / permission / history persistence に触れる場合、ticket 自体の再定義が必要な場合は、実装委譲前に人間へ戻す。
## 親 Pod / orchestrator の責務
1. 状態確認
- `git status --short --branch`
- 対象 ticket
- 関連 TODO / docs / 既存 worktree
2. worktree 作成
- `$user/worktree-workflow` に従い `./.worktree/<task-name>` を作る。
- `.insomnia` を sparse checkout で除外する。
3. 実装 Pod spawn
- read scope: main workspace 全体。
- write scope: child worktree、または必要最小 directory。
- task には以下を明示する。
- child worktree path / branch
- 対象 ticket path
- Bash は必ず child worktree に `cd` すること
- main workspace の `TODO.md` / `tickets/` / `docs/report/` / `.insomnia` は編集しないこと
- 範囲外事項
- 実行すべき build / test / format
- 完了報告項目
4. 監督
- `ReadPodOutput` で報告を読む。
- 通知が来ない場合でも、worktree の `git status` / `git diff` / test で完了状態を確認する。
- 必要なら `SendToPod` で修正依頼する。
5. review
- ticket の背景・要件・完了条件・範囲外に照らして diff を確認する。
- build / test / `git diff --check` を確認する。
- 必要なら reviewer Pod を read-only で立てる。
6. merge / lifecycle
- approve 後に main workspace へ merge する。
- `TODO.md` から該当行を削除し、`tickets/foo.md` を削除して完了 commit を作る。
- main workspace で必要な test / `cargo check --workspace` / `cargo fmt --check` を再実行する。
## 実装 Pod の責務
- child worktree 内でのみ実装する。
- main workspace の管理ファイルを書かない。
- 指定された build / test / format を実行する。
- ticket 要件外の設計変更、依存関係追加、scope / permission / history persistence / prompt context 加工原則に触れる変更が必要なら止めて報告する。
- 完了時に以下を報告する。
- worktree path / branch
- commit hashcommit した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか
## 実装 Pod の commit 方針
実装 Pod には child worktree 内での commit を許可してよい。
- commit は ticket 内で意味のある粒度にする。
- 例: `feat: ...`、`fix: ...`、`test: ...`、`docs: ...`
- 実装 Pod は merge / push / branch deletion / worktree remove をしない。
- 実装 Pod は `TODO.md` / `tickets/` の完了処理 commit をしない。
- 親 Pod は review 時に commit 粒度も確認する。
- 必要な修正は、原則追加 commit として積む。履歴改変や squash は人間の明示指示がある時だけ行う。
## Review → 修正 → 完了の標準形
### Approve
1. 実装 Pod を停止し、scope を回収する。
2. 親 Pod が main workspace で `git merge --no-ff <branch>` する。
3. 親 Pod が `TODO.md``tickets/foo.md` を完了処理して commit する。
4. main workspace で検証コマンドを再実行する。
5. 変更内容・commit・検証結果・残 dirty changes を報告する。
### Request changes
1. blocking finding をファイル / 行 / 理由 / 修正方針つきで整理する。
2. 実装 Pod が生きていれば `SendToPod` で修正依頼する。
3. 停止済みなら、同じ worktree / branch / scope で再 spawn するか、親 Pod が最小修正する。
4. 修正後に focused test と必要な broader test を再実行する。
5. 再 review する。
### Non-blocking comments
- ticket 要件外の改善はその場で混ぜない。
- 必要なら後続 ticket / docs/report にする。
- non-blocking を理由に completion を遅らせない。
## 並列実装時の注意
- 1 ticket = 1 worktree = 1 branch を基本にする。
- 複数 Pod に同じ write scope を渡さない。
- parent は child の write scope 配下を直接編集しない。
- 依存関係がある ticket は、土台 branch を merge してから次 worktree を切る。
- parallel に走らせた Pod の完了通知は取りこぼしうるため、`ReadPodOutput` と worktree 状態で確認する。
## 完了報告の標準形
```text
完了:
- ticket: <path>
- branch: <name>
- commits:
- <hash> <subject>
- 変更概要: ...
- 検証:
- cargo fmt --check
- cargo check --workspace
- cargo test ...
- review: approve / approve with comments / request changes
- 未解決事項: ...
- 残 dirty changes: ...
```
## この Workflow で扱わないもの
以下は `$user/auto-maintain` または別の設計相談で扱う。
- ticket 候補を見繕うこと。
- 新規 ticket 作成判断。
- QA feedback / AI feedback を ticket / report / workflow に落とす判断。
- 長期 maintainer loop / WorkItemStore / LeaseStore の設計。

View File

@ -0,0 +1,98 @@
---
description: insomnia プロジェクトで child git worktree を作成・管理するための機械的手順。実装 Pod に作らせず、親 Pod が main workspace で実行する。
model_invokation: false
user_invocable: true
requires: []
---
# Worktree Workflow
insomnia プロジェクトで実装差分を main workspace から分離するため、`./.worktree/<task-name>` に child git worktree を作る。これは **worktree の扱い方だけ** を定める Workflow であり、ticket 選定、実装委譲、review、merge の運用は `$user/multi-agent-workflow` 側で扱う。
insomnia では Pod の write scope が排他的に委譲されるため、child worktree に `.insomnia` を置かない。main workspace は orchestration / ticket / docs / memory / workflow 管理の場所として残し、child worktree はコード差分専用の作業面として扱う。
## 適用範囲
この Workflow は親 Pod / orchestrator が main workspace で実行する。
- 実装 Pod にこの Workflow を渡して worktree を作らせない。
- 実装 Pod は、親 Pod が作成済みの child worktree を受け取り、その中で実装・build・test・報告を行う。
- ticket 作成、TODO 更新、review artifact、docs/report は main workspace 側で扱う。
## 原則
- 1 ticket / 1 実装 task につき 1 worktree を作る。
- worktree path は `./.worktree/<task-name>`
- branch 名は原則 `<task-name>` と同じ kebab-case。
- child worktree には `.insomnia` を出さない。
- child worktree は実装差分用。`TODO.md` / `tickets/` / `docs/report/` / workflow / memory は原則 main workspace 側で扱う。
- push はしない。
## 事前確認
作成前に以下を確認する。
1. 対象 ticket / task が決まっているか。
2. `<task-name>` が branch / path 名に使える kebab-case か。
3. `git worktree add` を実行してよい許可があるか。
4. main workspace に混ぜてはいけない未保存差分がないか。
5. 同名 branch / worktree が既に存在しないか。
同名 branch がある場合は、既存 branch を使うか、人間に確認する。`git worktree add -b` で上書きしない。
## 作成手順
main workspace で実行する。
```bash
git worktree add .worktree/<task-name> -b <task-name>
git -C .worktree/<task-name> sparse-checkout init --no-cone
git -C .worktree/<task-name> sparse-checkout set --no-cone \
'/*' \
'!/.insomnia/' \
'!/.insomnia/**'
```
確認する。
```bash
git -C .worktree/<task-name> status --short --branch
test ! -e .worktree/<task-name>/.insomnia
```
失敗した場合は、worktree / branch / lock の状態を確認し、勝手に cleanup せず人間へ報告する。
## 子 Pod へ渡す scope
子 Pod を使う場合、子 Pod の cwd は main workspace のままになる。必ず作業対象が child worktree であることを明示し、Bash 実行時は毎回 `cd <repo>/.worktree/<task-name> && ...` させる。
推奨 scope:
```text
read: <repo>
write: <repo>/.worktree/<task-name>
```
より狭く切れる場合は、write scope を変更対象 crate / directory まで狭めてよい。ただし build / test に必要な生成物を書けることを確認する。
## child worktree 内の禁止事項
- `.insomnia` を作らない / コピーしない。
- main workspace の `TODO.md` / `tickets/` / `docs/report/` を編集しない。
- merge / push / branch deletion / worktree remove をしない。
- scope / permission / history persistence / prompt context 加工原則に関わる設計変更を無断で行わない。
## 完了時の扱い
worktree 作成 Workflow としては、完了時に merge しない。merge、ticket 完了、TODO 削除は `$user/multi-agent-workflow` または人間の明示指示で行う。
実装 Pod へ渡す完了報告項目の標準形:
- worktree path
- branch 名
- commit hash実装 Pod に commit を許可した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか

75
AGENTS.md Normal file
View File

@ -0,0 +1,75 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
## このシステムに置ける設計要旨
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
- 1チケット = 1行。未完了のみ記載し、完了したら行ごと削除する履歴はgitで追える
- ネストは同一領域のグルーピング(表示用)にのみ使う。実装上の依存関係はネストで表現しない
- 完了した子は削除し、親は未完了の子がある限り残す。最後の子が完了したら親ごと削除
- Ticketを追加する際は、合わせてTODOも書くこと
### Ticket の粒度
- 1チケット = 完了時点で、実装が仕様又は機能として説明できる粒度。
- 作成時、背景や要件を前提として書き、実装の方針やコードの詳細は不必要に増やさない。
- チケット内のステップPhase 1, 2, ...は実装順序であり、TODO等、外に出さない
- ビルドが通り、その機能に限り,まだ動作できないと明示出来ている場合を除いて全体を通して動作させられる状態である必要がある。
### Ticket のライフサイクル
gitがタイムラインの単一の情報源。ファイル操作とcommitで状態遷移を表現する。
a. 作成: `tickets/foo.md` を作成してcommit
b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

75
CLAUDE.md Normal file
View File

@ -0,0 +1,75 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
## このシステムに置ける設計要旨
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
- 1チケット = 1行。未完了のみ記載し、完了したら行ごと削除する履歴はgitで追える
- ネストは同一領域のグルーピング(表示用)にのみ使う。実装上の依存関係はネストで表現しない
- 完了した子は削除し、親は未完了の子がある限り残す。最後の子が完了したら親ごと削除
- Ticketを追加する際は、合わせてTODOも書くこと
### Ticket の粒度
- 1チケット = 完了時点で、実装が仕様又は機能として説明できる粒度。
- 作成時、背景や要件を前提として書き、実装の方針やコードの詳細は不必要に増やさない。
- チケット内のステップPhase 1, 2, ...は実装順序であり、TODO等、外に出さない
- ビルドが通り、その機能に限り,まだ動作できないと明示出来ている場合を除いて全体を通して動作させられる状態である必要がある。
### Ticket のライフサイクル
gitがタイムラインの単一の情報源。ファイル操作とcommitで状態遷移を表現する。
a. 作成: `tickets/foo.md` を作成してcommit
b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

2637
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,57 @@
[workspace]
resolver = "2"
members = [
"crates/insomnia",
"crates/client",
"crates/daemon",
"crates/llm-worker",
"crates/llm-worker-macros",
"crates/llm-worker-persistence",
"crates/session-store",
"crates/manifest",
"crates/pod",
"crates/protocol",
"crates/provider",
"crates/pod-registry",
"crates/session-metrics",
"crates/lint-common",
"crates/tools",
"crates/tui",
"crates/memory",
"crates/workflow",
]
[workspace.package]
edition = "2024"
license = "MIT"
[workspace.dependencies]
# Internal crates
client = { path = "crates/client" }
llm-worker = { path = "crates/llm-worker", version = "0.2" }
llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" }
manifest = { path = "crates/manifest" }
lint-common = { path = "crates/lint-common" }
memory = { path = "crates/memory" }
pod-registry = { path = "crates/pod-registry" }
protocol = { path = "crates/protocol" }
provider = { path = "crates/provider" }
session-metrics = { path = "crates/session-metrics" }
session-store = { path = "crates/session-store" }
tools = { path = "crates/tools" }
# External
# Note: `reqwest` and `chrono` are not aggregated here because some crates
# need `default-features = false`, which workspace inheritance cannot override.
async-trait = "0.1"
fs4 = "0.13"
futures = "0.3"
libc = "0.2"
schemars = "1.2"
serde = "1.0"
serde_json = "1.0"
sha2 = "0.11"
tempfile = "3.27"
thiserror = "2.0"
tokio = "1.52"
toml = "1.1"
tracing = "0.1"
uuid = "1.23"

18
KNOWN_ISSUES.md Normal file
View File

@ -0,0 +1,18 @@
# Known Issues
Ticket を切るほどではないが、次に近所を触るときに合わせて拾いたい小粒な所見の置き場。
## 運用
- 1 項目 = 出典 (file:line) + 症状 (一文) + トリガー (いつ拾うか、一文)
- 関連 ticket があれば `→ [tickets/foo.md]` でリンク
- 修正したら同じコミットで該当エントリを削除する (履歴は git)
- ここに溜める基準: 「ticket は重い」「だが忘れたら次の触り手が踏む」もの。明確に作業すべきものは ticket 化する
## エントリ
- `crates/tui/src/app.rs:478-485` — bad workflow slug を含む `Method::Run` 送信時、`Event::UserMessage` の早期 broadcast で `turn_index += 1` されターンヘッダだけ残る ("ghost turn header")。次に TUI のターンヘッダ / エラー表示周りを触るときに整理。→ [tickets/pod-input-validate-internalize.md] の review 由来。
- `crates/pod/src/controller.rs:944``worker_error_code``PodError::WorkflowResolve(_) => InvalidRequest` が post-commit な resolve エラー (`KnowledgeNotFound` 等) にも適用される。意味論的には妥当方向だが、resolve 系のエラー粒度を分けたくなったタイミングで再評価。
- `crates/pod/tests/controller_test.rs``double_run_returns_error` がたまに失敗する flakiness を観測。`pod-interrupt-prep-internalize` 以前から存在する別件。次に controller_test の Run 連投系のタイミングを触るときに併せて原因を切り分け。
- `crates/session-store/src/fs_store.rs:117-122``FsStore::read_entry_count``fs::read_to_string` で全文ロードしてから行数カウントするため O(n)。`ensure_head_or_fork` は run-start でしか呼ばれず現状は許容範囲だが、長期セッションが普通になった時点で `\n` バイト数の cheap count か末尾 seek に置き換える。
- `crates/session-store/src/segment.rs:121` `ensure_head_or_fork` (free fn, test 専用・本番 caller ゼロ) と `crates/pod/src/pod.rs` `Pod::ensure_segment_head` (本番 inline) に live auto-fork の検知 + forked_from 記録が二重実装されている。entry-hash-abolish 以前からの重複で、両方独立にテスト済みだが drift 必至。session-store 側を本番から呼ぶ形に寄せるか free fn を畳むかは要設計判断。Pod state / fork 周辺を次に触るときに統合を検討。

View File

@ -3,16 +3,3 @@
insomnia(i6a)は不休のエージェントループを回すためのエージェントプラットフォーム。
ワークフローを統括し、四六時中電力を消費し、イテレーションします。
## Crates
| クレート | 概要 |
|---|---|
| `insomnia` | トップレベルアプリケーション(未実装) |
| `llm-worker` | 自律的なLLMシステムを構築するためのライブラリ |
| `llm-worker-macros` | `llm-worker`用の手続きマクロ (`#[tool_registry]`, `#[tool]`) |
## ドキュメント
- [要件](crates/llm-worker/docs/requirements.md) — llm-workerに求める性能 (R1-R4)
- [アーキテクチャ](crates/llm-worker/docs/architecture.md) — 3層構成とモジュール配置

12
TODO.md
View File

@ -1,7 +1,5 @@
- [x] 永続化データ構造の制定
- [ ] テスト設計
- [x] ツール出力の遅延読み込み設計 (ToolOutput / BlobStore / auto_summarize)
- [ ] ツール設計
- [ ] ツールの動的追加/削除 (unregister, replace)
- [ ] ToolDefinition ファクトリの遅延初期化修正 (現状 register 時に即時呼び出しされている。セッション開始=初回メッセージ送信時まで遅延させる)
- [x] inspect ツール実装
# TODO legacy notice
Active repository work items have been migrated to `work-items/`.
Use `./tickets.sh list --status all` for the generated/current view and `./tickets.sh doctor` to validate the migration state.

11
crates/client/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "client"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
protocol = { workspace = true }
manifest = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "net", "io-util", "sync", "time", "process", "fs"] }
uuid = { workspace = true }

15
crates/client/src/lib.rs Normal file
View File

@ -0,0 +1,15 @@
//! Pod プロトコルを喋るクライアント。
//!
//! - [`PodClient`]: 既存 pod の Unix ソケットへ接続して `Method` を送り、
//! `Event` を受け取る低レベル接続。
//! - [`spawn`]: pod バイナリをサブプロセスとして起動し、`INSOMNIA-READY`
//! ハンドシェイクが終わるまで待つフロー。subprocess を立ち上げる必要が
//! ない呼び出し側 (=既存 pod に attach する場合) は使わなくてよい。
//!
//! TUI / GUI / E2E ハーネスはこの crate に依存して protocol を喋る。
mod pod_client;
pub mod spawn;
pub use pod_client::PodClient;
pub use spawn::{SpawnConfig, SpawnError, SpawnReady, spawn_pod};

View File

@ -0,0 +1,45 @@
use std::io;
use std::path::Path;
use protocol::stream::{JsonLineReader, JsonLineWriter};
use protocol::{Event, Method};
use tokio::net::UnixStream;
use tokio::sync::mpsc;
pub struct PodClient {
writer: JsonLineWriter<tokio::io::WriteHalf<UnixStream>>,
event_rx: mpsc::Receiver<Event>,
}
impl PodClient {
pub async fn connect(path: &Path) -> Result<Self, io::Error> {
let stream = UnixStream::connect(path).await?;
let (reader, writer) = tokio::io::split(stream);
let writer = JsonLineWriter::new(writer);
let (event_tx, event_rx) = mpsc::channel::<Event>(256);
tokio::spawn(async move {
let mut reader = JsonLineReader::new(reader);
while let Ok(Some(event)) = reader.next::<Event>().await {
if event_tx.send(event).await.is_err() {
break;
}
}
});
Ok(Self { writer, event_rx })
}
pub async fn send(&mut self, method: &Method) -> Result<(), io::Error> {
self.writer.write(method).await
}
pub fn try_next_event(&mut self) -> Option<Event> {
self.event_rx.try_recv().ok()
}
pub async fn next_event(&mut self) -> Option<Event> {
self.event_rx.recv().await
}
}

299
crates/client/src/spawn.rs Normal file
View File

@ -0,0 +1,299 @@
//! pod バイナリをサブプロセスとして立ち上げ、`INSOMNIA-READY` を待つ
//! ハンドシェイク。
//!
//! - 親プロセス (TUI / GUI / E2E) は overlay TOML を組み立ててこの関数に
//! 渡す。pod はそれを受けて socket を bind し、stderr に
//! `INSOMNIA-READY\t<name>\t<socket>` を吐く。
//! - 待機中の stderr 行は `progress` コールバック越しに呼び出し側へ流す。
//! UI の進捗表示や E2E のログ収集はここで賄う。
//! - `kill_on_drop = false` + `process_group(0)` により、親プロセス
//! ライフサイクルから切り離した detached pod を作る。ready 後の lifecycle
//! 管理は runtime ディレクトリ / socket を介して行う。
use std::io;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use uuid::Uuid;
const READY_PREFIX: &str = "INSOMNIA-READY\t";
const READY_TIMEOUT: Duration = Duration::from_secs(20);
/// `spawn_pod` の入力。
pub struct SpawnConfig {
/// `pod.name` として使う識別子。runtime ディレクトリ
/// (`manifest::paths::pod_runtime_dir`) の解決と、ready 行に乗る
/// 名前との突き合わせに使う。
pub pod_name: String,
/// `--overlay` で pod に渡す TOML 文字列。
pub overlay_toml: String,
/// pod の current_dir。
pub cwd: PathBuf,
/// `Some(id)` のとき `--session <id>` を付与し、当該セッションから
/// resume させる。
pub resume_from: Option<Uuid>,
/// true のとき `--pod <pod_name>` を付与し、pod 側で name-keyed state
/// があれば resume、なければ同名の新規 Pod として起動させる。
pub resume_by_pod_name: bool,
}
pub struct SpawnReady {
pub pod_name: String,
pub socket_path: PathBuf,
}
#[derive(Debug)]
pub enum SpawnError {
Io(io::Error),
/// runtime ディレクトリが解決できなかった (環境変数未設定等)。
RuntimeDirUnavailable,
PodLaunchFailed(io::Error),
PodExitedEarly {
stderr_tail: String,
},
Timeout,
}
impl std::fmt::Display for SpawnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "io error: {e}"),
Self::RuntimeDirUnavailable => write!(
f,
"could not resolve runtime directory (set INSOMNIA_HOME, INSOMNIA_RUNTIME_DIR, XDG_RUNTIME_DIR, or HOME)"
),
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
Self::PodExitedEarly { stderr_tail } => {
if stderr_tail.is_empty() {
write!(f, "pod exited before becoming ready")
} else {
write!(f, "pod exited before becoming ready: {stderr_tail}")
}
}
Self::Timeout => write!(
f,
"pod did not become ready within {}s",
READY_TIMEOUT.as_secs()
),
}
}
}
impl std::error::Error for SpawnError {}
impl From<io::Error> for SpawnError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
/// pod を spawn し、`INSOMNIA-READY` ハンドシェイクが終わるまで待つ。
///
/// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
/// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let pod_bin = resolve_pod_command();
let pod_runtime_dir = manifest::paths::pod_runtime_dir(&config.pod_name)
.ok_or(SpawnError::RuntimeDirUnavailable)?;
std::fs::create_dir_all(&pod_runtime_dir).map_err(SpawnError::Io)?;
let stderr_path = pod_runtime_dir.join("stderr.log");
let stderr_file = std::fs::File::create(&stderr_path).map_err(SpawnError::Io)?;
let mut command = Command::new(&pod_bin);
command
.arg("--overlay")
.arg(&config.overlay_toml)
.current_dir(&config.cwd)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::from(stderr_file))
.process_group(0);
if config.resume_by_pod_name {
command.arg("--pod").arg(&config.pod_name);
}
if let Some(id) = config.resume_from {
command.arg("--session").arg(id.to_string());
}
let mut child = command.spawn().map_err(SpawnError::PodLaunchFailed)?;
// Default `kill_on_drop = false` plus `process_group(0)` makes this
// a detached Pod once startup succeeds: dropping the handle does not
// terminate it, and terminal-generated signals for the parent's
// process group do not hit the Pod. Runtime state/socket files are
// the source of truth after that point.
let ready = match wait_for_ready_file(&mut progress, &stderr_path, &mut child).await {
Ok(ready) => ready,
Err(e) => {
let _ = child.start_kill();
let _ = child.wait().await;
return Err(e);
}
};
tokio::spawn(async move {
let _ = child.wait().await;
});
Ok(ready)
}
async fn wait_for_ready_file<F>(
progress: &mut F,
stderr_path: &Path,
child: &mut tokio::process::Child,
) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let mut tail = StderrTail::new();
let deadline = tokio::time::Instant::now() + READY_TIMEOUT;
let mut offset = 0usize;
loop {
let content = match tokio::fs::read_to_string(stderr_path).await {
Ok(content) => content,
Err(e) if e.kind() == io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(SpawnError::Io(e)),
};
if content.len() > offset {
for line in content[offset..].lines() {
if let Some(rest) = line.strip_prefix(READY_PREFIX) {
let mut parts = rest.splitn(2, '\t');
let pod_name = parts.next().unwrap_or("").to_string();
let socket_str = parts.next().unwrap_or("").to_string();
if pod_name.is_empty() || socket_str.is_empty() {
return Err(SpawnError::PodExitedEarly {
stderr_tail: format!("malformed ready line: {line}"),
});
}
let socket_path = PathBuf::from(socket_str);
wait_for_socket(
&socket_path,
deadline,
child,
stderr_path,
&mut tail,
&mut offset,
)
.await?;
return Ok(SpawnReady {
pod_name,
socket_path,
});
}
tail.push(line);
progress(line);
}
offset = content.len();
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
// Pod は exit 直前に最終 stderr 行を flush することがある。
// child.wait() が解決した後に再読みして、原因行を取りこ
// ぼさず PodExitedEarly に載せる。
drain_stderr_into_tail(stderr_path, &mut tail, &mut offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.into_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(100)) => {}
}
}
}
async fn wait_for_socket(
socket_path: &Path,
deadline: tokio::time::Instant,
child: &mut tokio::process::Child,
stderr_path: &Path,
tail: &mut StderrTail,
offset: &mut usize,
) -> Result<(), SpawnError> {
loop {
match tokio::net::UnixStream::connect(socket_path).await {
Ok(_) => return Ok(()),
Err(e)
if e.kind() == io::ErrorKind::NotFound
|| e.kind() == io::ErrorKind::ConnectionRefused => {}
Err(e) => return Err(SpawnError::Io(e)),
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
drain_stderr_into_tail(stderr_path, tail, offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.as_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(50)) => {}
}
}
}
async fn drain_stderr_into_tail(stderr_path: &Path, tail: &mut StderrTail, offset: &mut usize) {
let Ok(content) = tokio::fs::read_to_string(stderr_path).await else {
return;
};
if content.len() <= *offset {
return;
}
for line in content[*offset..].lines() {
if !line.starts_with(READY_PREFIX) {
tail.push(line);
}
}
*offset = content.len();
}
/// Resolves the binary used to launch a child Pod. Must point at a
/// `pod`-compatible executable — the parent reads the child's stderr
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
/// extra lines on stderr will pollute that handshake.
///
/// `INSOMNIA_POD_COMMAND` overrides the lookup (used by tests to inject
/// a mock binary). Otherwise we defer to `PATH` — missing binary
/// surfaces as the spawn `io::Error`.
fn resolve_pod_command() -> PathBuf {
if let Ok(cmd) = std::env::var("INSOMNIA_POD_COMMAND")
&& !cmd.is_empty()
{
return PathBuf::from(cmd);
}
PathBuf::from("pod")
}
struct StderrTail {
lines: std::collections::VecDeque<String>,
}
impl StderrTail {
fn new() -> Self {
Self {
lines: std::collections::VecDeque::with_capacity(8),
}
}
fn push(&mut self, line: &str) {
if self.lines.len() == 8 {
self.lines.pop_front();
}
self.lines.push_back(line.to_string());
}
fn as_string(&self) -> String {
self.lines.iter().cloned().collect::<Vec<_>>().join(" | ")
}
fn into_string(self) -> String {
self.lines.into_iter().collect::<Vec<_>>().join(" | ")
}
}

10
crates/daemon/Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "daemon"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
manifest = { workspace = true }
protocol = { workspace = true }
tokio = { workspace = true, features = ["full"] }

9
crates/daemon/README.md Normal file
View File

@ -0,0 +1,9 @@
# daemon
Pod のライフサイクルを管理する常駐デーモン。未実装。
## 依存クレート
- `manifest` — マニフェスト設定
- `protocol` — 通信プロトコル型
- `tokio` — 非同期ランタイム

1
crates/daemon/src/lib.rs Normal file
View File

@ -0,0 +1 @@

View File

@ -1,18 +0,0 @@
[package]
name = "insomnia"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { path = "../llm-worker" }
llm-worker-persistence = { path = "../llm-worker-persistence" }
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"
uuid = { version = "1", features = ["v7", "serde"] }
thiserror = "2.0"
tokio = { version = "1.49", features = ["fs"] }
[dev-dependencies]
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
tempfile = "3.24"

View File

@ -1,9 +0,0 @@
pub mod manifest;
pub mod pod;
pub mod provider;
pub mod scope;
pub use manifest::{PodManifest, ProviderConfig, ProviderKind};
pub use pod::{Pod, PodError, PodId, PodRunResult, apply_worker_manifest, new_pod_id};
pub use provider::build_client;
pub use scope::Scope;

View File

@ -1,164 +0,0 @@
use std::path::PathBuf;
use serde::Deserialize;
/// Declarative configuration for a Pod.
///
/// Parsed from a TOML manifest file. Describes the provider, model,
/// system prompt, and optional directory scope.
#[derive(Debug, Clone, Deserialize)]
pub struct PodManifest {
pub pod: PodMeta,
pub provider: ProviderConfig,
pub worker: WorkerManifest,
#[serde(default)]
pub scope: Option<ScopeConfig>,
}
/// Pod metadata.
#[derive(Debug, Clone, Deserialize)]
pub struct PodMeta {
pub name: String,
}
/// LLM provider configuration.
#[derive(Debug, Clone, Deserialize)]
pub struct ProviderConfig {
pub kind: ProviderKind,
pub model: String,
/// Environment variable name holding the API key.
#[serde(default)]
pub api_key_env: Option<String>,
/// Custom base URL for the provider API.
#[serde(default)]
pub base_url: Option<String>,
}
/// Supported LLM providers.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ProviderKind {
Anthropic,
Openai,
Gemini,
Ollama,
}
/// Worker-level configuration embedded in the manifest.
#[derive(Debug, Clone, Deserialize)]
pub struct WorkerManifest {
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub max_tokens: Option<u32>,
#[serde(default)]
pub temperature: Option<f32>,
}
/// Directory scope configuration.
#[derive(Debug, Clone, Deserialize)]
pub struct ScopeConfig {
pub root: PathBuf,
}
impl PodManifest {
/// Parse a manifest from a TOML string.
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
toml::from_str(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_minimal_manifest() {
let toml = r#"
[pod]
name = "test-agent"
[provider]
kind = "anthropic"
model = "claude-sonnet-4-20250514"
[worker]
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "test-agent");
assert_eq!(manifest.provider.kind, ProviderKind::Anthropic);
assert_eq!(manifest.provider.model, "claude-sonnet-4-20250514");
assert!(manifest.provider.api_key_env.is_none());
assert!(manifest.scope.is_none());
assert!(manifest.worker.system_prompt.is_none());
}
#[test]
fn parse_full_manifest() {
let toml = r#"
[pod]
name = "code-reviewer"
[provider]
kind = "anthropic"
model = "claude-sonnet-4-20250514"
api_key_env = "ANTHROPIC_API_KEY"
[worker]
system_prompt = "You are a code reviewer."
max_tokens = 4096
temperature = 0.3
[scope]
root = "./src"
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "code-reviewer");
assert_eq!(
manifest.provider.api_key_env.as_deref(),
Some("ANTHROPIC_API_KEY")
);
assert_eq!(
manifest.worker.system_prompt.as_deref(),
Some("You are a code reviewer.")
);
assert_eq!(manifest.worker.max_tokens, Some(4096));
assert_eq!(manifest.worker.temperature, Some(0.3));
assert_eq!(
manifest.scope.as_ref().unwrap().root,
PathBuf::from("./src")
);
}
#[test]
fn parse_ollama_no_api_key() {
let toml = r#"
[pod]
name = "local-agent"
[provider]
kind = "ollama"
model = "llama3"
[worker]
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.provider.kind, ProviderKind::Ollama);
assert!(manifest.provider.api_key_env.is_none());
}
#[test]
fn reject_unknown_provider() {
let toml = r#"
[pod]
name = "test"
[provider]
kind = "unknown_provider"
model = "x"
[worker]
"#;
assert!(PodManifest::from_toml(toml).is_err());
}
}

View File

@ -1,180 +0,0 @@
use llm_worker::llm_client::client::LlmClient;
use llm_worker::llm_client::RequestConfig;
use llm_worker::Worker;
use llm_worker_persistence::{
Session, SessionConfig, SessionError, SessionId, Store, StoreError,
};
use crate::manifest::{PodManifest, WorkerManifest};
use crate::scope::Scope;
/// Pod identifier. UUID v7 (time-ordered).
pub type PodId = uuid::Uuid;
/// Generate a new Pod ID.
pub fn new_pod_id() -> PodId {
uuid::Uuid::now_v7()
}
/// An independent agent execution unit.
///
/// Wraps a persistent [`Session`] with manifest metadata and an optional
/// directory scope. This is the primary abstraction in insomnia.
pub struct Pod<C: LlmClient, St: Store> {
id: PodId,
manifest: PodManifest,
session: Session<C, St>,
scope: Option<Scope>,
}
impl<C: LlmClient, St: Store> Pod<C, St> {
/// Create a new Pod from a pre-built Worker and store.
///
/// The caller is responsible for constructing the `LlmClient` from the
/// manifest's provider config. This keeps Pod free of provider-specific
/// dependencies.
pub async fn new(
manifest: PodManifest,
worker: Worker<C>,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let session = Session::new(worker, store, SessionConfig::default()).await?;
Ok(Self {
id: new_pod_id(),
manifest,
session,
scope,
})
}
/// Restore a Pod from a persisted session.
pub async fn restore(
id: PodId,
session_id: SessionId,
manifest: PodManifest,
client: C,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let session = Session::restore(client, store, session_id, SessionConfig::default()).await?;
Ok(Self {
id,
manifest,
session,
scope,
})
}
/// The Pod's unique identifier.
pub fn id(&self) -> PodId {
self.id
}
/// The session ID used for persistence.
pub fn session_id(&self) -> SessionId {
self.session.session_id()
}
/// The Pod's manifest.
pub fn manifest(&self) -> &PodManifest {
&self.manifest
}
/// The Pod's directory scope, if any.
pub fn scope(&self) -> Option<&Scope> {
self.scope.as_ref()
}
/// Direct access to the underlying session.
///
/// Use this to register tools, hooks, or subscribers on the worker
/// before calling [`run`](Self::run).
pub fn session_mut(&mut self) -> &mut Session<C, St> {
&mut self.session
}
/// Send user input and run until the LLM turn completes.
pub async fn run(&mut self, input: impl Into<String>) -> Result<PodRunResult, PodError> {
let result = self.session.run(input).await?;
Ok(result.into())
}
/// Resume from a paused state.
pub async fn resume(&mut self) -> Result<PodRunResult, PodError> {
let result = self.session.resume().await?;
Ok(result.into())
}
}
impl<St: Store> Pod<Box<dyn LlmClient>, St> {
/// Create a Pod entirely from a manifest.
///
/// Builds the LLM client from the provider config, applies worker
/// settings, and creates a new persistent session.
pub async fn from_manifest(
manifest: PodManifest,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let client = crate::provider::build_client(&manifest.provider)?;
let mut worker = Worker::new(client);
apply_worker_manifest(&mut worker, &manifest.worker);
let session = Session::new(worker, store, SessionConfig::default()).await?;
Ok(Self {
id: new_pod_id(),
manifest,
session,
scope,
})
}
}
/// Apply worker-level manifest settings to a Worker.
pub fn apply_worker_manifest<C: LlmClient>(worker: &mut Worker<C>, wm: &WorkerManifest) {
if let Some(ref prompt) = wm.system_prompt {
worker.set_system_prompt(prompt);
}
let mut config = RequestConfig::new();
if let Some(max_tokens) = wm.max_tokens {
config.max_tokens = Some(max_tokens);
}
if let Some(temperature) = wm.temperature {
config.temperature = Some(temperature);
}
worker.set_request_config(config);
}
/// Result of a Pod run.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PodRunResult {
/// The LLM finished its turn normally.
Finished,
/// The LLM paused (e.g. awaiting user confirmation via a hook).
Paused,
}
impl From<llm_worker::WorkerResult> for PodRunResult {
fn from(r: llm_worker::WorkerResult) -> Self {
match r {
llm_worker::WorkerResult::Finished => PodRunResult::Finished,
llm_worker::WorkerResult::Paused => PodRunResult::Paused,
}
}
}
/// Pod errors.
#[derive(Debug, thiserror::Error)]
pub enum PodError {
#[error(transparent)]
Session(#[from] SessionError),
#[error(transparent)]
Store(#[from] StoreError),
#[error("scope violation: {path} is outside the allowed directory")]
ScopeViolation { path: String },
#[error("provider configuration error: {0}")]
ProviderConfig(String),
}

View File

@ -1,60 +0,0 @@
use llm_worker::llm_client::client::LlmClient;
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::providers::gemini::GeminiClient;
use llm_worker::llm_client::providers::ollama::OllamaClient;
use llm_worker::llm_client::providers::openai::OpenAIClient;
use crate::manifest::{ProviderConfig, ProviderKind};
use crate::pod::PodError;
/// Build an [`LlmClient`] from a [`ProviderConfig`].
///
/// Resolves the API key from the environment variable specified in the config.
pub fn build_client(config: &ProviderConfig) -> Result<Box<dyn LlmClient>, PodError> {
let api_key = config
.api_key_env
.as_deref()
.map(std::env::var)
.transpose()
.map_err(|e| PodError::ProviderConfig(format!("env var: {e}")))?;
match config.kind {
ProviderKind::Anthropic => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("anthropic requires api_key_env".into())
})?;
let mut client = AnthropicClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Openai => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("openai requires api_key_env".into())
})?;
let mut client = OpenAIClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Gemini => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("gemini requires api_key_env".into())
})?;
let mut client = GeminiClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Ollama => {
let mut client = OllamaClient::new(&config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
}
}

View File

@ -1,101 +0,0 @@
use std::path::{Path, PathBuf};
/// Directory scope constraining a Pod's write access.
///
/// Read access is unrestricted — only write operations are checked against the scope.
#[derive(Debug, Clone)]
pub struct Scope {
root: PathBuf,
}
impl Scope {
/// Create a new scope rooted at the given directory.
///
/// The path is canonicalized to resolve symlinks and relative components.
pub fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
let root = root.into().canonicalize()?;
Ok(Self { root })
}
/// The root directory of this scope.
pub fn root(&self) -> &Path {
&self.root
}
/// Check whether `path` falls within this scope.
///
/// The path is canonicalized before comparison.
pub fn contains(&self, path: &Path) -> bool {
match path.canonicalize() {
Ok(canonical) => canonical.starts_with(&self.root),
Err(_) => {
// Path doesn't exist yet — check the parent directory instead.
// This handles write_file to a new file inside the scope.
match path.parent().and_then(|p| p.canonicalize().ok()) {
Some(parent) => parent.starts_with(&self.root),
None => false,
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn contains_file_inside_scope() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = dir.path().join("test.txt");
fs::write(&file, "hello").unwrap();
assert!(scope.contains(&file));
}
#[test]
fn rejects_file_outside_scope() {
let dir = TempDir::new().unwrap();
let outside = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = outside.path().join("test.txt");
fs::write(&file, "hello").unwrap();
assert!(!scope.contains(&file));
}
#[test]
fn contains_new_file_in_existing_parent() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
// File doesn't exist yet, but parent dir is inside scope
let new_file = dir.path().join("new.txt");
assert!(scope.contains(&new_file));
}
#[test]
fn contains_nested_directory() {
let dir = TempDir::new().unwrap();
let nested = dir.path().join("a/b/c");
fs::create_dir_all(&nested).unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = nested.join("test.txt");
assert!(scope.contains(&file));
}
#[test]
fn rejects_traversal_attack() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let traversal = dir.path().join("../../../etc/passwd");
assert!(!scope.contains(&traversal));
}
}

View File

@ -0,0 +1,13 @@
[package]
name = "lint-common"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
[dev-dependencies]
serde_json = { workspace = true }

View File

@ -0,0 +1,81 @@
//! Common frontmatter helpers.
use chrono::{DateTime, Utc};
use crate::RecordLintError;
/// Trait record frontmatter types implement so linters can drive them uniformly.
pub trait Frontmatter: Sized {
/// Hard upper bound on body chars (excluding the frontmatter block).
const BODY_LIMIT: usize;
fn created_at(&self) -> Option<DateTime<Utc>>;
fn updated_at(&self) -> Option<DateTime<Utc>>;
}
const FRONTMATTER_DELIM: &str = "---";
/// Split a markdown document into `(yaml_frontmatter, body)`.
///
/// Expects the document to start with `---\n` and have a closing
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
/// after the closing delimiter is consumed.
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), RecordLintError> {
// The opening delimiter must be the very first line.
let after_open = content
.strip_prefix(FRONTMATTER_DELIM)
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
.ok_or(RecordLintError::MissingFrontmatter)?;
// Look for the closing `---` on its own line.
let mut yaml_end = None;
let mut byte_offset = 0usize;
for line in after_open.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
if trimmed == FRONTMATTER_DELIM {
yaml_end = Some((byte_offset, byte_offset + line.len()));
break;
}
byte_offset += line.len();
}
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
RecordLintError::MalformedFrontmatter("missing closing `---` line".to_string())
})?;
let yaml = &after_open[..yaml_end_excl];
let body = &after_open[body_start..];
Ok((yaml, body))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn splits_simple() {
let doc = "---\nfoo: 1\n---\nbody here\n";
let (y, b) = split_frontmatter(doc).unwrap();
assert_eq!(y, "foo: 1\n");
assert_eq!(b, "body here\n");
}
#[test]
fn no_leading_delim_errors() {
let err = split_frontmatter("hello").unwrap_err();
assert!(matches!(err, RecordLintError::MissingFrontmatter));
}
#[test]
fn no_closing_delim_errors() {
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
assert!(matches!(err, RecordLintError::MalformedFrontmatter(_)));
}
#[test]
fn handles_empty_body() {
let doc = "---\nfoo: 1\n---\n";
let (_, b) = split_frontmatter(doc).unwrap();
assert_eq!(b, "");
}
}

View File

@ -0,0 +1,20 @@
//! Shared record lint primitives for memory and workflow files.
mod frontmatter;
mod slug;
pub use frontmatter::{Frontmatter, split_frontmatter};
pub use slug::{Slug, is_valid_slug};
/// Common lint errors for Markdown record syntax shared by memory and workflow.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum RecordLintError {
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
InvalidSlug(String),
#[error("malformed frontmatter: {0}")]
MalformedFrontmatter(String),
#[error("frontmatter is missing or document is empty")]
MissingFrontmatter,
}

View File

@ -0,0 +1,146 @@
//! Slug type and validation.
//!
//! Syntax (agent-skills compatible):
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
//! - 164 chars
//! - lowercase ASCII alphanumerics and `-`
//! - cannot start or end with `-`
//! - no consecutive `--`
use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Deserializer, Serialize};
use crate::RecordLintError;
const MIN_LEN: usize = 1;
const MAX_LEN: usize = 64;
/// Validated slug. Constructible only via [`Slug::parse`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[serde(transparent)]
pub struct Slug(String);
impl Slug {
/// Parse and validate. Returns [`RecordLintError::InvalidSlug`] on rejection.
pub fn parse(s: impl Into<String>) -> Result<Self, RecordLintError> {
let s = s.into();
if is_valid_slug(&s) {
Ok(Self(s))
} else {
Err(RecordLintError::InvalidSlug(s))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for Slug {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for Slug {
type Err = RecordLintError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl<'de> Deserialize<'de> for Slug {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
Self::parse(raw).map_err(serde::de::Error::custom)
}
}
/// Pure-fn predicate matching the agent-skills slug regex without
/// pulling in the `regex` crate.
pub fn is_valid_slug(s: &str) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if len < MIN_LEN || len > MAX_LEN {
return false;
}
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
return false;
}
let mut prev_dash = false;
for &b in bytes {
if b == b'-' {
if prev_dash {
return false;
}
prev_dash = true;
} else if is_alnum_lower(b) {
prev_dash = false;
} else {
return false;
}
}
true
}
fn is_alnum_lower(b: u8) -> bool {
b.is_ascii_digit() || b.is_ascii_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_basic_slugs() {
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
assert!(is_valid_slug(s), "expected `{s}` valid");
assert!(Slug::parse(s).is_ok());
}
}
#[test]
fn rejects_bad_slugs() {
for s in [
"", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä",
] {
assert!(!is_valid_slug(s), "expected `{s}` invalid");
assert!(Slug::parse(s).is_err());
}
}
#[test]
fn enforces_length_bounds() {
let too_long = "a".repeat(MAX_LEN + 1);
assert!(!is_valid_slug(&too_long));
let max = "a".repeat(MAX_LEN);
assert!(is_valid_slug(&max));
}
#[test]
fn deserializes_via_serde() {
let json = "\"valid-slug\"";
let slug: Slug = serde_json::from_str(json).unwrap();
assert_eq!(slug.as_str(), "valid-slug");
let bad = "\"BAD\"";
let err: Result<Slug, _> = serde_json::from_str(bad);
assert!(err.is_err());
}
}

View File

@ -0,0 +1,9 @@
# llm-worker-macros
Rust メソッドを LLM 呼び出し可能なツールとして自動登録する手続きマクロクレート。引数構造体・Tool トレイト実装・ToolDefinition を自動生成する。
## 公開マクロ
- `#[tool_registry]` — impl ブロックに付与し、内部の `#[tool]` メソッドを一括処理
- `#[tool]` — メソッドをツールとしてマーク
- `#[description = "..."]` — 引数に説明を付与JSON Schema の description に反映)

View File

@ -192,13 +192,13 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
let result_handling = if is_result_type(&sig.output) {
quote! {
match result {
Ok(val) => Ok(format!("{:?}", val)),
Ok(val) => Ok(format!("{:?}", val).into()),
Err(e) => Err(::llm_worker::tool::ToolError::ExecutionFailed(format!("{}", e))),
}
}
} else {
quote! {
Ok(format!("{:?}", result))
Ok(format!("{:?}", result).into())
}
};
@ -247,7 +247,7 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
#[async_trait::async_trait]
impl ::llm_worker::tool::Tool for #tool_struct_name {
async fn execute(&self, input_json: &str) -> Result<String, ::llm_worker::tool::ToolError> {
async fn execute(&self, input_json: &str) -> Result<::llm_worker::tool::ToolOutput, ::llm_worker::tool::ToolError> {
#execute_body
}
}

View File

@ -1,21 +0,0 @@
[package]
name = "llm-worker-persistence"
description = "Session persistence for llm-worker via append-only JSONL logs"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { path = "../llm-worker" }
async-trait = "0.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1.49", features = ["fs", "io-util"] }
uuid = { version = "1", features = ["v7", "serde"] }
thiserror = "2.0"
[dev-dependencies]
tokio = { version = "1.49", features = ["macros", "rt-multi-thread", "fs", "io-util"] }
tempfile = "3.24"
futures = "0.3"
async-trait = "0.1"

View File

@ -1,47 +0,0 @@
//! [`ToolOutputProcessor`] implementation backed by a [`BlobStore`].
//!
//! Converts large tool output strings into [`ToolOutput::Stored`] and
//! persists the content via a [`BlobStore`], returning a summary with
//! a blob reference for conversation history.
use crate::blob_store::BlobStore;
use async_trait::async_trait;
use llm_worker::tool::{ToolError, ToolOutput, ToolOutputProcessor};
use std::sync::Arc;
/// A [`ToolOutputProcessor`] that stores large outputs in a [`BlobStore`].
///
/// Small outputs (≤ `INLINE_THRESHOLD` bytes) pass through unchanged.
/// Large outputs are stored as blobs, and a summary with a `[blob:<id>]`
/// reference replaces the original content in conversation history.
pub struct BlobOutputProcessor<B: BlobStore> {
blob_store: Arc<B>,
}
impl<B: BlobStore> BlobOutputProcessor<B> {
/// Create a new processor backed by the given blob store.
pub fn new(blob_store: Arc<B>) -> Self {
Self { blob_store }
}
}
#[async_trait]
impl<B: BlobStore + 'static> ToolOutputProcessor for BlobOutputProcessor<B> {
async fn process(&self, output: String) -> Result<String, ToolError> {
let tool_output = ToolOutput::from(output);
match tool_output {
ToolOutput::Inline(s) => Ok(s),
ToolOutput::Stored { summary, content } => {
let blob_id = self
.blob_store
.store(&content)
.await
.map_err(|e| ToolError::Internal(format!("blob store error: {e}")))?;
// Prepend blob reference to the summary
Ok(format!("[blob:{blob_id}] {summary}"))
}
}
}
}

View File

@ -1,54 +0,0 @@
//! Blob storage abstraction for large tool outputs.
//!
//! [`BlobStore`] provides async storage and retrieval of [`Content`] blobs,
//! keeping them separate from session logs. Session logs reference blobs
//! by [`BlobId`] in tool result summaries.
use llm_worker::tool::Content;
use std::future::Future;
/// Unique blob identifier. UUID v7 (time-ordered).
pub type BlobId = uuid::Uuid;
/// Generate a new blob ID.
pub fn new_blob_id() -> BlobId {
uuid::Uuid::now_v7()
}
/// Errors from the blob store.
#[derive(Debug, thiserror::Error)]
pub enum BlobStoreError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("blob not found: {0}")]
NotFound(BlobId),
}
/// Async blob storage backend.
///
/// Stores and retrieves [`Content`] blobs independently of session logs.
/// All methods take `&self` — implementations should use interior mutability
/// when needed.
pub trait BlobStore: Send + Sync {
/// Store content and return its assigned ID.
fn store(
&self,
content: &Content,
) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
/// Load content by ID.
fn load(
&self,
id: BlobId,
) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
/// Check if a blob exists.
fn exists(
&self,
id: BlobId,
) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
}

View File

@ -1,21 +0,0 @@
//! Debug-only raw stream event recording.
//!
//! [`TraceEntry`] captures every LLM stream event verbatim for debugging
//! and post-hoc analysis. Written to a separate `.trace.jsonl` file,
//! completely independent of the session log used for state restoration.
//!
//! Disabled by default. Enable via `SessionConfig::record_event_trace`.
use llm_worker::llm_client::event::Event;
use serde::{Deserialize, Serialize};
/// A single trace entry recording a raw stream event.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TraceEntry {
/// Timestamp in milliseconds since Unix epoch.
pub ts: u64,
/// Turn number at the time of recording.
pub turn: usize,
/// The raw stream event.
pub event: Event,
}

View File

@ -1,83 +0,0 @@
//! Filesystem-backed blob store.
//!
//! Layout:
//! - Text blobs: `{root}/{blob_id}.txt`
//! - Structured blobs: `{root}/{blob_id}.json`
use crate::blob_store::{new_blob_id, BlobId, BlobStore, BlobStoreError};
use llm_worker::tool::Content;
use std::path::PathBuf;
use tokio::fs;
/// Filesystem-backed blob store.
///
/// Each blob is stored as a single file. Text content uses `.txt`,
/// structured (JSON) content uses `.json`.
#[derive(Clone)]
pub struct FsBlobStore {
root: PathBuf,
}
impl FsBlobStore {
/// Create a new `FsBlobStore` rooted at the given directory.
/// Creates the directory if it does not exist.
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, BlobStoreError> {
let root = root.into();
fs::create_dir_all(&root).await?;
Ok(Self { root })
}
fn text_path(&self, id: BlobId) -> PathBuf {
self.root.join(format!("{id}.txt"))
}
fn json_path(&self, id: BlobId) -> PathBuf {
self.root.join(format!("{id}.json"))
}
/// Resolve the actual path for a blob, checking both extensions.
fn resolve_path(&self, id: BlobId) -> Option<(PathBuf, bool)> {
let txt = self.text_path(id);
if txt.exists() {
return Some((txt, true));
}
let json = self.json_path(id);
if json.exists() {
return Some((json, false));
}
None
}
}
impl BlobStore for FsBlobStore {
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
let id = new_blob_id();
match content {
Content::Text(text) => {
fs::write(self.text_path(id), text.as_bytes()).await?;
}
Content::Structured(value) => {
let json = serde_json::to_string_pretty(value)?;
fs::write(self.json_path(id), json.as_bytes()).await?;
}
}
Ok(id)
}
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
let (path, is_text) = self
.resolve_path(id)
.ok_or(BlobStoreError::NotFound(id))?;
let bytes = fs::read_to_string(&path).await?;
if is_text {
Ok(Content::Text(bytes))
} else {
let value = serde_json::from_str(&bytes)?;
Ok(Content::Structured(value))
}
}
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
Ok(self.resolve_path(id).is_some())
}
}

View File

@ -1,133 +0,0 @@
//! Filesystem-backed JSONL store.
//!
//! Layout:
//! - Session log: `{root}/{session_id}.jsonl`
//! - Event trace: `{root}/{session_id}.trace.jsonl`
use crate::event_trace::TraceEntry;
use crate::session_log::LogEntry;
use crate::store::{Store, StoreError};
use crate::SessionId;
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::AsyncWriteExt;
/// Filesystem-backed JSONL store.
///
/// Each session is stored as a single `.jsonl` file with one [`LogEntry`]
/// per line. Writes use append mode for crash safety.
#[derive(Clone)]
pub struct FsStore {
root: PathBuf,
}
impl FsStore {
/// Create a new `FsStore` rooted at the given directory.
/// Creates the directory if it does not exist.
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, StoreError> {
let root = root.into();
fs::create_dir_all(&root).await?;
Ok(Self { root })
}
fn log_path(&self, id: SessionId) -> PathBuf {
self.root.join(format!("{id}.jsonl"))
}
fn trace_path(&self, id: SessionId) -> PathBuf {
self.root.join(format!("{id}.trace.jsonl"))
}
async fn append_line(&self, path: &Path, line: &str) -> Result<(), StoreError> {
let mut file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
.await?;
file.write_all(line.as_bytes()).await?;
file.write_all(b"\n").await?;
file.flush().await?;
Ok(())
}
fn parse_jsonl<T: serde::de::DeserializeOwned>(
content: &str,
) -> Result<Vec<T>, StoreError> {
let mut entries = Vec::new();
for (i, line) in content.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let entry: T =
serde_json::from_str(line).map_err(|e| StoreError::Corrupt {
line: i + 1,
message: e.to_string(),
})?;
entries.push(entry);
}
Ok(entries)
}
}
impl Store for FsStore {
async fn append(&self, id: SessionId, entry: &LogEntry) -> Result<(), StoreError> {
let line = serde_json::to_string(entry)?;
self.append_line(&self.log_path(id), &line).await
}
async fn read_all(&self, id: SessionId) -> Result<Vec<LogEntry>, StoreError> {
let path = self.log_path(id);
if !path.exists() {
return Err(StoreError::NotFound(id));
}
let content = fs::read_to_string(&path).await?;
Self::parse_jsonl(&content)
}
async fn list_sessions(&self) -> Result<Vec<SessionId>, StoreError> {
let mut sessions = Vec::new();
let mut dir = fs::read_dir(&self.root).await?;
while let Some(entry) = dir.next_entry().await? {
let path = entry.path();
// Only match .jsonl files, not .trace.jsonl
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.ends_with(".jsonl") && !name.ends_with(".trace.jsonl") {
let stem = name.trim_end_matches(".jsonl");
if let Ok(id) = stem.parse::<SessionId>() {
sessions.push(id);
}
}
}
// UUID v7: lexicographic sort = chronological sort, newest first
sessions.sort_by(|a, b| b.cmp(a));
Ok(sessions)
}
async fn create_session(
&self,
id: SessionId,
entries: &[LogEntry],
) -> Result<(), StoreError> {
let path = self.log_path(id);
let mut content = String::new();
for entry in entries {
content.push_str(&serde_json::to_string(entry)?);
content.push('\n');
}
fs::write(&path, content.as_bytes()).await?;
Ok(())
}
async fn exists(&self, id: SessionId) -> Result<bool, StoreError> {
Ok(self.log_path(id).exists())
}
async fn append_trace(
&self,
id: SessionId,
entry: &TraceEntry,
) -> Result<(), StoreError> {
let line = serde_json::to_string(entry)?;
self.append_line(&self.trace_path(id), &line).await
}
}

View File

@ -1,668 +0,0 @@
//! Built-in `inspect` tool for retrieving stored blob content.
//!
//! When large tool outputs are stored in a [`BlobStore`], only a summary
//! with a `[blob:<id>]` reference is placed in conversation history.
//! This tool lets the LLM retrieve details on demand, with optional
//! selectors for partial access.
use std::sync::Arc;
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::json;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
use llm_worker::state::Mutable;
use llm_worker::ToolRegistryError;
use llm_worker::Worker;
use llm_worker::llm_client::LlmClient;
use crate::blob_store::{BlobId, BlobStore};
// ─── Constants ───────────────────────────────────────────────────────────────
/// Maximum lines shown in the default text preview.
const DEFAULT_PREVIEW_LINES: usize = 50;
/// Maximum array elements shown in the default preview.
const DEFAULT_PREVIEW_ELEMENTS: usize = 5;
/// Maximum object keys whose values are shown in the default preview.
const DEFAULT_PREVIEW_KEYS: usize = 3;
// ─── Selector ────────────────────────────────────────────────────────────────
/// Parsed selector for partial blob content retrieval.
#[derive(Debug, Clone, PartialEq, Eq)]
enum Selector {
/// Extract a range of lines (1-based, inclusive).
Lines { start: usize, end: usize },
/// Extract a range of array elements (0-based, exclusive end).
Slice { start: usize, end: usize },
/// Extract a specific key from a JSON object.
Key(String),
}
fn parse_selector(s: &str) -> Result<Selector, ToolError> {
if let Some(rest) = s.strip_prefix("lines:") {
let (a, b) = rest
.split_once('-')
.ok_or_else(|| ToolError::InvalidArgument(format!(
"invalid lines selector '{s}': expected format lines:N-M"
)))?;
let start: usize = a.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid start line number: '{a}'"))
})?;
let end: usize = b.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid end line number: '{b}'"))
})?;
if start == 0 {
return Err(ToolError::InvalidArgument(
"line numbers are 1-based, got 0".into(),
));
}
if start > end {
return Err(ToolError::InvalidArgument(format!(
"start line ({start}) must be <= end line ({end})"
)));
}
Ok(Selector::Lines { start, end })
} else if let Some(rest) = s.strip_prefix("slice:") {
let (a, b) = rest
.split_once("..")
.ok_or_else(|| ToolError::InvalidArgument(format!(
"invalid slice selector '{s}': expected format slice:N..M"
)))?;
let start: usize = a.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid start index: '{a}'"))
})?;
let end: usize = b.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid end index: '{b}'"))
})?;
if start > end {
return Err(ToolError::InvalidArgument(format!(
"start index ({start}) must be <= end index ({end})"
)));
}
Ok(Selector::Slice { start, end })
} else if let Some(rest) = s.strip_prefix("key:") {
if rest.is_empty() {
return Err(ToolError::InvalidArgument("key name must not be empty".into()));
}
Ok(Selector::Key(rest.to_string()))
} else {
Err(ToolError::InvalidArgument(format!(
"unrecognized selector format: '{s}'. Expected: lines:N-M, slice:N..M, or key:NAME"
)))
}
}
// ─── InspectTool ─────────────────────────────────────────────────────────────
#[derive(Deserialize)]
struct InspectArgs {
blob_id: String,
selector: Option<String>,
}
/// Built-in tool that retrieves stored blob content.
pub struct InspectTool<B: BlobStore> {
blob_store: Arc<B>,
}
impl<B: BlobStore> InspectTool<B> {
pub fn new(blob_store: Arc<B>) -> Self {
Self { blob_store }
}
}
impl<B: BlobStore + 'static> InspectTool<B> {
/// Create a [`ToolDefinition`] factory for this tool.
pub fn tool_definition(blob_store: Arc<B>) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new("inspect")
.description(
"Retrieve content from a stored blob referenced by [blob:<id>] in conversation history. \
Supports selectors for partial access: \
'lines:N-M' (text line range, 1-based inclusive), \
'slice:N..M' (array element range, 0-based exclusive end), \
'key:NAME' (object key lookup). \
Without a selector, returns metadata and a preview.",
)
.input_schema(json!({
"type": "object",
"properties": {
"blob_id": {
"type": "string",
"description": "The blob UUID from a [blob:<id>] reference"
},
"selector": {
"type": "string",
"description": "Optional: 'lines:N-M', 'slice:N..M', or 'key:NAME'"
}
},
"required": ["blob_id"]
}));
let tool = Arc::new(InspectTool::new(Arc::clone(&blob_store))) as Arc<dyn Tool>;
(meta, tool)
})
}
}
#[async_trait]
impl<B: BlobStore + 'static> Tool for InspectTool<B> {
async fn execute(&self, input_json: &str) -> Result<String, ToolError> {
let args: InspectArgs = serde_json::from_str(input_json)
.map_err(|e| ToolError::InvalidArgument(format!("invalid arguments: {e}")))?;
let blob_id: BlobId = args
.blob_id
.parse()
.map_err(|_| ToolError::InvalidArgument(format!(
"invalid blob_id: '{}' is not a valid UUID", args.blob_id
)))?;
let content = self
.blob_store
.load(blob_id)
.await
.map_err(|e| ToolError::ExecutionFailed(format!("{e}")))?;
match args.selector {
None => Ok(default_view(&content)),
Some(sel) => {
let selector = parse_selector(&sel)?;
apply_selector(&content, &selector)
}
}
}
}
// ─── Default view ────────────────────────────────────────────────────────────
use llm_worker::tool::Content;
fn default_view(content: &Content) -> String {
match content {
Content::Text(text) => default_view_text(text),
Content::Structured(value) => default_view_structured(value),
}
}
fn default_view_text(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let total = lines.len();
let size = text.len();
let preview_end = total.min(DEFAULT_PREVIEW_LINES);
let mut out = format!("type: text\nlines: {total}\nsize: {size} bytes\n\n");
out.push_str(&format!("── preview (lines 1-{preview_end}) ──\n"));
for line in &lines[..preview_end] {
out.push_str(line);
out.push('\n');
}
if total > DEFAULT_PREVIEW_LINES {
out.push_str(&format!("... ({} more lines)\n", total - DEFAULT_PREVIEW_LINES));
}
out
}
fn default_view_structured(value: &serde_json::Value) -> String {
use serde_json::Value;
match value {
Value::Array(arr) => {
let total = arr.len();
let preview_end = total.min(DEFAULT_PREVIEW_ELEMENTS);
let mut out = format!("type: json_array\nentries: {total}\n\n");
out.push_str(&format!("── preview (0..{preview_end}) ──\n"));
for item in &arr[..preview_end] {
if let Ok(json) = serde_json::to_string_pretty(item) {
out.push_str(&json);
out.push('\n');
}
}
if total > DEFAULT_PREVIEW_ELEMENTS {
out.push_str(&format!("... ({} more entries)\n", total - DEFAULT_PREVIEW_ELEMENTS));
}
out
}
Value::Object(map) => {
let total = map.len();
let mut out = format!("type: json_object\nkeys: {total}\n\n── keys ──\n");
for (key, val) in map.iter() {
out.push_str(&format!("{key}: {}\n", value_type_label(val)));
}
// Preview first N key-value pairs
let preview_keys: Vec<_> = map.iter().take(DEFAULT_PREVIEW_KEYS).collect();
if !preview_keys.is_empty() {
out.push_str("\n── preview ──\n");
for (key, val) in preview_keys {
if let Ok(json) = serde_json::to_string_pretty(val) {
out.push_str(&format!("{key}: {json}\n"));
}
}
}
out
}
other => {
// Scalar — just show it
serde_json::to_string_pretty(other).unwrap_or_default()
}
}
}
fn value_type_label(value: &serde_json::Value) -> &'static str {
match value {
serde_json::Value::Null => "null",
serde_json::Value::Bool(_) => "bool",
serde_json::Value::Number(_) => "number",
serde_json::Value::String(_) => "string",
serde_json::Value::Array(_) => "array",
serde_json::Value::Object(_) => "object",
}
}
// ─── Selector application ────────────────────────────────────────────────────
fn apply_selector(content: &Content, selector: &Selector) -> Result<String, ToolError> {
match (content, selector) {
(Content::Text(text), Selector::Lines { start, end }) => {
let lines: Vec<&str> = text.lines().collect();
let total = lines.len();
// Convert 1-based inclusive to 0-based
let from = (*start - 1).min(total);
let to = (*end).min(total);
if from >= total {
return Ok(format!("(no lines — content has {total} lines)"));
}
Ok(lines[from..to].join("\n"))
}
(Content::Structured(serde_json::Value::Array(arr)), Selector::Slice { start, end }) => {
let total = arr.len();
let from = (*start).min(total);
let to = (*end).min(total);
let slice = &arr[from..to];
serde_json::to_string_pretty(slice)
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}")))
}
(Content::Structured(serde_json::Value::Object(map)), Selector::Key(key)) => {
match map.get(key.as_str()) {
Some(val) => serde_json::to_string_pretty(val)
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}"))),
None => {
let available: Vec<_> = map.keys().collect();
Err(ToolError::InvalidArgument(format!(
"key '{key}' not found. Available keys: {available:?}"
)))
}
}
}
// Type mismatches
(Content::Text(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays, but this blob contains text. Use 'lines:N-M' instead.".into(),
)),
(Content::Text(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects, but this blob contains text. Use 'lines:N-M' instead.".into(),
)),
(Content::Structured(_), Selector::Lines { .. }) => Err(ToolError::InvalidArgument(
"lines selector only applies to text content, but this blob contains JSON. Use 'slice:N..M' or 'key:NAME' instead.".into(),
)),
(Content::Structured(serde_json::Value::Object(_)), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays, but this blob is a JSON object. Use 'key:NAME' instead.".into(),
)),
(Content::Structured(serde_json::Value::Array(_)), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects, but this blob is a JSON array. Use 'slice:N..M' instead.".into(),
)),
(Content::Structured(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays.".into(),
)),
(Content::Structured(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects.".into(),
)),
}
}
// ─── Registration helper ─────────────────────────────────────────────────────
/// Register the `inspect` tool on a [`Worker`].
///
/// Call this alongside [`BlobOutputProcessor`](crate::BlobOutputProcessor)
/// setup so the LLM can retrieve stored blob content.
pub fn register_inspect_tool<C, B>(
worker: &mut Worker<C, Mutable>,
blob_store: Arc<B>,
) -> Result<(), ToolRegistryError>
where
C: LlmClient,
B: BlobStore + 'static,
{
worker.register_tool(InspectTool::<B>::tool_definition(blob_store))
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::blob_store::{new_blob_id, BlobStoreError};
use llm_worker::tool::Content;
use std::collections::HashMap;
use tokio::sync::Mutex;
// ── In-memory BlobStore for tests ────────────────────────────────────
struct MemBlobStore {
blobs: Mutex<HashMap<BlobId, Content>>,
}
impl MemBlobStore {
fn new() -> Self {
Self {
blobs: Mutex::new(HashMap::new()),
}
}
}
impl BlobStore for MemBlobStore {
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
let id = new_blob_id();
self.blobs.lock().await.insert(id, content.clone());
Ok(id)
}
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
self.blobs
.lock()
.await
.get(&id)
.cloned()
.ok_or(BlobStoreError::NotFound(id))
}
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
Ok(self.blobs.lock().await.contains_key(&id))
}
}
// ── Selector parsing ─────────────────────────────────────────────────
#[test]
fn parse_lines_valid() {
assert_eq!(
parse_selector("lines:1-50").unwrap(),
Selector::Lines { start: 1, end: 50 }
);
assert_eq!(
parse_selector("lines:5-5").unwrap(),
Selector::Lines { start: 5, end: 5 }
);
}
#[test]
fn parse_lines_zero_start() {
let err = parse_selector("lines:0-5").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_lines_inverted() {
let err = parse_selector("lines:50-20").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_lines_missing_dash() {
let err = parse_selector("lines:20").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_slice_valid() {
assert_eq!(
parse_selector("slice:0..10").unwrap(),
Selector::Slice { start: 0, end: 10 }
);
assert_eq!(
parse_selector("slice:3..8").unwrap(),
Selector::Slice { start: 3, end: 8 }
);
}
#[test]
fn parse_slice_inverted() {
let err = parse_selector("slice:10..3").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_key_valid() {
assert_eq!(
parse_selector("key:results").unwrap(),
Selector::Key("results".into())
);
// Key name with colon
assert_eq!(
parse_selector("key:nested:key").unwrap(),
Selector::Key("nested:key".into())
);
}
#[test]
fn parse_key_empty() {
let err = parse_selector("key:").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_unknown_prefix() {
let err = parse_selector("unknown:foo").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
// ── Default view ─────────────────────────────────────────────────────
#[test]
fn default_view_text_short() {
let text = "line1\nline2\nline3\n";
let content = Content::Text(text.into());
let view = default_view(&content);
assert!(view.contains("type: text"));
assert!(view.contains("lines: 3"));
assert!(view.contains("line1"));
assert!(!view.contains("more lines"));
}
#[test]
fn default_view_text_long() {
let text: String = (1..=100).map(|i| format!("line {i}\n")).collect();
let content = Content::Text(text);
let view = default_view(&content);
assert!(view.contains("type: text"));
assert!(view.contains("lines: 100"));
assert!(view.contains("line 1"));
assert!(view.contains("line 50"));
assert!(!view.contains("line 51\n"));
assert!(view.contains("50 more lines"));
}
#[test]
fn default_view_array() {
let arr: Vec<serde_json::Value> = (0..20).map(|i| json!({"id": i})).collect();
let content = Content::Structured(json!(arr));
let view = default_view(&content);
assert!(view.contains("type: json_array"));
assert!(view.contains("entries: 20"));
assert!(view.contains("15 more entries"));
}
#[test]
fn default_view_object() {
let content = Content::Structured(json!({
"name": "test",
"count": 42,
"items": [1, 2, 3],
"nested": {"a": 1}
}));
let view = default_view(&content);
assert!(view.contains("type: json_object"));
assert!(view.contains("keys: 4"));
assert!(view.contains("── keys ──"));
assert!(view.contains("── preview ──"));
}
// ── Selector application ─────────────────────────────────────────────
#[test]
fn apply_lines_on_text() {
let text = "a\nb\nc\nd\ne\nf\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 4 }).unwrap();
assert_eq!(result, "b\nc\nd");
}
#[test]
fn apply_lines_clamp() {
let text = "a\nb\nc\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 100 }).unwrap();
assert_eq!(result, "b\nc");
}
#[test]
fn apply_lines_beyond_content() {
let text = "a\nb\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 10, end: 20 }).unwrap();
assert!(result.contains("no lines"));
}
#[test]
fn apply_slice_on_array() {
let content = Content::Structured(json!([10, 20, 30, 40, 50]));
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 3 }).unwrap();
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
assert_eq!(parsed, vec![20, 30]);
}
#[test]
fn apply_slice_clamp() {
let content = Content::Structured(json!([10, 20, 30]));
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 100 }).unwrap();
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
assert_eq!(parsed, vec![20, 30]);
}
#[test]
fn apply_key_on_object() {
let content = Content::Structured(json!({"name": "test", "count": 42}));
let result = apply_selector(&content, &Selector::Key("name".into())).unwrap();
assert_eq!(result.trim(), "\"test\"");
}
#[test]
fn apply_key_not_found() {
let content = Content::Structured(json!({"name": "test"}));
let err = apply_selector(&content, &Selector::Key("missing".into())).unwrap_err();
match err {
ToolError::InvalidArgument(msg) => {
assert!(msg.contains("missing"));
assert!(msg.contains("name"));
}
_ => panic!("expected InvalidArgument"),
}
}
// ── Type mismatch errors ─────────────────────────────────────────────
#[test]
fn lines_on_json_error() {
let content = Content::Structured(json!([1, 2, 3]));
let err = apply_selector(&content, &Selector::Lines { start: 1, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn slice_on_text_error() {
let content = Content::Text("hello".into());
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn key_on_text_error() {
let content = Content::Text("hello".into());
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn slice_on_object_error() {
let content = Content::Structured(json!({"a": 1}));
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn key_on_array_error() {
let content = Content::Structured(json!([1, 2, 3]));
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
// ── Integration via execute() ────────────────────────────────────────
#[tokio::test]
async fn execute_default_view() {
let store = Arc::new(MemBlobStore::new());
let text = (1..=100).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
let blob_id = store.store(&Content::Text(text)).await.unwrap();
let tool = InspectTool::new(store);
let result = tool
.execute(&json!({"blob_id": blob_id.to_string()}).to_string())
.await
.unwrap();
assert!(result.contains("type: text"));
assert!(result.contains("lines: 100"));
}
#[tokio::test]
async fn execute_with_selector() {
let store = Arc::new(MemBlobStore::new());
let blob_id = store
.store(&Content::Structured(json!({"name": "test", "value": 42})))
.await
.unwrap();
let tool = InspectTool::new(store);
let result = tool
.execute(&json!({"blob_id": blob_id.to_string(), "selector": "key:name"}).to_string())
.await
.unwrap();
assert_eq!(result.trim(), "\"test\"");
}
#[tokio::test]
async fn execute_invalid_blob_id() {
let store = Arc::new(MemBlobStore::new());
let tool = InspectTool::new(store);
let err = tool
.execute(&json!({"blob_id": "not-a-uuid"}).to_string())
.await
.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn execute_blob_not_found() {
let store = Arc::new(MemBlobStore::new());
let tool = InspectTool::new(store);
let fake_id = new_blob_id();
let err = tool
.execute(&json!({"blob_id": fake_id.to_string()}).to_string())
.await
.unwrap_err();
assert!(matches!(err, ToolError::ExecutionFailed(_)));
}
}

View File

@ -1,49 +0,0 @@
//! Session persistence for `llm-worker` via append-only JSONL logs.
//!
//! # Architecture
//!
//! Sessions are recorded as a sequence of [`LogEntry`] values, one per line
//! in a `.jsonl` file. Reading the log and collecting entries reconstructs
//! the full [`Worker`] state — no separate snapshots or checkpoints needed.
//!
//! Debug-mode [`TraceEntry`] records capture raw stream events in a separate
//! `.trace.jsonl` file, independent of the session log.
//!
//! # Quick start
//!
//! ```ignore
//! use llm_worker_persistence::{Session, SessionConfig, FsStore};
//!
//! let store = FsStore::new("./sessions").await?;
//! let worker = Worker::new(client);
//! let mut session = Session::new(worker, store, SessionConfig::default()).await?;
//! session.run("Hello!").await?;
//! ```
pub mod blob_output_processor;
pub mod blob_store;
pub mod event_trace;
pub mod fs_blob_store;
pub mod fs_store;
pub mod inspect_tool;
pub mod session;
pub mod session_log;
pub mod store;
pub use blob_output_processor::BlobOutputProcessor;
pub use blob_store::{BlobId, BlobStore, BlobStoreError};
pub use inspect_tool::{InspectTool, register_inspect_tool};
pub use event_trace::TraceEntry;
pub use fs_blob_store::FsBlobStore;
pub use fs_store::FsStore;
pub use session::{Session, SessionConfig, SessionError};
pub use session_log::{LogEntry, Outcome, RestoredState, collect_state};
pub use store::{Store, StoreError};
/// Session identifier. UUID v7 (time-ordered, lexicographically sortable).
pub type SessionId = uuid::Uuid;
/// Generate a new session ID.
pub fn new_session_id() -> SessionId {
uuid::Uuid::now_v7()
}

View File

@ -1,338 +0,0 @@
//! Persistent session wrapper around [`Worker`].
//!
//! [`Session`] intercepts `Worker` operations and appends [`LogEntry`] records
//! to a [`Store`]. It does not modify `Worker` internals — all persistence
//! happens by observing state before and after each operation.
use crate::session_log::{self, LogEntry, Outcome};
use crate::store::{Store, StoreError};
use crate::SessionId;
use llm_worker::llm_client::client::LlmClient;
use llm_worker::state::Mutable;
use llm_worker::{Worker, WorkerError, WorkerResult};
/// Configuration for session persistence.
#[derive(Debug, Clone)]
pub struct SessionConfig {
/// Record raw stream events to a separate trace file.
/// Default: `false`.
pub record_event_trace: bool,
}
impl Default for SessionConfig {
fn default() -> Self {
Self {
record_event_trace: false,
}
}
}
/// Errors from session operations.
#[derive(Debug, thiserror::Error)]
pub enum SessionError {
#[error(transparent)]
Worker(#[from] WorkerError),
#[error(transparent)]
Store(#[from] StoreError),
}
/// Persistent session wrapping a [`Worker`].
///
/// The `worker` field is public for direct access to Worker APIs
/// (tool registration, hook setup, subscriber management, etc.).
/// State-mutating operations (`run`, `resume`) should go through
/// Session methods to ensure proper logging.
pub struct Session<C: LlmClient, St: Store> {
pub worker: Worker<C, Mutable>,
store: St,
session_id: SessionId,
_config: SessionConfig,
}
impl<C: LlmClient, St: Store> Session<C, St> {
/// Create a new session, writing the initial `SessionStart` entry.
pub async fn new(
worker: Worker<C, Mutable>,
store: St,
config: SessionConfig,
) -> Result<Self, StoreError> {
let session_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: worker.get_system_prompt().map(String::from),
config: worker.request_config().clone(),
history: worker.history().to_vec(),
};
store.append(session_id, &start).await?;
Ok(Self {
worker,
store,
session_id,
_config: config,
})
}
/// Restore a session from a stored log.
///
/// Reads all log entries, collects state from them,
/// and returns a `Session` ready for `resume()`.
pub async fn restore(
client: C,
store: St,
session_id: SessionId,
config: SessionConfig,
) -> Result<Self, SessionError> {
let entries = store.read_all(session_id).await?;
let state = session_log::collect_state(&entries);
let mut worker = Worker::new(client);
if let Some(ref prompt) = state.system_prompt {
worker.set_system_prompt(prompt);
}
worker.set_history(state.history);
worker.set_request_config(state.config);
worker.set_turn_count(state.turn_count);
worker.set_last_run_interrupted(state.last_run_interrupted);
Ok(Self {
worker,
store,
session_id,
_config: config,
})
}
/// The session ID.
pub fn session_id(&self) -> SessionId {
self.session_id
}
/// Reference to the underlying store.
pub fn store(&self) -> &St {
&self.store
}
/// Run a user turn, logging all state changes.
pub async fn run(
&mut self,
user_input: impl Into<String>,
) -> Result<WorkerResult, SessionError> {
let history_before = self.worker.history().len();
let result = self.worker.run(user_input).await;
self.log_history_delta(history_before).await?;
self.log_turn_end().await?;
self.log_outcome(&result).await?;
result.map_err(SessionError::Worker)
}
/// Resume from a paused state, logging all state changes.
pub async fn resume(&mut self) -> Result<WorkerResult, SessionError> {
let history_before = self.worker.history().len();
let result = self.worker.resume().await;
self.log_history_delta(history_before).await?;
self.log_turn_end().await?;
self.log_outcome(&result).await?;
result.map_err(SessionError::Worker)
}
/// Fork this session at its current state.
/// Returns the new session ID. The new log contains a `SessionStart`
/// seeded with the current history.
pub async fn fork(&self) -> Result<SessionId, StoreError> {
let fork_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: self.worker.get_system_prompt().map(String::from),
config: self.worker.request_config().clone(),
history: self.worker.history().to_vec(),
};
self.store.create_session(fork_id, &[start]).await?;
Ok(fork_id)
}
/// Fork from an arbitrary point in a stored session's log.
/// Replays entries up to `up_to_entry` and creates a new session
/// with that reconstructed state.
pub async fn fork_at(
store: &St,
source_id: SessionId,
up_to_entry: usize,
) -> Result<SessionId, StoreError> {
let entries = store.read_all(source_id).await?;
let truncated = &entries[..up_to_entry.min(entries.len())];
let state = session_log::collect_state(truncated);
let fork_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: state.system_prompt,
config: state.config,
history: state.history,
};
store.create_session(fork_id, &[start]).await?;
Ok(fork_id)
}
/// Log a `CacheLocked` entry.
pub async fn log_cache_locked(
&self,
locked_prefix_len: usize,
) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::CacheLocked {
ts: session_log::now_millis(),
locked_prefix_len,
},
)
.await
}
/// Log a `CacheUnlocked` entry.
pub async fn log_cache_unlocked(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::CacheUnlocked {
ts: session_log::now_millis(),
},
)
.await
}
/// Log a `ConfigChanged` entry.
pub async fn log_config_changed(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::ConfigChanged {
ts: session_log::now_millis(),
config: self.worker.request_config().clone(),
},
)
.await
}
// ── Private helpers ──────────────────────────────────────────────────
async fn log_history_delta(&self, before_len: usize) -> Result<(), StoreError> {
let history = self.worker.history();
if history.len() <= before_len {
return Ok(());
}
let ts = session_log::now_millis();
let new_items = &history[before_len..];
let mut i = 0;
// Classify and group items by type.
// The actual items from history are used (not pre-constructed copies),
// so any modifications by hooks (e.g. on_prompt_submit) are captured correctly.
while i < new_items.len() {
let item = &new_items[i];
if item.is_user_message() {
self.store
.append(
self.session_id,
&LogEntry::UserInput {
ts,
item: new_items[i].clone(),
},
)
.await?;
i += 1;
} else if item.is_tool_result() {
let start = i;
while i < new_items.len() && new_items[i].is_tool_result() {
i += 1;
}
self.store
.append(
self.session_id,
&LogEntry::ToolResults {
ts,
items: new_items[start..i].to_vec(),
},
)
.await?;
} else if item.is_assistant_message()
|| item.is_tool_call()
|| item.is_reasoning()
{
let start = i;
while i < new_items.len()
&& (new_items[i].is_assistant_message()
|| new_items[i].is_tool_call()
|| new_items[i].is_reasoning())
{
i += 1;
}
self.store
.append(
self.session_id,
&LogEntry::AssistantItems {
ts,
items: new_items[start..i].to_vec(),
},
)
.await?;
} else {
self.store
.append(
self.session_id,
&LogEntry::HookInjectedItems {
ts,
items: vec![new_items[i].clone()],
},
)
.await?;
i += 1;
}
}
Ok(())
}
async fn log_turn_end(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::TurnEnd {
ts: session_log::now_millis(),
turn_count: self.worker.turn_count(),
},
)
.await
}
async fn log_outcome(
&self,
result: &Result<WorkerResult, WorkerError>,
) -> Result<(), StoreError> {
let outcome = match result {
Ok(WorkerResult::Finished) => Outcome::Finished,
Ok(WorkerResult::Paused) => Outcome::Paused,
Err(e) => Outcome::Error {
message: e.to_string(),
},
};
self.store
.append(
self.session_id,
&LogEntry::RunOutcome {
ts: session_log::now_millis(),
outcome,
interrupted: self.worker.last_run_interrupted(),
},
)
.await
}
}

View File

@ -1,285 +0,0 @@
//! Session log types for append-only JSONL persistence.
//!
//! Each [`LogEntry`] represents a single state transition in a session,
//! serialized as one line in a `.jsonl` file. Reading all entries and
//! collecting them via [`collect_state`] reconstructs the full [`Worker`] state.
use llm_worker::llm_client::types::{Item, RequestConfig};
use serde::{Deserialize, Serialize};
/// A single session log entry, serialized as one JSONL line.
///
/// Variants correspond to specific mutation points in `Worker`:
/// - `SessionStart` — always the first entry; captures initial state
/// - `UserInput` / `AssistantItems` / `ToolResults` / `HookInjectedItems` — history appends
/// - `TurnEnd` — turn boundary marker
/// - `CacheLocked` / `CacheUnlocked` — KV cache state transitions
/// - `RunOutcome` — marks end of a `run()` or `resume()` call
/// - `ConfigChanged` — `RequestConfig` mutation
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum LogEntry {
/// Session start. Always the first entry in a log.
/// For forked sessions, `history` contains the seed state from the parent.
SessionStart {
ts: u64,
system_prompt: Option<String>,
config: RequestConfig,
history: Vec<Item>,
},
/// User input pushed to history (worker.rs:229).
UserInput { ts: u64, item: Item },
/// Assistant response items added to history (worker.rs:1040-1041).
AssistantItems { ts: u64, items: Vec<Item> },
/// Tool execution results added to history (worker.rs:897-900, 1072-1076).
ToolResults { ts: u64, items: Vec<Item> },
/// Items injected by `on_turn_end` hook via `ContinueWithMessages` (worker.rs:1055).
HookInjectedItems { ts: u64, items: Vec<Item> },
/// Turn boundary. Records the turn count after increment.
TurnEnd { ts: u64, turn_count: usize },
/// KV cache locked. Records the history prefix length that is now immutable.
CacheLocked { ts: u64, locked_prefix_len: usize },
/// KV cache unlocked.
CacheUnlocked { ts: u64 },
/// Outcome of a `run()` or `resume()` call.
/// This is metadata for auditing; state collection does not branch on the outcome.
RunOutcome {
ts: u64,
outcome: Outcome,
interrupted: bool,
},
/// `RequestConfig` changed.
ConfigChanged { ts: u64, config: RequestConfig },
}
/// Outcome of a run/resume call. Metadata for auditing only.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Outcome {
Finished,
Paused,
Error { message: String },
}
/// State collected from log entries.
#[derive(Debug, Clone)]
pub struct RestoredState {
pub system_prompt: Option<String>,
pub config: RequestConfig,
pub history: Vec<Item>,
pub turn_count: usize,
pub locked_prefix_len: usize,
pub last_run_interrupted: bool,
}
/// Replay a sequence of log entries to reconstruct worker state.
pub fn collect_state(entries: &[LogEntry]) -> RestoredState {
let mut state = RestoredState {
system_prompt: None,
config: RequestConfig::default(),
history: Vec::new(),
turn_count: 0,
locked_prefix_len: 0,
last_run_interrupted: false,
};
for entry in entries {
match entry {
LogEntry::SessionStart {
system_prompt,
config,
history,
..
} => {
state.system_prompt = system_prompt.clone();
state.config = config.clone();
state.history = history.clone();
}
LogEntry::UserInput { item, .. } => {
state.history.push(item.clone());
}
LogEntry::AssistantItems { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::ToolResults { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::HookInjectedItems { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::TurnEnd { turn_count, .. } => {
state.turn_count = *turn_count;
}
LogEntry::CacheLocked {
locked_prefix_len, ..
} => {
state.locked_prefix_len = *locked_prefix_len;
}
LogEntry::CacheUnlocked { .. } => {
state.locked_prefix_len = 0;
}
LogEntry::RunOutcome { interrupted, .. } => {
state.last_run_interrupted = *interrupted;
}
LogEntry::ConfigChanged { config, .. } => {
state.config = config.clone();
}
}
}
state
}
/// Get the current timestamp in milliseconds since Unix epoch.
pub fn now_millis() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("system clock before Unix epoch")
.as_millis() as u64
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn replay_empty() {
let state = collect_state(&[]);
assert!(state.history.is_empty());
assert_eq!(state.turn_count, 0);
assert_eq!(state.locked_prefix_len, 0);
}
#[test]
fn replay_session_start_sets_initial_state() {
let entries = vec![LogEntry::SessionStart {
ts: 1000,
system_prompt: Some("You are helpful.".into()),
config: RequestConfig::default().with_max_tokens(1024),
history: vec![Item::user_message("seed")],
}];
let state = collect_state(&entries);
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
assert_eq!(state.config.max_tokens, Some(1024));
assert_eq!(state.history.len(), 1);
}
#[test]
fn replay_full_turn() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Hello"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::assistant_message("Hi!")],
},
LogEntry::TurnEnd {
ts: 3100,
turn_count: 1,
},
LogEntry::RunOutcome {
ts: 3200,
outcome: Outcome::Finished,
interrupted: false,
},
];
let state = collect_state(&entries);
assert_eq!(state.history.len(), 2);
assert_eq!(state.turn_count, 1);
assert!(!state.last_run_interrupted);
}
#[test]
fn replay_with_tool_calls() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Check weather"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::tool_call("call_1", "get_weather", r#"{"city":"Tokyo"}"#)],
},
LogEntry::ToolResults {
ts: 3500,
items: vec![Item::tool_result("call_1", "Sunny, 25C")],
},
LogEntry::AssistantItems {
ts: 4000,
items: vec![Item::assistant_message("It's sunny in Tokyo!")],
},
LogEntry::TurnEnd {
ts: 4100,
turn_count: 1,
},
];
let state = collect_state(&entries);
assert_eq!(state.history.len(), 4);
assert!(state.history[1].is_tool_call());
assert!(state.history[2].is_tool_result());
}
#[test]
fn replay_cache_lock_unlock() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![Item::user_message("a"), Item::assistant_message("b")],
},
LogEntry::CacheLocked {
ts: 2000,
locked_prefix_len: 2,
},
LogEntry::CacheUnlocked { ts: 3000 },
];
let state = collect_state(&entries);
assert_eq!(state.locked_prefix_len, 0);
// Check locked state before unlock
let state_locked = collect_state(&entries[..2]);
assert_eq!(state_locked.locked_prefix_len, 2);
}
#[test]
fn replay_config_changed() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::ConfigChanged {
ts: 2000,
config: RequestConfig::default().with_temperature(0.5),
},
];
let state = collect_state(&entries);
assert_eq!(state.config.temperature, Some(0.5));
}
}

View File

@ -1,68 +0,0 @@
//! Persistence backend abstraction.
//!
//! [`Store`] defines the async interface for reading and writing session logs.
//! Implementations handle the physical storage (filesystem, database, etc.).
use crate::event_trace::TraceEntry;
use crate::session_log::LogEntry;
use crate::SessionId;
use std::future::Future;
/// Errors from the persistence store.
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("session not found: {0}")]
NotFound(SessionId),
#[error("log corrupted at line {line}: {message}")]
Corrupt { line: usize, message: String },
}
/// Async persistence backend for session logs.
///
/// All methods take `&self` — implementations should use interior mutability
/// (e.g., append-mode file handles) when needed.
pub trait Store: Send + Sync {
/// Append a single log entry to the session.
fn append(
&self,
id: SessionId,
entry: &LogEntry,
) -> impl Future<Output = Result<(), StoreError>> + Send;
/// Read all log entries for a session, in order.
fn read_all(
&self,
id: SessionId,
) -> impl Future<Output = Result<Vec<LogEntry>, StoreError>> + Send;
/// List all session IDs, most recent first.
fn list_sessions(&self)
-> impl Future<Output = Result<Vec<SessionId>, StoreError>> + Send;
/// Create a new session with initial entries.
fn create_session(
&self,
id: SessionId,
entries: &[LogEntry],
) -> impl Future<Output = Result<(), StoreError>> + Send;
/// Check if a session exists.
fn exists(
&self,
id: SessionId,
) -> impl Future<Output = Result<bool, StoreError>> + Send;
/// Append a trace entry to the debug event trace file.
fn append_trace(
&self,
id: SessionId,
entry: &TraceEntry,
) -> impl Future<Output = Result<(), StoreError>> + Send;
}

View File

@ -1,176 +0,0 @@
use llm_worker::llm_client::types::{Item, RequestConfig};
use llm_worker_persistence::{
FsStore, LogEntry, Outcome, Store, TraceEntry, new_session_id, collect_state,
};
#[tokio::test]
async fn round_trip_write_and_read() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: Some("You are helpful.".into()),
config: RequestConfig::default().with_max_tokens(1024),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Hello"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::assistant_message("Hi there!")],
},
LogEntry::TurnEnd {
ts: 3100,
turn_count: 1,
},
LogEntry::RunOutcome {
ts: 3200,
outcome: Outcome::Finished,
interrupted: false,
},
];
// Write entries one by one
for entry in &entries {
store.append(id, entry).await.unwrap();
}
// Read back
let read_back = store.read_all(id).await.unwrap();
assert_eq!(read_back.len(), entries.len());
// Replay and verify state
let state = collect_state(&read_back);
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
assert_eq!(state.config.max_tokens, Some(1024));
assert_eq!(state.history.len(), 2);
assert_eq!(state.turn_count, 1);
assert!(!state.last_run_interrupted);
}
#[tokio::test]
async fn create_session_writes_all_entries() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![Item::user_message("seed"), Item::assistant_message("ok")],
},
];
store.create_session(id, &entries).await.unwrap();
let read_back = store.read_all(id).await.unwrap();
assert_eq!(read_back.len(), 1);
let state = collect_state(&read_back);
assert_eq!(state.history.len(), 2);
}
#[tokio::test]
async fn list_sessions_returns_newest_first() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id1 = new_session_id();
// Small delay to ensure different UUID v7 timestamps
tokio::time::sleep(std::time::Duration::from_millis(2)).await;
let id2 = new_session_id();
let start = LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
};
store.append(id1, &start).await.unwrap();
store.append(id2, &start).await.unwrap();
let sessions = store.list_sessions().await.unwrap();
assert_eq!(sessions.len(), 2);
assert_eq!(sessions[0], id2); // newest first
assert_eq!(sessions[1], id1);
}
#[tokio::test]
async fn exists_returns_correct_state() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
assert!(!store.exists(id).await.unwrap());
store
.append(
id,
&LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
)
.await
.unwrap();
assert!(store.exists(id).await.unwrap());
}
#[tokio::test]
async fn not_found_error_for_missing_session() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let result = store.read_all(id).await;
assert!(result.is_err());
}
#[tokio::test]
async fn trace_entries_in_separate_file() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
// Write a log entry
store
.append(
id,
&LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
)
.await
.unwrap();
// Write a trace entry
let trace = TraceEntry {
ts: 1500,
turn: 0,
event: llm_worker::llm_client::event::Event::Ping(
llm_worker::llm_client::event::PingEvent { timestamp: None },
),
};
store.append_trace(id, &trace).await.unwrap();
// Log should have 1 entry, unaffected by trace
let log = store.read_all(id).await.unwrap();
assert_eq!(log.len(), 1);
// Trace file should exist separately
let trace_path = dir.path().join(format!("{id}.trace.jsonl"));
assert!(trace_path.exists());
}

View File

@ -1,335 +0,0 @@
mod common;
use std::sync::Arc;
use async_trait::async_trait;
use common::MockLlmClient;
use llm_worker::hook::{Hook, HookError, OnTurnEnd, OnTurnEndResult};
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent};
use llm_worker::llm_client::types::{Item, RequestConfig};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
use llm_worker::Worker;
use llm_worker_persistence::{
FsStore, LogEntry, Outcome, Session, SessionConfig, Store, collect_state,
};
// =============================================================================
// Helpers
// =============================================================================
fn simple_text_events() -> Vec<Event> {
vec![
Event::text_block_start(0),
Event::text_delta(0, "Hello!"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
]
}
fn tool_call_events() -> Vec<Vec<Event>> {
vec![
// 1st response: tool call
vec![
Event::tool_use_start(0, "call_1", "get_weather"),
Event::tool_input_delta(0, r#"{"city":"Tokyo"}"#),
Event::tool_use_stop(0),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
// 2nd response: final text
vec![
Event::text_block_start(0),
Event::text_delta(0, "It's sunny in Tokyo!"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
]
}
#[derive(Clone)]
struct MockWeatherTool;
#[async_trait]
impl Tool for MockWeatherTool {
async fn execute(&self, _input_json: &str) -> Result<String, ToolError> {
Ok("Sunny, 25C".to_string())
}
}
fn weather_tool_definition() -> ToolDefinition {
Arc::new(|| {
let meta = ToolMeta::new("get_weather")
.description("Get weather")
.input_schema(serde_json::json!({
"type": "object",
"properties": {
"city": { "type": "string" }
},
"required": ["city"]
}));
(meta, Arc::new(MockWeatherTool) as Arc<dyn Tool>)
})
}
/// Hook that forces Pause on the first turn end.
struct PauseOnFirstTurnEnd;
#[async_trait]
impl Hook<OnTurnEnd> for PauseOnFirstTurnEnd {
async fn call(&self, _input: &mut Vec<Item>) -> Result<OnTurnEndResult, HookError> {
Ok(OnTurnEndResult::Paused)
}
}
async fn make_store() -> (tempfile::TempDir, FsStore) {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
(dir, store)
}
// =============================================================================
// Tests
// =============================================================================
#[tokio::test]
async fn session_run_logs_entries() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hi").await.unwrap();
let entries = store.read_all(sid).await.unwrap();
// SessionStart, UserInput, AssistantItems, TurnEnd, RunOutcome (at minimum)
assert!(entries.len() >= 4, "expected at least 4 entries, got {}", entries.len());
// First entry is SessionStart
assert!(matches!(entries[0], LogEntry::SessionStart { .. }));
// Has a RunOutcome with Finished
let has_finished = entries.iter().any(|e| matches!(
e,
LogEntry::RunOutcome { outcome: Outcome::Finished, .. }
));
assert!(has_finished, "should have a Finished outcome");
}
#[tokio::test]
async fn session_restore_round_trip() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let mut worker = Worker::new(client);
worker.set_system_prompt("You are helpful.");
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hi").await.unwrap();
let original_history = session.worker.history().to_vec();
let original_turn_count = session.worker.turn_count();
// Restore
let restore_client = MockLlmClient::new(vec![]); // won't be called
let restored = Session::restore(restore_client, store.clone(), sid, SessionConfig::default())
.await
.unwrap();
assert_eq!(restored.worker.history().len(), original_history.len());
assert_eq!(restored.worker.turn_count(), original_turn_count);
assert_eq!(
restored.worker.get_system_prompt().map(String::from),
Some("You are helpful.".to_string())
);
}
#[tokio::test]
async fn session_run_with_tool_call() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::with_responses(tool_call_events());
let mut worker = Worker::new(client);
worker.register_tool(weather_tool_definition()).unwrap();
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("What's the weather?").await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_tool_results = entries.iter().any(|e| matches!(e, LogEntry::ToolResults { .. }));
assert!(has_tool_results, "should have ToolResults entry");
let has_assistant = entries.iter().any(|e| matches!(e, LogEntry::AssistantItems { .. }));
assert!(has_assistant, "should have AssistantItems entry");
}
#[tokio::test]
async fn session_resume_after_pause() {
let (_dir, store) = make_store().await;
// First run: tool call with pause hook → Paused
let client = MockLlmClient::with_responses(tool_call_events());
let mut worker = Worker::new(client);
worker.register_tool(weather_tool_definition()).unwrap();
worker.add_on_turn_end_hook(PauseOnFirstTurnEnd);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
let result = session.run("Weather?").await.unwrap();
assert!(matches!(result, llm_worker::WorkerResult::Paused));
// Check RunOutcome is Paused
let entries = store.read_all(sid).await.unwrap();
let has_paused = entries.iter().any(|e| matches!(
e,
LogEntry::RunOutcome { outcome: Outcome::Paused, .. }
));
assert!(has_paused, "should have Paused outcome");
// Restore and resume
let resume_client = MockLlmClient::with_responses(vec![vec![
Event::text_block_start(0),
Event::text_delta(0, "After resume"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
]]);
let mut restored = Session::restore(resume_client, store.clone(), sid, SessionConfig::default())
.await
.unwrap();
assert!(restored.worker.last_run_interrupted());
// resume may or may not succeed depending on Worker internal state,
// but the restore itself should work
let _ = restored.resume().await;
}
#[tokio::test]
async fn session_fork_preserves_state() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let mut worker = Worker::new(client);
worker.set_system_prompt("System prompt");
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
session.run("Hello").await.unwrap();
let original_history_len = session.worker.history().len();
let fork_id = session.fork().await.unwrap();
// Fork should have a SessionStart with the current history
let fork_entries = store.read_all(fork_id).await.unwrap();
assert_eq!(fork_entries.len(), 1);
assert!(matches!(&fork_entries[0], LogEntry::SessionStart { .. }));
let fork_state = collect_state(&fork_entries);
assert_eq!(fork_state.history.len(), original_history_len);
assert_eq!(fork_state.system_prompt.as_deref(), Some("System prompt"));
}
#[tokio::test]
async fn session_fork_at_truncates() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hello").await.unwrap();
let all_entries = store.read_all(sid).await.unwrap();
assert!(all_entries.len() > 2);
// Fork at entry 2 (SessionStart + UserInput only)
let fork_id = Session::<MockLlmClient, FsStore>::fork_at(&store, sid, 2)
.await
.unwrap();
let fork_entries = store.read_all(fork_id).await.unwrap();
assert_eq!(fork_entries.len(), 1); // Just the new SessionStart
let fork_state = collect_state(&fork_entries);
// Should have the state from replaying only the first 2 entries
let original_truncated_state = collect_state(&all_entries[..2]);
assert_eq!(fork_state.history.len(), original_truncated_state.history.len());
}
#[tokio::test]
async fn session_config_changed_logged() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
// Modify config via worker and log it
session.worker.set_request_config(RequestConfig::default().with_temperature(0.7));
session.log_config_changed().await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_config_changed = entries.iter().any(|e| matches!(
e,
LogEntry::ConfigChanged { config, .. } if config.temperature == Some(0.7)
));
assert!(has_config_changed, "should have ConfigChanged entry");
}
#[tokio::test]
async fn session_cache_lock_unlock_logged() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client);
let session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.log_cache_locked(5).await.unwrap();
session.log_cache_unlocked().await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_locked = entries.iter().any(|e| matches!(
e,
LogEntry::CacheLocked { locked_prefix_len: 5, .. }
));
assert!(has_locked, "should have CacheLocked entry");
let has_unlocked = entries.iter().any(|e| matches!(e, LogEntry::CacheUnlocked { .. }));
assert!(has_unlocked, "should have CacheUnlocked entry");
// State after all entries: unlocked
let state = collect_state(&entries);
assert_eq!(state.locked_prefix_len, 0);
}

View File

@ -6,22 +6,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
tracing = "0.1"
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
async-trait = { workspace = true }
futures = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
tokio-util = "0.7"
reqwest = { version = "0.13.1", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
reqwest = { version = "0.13", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
eventsource-stream = "0.2"
llm-worker-macros = { path = "../llm-worker-macros", version = "0.2" }
zstd = "0.13"
llm-worker-macros = { workspace = true }
[dev-dependencies]
clap = { version = "4.5", features = ["derive", "env"] }
schemars = "1.2"
tempfile = "3.24"
schemars = { workspace = true }
tempfile = { workspace = true }
dotenv = "0.15"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
trybuild = "1.0.116"
wiremock = "0.6.5"

View File

@ -0,0 +1,23 @@
# llm-worker
LLM との対話を管理する低レベル基盤クレート。会話履歴、ツール実行、イベントストリーミング、ライフサイクルフックを統合した `Worker` 抽象を提供する。
## 公開型
### コア
- `Worker<C, S>` — LLM 対話の中央管理(ターン実行、ツール呼び出し、キャンセル)
- `WorkerConfig` / `WorkerResult` / `WorkerError` — 設定・実行結果・エラー
- `Item` / `ContentPart` / `Role` — 会話履歴の構成要素
### モジュール
- `llm_client` — プロバイダ抽象(`LlmClient` トレイト、`Request`, `RequestConfig`, Anthropic/OpenAI/Gemini/Ollama 実装)
- `tool` — ツール定義・実行(`Tool` トレイト、`ToolDefinition`, `ToolOutput`, サイズ判定による Inline/Stored 切替)
- `tool_server` — ツール登録・ルックアップ(`ToolServer`, `ToolServerHandle`
- `hook` — 実行フローへの介入ポイント(`Hook` トレイト、`PreToolCall`, `PostToolCall`, `OnTurnEnd` など)
- クロージャベースイベント購読(`Worker::on_text_block()`, `on_tool_use_block()`, `on_usage()` 等)
- `timeline` — イベントストリームのディスパッチ(`Handler` トレイト、各ブロックコレクター)。パワーユーザー向けに `timeline_mut()` も提供
- `event` — ストリーミングイベント型(`Event`, `BlockStart`, `BlockDelta` など)
- `state` — 型状態パターンによるキャッシュ保護(`Mutable` / `CacheLocked`
cratesの整理Add READMEsRE to all crates@@

View File

@ -33,7 +33,7 @@ llm-workerは3層構成でLLMとのインタラクションを管理する。
| `tool` / `tool_server` | ツール定義・登録・実行 | R3 |
| `timeline` | イベントストリーム処理、Handler dispatch | — |
| `handler` | Handler/Kind trait、ブロック別ハンドラ | — |
| `subscriber` | WorkerSubscriber trait、UI向けイベント配信 | — |
| `callback` | クロージャベースイベント購読(`on_text_block`, `on_usage` 等) | — |
| `llm_client` | LLMプロバイダへのHTTPリクエスト/ストリーミング | — |
| `llm_client/scheme` | プロバイダ固有ワイヤーフォーマット変換 | — |
| `llm_client/providers` | Anthropic, OpenAI, Gemini, Ollama実装 | — |

View File

@ -1,132 +0,0 @@
# ツール出力の遅延読み込み設計
## 課題
ツール実行結果(ファイル内容、検索結果等)は サイズが予測不能 で、
全量を `Item::ToolResult { output: String }` として LLM コンテキストに
載せると、トークン消費が爆発する。
## 方針
- ツール出力に **Inline / Stored** の区別を導入する
- Stored な出力は **BlobStore** に保存し、履歴には要約のみ載せる
- LLM が詳細を見たい場合は **inspect ツール** で部分取得する
## データ型
### ToolOutputllm-worker 側)
```rust
pub enum ToolOutput {
/// 小さな結果: そのまま history に載る
Inline(String),
/// 大きな結果: summary だけ history に載り、全体は BlobStore に保存される
Stored {
summary: String,
content: Content,
},
}
pub enum Content {
Text(String),
Structured(serde_json::Value),
}
```
- `Tool::execute()` の戻り値は `Result<String, ToolError>` のまま据え置き
- `From<String> for ToolOutput` で閾値ベースの自動昇格を行う
- ツール実装者が明示的に `ToolOutput` を返したい場合は別トレイトメソッドを用意
### BlobStorellm-worker-persistence 側)
```rust
pub type BlobId = uuid::Uuid; // UUID v7
pub trait BlobStore: Send + Sync {
fn store(&self, content: &Content) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
fn load(&self, id: BlobId) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
fn exists(&self, id: BlobId) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
}
```
### FsBlobStore レイアウト
```
blobs/
├── {blob_id}.txt # Content::Text
└── {blob_id}.json # Content::Structured
```
セッションとは独立したフラットなストア。セッションとの紐付けは
ログ側の参照summary 内の `[blob:<id>]`)で行う。
## 自動サマリ
`From<String>` による自動昇格時のサマリ生成ルール:
| 項目 | 値 |
|---|---|
| Inline 閾値 | 800 bytes |
| サマリ上限 | 400 bytes |
| 先頭行数 | 5 行 |
| 末尾行数 | 3 行 |
### Text のサマリ形式
```
[blob:<id>] text | {N} lines
── head ──
{先頭5行}
── tail ──
{末尾3行}
```
### Structured (JSON Array) のサマリ形式
```
[blob:<id>] json_array | {N} entries
── schema ──
{最初の要素のキー: 型}
── head ──
{先頭2要素}
```
### Structured (JSON Object) のサマリ形式
```
[blob:<id>] json_object | {N} keys
── keys ──
{キー一覧と各値の型/サイズ}
```
## Worker への統合
```
Tool::execute() → Result<String, ToolError>
▼ From<String> for ToolOutput
ToolOutput::Inline(s) ← len ≤ 800
ToolOutput::Stored { .. } ← len > 800
▼ Worker が BlobStore に保存
Item::ToolResult { output: summary } ← history に載る
▼ LLM が詳細を見たい場合
inspect(blob_id, selector?) → 部分取得
```
Worker はオプショナルに `BlobStore` を保持する。
BlobStore が未設定の場合は従来通り全量 Inline として扱う。
## inspect ツール
Worker に BlobStore が設定されている場合、自動的に登録される組み込みツール。
```
inspect(blob_id, selector?)
```
- selector 省略: メタ情報 + 先頭部分
- `lines:20-50`: 行範囲Text 用)
- `slice:3..8`: インデックス範囲Array 用)
- `key:results`: キー指定Object 用)

View File

@ -20,9 +20,16 @@ mod recorder;
mod scenarios;
use clap::{Parser, ValueEnum};
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::providers::gemini::GeminiClient;
use llm_worker::llm_client::providers::openai::OpenAIClient;
use llm_worker::llm_client::scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
fn make_transport<S: Scheme>(scheme: S, model: &str, auth: ResolvedAuth) -> HttpTransport<S> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
HttpTransport::new(scheme, model.to_string(), base_url, auth, cap)
}
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
@ -60,7 +67,7 @@ async fn run_scenario_with_anthropic(
let api_key = std::env::var("ANTHROPIC_API_KEY")
.expect("ANTHROPIC_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514");
let client = AnthropicClient::new(&api_key, model);
let client = make_transport(AnthropicScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -82,7 +89,7 @@ async fn run_scenario_with_openai(
let api_key =
std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("gpt-4o");
let client = OpenAIClient::new(&api_key, model);
let client = make_transport(OpenAIScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -101,10 +108,15 @@ async fn run_scenario_with_ollama(
subdir: &str,
model: Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
use llm_worker::llm_client::providers::ollama::OllamaClient;
// Ollama typically runs local, no key needed or placeholder
let model = model.as_deref().unwrap_or("llama3"); // default example
let client = OllamaClient::new(model); // base_url placeholder, handled by client default
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
let model = model.as_deref().unwrap_or("llama3");
let client = HttpTransport::new(
AnthropicScheme::new(),
model.to_string(),
"http://localhost:11434".to_string(),
ResolvedAuth::None,
AnthropicScheme::new().default_capability(),
);
recorder::record_request(
&client,
@ -126,7 +138,7 @@ async fn run_scenario_with_gemini(
let api_key =
std::env::var("GEMINI_API_KEY").expect("GEMINI_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("gemini-2.0-flash");
let client = GeminiClient::new(&api_key, model);
let client = make_transport(GeminiScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,

View File

@ -2,11 +2,10 @@
//!
//! Example of cancelling from another thread during streaming
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::{Worker, WorkerResult};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
@ -24,46 +23,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let api_key =
std::env::var("ANTHROPIC_API_KEY").expect("ANTHROPIC_API_KEY environment variable not set");
let client = AnthropicClient::new(&api_key, "claude-sonnet-4-20250514");
let worker = Arc::new(Mutex::new(Worker::new(client)));
let scheme = AnthropicScheme::new();
let model = "claude-sonnet-4-20250514".to_string();
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap);
let worker = Worker::new(client);
println!("🚀 Starting Worker...");
println!("💡 Will cancel after 2 seconds\n");
// Get cancel sender first (without holding lock)
let cancel_tx = {
let w = worker.lock().await;
w.cancel_sender()
};
// Get cancel sender before run (Mutable state)
let cancel_tx = worker.cancel_sender();
// Task 1: Run Worker
let worker_clone = worker.clone();
let task = tokio::spawn(async move {
let mut w = worker_clone.lock().await;
println!("📡 Sending request to LLM...");
match w.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
Ok(WorkerResult::Finished) => {
println!("✅ Task completed normally");
}
Ok(WorkerResult::Paused) => {
println!("⏸️ Task paused");
}
Err(e) => {
println!("❌ Task error: {}", e);
}
}
});
// Task 2: Cancel after 2 seconds
// Task: Cancel after 2 seconds
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(2)).await;
println!("\n🛑 Cancelling worker...");
let _ = cancel_tx.send(()).await;
});
// Wait for task completion
task.await?;
println!("📡 Sending request to LLM...");
match worker.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
Ok(out) => match out.result {
WorkerResult::Finished => println!("✅ Task completed normally"),
WorkerResult::Paused => println!("⏸️ Task paused"),
WorkerResult::LimitReached => println!("🔒 Turn limit reached"),
WorkerResult::Yielded => println!("↩️ Task yielded"),
},
Err(e) => {
println!("❌ Task error: {}", e);
}
}
println!("\n✨ Demo complete!");

View File

@ -41,13 +41,14 @@ use tracing_subscriber::EnvFilter;
use clap::{Parser, ValueEnum};
use llm_worker::{
Worker,
hook::{Hook, HookError, PostToolCall, PostToolCallContext, PostToolCallResult},
interceptor::{Interceptor, PostToolAction, ToolResultInfo},
llm_client::{
LlmClient,
providers::{
anthropic::AnthropicClient, gemini::GeminiClient, ollama::OllamaClient,
openai::OpenAIClient,
capability::{CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport},
scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
},
transport::{HttpTransport, ResolvedAuth},
},
timeline::{Handler, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind},
};
@ -270,34 +271,34 @@ impl Handler<ToolUseBlockKind> for ToolCallPrinter {
}
}
/// Hook that displays tool execution results
struct ToolResultPrinterHook {
/// Policy that displays tool execution results.
struct ToolResultPrinterPolicy {
call_names: Arc<Mutex<HashMap<String, String>>>,
}
impl ToolResultPrinterHook {
impl ToolResultPrinterPolicy {
fn new(call_names: Arc<Mutex<HashMap<String, String>>>) -> Self {
Self { call_names }
}
}
#[async_trait]
impl Hook<PostToolCall> for ToolResultPrinterHook {
async fn call(&self, ctx: &mut PostToolCallContext) -> Result<PostToolCallResult, HookError> {
impl Interceptor for ToolResultPrinterPolicy {
async fn post_tool_call(&self, info: &mut ToolResultInfo) -> PostToolAction {
let name = self
.call_names
.lock()
.unwrap()
.remove(&ctx.result.tool_use_id)
.unwrap_or_else(|| ctx.result.tool_use_id.clone());
.remove(&info.result.tool_use_id)
.unwrap_or_else(|| info.result.tool_use_id.clone());
if ctx.result.is_error {
println!(" Result ({}): ❌ {}", name, ctx.result.content);
if info.result.is_error {
println!(" Result ({}): ❌ {}", name, info.result.summary);
} else {
println!(" Result ({}): ✅ {}", name, ctx.result.content);
println!(" Result ({}): ✅ {}", name, info.result.summary);
}
Ok(PostToolCallResult::Continue)
PostToolAction::Continue
}
}
@ -327,6 +328,22 @@ fn get_api_key(args: &Args) -> Result<String, String> {
}
/// Create client based on provider
fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
fn build_transport<S: Scheme>(scheme: S, model: String, auth: ResolvedAuth) -> Box<dyn LlmClient> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
Box::new(HttpTransport::new(scheme, model, base_url, auth, cap))
}
fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
let model = args
.model
@ -336,21 +353,32 @@ fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
let api_key = get_api_key(args)?;
match args.provider {
Provider::Anthropic => {
let client = AnthropicClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Gemini => {
let client = GeminiClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Openai => {
let client = OpenAIClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Anthropic => Ok(build_transport(
AnthropicScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Gemini => Ok(build_transport(
GeminiScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Openai => Ok(build_transport(
OpenAIScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Ollama => {
let client = OllamaClient::new(&model);
Ok(Box::new(client))
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
let scheme = AnthropicScheme::new();
let cap = default_capability();
Ok(Box::new(HttpTransport::new(
scheme,
model,
"http://localhost:11434".to_string(),
ResolvedAuth::None,
cap,
)))
}
}
}
@ -438,10 +466,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Register tools (unless --no-tools)
if !args.no_tools {
let app = AppContext;
worker
.register_tool(app.get_current_time_definition())
.unwrap();
worker.register_tool(app.calculate_definition()).unwrap();
worker.register_tool(app.get_current_time_definition());
worker.register_tool(app.calculate_definition());
}
// Register streaming display handlers
@ -450,7 +476,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.on_text_block(StreamingPrinter::new())
.on_tool_use_block(ToolCallPrinter::new(tool_call_names.clone()));
worker.add_post_tool_call_hook(ToolResultPrinterHook::new(tool_call_names));
worker.set_interceptor(ToolResultPrinterPolicy::new(tool_call_names));
// One-shot mode
if let Some(prompt) = args.prompt {
@ -465,7 +491,27 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
return Ok(());
}
// Interactive loop
// Interactive loop — first input transitions Mutable → Locked
print!("\n👤 You: ");
io::stdout().flush()?;
let mut first_input = String::new();
io::stdin().read_line(&mut first_input)?;
let first_input = first_input.trim();
if first_input == "quit" || first_input == "exit" || first_input.is_empty() {
println!("\n👋 Goodbye!");
return Ok(());
}
let mut locked = match worker.run(first_input).await {
Ok(out) => out.worker,
Err(e) => {
eprintln!("\n❌ Error: {}", e);
return Ok(());
}
};
loop {
print!("\n👤 You: ");
io::stdout().flush()?;
@ -483,8 +529,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
break;
}
// Run Worker (Worker manages history)
match worker.run(input).await {
match locked.run(input).await {
Ok(_) => {}
Err(e) => {
eprintln!("\n❌ Error: {}", e);

View File

@ -0,0 +1,291 @@
//! Closure-based event callback API
//!
//! Provides a closure-based alternative to implementing `Handler<K>` directly.
//! Register callbacks on `Worker` via `on_text_block()`, `on_tool_use_block()`,
//! `on_usage()`, etc.
use std::marker::PhantomData;
use crate::handler::{
Handler, Kind, TextBlockEvent, TextBlockKind, ThinkingBlockEvent, ThinkingBlockKind,
ToolUseBlockEvent, ToolUseBlockKind, ToolUseBlockStart,
};
use crate::tool::ToolCall;
// =============================================================================
// TextBlock Closure Handler
// =============================================================================
/// Callback scope for a text block.
///
/// Passed to the setup closure registered with `Worker::on_text_block()`.
/// Register per-block callbacks via `on_delta()` and `on_stop()`.
///
/// # Examples
///
/// ```ignore
/// worker.on_text_block(|block| {
/// block.on_delta(|text| print!("{}", text));
/// block.on_stop(|full_text| println!("\n--- {} chars ---", full_text.len()));
/// });
/// ```
pub struct TextBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl TextBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated text of the block.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
/// Per-block state created by Timeline's scope lifecycle.
#[derive(Default)]
pub(crate) struct TextBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
/// Closure-based `Handler<TextBlockKind>` adapter.
pub(crate) struct ClosureTextBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut TextBlockScope) + Send + Sync>,
}
impl Handler<TextBlockKind> for ClosureTextBlockHandler {
type Scope = TextBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
match event {
TextBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = TextBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
TextBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
TextBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ThinkingBlock Closure Handler
// =============================================================================
/// Callback scope for a thinking block.
///
/// Mirrors `TextBlockScope`. Some providers (or some configurations)
/// emit thinking metadata without plaintext deltas — in that case the
/// block fires `Start` and `Stop` with no `Delta` in between, which is
/// expected and not an error.
pub struct ThinkingBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl ThinkingBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each thinking text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated thinking text. May be empty when
/// the provider didn't emit any plaintext deltas.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
#[derive(Default)]
pub(crate) struct ThinkingBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
pub(crate) struct ClosureThinkingBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut ThinkingBlockScope) + Send + Sync>,
}
impl Handler<ThinkingBlockKind> for ClosureThinkingBlockHandler {
type Scope = ThinkingBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
match event {
ThinkingBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = ThinkingBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ThinkingBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
ThinkingBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ToolUseBlock Closure Handler
// =============================================================================
/// Callback scope for a tool use block.
///
/// Passed to the setup closure registered with `Worker::on_tool_use_block()`.
/// The setup closure also receives `&ToolUseBlockStart` with `id` and `name`.
///
/// # Examples
///
/// ```ignore
/// worker.on_tool_use_block(|start, block| {
/// println!("Tool: {} ({})", start.name, start.id);
/// block.on_delta(|json| { /* streaming JSON fragment */ });
/// block.on_stop(|call| println!("Done: {}", call.name));
/// });
/// ```
pub struct ToolUseBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
}
impl ToolUseBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each JSON input delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the fully assembled `ToolCall` with parsed JSON input.
pub fn on_stop(&mut self, f: impl FnMut(&ToolCall) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
/// Per-block state for tool use closure handler.
#[derive(Default)]
pub(crate) struct ToolUseBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
id: String,
name: String,
input_json: String,
}
/// Closure-based `Handler<ToolUseBlockKind>` adapter.
pub(crate) struct ClosureToolUseBlockHandler {
pub(crate) setup: Box<dyn FnMut(&ToolUseBlockStart, &mut ToolUseBlockScope) + Send + Sync>,
}
impl Handler<ToolUseBlockKind> for ClosureToolUseBlockHandler {
type Scope = ToolUseBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
match event {
ToolUseBlockEvent::Start(start) => {
scope.id = start.id.clone();
scope.name = start.name.clone();
scope.input_json.clear();
let mut builder = ToolUseBlockScope::new();
(self.setup)(start, &mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ToolUseBlockEvent::InputJsonDelta(json) => {
scope.input_json.push_str(json);
if let Some(f) = &mut scope.on_delta {
f(json);
}
}
ToolUseBlockEvent::Stop(_) => {
let input: serde_json::Value =
serde_json::from_str(&scope.input_json).unwrap_or_default();
let tool_call = ToolCall {
id: std::mem::take(&mut scope.id),
name: std::mem::take(&mut scope.name),
input,
};
if let Some(f) = &mut scope.on_stop {
f(&tool_call);
}
}
}
}
}
// =============================================================================
// Generic Meta Event Closure Handler
// =============================================================================
/// Closure-based `Handler<K>` adapter for meta events (Usage, Status, Error).
pub(crate) struct ClosureMetaHandler<F, K>
where
K: Kind,
{
pub(crate) callback: F,
pub(crate) _kind: PhantomData<K>,
}
impl<F, K> Handler<K> for ClosureMetaHandler<F, K>
where
F: FnMut(&K::Event) + Send + Sync,
K: Kind,
{
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &K::Event) {
(self.callback)(event);
}
}

View File

@ -91,6 +91,16 @@ impl Kind for ErrorKind {
type Event = ErrorEvent;
}
/// Reasoning item Kind - 完成済み reasoning item の永続化用
///
/// 1 reasoning item につき 1 度だけ発火する。Worker は
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
/// `Item::Reasoning` として history に append する。
pub struct ReasoningItemKind;
impl Kind for ReasoningItemKind {
type Event = ReasoningItemEvent;
}
// =============================================================================
// Block Kind Definitions
// =============================================================================

View File

@ -1,310 +0,0 @@
//! Hook-related type definitions
//!
//! Types used for turn control and intervention in the Worker layer
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use thiserror::Error;
// =============================================================================
// Hook Event Kinds
// =============================================================================
pub trait HookEventKind: Send + Sync + 'static {
type Input;
type Output;
}
pub struct OnPromptSubmit;
pub struct PreLlmRequest;
pub struct PreToolCall;
pub struct PostToolCall;
pub struct OnTurnEnd;
pub struct OnAbort;
pub struct OnTextDelta;
pub struct OnToolCallDelta;
pub struct OnStreamChunk;
pub struct OnStreamComplete;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum OnPromptSubmitResult {
Continue,
Cancel(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreLlmRequestResult {
Continue,
Cancel(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreToolCallResult {
Continue,
Skip,
Abort(String),
Pause,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PostToolCallResult {
Continue,
Abort(String),
}
#[derive(Debug, Clone)]
pub enum OnTurnEndResult {
Finish,
ContinueWithMessages(Vec<crate::Item>),
Paused,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StreamHookResult {
Continue,
Abort(String),
Pause,
}
use std::sync::Arc;
use crate::tool::{Tool, ToolMeta};
/// Input context for PreToolCall
pub struct ToolCallContext {
/// Tool call information (modifiable)
pub call: ToolCall,
/// Tool meta information (immutable)
pub meta: ToolMeta,
/// Tool instance (for state access)
pub tool: Arc<dyn Tool>,
}
/// Input context for PostToolCall
pub struct PostToolCallContext {
/// Tool call information
pub call: ToolCall,
/// Tool execution result (modifiable)
pub result: ToolResult,
/// Tool meta information (immutable)
pub meta: ToolMeta,
/// Tool instance (for state access)
pub tool: Arc<dyn Tool>,
}
/// Input context for OnTextDelta
#[derive(Debug, Clone)]
pub struct TextDeltaContext {
/// Block index
pub index: usize,
/// Text delta content
pub delta: String,
}
/// Input context for OnToolCallDelta
#[derive(Debug, Clone)]
pub struct ToolCallDeltaContext {
/// Block index
pub index: usize,
/// Partial JSON fragment
pub delta_json_fragment: String,
}
/// Input context for OnStreamChunk
#[derive(Debug, Clone)]
pub struct StreamChunkContext {
/// Public worker-level event
pub event: crate::event::Event,
}
/// Input context for OnStreamComplete
#[derive(Debug, Clone)]
pub struct StreamCompleteContext {
/// Current turn number
pub turn: usize,
/// Number of streamed events in this request
pub event_count: usize,
}
impl HookEventKind for OnPromptSubmit {
type Input = crate::Item;
type Output = OnPromptSubmitResult;
}
impl HookEventKind for PreLlmRequest {
type Input = Vec<crate::Item>;
type Output = PreLlmRequestResult;
}
impl HookEventKind for PreToolCall {
type Input = ToolCallContext;
type Output = PreToolCallResult;
}
impl HookEventKind for PostToolCall {
type Input = PostToolCallContext;
type Output = PostToolCallResult;
}
impl HookEventKind for OnTurnEnd {
type Input = Vec<crate::Item>;
type Output = OnTurnEndResult;
}
impl HookEventKind for OnAbort {
type Input = String;
type Output = ();
}
impl HookEventKind for OnTextDelta {
type Input = TextDeltaContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnToolCallDelta {
type Input = ToolCallDeltaContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnStreamChunk {
type Input = StreamChunkContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnStreamComplete {
type Input = StreamCompleteContext;
type Output = StreamHookResult;
}
// =============================================================================
// Tool Call / Result Types
// =============================================================================
/// Tool call information
///
/// Represents a ToolUse block from LLM, modifiable in Hook processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
/// Tool call ID (used for linking with response)
pub id: String,
/// Tool name
pub name: String,
/// Input arguments (JSON)
pub input: Value,
}
/// Tool execution result
///
/// Represents the result after tool execution, modifiable in Hook processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolResult {
/// Corresponding tool call ID
pub tool_use_id: String,
/// Result content
pub content: String,
/// Whether this is an error
#[serde(default)]
pub is_error: bool,
}
impl ToolResult {
/// Create a success result
pub fn success(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
Self {
tool_use_id: tool_use_id.into(),
content: content.into(),
is_error: false,
}
}
/// Create an error result
pub fn error(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
Self {
tool_use_id: tool_use_id.into(),
content: content.into(),
is_error: true,
}
}
}
// =============================================================================
// Hook Error
// =============================================================================
/// Hook error
#[derive(Debug, Error)]
pub enum HookError {
/// Processing was aborted
#[error("Aborted: {0}")]
Aborted(String),
/// Internal error
#[error("Hook error: {0}")]
Internal(String),
}
// =============================================================================
// Hook Trait
// =============================================================================
/// Trait for handling Hook events
///
/// Each event type has a different return type, constrained via `HookEventKind`.
#[async_trait]
pub trait Hook<E: HookEventKind>: Send + Sync {
async fn call(&self, input: &mut E::Input) -> Result<E::Output, HookError>;
}
// =============================================================================
// Hook Registry
// =============================================================================
/// Registry holding all Hooks
///
/// Used internally by Worker to manage all Hook types.
pub struct HookRegistry {
/// on_prompt_submit Hook
pub(crate) on_prompt_submit: Vec<Box<dyn Hook<OnPromptSubmit>>>,
/// pre_llm_request Hook
pub(crate) pre_llm_request: Vec<Box<dyn Hook<PreLlmRequest>>>,
/// pre_tool_call Hook
pub(crate) pre_tool_call: Vec<Box<dyn Hook<PreToolCall>>>,
/// post_tool_call Hook
pub(crate) post_tool_call: Vec<Box<dyn Hook<PostToolCall>>>,
/// on_turn_end Hook
pub(crate) on_turn_end: Vec<Box<dyn Hook<OnTurnEnd>>>,
/// on_abort Hook
pub(crate) on_abort: Vec<Box<dyn Hook<OnAbort>>>,
/// on_text_delta Hook
pub(crate) on_text_delta: Vec<Box<dyn Hook<OnTextDelta>>>,
/// on_tool_call_delta Hook
pub(crate) on_tool_call_delta: Vec<Box<dyn Hook<OnToolCallDelta>>>,
/// on_stream_chunk Hook
pub(crate) on_stream_chunk: Vec<Box<dyn Hook<OnStreamChunk>>>,
/// on_stream_complete Hook
pub(crate) on_stream_complete: Vec<Box<dyn Hook<OnStreamComplete>>>,
}
impl Default for HookRegistry {
fn default() -> Self {
Self::new()
}
}
impl HookRegistry {
/// Create an empty HookRegistry
pub fn new() -> Self {
Self {
on_prompt_submit: Vec::new(),
pre_llm_request: Vec::new(),
pre_tool_call: Vec::new(),
post_tool_call: Vec::new(),
on_turn_end: Vec::new(),
on_abort: Vec::new(),
on_text_delta: Vec::new(),
on_tool_call_delta: Vec::new(),
on_stream_chunk: Vec::new(),
on_stream_complete: Vec::new(),
}
}
}

View File

@ -0,0 +1,185 @@
//! Interceptor - control flow delegation for the Worker execution loop
//!
//! Defines the [`Interceptor`] trait that upper layers (e.g. Pod) implement
//! to inject orchestration decisions (approval, skip, pause, abort)
//! into the Worker's turn loop without the Worker knowing about
//! higher-level concepts.
use std::sync::Arc;
use async_trait::async_trait;
use crate::Item;
use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
// =============================================================================
// Action Enums
// =============================================================================
/// Action after prompt submission.
#[derive(Debug, Clone, PartialEq)]
pub enum PromptAction {
/// Proceed normally.
Continue,
/// Cancel with a reason.
Cancel(String),
/// Proceed, and append these items to history right after the user
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
/// submit edge: lets the upper layer attach resolver-produced
/// system messages (e.g. `@<path>` file content) so they sit
/// adjacent to the user message that referenced them.
ContinueWith(Vec<Item>),
}
/// Action before an LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreRequestAction {
/// Proceed normally.
Continue,
/// Cancel with a reason (treated as an error).
Cancel(String),
/// Yield control to the caller for external processing.
///
/// The Worker exits the turn loop cleanly with `WorkerResult::Yielded`.
/// The caller is expected to resume execution later.
Yield,
}
/// Action before a tool call.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreToolAction {
/// Proceed with execution.
Continue,
/// Skip this tool call (do not execute).
Skip,
/// Do not execute the tool call; commit this synthetic result instead.
///
/// This preserves provider-visible `tool_use` / `tool_result` pairing
/// without aborting the whole turn.
SyntheticResult(ToolResult),
/// Abort the entire run.
Abort(String),
/// Pause execution (can be resumed later).
Pause,
}
/// Action after a tool call.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PostToolAction {
/// Proceed normally.
Continue,
/// Abort the entire run.
Abort(String),
}
/// Action at the end of a turn (when LLM produces no tool calls).
#[derive(Debug, Clone)]
pub enum TurnEndAction {
/// Turn is finished, return to caller.
Finish,
/// Continue with additional messages injected into history.
ContinueWithMessages(Vec<Item>),
/// Pause execution (can be resumed later).
Pause,
}
// =============================================================================
// Context Types
// =============================================================================
/// Context for pre-tool-call decisions.
pub struct ToolCallInfo {
/// Tool call information (modifiable).
pub call: ToolCall,
/// Tool meta information.
pub meta: ToolMeta,
/// Tool instance (for state access).
pub tool: Arc<dyn Tool>,
}
/// Context for post-tool-call decisions.
pub struct ToolResultInfo {
/// Original tool call.
pub call: ToolCall,
/// Tool execution result (modifiable).
pub result: ToolResult,
/// Tool meta information.
pub meta: ToolMeta,
/// Tool instance (for state access).
pub tool: Arc<dyn Tool>,
}
// =============================================================================
// Interceptor Trait
// =============================================================================
/// Intercepts the Worker execution loop at key decision points.
///
/// All methods have default implementations that let the Worker
/// proceed without intervention. Upper layers (e.g. Pod) provide
/// richer implementations for approval flows, permission checks, etc.
#[async_trait]
pub trait Interceptor: Send + Sync {
/// Called after receiving user input, before adding to history.
async fn on_prompt_submit(&self, _item: &mut Item) -> PromptAction {
PromptAction::Continue
}
/// Items that should be **committed to `worker.history`** just
/// before the next LLM request. Returned items are `extend`ed into
/// the persistent history (and therefore picked up by the per-turn
/// clone that backs the LLM request, plus the usual
/// history-persistence path).
///
/// Use this for inputs that arrive from outside the LLM and need
/// to be reflected in the on-disk history — notifications,
/// cross-Pod events, system reminders. Do **not** use
/// [`Self::pre_llm_request`] for that purpose: it mutates a
/// per-request clone, so any committed assistant response that
/// reacts to the injection would have no visible trigger on the
/// next turn (or after resume / compaction).
///
/// `pre_llm_request` remains the right place for purely
/// reproducible per-request transformations (pruning, content
/// trimming, cache anchors) that depend only on the existing
/// history.
async fn pending_history_appends(&self) -> Vec<Item> {
Vec::new()
}
/// Called before each LLM request. The context starts as a clone
/// of `worker.history` (after `pending_history_appends` and the
/// Worker's own prune projection have been applied) and can be
/// further modified for that single request only — mutations here
/// are **not** persisted back to history. Use
/// [`Self::pending_history_appends`] for inputs that need to land
/// in history.
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
PreRequestAction::Continue
}
/// Called before each tool is executed.
async fn pre_tool_call(&self, _info: &mut ToolCallInfo) -> PreToolAction {
PreToolAction::Continue
}
/// Called after each tool completes.
async fn post_tool_call(&self, _info: &mut ToolResultInfo) -> PostToolAction {
PostToolAction::Continue
}
/// Called when a turn ends with no tool calls.
async fn on_turn_end(&self, _history: &[Item]) -> TurnEndAction {
TurnEndAction::Finish
}
/// Called when execution is interrupted (abort or cancel).
async fn on_abort(&self, _reason: &str) {}
}
/// Default interceptor: no intervention. Worker proceeds through the loop
/// without any external control flow decisions.
pub(crate) struct DefaultInterceptor;
#[async_trait]
impl Interceptor for DefaultInterceptor {}

View File

@ -6,8 +6,8 @@
//!
//! - [`Worker`] - Central component for managing LLM interactions
//! - [`tool::Tool`] - Tools that can be invoked by the LLM
//! - [`hook::Hook`] - Hooks for intercepting turn progression
//! - [`subscriber::WorkerSubscriber`] - Subscribing to streaming events
//! - [`interceptor::Interceptor`] - Control-flow delegation for the execution loop
//! - Closure-based event callbacks via `Worker::on_text_block()`, `on_tool_use_block()`, etc.
//!
//! # Quick Start
//!
@ -27,26 +27,38 @@
//!
//! # Cache Protection
//!
//! To maximize KV cache hit rate, transition to the locked state
//! with [`Worker::lock()`] before execution.
//! `run()` automatically locks the cache. To edit state between turns,
//! call `unlock_cache()` first; the next `run()` re-locks automatically.
//!
//! ```ignore
//! let mut locked = worker.lock();
//! locked.run("user input").await?;
//! worker.run("user input").await?;
//! worker.unlock_cache();
//! worker.set_system_prompt("new prompt");
//! worker.run("next input").await?;
//! ```
mod handler;
mod message;
mod worker;
pub(crate) mod callback;
pub mod event;
pub mod hook;
pub mod interceptor;
pub mod llm_client;
pub mod prune;
pub mod state;
pub mod subscriber;
pub mod timeline;
pub mod token_counter;
pub mod tool;
pub mod tool_server;
pub mod usage_record;
pub use callback::{TextBlockScope, ThinkingBlockScope, ToolUseBlockScope};
pub use handler::ToolUseBlockStart;
pub use interceptor::Interceptor;
pub use message::{ContentPart, Item, Message, Role};
pub use worker::{ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
pub use usage_record::UsageRecord;
pub use worker::{
LlmRetryNotice, RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult,
};

View File

@ -0,0 +1,57 @@
//! `Scheme` 実装と通信層が要求する認証要件、および動的認証プロバイダ。
//!
//! マニフェスト側の型(`ModelConfig` / `SchemeKind` / `AuthRef`)は
//! `crates/manifest` に置き、llm-worker はそれを知らずに済む。
//! `AuthRequirement` は scheme が宣言する「この scheme はどんな認証を
//! 期待するか」のランタイム記述で、manifest 側の `AuthRef` との
//! 照合(`AuthRef → ResolvedAuth` 変換の適否)は `crates/provider`
//! で行う。
//!
//! Codex OAuth のようにリクエスト毎にトークンが変わり得る認証は
//! [`AuthProvider`] trait を `crates/provider` 側で実装し、
//! [`super::transport::ResolvedAuth::Custom`] 経由で transport に渡す。
use async_trait::async_trait;
use reqwest::header::{HeaderName, HeaderValue};
use super::error::ClientError;
/// `Scheme::required_auth()` が返す認証要件。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AuthRequirement {
/// 認証を行わないOllama など)
None,
/// `Authorization: Bearer <token>` ヘッダtoken は API key 相当)
Bearer,
/// `x-api-key: <token>` ヘッダAnthropic 形式)
XApiKey,
/// クエリパラメータ `?<name>=<token>`Gemini 形式)
QueryParam { name: &'static str },
/// 複合ヘッダCodex OAuth 等、`crates/provider` 側で解決)
Custom,
}
/// リクエスト毎に認証ヘッダを動的に組み立てるプロバイダ。
///
/// Codex OAuth のように access_token が refresh で更新されたり、
/// `ChatGPT-Account-Id` / `X-OpenAI-Fedramp` のような複数ヘッダを
/// 同時に注入する必要があるケースで使う。実体は `crates/provider`
/// 側に置き、llm-worker は trait を知るだけ。
///
/// 返したヘッダはそのまま `HeaderMap` に挿入される。`Authorization`
/// 含む scheme 既定の認証ヘッダは送出されないので、必要なら
/// 実装側でセットすること。
#[async_trait]
pub trait AuthProvider: Send + Sync + std::fmt::Debug {
/// 1 リクエスト分の認証ヘッダを返す。refresh が必要なら内部で行う。
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError>;
/// ChatGPT Codex backend 向けの複合認証かどうか。
///
/// transport は provider crate の具象型を知らないため、この hook だけで
/// Codex CLI 互換の wire behaviorconversation header / request compression 等)
/// を切り替える。
fn is_codex_backend(&self) -> bool {
false
}
}

View File

@ -0,0 +1,169 @@
//! モデル能力メタデータ
//!
//! `ModelCapability` はモデルが持つ機能差を表現する。scheme は同じでも
//! モデルごとに reasoning 可否や prompt caching 方式が違うため、scheme
//! から分離して保持する。
//!
//! 値の供給経路は 2 通り:
//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル)
//! 2. `ModelConfig::capability` での明示 override未知モデル、または上書き
use serde::{Deserialize, Deserializer, Serialize, Serializer};
/// モデル能力メタデータ
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ModelCapability {
pub tool_calling: ToolCallingSupport,
pub structured_output: StructuredOutput,
#[serde(default)]
pub reasoning: Option<ReasoningSupport>,
#[serde(default)]
pub vision: bool,
pub prompt_caching: CacheStrategy,
}
impl ModelCapability {
/// 何もサポートしない安全側デフォルト。未知モデルのフォールバック用。
pub const fn minimal() -> Self {
Self {
tool_calling: ToolCallingSupport::None,
structured_output: StructuredOutput::None,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
}
/// ツール呼び出しサポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ToolCallingSupport {
/// 非サポート
None,
/// 1 回のレスポンスで 1 ツールのみ
Sequential,
/// 1 回のレスポンスで複数ツール並行
Parallel,
}
/// Structured output サポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum StructuredOutput {
None,
/// `json_object` モード(スキーマなし JSON 強制)
JsonObject,
/// JSON Schema 指定で構造化出力
JsonSchema,
}
/// Reasoningextended thinkingサポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ReasoningSupport {
/// OpenAI 形式: `reasoning.effort` (low/medium/high)
Effort,
/// Anthropic 形式: `thinking.budget_tokens`
BudgetTokens,
/// 両対応(内部では共通 `ReasoningControl` として扱い、各 scheme で投影)
Both,
}
/// Prompt caching 戦略
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum CacheStrategy {
/// Anthropic: `cache_control` マーカーを明示挿入
Explicit { max_breakpoints: u8 },
/// それ以外: サーバ側自動 prefix、または未サポート
Auto,
}
/// Reasoning 制御共通型、scheme 側で各社形式に投影)。
///
/// 文字列は provider-native な effort label、数値は provider-native な
/// thinking budget token として扱う。どちらか一方だけを型で表現する。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum ReasoningControl {
Effort(ReasoningEffort),
BudgetTokens(i32),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
XHigh,
Other(String),
}
impl ReasoningEffort {
pub fn as_str(&self) -> &str {
match self {
Self::Minimal => "minimal",
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
Self::XHigh => "xhigh",
Self::Other(label) => label.as_str(),
}
}
}
impl From<String> for ReasoningEffort {
fn from(value: String) -> Self {
match value.as_str() {
"minimal" => Self::Minimal,
"low" => Self::Low,
"medium" => Self::Medium,
"high" => Self::High,
"xhigh" => Self::XHigh,
_ => Self::Other(value),
}
}
}
impl Serialize for ReasoningEffort {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for ReasoningEffort {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
String::deserialize(deserializer).map(Self::from)
}
}
#[cfg(test)]
mod tests {
use super::{ReasoningControl, ReasoningEffort};
#[test]
fn reasoning_control_deserializes_effort_labels() {
let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap();
assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh));
let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap();
assert_eq!(
unknown,
ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into()))
);
}
#[test]
fn reasoning_control_deserializes_signed_budget() {
let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap();
assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1));
}
}

View File

@ -36,6 +36,8 @@ impl std::fmt::Display for ConfigWarning {
}
}
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>;
/// LLMクライアントのtrait
///
/// 各プロバイダはこのtraitを実装し、統一されたインターフェースを提供する。
@ -49,10 +51,13 @@ pub trait LlmClient: Send + Sync {
/// # Returns
/// * `Ok(Stream)` - イベントストリーム
/// * `Err(ClientError)` - エラー
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>;
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError>;
/// Clone this client into a new `Box<dyn LlmClient>`.
///
/// Used when a second client instance is needed (e.g. for context
/// compaction) without access to the original construction parameters.
fn clone_boxed(&self) -> Box<dyn LlmClient>;
/// 設定をバリデーションし、未サポートの設定があれば警告を返す
///
@ -68,18 +73,25 @@ pub trait LlmClient: Send + Sync {
}
}
impl Clone for Box<dyn LlmClient> {
fn clone(&self) -> Self {
self.clone_boxed()
}
}
/// `Box<dyn LlmClient>` に対する `LlmClient` の実装
///
/// これにより、動的ディスパッチを使用するクライアントも `Worker` で利用可能になる。
#[async_trait]
impl LlmClient for Box<dyn LlmClient> {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
(**self).stream(request).await
}
fn clone_boxed(&self) -> Box<dyn LlmClient> {
(**self).clone_boxed()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
(**self).validate_config(config)
}

View File

@ -1,6 +1,6 @@
//! LLMクライアントエラー型
use std::fmt;
use std::{fmt, time::Duration};
/// LLMクライアントのエラー
#[derive(Debug)]
@ -16,6 +16,12 @@ pub enum ClientError {
status: Option<u16>,
code: Option<String>,
message: String,
retry_after: Option<Duration>,
},
/// A request lifecycle phase exceeded its hard timeout.
Timeout {
phase: &'static str,
timeout: Duration,
},
/// 設定エラー
Config(String),
@ -31,6 +37,7 @@ impl fmt::Display for ClientError {
status,
code,
message,
..
} => {
write!(f, "API error")?;
if let Some(s) = status {
@ -41,6 +48,9 @@ impl fmt::Display for ClientError {
}
write!(f, ": {}", message)
}
ClientError::Timeout { phase, timeout } => {
write!(f, "{phase} timed out after {}s", timeout.as_secs())
}
ClientError::Config(msg) => write!(f, "Config error: {}", msg),
}
}
@ -67,3 +77,96 @@ impl From<serde_json::Error> for ClientError {
ClientError::Json(err)
}
}
impl ClientError {
pub fn status(&self) -> Option<u16> {
match self {
ClientError::Api { status, .. } => *status,
_ => None,
}
}
pub fn retry_after(&self) -> Option<Duration> {
match self {
ClientError::Api { retry_after, .. } => *retry_after,
_ => None,
}
}
}
/// transient な失敗としてリトライ対象になるかを判定する。
///
/// 対象:
/// - `Api { status }` のうち 408 / 425 / 429 / 500 / 502 / 503 / 504 / 529
/// - `Http(reqwest::Error)` のうち `is_connect()` または `is_timeout()`
/// - `Timeout { .. }` の lifecycle hard timeout
///
/// それ以外Json、Sse、Config、上記以外の Api ステータス)は false。
/// SSE 読み出し開始後の失敗は呼び出し側で `Sse` として上に流すため、
/// ここで対象外にしておけば自動的に弾かれる。
pub fn is_retryable(error: &ClientError) -> bool {
match error {
ClientError::Api {
status: Some(code), ..
} => matches!(*code, 408 | 425 | 429 | 500 | 502 | 503 | 504 | 529),
ClientError::Api { status: None, .. } => false,
ClientError::Timeout { .. } => true,
ClientError::Http(e) => e.is_connect() || e.is_timeout(),
ClientError::Json(_) | ClientError::Sse(_) | ClientError::Config(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn api_err(status: Option<u16>) -> ClientError {
ClientError::Api {
status,
code: None,
message: String::new(),
retry_after: None,
}
}
#[test]
fn retryable_status_codes() {
for code in [408u16, 425, 429, 500, 502, 503, 504, 529] {
assert!(
is_retryable(&api_err(Some(code))),
"status {code} should be retryable",
);
}
}
#[test]
fn non_retryable_status_codes() {
for code in [400u16, 401, 403, 404, 409, 410, 422, 501] {
assert!(
!is_retryable(&api_err(Some(code))),
"status {code} should not be retryable",
);
}
}
#[test]
fn api_without_status_not_retryable() {
assert!(!is_retryable(&api_err(None)));
}
#[test]
fn lifecycle_timeout_is_retryable() {
assert!(is_retryable(&ClientError::Timeout {
phase: "stream_open",
timeout: Duration::from_secs(30),
}));
}
#[test]
fn json_sse_config_not_retryable() {
let json_err = serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
assert!(!is_retryable(&ClientError::Json(json_err)));
assert!(!is_retryable(&ClientError::Sse("boom".into())));
assert!(!is_retryable(&ClientError::Config("boom".into())));
}
}

View File

@ -15,8 +15,11 @@ use serde::{Deserialize, Serialize};
///
/// # イベントの種類
///
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse`
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
/// は別経路で発火する)
///
/// # ブロックのライフサイクル
///
@ -32,6 +35,10 @@ pub enum Event {
Status(StatusEvent),
/// エラー発生
Error(ErrorEvent),
/// Scheme が生成内容として解釈しない未対応 SSE イベント。
///
/// stream trace 用の観測イベントであり、timeline / history には反映しない。
UnhandledSse(UnhandledSseEvent),
/// ブロック開始(テキスト、ツール使用等)
BlockStart(BlockStart),
@ -41,6 +48,18 @@ pub enum Event {
BlockStop(BlockStop),
/// ブロック中断
BlockAbort(BlockAbort),
/// Reasoning item の完成。scheme が「次の request に送り返すための
/// reasoning material が揃った」点で 1 度だけ発火する。
///
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
///
/// 上位層Worker / ReasoningItemCollectorはこれを `Item::Reasoning`
/// として `worker.history` に append する。streaming 表示用の
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
/// は依然として並行発火するlive display と round-trip persist の責務分離)。
ReasoningItem(ReasoningItemEvent),
}
// =============================================================================
@ -54,17 +73,27 @@ pub struct PingEvent {
}
/// 使用量イベント
///
/// プロバイダから受信した 1 LLM リクエスト分のトークン会計。
/// 各 scheme で正規化され、フィールドの意味は全プロバイダ共通:
///
/// - `input_tokens` は **送信した prompt prefix 全体の占有量**(プロンプト全長)。
/// キャッシュヒット分も含まれる。Anthropic は raw API では非キャッシュ分のみを
/// `input_tokens` として返すため、`AnthropicScheme::convert_usage` で
/// `cache_read + cache_creation` を加算してこの規約に揃えている。
/// - `cache_read_input_tokens` / `cache_creation_input_tokens` は上記の内訳で、
/// 料金会計用。占有量からは差し引かない。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct UsageEvent {
/// 入力トークン数
/// 送信した prompt prefix の総トークン数(占有量、キャッシュ込み)
pub input_tokens: Option<u64>,
/// 出力トークン数
/// このリクエストで生成された出力トークン数
pub output_tokens: Option<u64>,
/// 合計トークン数
/// `input_tokens + output_tokens`
pub total_tokens: Option<u64>,
/// キャッシュ読み込みトークン数
/// `input_tokens` のうちキャッシュから読まれた分(割引料金)
pub cache_read_input_tokens: Option<u64>,
/// キャッシュ作成トークン数
/// `input_tokens` のうちこのリクエストでキャッシュに書かれた分割増料金、Anthropic
pub cache_creation_input_tokens: Option<u64>,
}
@ -94,6 +123,18 @@ pub struct ErrorEvent {
pub message: String,
}
/// 未対応 SSE イベントの観測用メタイベント。
///
/// `data_preview` は provider から受け取った raw SSE data の bounded preview、
/// `data_len` は preview 前の raw data byte length。
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct UnhandledSseEvent {
pub provider: String,
pub event_type: String,
pub data_preview: String,
pub data_len: usize,
}
// =============================================================================
// Block Types
// =============================================================================
@ -202,6 +243,31 @@ impl BlockAbort {
}
}
// =============================================================================
// Reasoning Item Event
// =============================================================================
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
/// materialtext, summary, encrypted_content, signature, idを揃えて
/// 1 度だけ発火する。
///
/// `Item::Reasoning` のフィールドを 1:1 に持つ。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ReasoningItemEvent {
/// scheme 側で観測した item idOpenAI Responses の `id`)。
pub id: Option<String>,
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は
/// `reasoning_text` 累積。redacted_thinking では空。
pub text: String,
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
pub summary: Vec<String>,
/// 暗号化された opaque blobAnthropic `redacted_thinking.data` /
/// OpenAI Responses `encrypted_content`)。
pub encrypted_content: Option<String>,
/// Anthropic extended thinking signature。round-trip 必須。
pub signature: Option<String>,
}
/// 停止理由
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum StopReason {

View File

@ -16,14 +16,19 @@
//! - `providers`: プロバイダ固有のクライアント実装
//! - `scheme`: APIスキーマリクエスト/レスポンス変換)
pub mod auth;
pub mod capability;
pub mod client;
pub mod error;
pub mod event;
pub mod types;
pub mod providers;
pub mod retry;
pub mod scheme;
pub mod transport;
pub use auth::*;
pub use capability::*;
pub use client::*;
pub use error::*;
pub use event::*;

View File

@ -1,201 +0,0 @@
//! Anthropic プロバイダ実装
//!
//! Anthropic Messages APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, scheme::anthropic::AnthropicScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt, future::ready};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// Anthropic クライアント
pub struct AnthropicClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: AnthropicScheme,
/// ベースURL
base_url: String,
}
impl AnthropicClient {
/// 新しいAnthropicクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: AnthropicScheme::default(),
base_url: "https://api.anthropic.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: AnthropicScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
headers.insert(
"x-api-key",
HeaderValue::from_str(&self.api_key)
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?,
);
headers.insert(
"anthropic-version",
HeaderValue::from_str(&self.scheme.api_version)
.map_err(|e| ClientError::Config(format!("Invalid API version: {}", e)))?,
);
// 細粒度ツールストリーミングを有効にする場合
if self.scheme.fine_grained_tool_streaming {
headers.insert(
"anthropic-beta",
HeaderValue::from_static("fine-grained-tool-streaming-2025-05-14"),
);
}
Ok(headers)
}
}
#[async_trait]
impl LlmClient for AnthropicClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
let url = format!("{}/v1/messages", self.base_url);
let headers = self.build_headers()?;
let body = self.scheme.build_request(&self.model, &request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
// AnthropicはBlockStopイベントに正しいblock_typeを含まないため、
// クライアント側で状態を追跡して補完する
let mut current_block_type = None;
let stream = event_stream.filter_map(move |result| {
ready(match result {
Ok(event) => {
// SSEイベントをパース
match scheme.parse_event(&event.event, &event.data) {
Ok(Some(mut evt)) => {
// ブロックタイプの追跡と修正
match &evt {
Event::BlockStart(start) => {
current_block_type = Some(start.block_type);
}
Event::BlockStop(stop) => {
if let Some(block_type) = current_block_type.take() {
// 正しいブロックタイプで上書き
// (Event::BlockStopの中身を置換)
evt =
Event::BlockStop(crate::llm_client::event::BlockStop {
block_type,
..stop.clone()
});
}
}
_ => {}
}
Some(Ok(evt))
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
Err(e) => Some(Err(ClientError::Sse(e.to_string()))),
})
});
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
assert_eq!(client.model, "claude-sonnet-4-20250514");
}
#[test]
fn test_build_headers() {
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
let headers = client.build_headers().unwrap();
assert!(headers.contains_key("x-api-key"));
assert!(headers.contains_key("anthropic-version"));
assert!(headers.contains_key("anthropic-beta"));
}
}

View File

@ -1,185 +0,0 @@
//! Gemini プロバイダ実装
//!
//! Google Gemini APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, scheme::gemini::GeminiScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// Gemini クライアント
pub struct GeminiClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: GeminiScheme,
/// ベースURL
base_url: String,
}
impl GeminiClient {
/// 新しいGeminiクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: GeminiScheme::default(),
base_url: "https://generativelanguage.googleapis.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: GeminiScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
Ok(headers)
}
}
#[async_trait]
impl LlmClient for GeminiClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
// URL構築: base_url/v1beta/models/{model}:streamGenerateContent?alt=sse&key={api_key}
let url = format!(
"{}/v1beta/models/{}:streamGenerateContent?alt=sse&key={}",
self.base_url, self.model, self.api_key
);
let headers = self.build_headers()?;
let body = self.scheme.build_request(&request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
// Gemini error format: { "error": { "code": xxx, "message": "...", "status": "..." } }
let error = json.get("error").unwrap_or(&json);
let code = error
.get("status")
.and_then(|v| v.as_str())
.map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
let stream = event_stream
.map(move |result| {
match result {
Ok(event) => {
// SSEイベントをパース
// Geminiは "data: {...}" 形式で送る
match scheme.parse_event(&event.data) {
Ok(Some(events)) => Ok(Some(events)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
Err(e) => Err(ClientError::Sse(e.to_string())),
}
})
// flatten Option<Vec<Event>> stream to Stream<Event>
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Ok(None) => Box::pin(futures::stream::empty()),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
assert_eq!(client.model, "gemini-2.0-flash");
}
#[test]
fn test_build_headers() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
let headers = client.build_headers().unwrap();
assert!(headers.contains_key("content-type"));
}
#[test]
fn test_custom_base_url() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash")
.with_base_url("https://custom.api.example.com");
assert_eq!(client.base_url, "https://custom.api.example.com");
}
}

View File

@ -1,8 +0,0 @@
//! プロバイダ実装
//!
//! 各プロバイダ固有のHTTPクライアント実装
pub mod anthropic;
pub mod gemini;
pub mod ollama;
pub mod openai;

View File

@ -1,62 +0,0 @@
//! Ollama プロバイダ実装
//!
//! OllamaはOpenAI互換APIを提供するため、OpenAIクライアントと互換性がある。
//! デフォルトのベースURLと認証設定が異なる。
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, providers::openai::OpenAIClient,
scheme::openai::OpenAIScheme,
};
use async_trait::async_trait;
use futures::Stream;
/// Ollama クライアント
///
/// 内部的にOpenAIClientを使用するラッパー、もしくはOpenAIClientと同様の実装を持つ。
/// ここではOpenAIClient構成をカスタマイズして提供する。
pub struct OllamaClient {
inner: OpenAIClient,
}
impl OllamaClient {
/// 新しいOllamaクライアントを作成
pub fn new(model: impl Into<String>) -> Self {
// Ollama usually runs on localhost:11434/v1
// API key is "ollama" or ignored
let base_url = "http://localhost:11434";
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
let client = OpenAIClient::new("ollama", model)
.with_base_url(base_url)
.with_scheme(scheme);
// Currently OpenAIScheme sets include_usage: true. Ollama supports checks?
// Assuming Ollama modern versions support usage.
Self { inner: client }
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.inner = self.inner.with_base_url(url);
self
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.inner = self.inner.with_http_client(client);
self
}
}
#[async_trait]
impl LlmClient for OllamaClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
self.inner.stream(request).await
}
}

View File

@ -1,212 +0,0 @@
//! OpenAI プロバイダ実装
//!
//! OpenAI Chat Completions APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, ConfigWarning, LlmClient, Request, RequestConfig, event::Event,
scheme::openai::OpenAIScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// OpenAI クライアント
pub struct OpenAIClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: OpenAIScheme,
/// ベースURL
base_url: String,
}
impl OpenAIClient {
/// 新しいOpenAIクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: OpenAIScheme::default(),
base_url: "https://api.openai.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: OpenAIScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
let api_key_val = if self.api_key.is_empty() {
// For providers like Ollama, API key might be empty/dummy.
// But typical OpenAI requires it.
// We'll allow empty if user intends it, but usually it's checked.
HeaderValue::from_static("")
} else {
let mut val = HeaderValue::from_str(&format!("Bearer {}", self.api_key))
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?;
val.set_sensitive(true);
val
};
if !api_key_val.is_empty() {
headers.insert("Authorization", api_key_val);
}
Ok(headers)
}
}
#[async_trait]
impl LlmClient for OpenAIClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
// Construct the URL: base_url usually ends without slash, path starts with slash or vice versa.
// Standard OpenAI base is "https://api.openai.com". Endpoint is "/v1/chat/completions".
// If external base_url includes /v1, we should be careful.
// Let's assume defaults. If user provides "http://localhost:11434/v1", we append "/chat/completions".
// Or cleaner: user provides full base up to version?
// Anthropic client uses "{}/v1/messages".
// Let's stick to appending "/v1/chat/completions" if base is just host,
// OR assume base includes /v1 if user overrides it?
// Let's use robust joining or simple assumption matching Anthropic pattern:
// Default: https://api.openai.com -> https://api.openai.com/v1/chat/completions
// However, Ollama default is http://localhost:11434/v1/chat/completions if using OpenAI compact.
// If we configure base_url via `with_base_url`, it's flexible.
// Let's try to detect if /v1 is present or just append consistently.
// Ideally `base_url` should be the root passed to `new`.
let url = if self.base_url.ends_with("/v1") {
format!("{}/chat/completions", self.base_url)
} else if self.base_url.ends_with("/") {
format!("{}v1/chat/completions", self.base_url)
} else {
format!("{}/v1/chat/completions", self.base_url)
};
let headers = self.build_headers()?;
let body = self.scheme.build_request(&self.model, &request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
// OpenAI error format: { "error": { "message": "...", "type": "...", ... } }
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
let stream = event_stream
.map(move |result| {
match result {
Ok(event) => {
// SSEイベントをパース
// OpenAI stream events are "data: {...}"
// event.event is usually "message" (default) or empty.
// parse_event takes data string.
if event.data == "[DONE]" {
// End of stream handled inside parse_event usually returning None
Ok(None)
} else {
match scheme.parse_event(&event.data) {
Ok(Some(events)) => Ok(Some(events)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
}
Err(e) => Err(ClientError::Sse(e.to_string())),
}
})
// flatten Option<Vec<Event>> stream to Stream<Event>
// map returns Result<Option<Vec<Event>>, Error>
// We want Stream<Item = Result<Event, Error>>
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Ok(None) => Box::pin(futures::stream::empty()),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// OpenAI does not support top_k
if config.top_k.is_some() {
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI"));
}
warnings
}
}

View File

@ -0,0 +1,104 @@
//! LLM response stream を開く前の transient error 向けリトライポリシー。
//!
//! Worker が `LlmClient::stream` の open error に対して `is_retryable` を見て
//! retry / backoff / TUI event / cancellation をまとめて管理する。
//! SSE 読み出し開始後の失敗は対象外。
use std::time::Duration;
/// 指数バックオフ + ジッター + 累積タイムアウトを表すポリシー。
///
/// `Default` は llm-worker 全体の固定値を返す。manifest 経由の上書きが
/// 必要になったら拡張する(現状は不要 → `tickets/llm-worker-transient-retry.md`)。
#[derive(Debug, Clone)]
pub struct RetryPolicy {
/// 指数の基準値。`base * 2^attempt` を `cap` で頭打ちにした上限から
/// フルジッターで実際の wait を抽選する。
pub base: Duration,
/// 1 回あたりの wait の上限。
pub cap: Duration,
/// 試行の合計回数(初回 + リトライ)。`1` ならリトライしない。
pub max_attempts: u32,
/// 初回送信開始からの累積タイムアウト。これを超える wait は打ち切る。
pub total_timeout: Duration,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
base: Duration::from_millis(500),
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
}
}
}
impl RetryPolicy {
/// `attempt` 回目の失敗0-indexed後に待つ時間を返す。
/// `Retry-After` で上書きしたい場合は呼び出さず、その値をそのまま使う。
pub fn backoff(&self, attempt: u32) -> Duration {
let shift = attempt.min(20);
let base_nanos = self.base.as_nanos() as u64;
let exp_nanos = base_nanos.saturating_mul(1u64 << shift);
let cap_nanos = self.cap.as_nanos() as u64;
let upper = exp_nanos.min(cap_nanos);
Duration::from_nanos(jitter_nanos(upper))
}
}
/// `[0, max_nanos]` から擬似乱数的に 1 つ取り出す。`SystemTime` の
/// 下位ビットを splitmix64 で攪拌するだけの軽量実装で、暗号的乱数性は
/// 持たないがフルジッターのぶつかり回避には十分。
fn jitter_nanos(max_nanos: u64) -> u64 {
if max_nanos == 0 {
return 0;
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
x ^= x >> 31;
x % (max_nanos + 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_policy_values() {
let p = RetryPolicy::default();
assert_eq!(p.base, Duration::from_millis(500));
assert_eq!(p.cap, Duration::from_secs(10));
assert_eq!(p.max_attempts, 4);
assert_eq!(p.total_timeout, Duration::from_secs(30));
}
#[test]
fn backoff_respects_cap() {
let p = RetryPolicy::default();
for attempt in 0..30u32 {
assert!(
p.backoff(attempt) <= p.cap,
"attempt {attempt} exceeded cap",
);
}
}
#[test]
fn backoff_zero_when_base_zero() {
let p = RetryPolicy {
base: Duration::ZERO,
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
};
for attempt in 0..5 {
assert_eq!(p.backoff(attempt), Duration::ZERO);
}
}
}

View File

@ -0,0 +1,23 @@
//! Anthropic scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`claude-*` など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは未知モデルでも「この wire で
//! 安全に送れる最小共通項」を返すだけに留める。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability。
///
/// Ollama の `/v1/messages` 流用を想定して `cache_control` を送らない
/// `CacheStrategy::Auto` にする。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -12,6 +12,7 @@ use crate::llm_client::{
use serde::Deserialize;
use super::AnthropicScheme;
use super::scheme_impl::{AnthropicState, PendingThinking};
/// Anthropic SSEイベントタイプ
#[derive(Debug, Clone, PartialEq, Eq)]
@ -75,7 +76,21 @@ pub(crate) enum ContentBlock {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
Thinking {
#[serde(default)]
thinking: String,
/// 非ストリーミングレスポンス由来の初期 signature通常はストリームでは
/// 空 → `signature_delta` で埋まる)。
#[serde(default)]
signature: Option<String>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化された opaque blob。signature ではなく、まるごと
/// `redacted_thinking.data` として送り返す必要がある。
#[serde(default)]
data: String,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -228,7 +243,9 @@ impl AnthropicScheme {
fn convert_block_start(&self, event: &ContentBlockStartEvent) -> Event {
let (block_type, metadata) = match &event.content_block {
ContentBlock::Text { .. } => (BlockType::Text, BlockMetadata::Text),
ContentBlock::Thinking { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
(BlockType::Thinking, BlockMetadata::Thinking)
}
ContentBlock::ToolUse { id, name, .. } => (
BlockType::ToolUse,
BlockMetadata::ToolUse {
@ -264,13 +281,139 @@ impl AnthropicScheme {
}))
}
/// state を持ち回す上位パース。
///
/// `parse_event` の単発 Event に加えて、以下を行う:
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
/// `state.pending_thinking` に蓄積し、`content_block_stop` で
/// `Event::ReasoningItem` を追加発火する
/// - `signature_delta` を蓄積Stream channel には流さず、reasoning event
/// にだけ反映する)
pub(crate) fn parse_with_state(
&self,
event_type: &str,
data: &str,
state: &mut AnthropicState,
) -> Result<Vec<Event>, ClientError> {
let Some(parsed_event_type) = AnthropicEventType::parse(event_type) else {
return Ok(Vec::new());
};
// signature_delta はストリーム表示には流さず、state にだけ蓄積。
// それ以外は parse_event で標準 Event 化する。
let mut emitted: Vec<Event> = Vec::new();
match parsed_event_type {
AnthropicEventType::ContentBlockStart => {
let raw: ContentBlockStartEvent = serde_json::from_str(data)?;
state.current_block_type = Some(match &raw.content_block {
ContentBlock::Text { .. } => BlockType::Text,
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
BlockType::Thinking
}
ContentBlock::ToolUse { .. } => BlockType::ToolUse,
});
match &raw.content_block {
ContentBlock::Thinking {
thinking,
signature,
} => {
state.pending_thinking = Some(PendingThinking {
text: thinking.clone(),
signature: signature.clone(),
redacted_data: None,
});
}
ContentBlock::RedactedThinking { data: blob } => {
state.pending_thinking = Some(PendingThinking {
text: String::new(),
signature: None,
redacted_data: Some(blob.clone()),
});
}
_ => {}
}
emitted.push(self.convert_block_start(&raw));
}
AnthropicEventType::ContentBlockDelta => {
let raw: ContentBlockDeltaEvent = serde_json::from_str(data)?;
match &raw.delta {
DeltaBlock::ThinkingDelta { thinking } => {
if let Some(pending) = state.pending_thinking.as_mut() {
pending.text.push_str(thinking);
}
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Thinking(thinking.clone()),
}));
}
DeltaBlock::SignatureDelta { signature } => {
if let Some(pending) = state.pending_thinking.as_mut() {
// 通常 1 回しか来ないが、複数 fragment 来ても連結しておく
match &mut pending.signature {
Some(acc) => acc.push_str(signature),
None => pending.signature = Some(signature.clone()),
}
}
}
DeltaBlock::TextDelta { text } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Text(text.clone()),
}));
}
DeltaBlock::InputJsonDelta { partial_json } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::InputJson(partial_json.clone()),
}));
}
}
}
AnthropicEventType::ContentBlockStop => {
let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
emitted.push(Event::BlockStop(BlockStop {
index: raw.index,
block_type,
stop_reason: None,
}));
if matches!(block_type, BlockType::Thinking) {
if let Some(pending) = state.pending_thinking.take() {
emitted.push(Event::ReasoningItem(pending.into_event()));
}
}
}
// 残りは state を必要としない。既存 parse_event に委譲。
_ => {
if let Some(event) = self.parse_event(event_type, data)? {
emitted.push(event);
}
}
}
Ok(emitted)
}
fn convert_usage(&self, usage: &UsageData) -> UsageEvent {
let input = usage.input_tokens.unwrap_or(0);
// Anthropic の `input_tokens` は **キャッシュ外** の入力トークンのみで、
// プロンプト全長は input_tokens + cache_read + cache_creation。
// UsageEvent の `input_tokens` には「占有量(プロンプト全長)」を載せる
// 規約に合わせて、ここでキャッシュ分を足し込む。
// cache_read_input_tokens / cache_creation_input_tokens は内訳として
// 別フィールドに残るので、料金計算側で `input - cache_read - cache_creation`
// により非キャッシュ入力分は逆算可能。
let raw_input = usage.input_tokens.unwrap_or(0);
let cache_read = usage.cache_read_input_tokens.unwrap_or(0);
let cache_creation = usage.cache_creation_input_tokens.unwrap_or(0);
let input_total = raw_input + cache_read + cache_creation;
let output = usage.output_tokens.unwrap_or(0);
UsageEvent {
input_tokens: usage.input_tokens,
input_tokens: usage.input_tokens.map(|_| input_total),
output_tokens: usage.output_tokens,
total_tokens: Some(input + output),
total_tokens: Some(input_total + output),
cache_read_input_tokens: usage.cache_read_input_tokens,
cache_creation_input_tokens: usage.cache_creation_input_tokens,
}
@ -289,12 +432,33 @@ mod tests {
let event = scheme.parse_event("message_start", data).unwrap().unwrap();
match event {
Event::Usage(u) => {
// キャッシュなしなので input_total = raw_input = 10
assert_eq!(u.input_tokens, Some(10));
}
_ => panic!("Expected Usage event"),
}
}
#[test]
fn test_convert_usage_includes_cache_in_input_total() {
// Anthropic の input_tokens はキャッシュ外のみで、占有量は
// input + cache_read + cache_creation。
// UsageEvent.input_tokens は占有量に正規化される。
let scheme = AnthropicScheme::new();
let usage = UsageData {
input_tokens: Some(100),
output_tokens: Some(50),
cache_read_input_tokens: Some(800),
cache_creation_input_tokens: Some(200),
};
let event = scheme.convert_usage(&usage);
// 100 + 800 + 200 = 1100
assert_eq!(event.input_tokens, Some(1100));
assert_eq!(event.cache_read_input_tokens, Some(800));
assert_eq!(event.cache_creation_input_tokens, Some(200));
assert_eq!(event.total_tokens, Some(1150));
}
#[test]
fn test_parse_content_block_start_text() {
let scheme = AnthropicScheme::new();
@ -359,6 +523,117 @@ mod tests {
}
}
#[test]
fn thinking_block_emits_reasoning_item_with_signature() {
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
let evs = scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
&mut state,
)
.unwrap();
assert!(matches!(evs[0], Event::BlockStart(_)));
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hello "}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"world"}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"SIG-XYZ"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
// BlockStop と ReasoningItem の 2 件が並ぶ
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
};
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
assert!(reasoning.encrypted_content.is_none());
}
#[test]
fn redacted_thinking_emits_reasoning_item_with_data() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"opaque-blob"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem");
};
assert!(reasoning.text.is_empty());
assert!(reasoning.signature.is_none());
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
}
#[test]
fn text_block_does_not_emit_reasoning_item() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
}
#[test]
fn test_parse_ping() {
let scheme = AnthropicScheme::new();

View File

@ -3,8 +3,12 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
mod capability;
mod events;
mod request;
mod scheme_impl;
pub use scheme_impl::AnthropicState;
/// Anthropicスキーマ
///

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,107 @@
//! `impl Scheme for AnthropicScheme`
//!
//! Anthropic Messages API の wire 表現に必要な URL・ヘッダ・SSE パース・
//! リクエスト body 生成を共通 `Scheme` trait にぶら下げる。
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
event::{BlockType, Event, ReasoningItemEvent},
scheme::Scheme,
types::Request,
};
use super::AnthropicScheme;
/// Anthropic の SSE パースで必要な状態。
///
/// 1. `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 直前の `content_block_start` で観測した `block_type` を保持して
/// `BlockStop` に書き戻す。
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
/// `content_block_stop` で `Event::ReasoningItem` を発火する
/// round-trip 永続化のため)。
#[derive(Debug, Default)]
pub struct AnthropicState {
pub(crate) current_block_type: Option<BlockType>,
pub(crate) pending_thinking: Option<PendingThinking>,
}
/// 1 つの `thinking` または `redacted_thinking` content_block の蓄積バッファ。
#[derive(Debug, Default)]
pub(crate) struct PendingThinking {
pub(crate) text: String,
pub(crate) signature: Option<String>,
pub(crate) redacted_data: Option<String>,
}
impl PendingThinking {
pub(crate) fn into_event(self) -> ReasoningItemEvent {
ReasoningItemEvent {
id: None,
text: self.text,
summary: Vec::new(),
encrypted_content: self.redacted_data,
signature: self.signature,
}
}
}
impl Scheme for AnthropicScheme {
type State = AnthropicState;
fn default_base_url(&self) -> &'static str {
"https://api.anthropic.com"
}
fn path(&self, _model_id: &str) -> String {
"/v1/messages".to_string()
}
fn required_auth(&self) -> AuthRequirement {
// Ollama の `/v1/messages` 互換では認証が要らないが、それは
// `AuthRef::None` + `build_headers` 側の「ResolvedAuth::None
// なら何もしない」分岐で吸収する(`accepts` 判定で弾かれない
// よう、現状は XApiKey を要求しつつ、None 側でもパスするよう
// にする戦略)。
AuthRequirement::XApiKey
}
fn additional_headers(&self) -> Vec<(&'static str, String)> {
let mut headers = vec![("anthropic-version", self.api_version.clone())];
if self.fine_grained_tool_streaming {
headers.push((
"anthropic-beta",
"fine-grained-tool-streaming-2025-05-14".to_string(),
));
}
headers
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(model_id, request, capability);
serde_json::to_value(&req).expect("AnthropicRequest is always serialisable")
}
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
self.parse_with_state(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
}

View File

@ -0,0 +1,20 @@
//! Gemini scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gemini-*` バージョン別の reasoning 有無)は
//! 高レベル構築層(`provider::capability`)の責務。ここでは wire の
//! 保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability(未知モデル / 未明示モデル用)。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -131,6 +131,7 @@ impl GeminiScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse Gemini SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -3,8 +3,10 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
mod capability;
mod events;
mod request;
mod scheme_impl;
/// Geminiスキーマ
///

View File

@ -7,7 +7,8 @@ use serde_json::Value;
use crate::llm_client::{
Request,
types::{Item, Role, ToolDefinition},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::GeminiScheme;
@ -139,11 +140,26 @@ pub(crate) struct GeminiGenerationConfig {
/// Stop sequences
#[serde(skip_serializing_if = "Vec::is_empty")]
pub stop_sequences: Vec<String>,
/// Thinking / reasoning 設定Gemini 2.5 以降)。
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking_config: Option<GeminiThinkingConfig>,
}
/// Gemini thinking config (gemini-2.5 以降)
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct GeminiThinkingConfig {
/// Token budget for thinking. `-1` means dynamic.
pub thinking_budget: i32,
}
impl GeminiScheme {
/// Build Gemini request from Request
pub(crate) fn build_request(&self, request: &Request) -> GeminiRequest {
pub(crate) fn build_request(
&self,
request: &Request,
capability: &ModelCapability,
) -> GeminiRequest {
let contents = self.convert_items_to_contents(&request.items);
// System prompt
@ -177,6 +193,24 @@ impl GeminiScheme {
None
};
// Reasoning の投影: capability が BudgetTokens / Both をサポートし、
// request 側で budget_tokens が指定されているときだけ thinking_config を付ける。
let supports_budget = matches!(
capability.reasoning,
Some(ReasoningSupport::BudgetTokens | ReasoningSupport::Both),
);
let thinking_config = request
.config
.reasoning
.as_ref()
.filter(|_| supports_budget)
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig {
thinking_budget: *budget,
}),
ReasoningControl::Effort(_) => None,
});
// Generation config
let generation_config = Some(GeminiGenerationConfig {
max_output_tokens: request.config.max_tokens,
@ -184,6 +218,7 @@ impl GeminiScheme {
top_p: request.config.top_p,
top_k: request.config.top_k,
stop_sequences: request.config.stop_sequences.clone(),
thinking_config,
});
GeminiRequest {
@ -216,9 +251,8 @@ impl GeminiScheme {
);
let gemini_role = match role {
Role::User => "user",
Role::User | Role::System => "user",
Role::Assistant => "model",
Role::System => continue, // Skip system role items
};
let parts: Vec<GeminiPart> = content
@ -245,9 +279,8 @@ impl GeminiScheme {
});
}
// Parse arguments
let args = serde_json::from_str(arguments)
.unwrap_or_else(|_| Value::Object(serde_json::Map::new()));
// Parse arguments (normalize non-object / legacy "null" payloads to {})
let args = parse_tool_arguments(arguments);
pending_model_parts.push(GeminiPart::FunctionCall {
function_call: GeminiFunctionCall {
@ -258,7 +291,10 @@ impl GeminiScheme {
}
Item::ToolResult {
call_id, output, ..
call_id,
summary,
content,
..
} => {
// Flush pending model parts first
if !pending_model_parts.is_empty() {
@ -268,12 +304,16 @@ impl GeminiScheme {
});
}
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
pending_user_parts.push(GeminiPart::FunctionResponse {
function_response: GeminiFunctionResponse {
name: call_id.clone(),
response: GeminiFunctionResponseContent {
name: call_id.clone(),
content: Value::String(output.clone()),
content: Value::String(text),
},
},
});
@ -336,6 +376,26 @@ impl GeminiScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap()
}
}
#[test]
fn test_build_simple_request() {
@ -344,7 +404,7 @@ mod tests {
.system("You are a helpful assistant.")
.user("Hello!");
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert!(gemini_req.system_instruction.is_some());
assert_eq!(gemini_req.contents.len(), 1);
@ -366,7 +426,7 @@ mod tests {
})),
);
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.tools.len(), 1);
assert_eq!(gemini_req.tools[0].function_declarations.len(), 1);
@ -382,7 +442,7 @@ mod tests {
let scheme = GeminiScheme::new();
let request = Request::new().user("Hello").assistant("Hi there!");
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.contents.len(), 2);
assert_eq!(gemini_req.contents[0].role, "user");
@ -401,11 +461,36 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.contents.len(), 3);
assert_eq!(gemini_req.contents[0].role, "user");
assert_eq!(gemini_req.contents[1].role, "model");
assert_eq!(gemini_req.contents[2].role, "user");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
let thinking = config.thinking_config.expect("thinking config");
assert_eq!(thinking.thinking_budget, -1);
}
#[test]
fn effort_reasoning_not_projected_to_gemini() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
assert!(config.thinking_config.is_none());
}
}

View File

@ -0,0 +1,49 @@
//! `impl Scheme for GeminiScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError, auth::AuthRequirement, capability::ModelCapability, event::Event, scheme::Scheme,
types::Request,
};
use super::GeminiScheme;
impl Scheme for GeminiScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://generativelanguage.googleapis.com"
}
fn path(&self, model_id: &str) -> String {
format!("/v1beta/models/{model_id}:streamGenerateContent?alt=sse")
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::QueryParam { name: "key" }
}
fn build_request_body(
&self,
_model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(request, capability);
serde_json::to_value(&req).expect("GeminiRequest is always serialisable")
}
fn parse_sse(
&self,
_event_type: &str,
data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
}

View File

@ -3,7 +3,90 @@
//! 各APIスキーマごとの変換ロジック
//! - リクエスト変換: Request → プロバイダ固有JSON
//! - レスポンス変換: SSEイベント → Event
//!
//! [`Scheme`] trait により `HttpTransport<S>` から scheme 固有の差分
//! パス、ヘッダ、認証要件、body 生成、SSE パース)をすべて委譲する。
pub mod anthropic;
pub mod gemini;
pub mod openai;
pub mod openai_chat;
pub mod openai_responses;
use serde_json::Value;
use super::auth::AuthRequirement;
use super::capability::ModelCapability;
use super::client::ConfigWarning;
use super::error::ClientError;
use super::event::Event;
use super::types::{Request, RequestConfig};
/// wire scheme の抽象。各プロバイダの API 仕様ごとに 1 つ実装する。
///
/// `HttpTransport<S: Scheme>` が URL 組立・認証ヘッダ挿入・SSE パース
/// のループを担い、`Scheme` 実装は各仕様固有の差分のみ提供する。
///
/// # 状態
///
/// SSE パースでフレーム間に状態を保つ必要がある schemeAnthropic の
/// `BlockStop` に `block_type` が載らない仕様の補完など)は
/// [`Scheme::State`] に中間状態を表す型を置く。
/// 状態を持たない scheme は `type State = ()` とする。
pub trait Scheme: Clone + Send + Sync + 'static {
/// SSE パースのフレーム間で共有する状態。`HttpTransport` が
/// ストリーム開始時に `Default::default()` を一度だけ作り、
/// フレームごとに `&mut` で渡す。
type State: Default + Send + 'static;
/// scheme のベース URL`ModelConfig::base_url` 未指定時のデフォルト)
fn default_base_url(&self) -> &'static str;
/// リクエスト先の相対パス。Gemini のようにモデル名をパスに埋め込む
/// プロバイダもあるため、モデル ID を受け取る。
fn path(&self, model_id: &str) -> String;
/// この scheme が要求する認証形式。`build_client` 時に
/// `manifest::AuthRef` と照合する。
fn required_auth(&self) -> AuthRequirement;
/// `Content-Type` 以外の追加ヘッダ。`anthropic-version` / `anthropic-beta` 等。
fn additional_headers(&self) -> Vec<(&'static str, String)> {
Vec::new()
}
/// リクエスト body を生成する。`capability` は `CacheStrategy` や
/// `ReasoningSupport` を参照して scheme 側の挙動を分岐させるため
/// に渡される。
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value;
/// SSE イベント 1 件を 0 個以上の [`Event`] に変換する。
///
/// `event_type` は SSE フレームの `event:` フィールド、`data` は
/// `data:` フィールド。`[DONE]` 等の終端マーカーは実装側で判定する。
/// `state` はストリーム単位で共有される可変状態。
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError>;
/// scheme 既定の capability。モデル ID に関係なく、この wire で
/// 安全に送れる最小共通項を返す。既知モデル ID の能力テーブルは
/// `provider::capability::lookup` 側(高レベル構築層)の責務で、
/// scheme はここには関与しない。
fn default_capability(&self) -> ModelCapability;
/// scheme 側でサポートしていない `RequestConfig` フィールドを
/// 警告として返す(例: OpenAI Chat は `top_k` 非対応)。
/// デフォルトは空 Vec。
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let _ = config;
Vec::new()
}
}

View File

@ -0,0 +1,20 @@
//! OpenAI Chat Completions scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gpt-5` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等)
/// で未知モデル ID を受けたときのフォールバックに使う。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -75,6 +75,7 @@ impl OpenAIScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -3,8 +3,10 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
pub(crate) mod capability;
mod events;
mod request;
mod scheme_impl;
/// OpenAIスキーマ
///

View File

@ -7,7 +7,8 @@ use serde_json::Value;
use crate::llm_client::{
Request,
types::{Item, Role, ToolDefinition},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::OpenAIScheme;
@ -34,6 +35,9 @@ pub(crate) struct OpenAIRequest {
pub tools: Vec<OpenAITool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<String>,
/// Reasoning efforto1 / o3 / o4 / gpt-5 系で有効)。
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
}
#[derive(Debug, Serialize)]
@ -110,7 +114,12 @@ pub(crate) struct OpenAIToolCallFunction {
impl OpenAIScheme {
/// Build OpenAI request from Request
pub(crate) fn build_request(&self, model: &str, request: &Request) -> OpenAIRequest {
pub(crate) fn build_request(
&self,
model: &str,
request: &Request,
capability: &ModelCapability,
) -> OpenAIRequest {
let mut messages = Vec::new();
// Add system message if present
@ -135,6 +144,22 @@ impl OpenAIScheme {
(None, request.config.max_tokens)
};
// Reasoning の投影: capability が Effort / Both をサポートし、
// request 側で effort が指定されているときだけ reasoning_effort を付ける。
let supports_effort = matches!(
capability.reasoning,
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
);
let reasoning_effort = request
.config
.reasoning
.as_ref()
.filter(|_| supports_effort)
.and_then(|rc| match rc {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
});
OpenAIRequest {
model: model.to_string(),
max_completion_tokens,
@ -149,6 +174,7 @@ impl OpenAIScheme {
messages,
tools,
tool_choice: None,
reasoning_effort,
}
}
@ -201,18 +227,24 @@ impl OpenAIScheme {
arguments,
..
} => {
// Normalize non-object / legacy "null" payloads to "{}" so
// OpenAI gets a valid JSON object string.
let normalized_args = parse_tool_arguments(arguments).to_string();
pending_tool_calls.push(OpenAIToolCall {
id: call_id.clone(),
r#type: "function".to_string(),
function: OpenAIToolCallFunction {
name: name.clone(),
arguments: arguments.clone(),
arguments: normalized_args,
},
});
}
Item::ToolResult {
call_id, output, ..
call_id,
summary,
content,
..
} => {
// Flush pending tool calls before tool result
self.flush_pending_assistant(
@ -221,9 +253,13 @@ impl OpenAIScheme {
&mut pending_assistant_text,
);
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
messages.push(OpenAIMessage {
role: "tool".to_string(),
content: Some(OpenAIContent::Text(output.clone())),
content: Some(OpenAIContent::Text(text)),
tool_calls: vec![],
tool_call_id: Some(call_id.clone()),
name: None,
@ -284,13 +320,26 @@ impl OpenAIScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
#[test]
fn test_build_simple_request() {
let scheme = OpenAIScheme::new();
let request = Request::new().system("System prompt").user("Hello");
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.model, "gpt-4o");
assert_eq!(body.messages.len(), 2);
@ -311,7 +360,7 @@ mod tests {
.user("Check weather")
.tool(ToolDefinition::new("weather").description("Get weather"));
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.tools.len(), 1);
assert_eq!(body.tools[0].function.name, "weather");
}
@ -321,7 +370,7 @@ mod tests {
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
let request = Request::new().user("Hello").max_tokens(100);
let body = scheme.build_request("llama3", &request);
let body = scheme.build_request("llama3", &request, &cap());
assert_eq!(body.max_tokens, Some(100));
assert!(body.max_completion_tokens.is_none());
@ -332,12 +381,44 @@ mod tests {
let scheme = OpenAIScheme::new();
let request = Request::new().user("Hello").max_tokens(100);
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.max_completion_tokens, Some(100));
assert!(body.max_tokens.is_none());
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other(
"provider-native".into(),
)));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Effort),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native"));
}
#[test]
fn budget_reasoning_not_projected_to_openai_chat() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Both),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert!(body.reasoning_effort.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = OpenAIScheme::new();
@ -350,7 +431,7 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.messages.len(), 3);
assert_eq!(body.messages[0].role, "user");

View File

@ -0,0 +1,67 @@
//! `impl Scheme for OpenAIScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::{Request, RequestConfig},
};
use super::OpenAIScheme;
impl Scheme for OpenAIScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://api.openai.com"
}
fn path(&self, _model_id: &str) -> String {
"/v1/chat/completions".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(model_id, request, capability);
serde_json::to_value(&req).expect("OpenAIRequest is always serialisable")
}
fn parse_sse(
&self,
_event_type: &str,
data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
// `data: [DONE]` は終端マーカー
if data.trim() == "[DONE]" {
return Ok(Vec::new());
}
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// OpenAI Chat Completions API は top_k を受け付けない
if config.top_k.is_some() {
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI Chat"));
}
warnings
}
}

View File

@ -0,0 +1,18 @@
//! OpenAI Responses scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gpt-5` / `codex-` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,84 @@
//! OpenAI Responses API スキーマ (`/v1/responses`)
//!
//! Chat Completions とは別物の item-based wire format。reasoning item と
//! function_call item が first-class で、SSE イベントも `response.*` 名前空間で
//! 流れる。ChatGPT OAuth 経路 (codex) は本 scheme 必須。
//!
//! - リクエスト JSON 生成: [`request`]
//! - SSE イベントパース → [`Event`](crate::llm_client::event::Event) 変換: [`events`]
mod capability;
mod events;
mod request;
mod scheme_impl;
pub use scheme_impl::OpenAIResponsesState;
/// OpenAI Responses scheme 本体。
///
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
/// ように受理パラメータが subset の経路では provider 層で
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy
#[derive(Debug, Clone)]
pub struct OpenAIResponsesScheme {
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
pub store: bool,
/// `include: ["reasoning.encrypted_content"]` を付けるか。
/// `store=false` で reasoning を使うなら必須。
pub include_encrypted_content: bool,
/// `max_output_tokens` を body に載せるか。公式 OpenAI Responses API は
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_max_output_tokens: bool,
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_sampling_params: bool,
}
impl Default for OpenAIResponsesScheme {
fn default() -> Self {
Self {
store: false,
include_encrypted_content: true,
send_max_output_tokens: true,
send_sampling_params: true,
}
}
}
impl OpenAIResponsesScheme {
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
pub fn new() -> Self {
Self::default()
}
/// `store` を上書き。
pub fn with_store(mut self, store: bool) -> Self {
self.store = store;
self
}
/// `include: ["reasoning.encrypted_content"]` の有無を上書き。
pub fn with_include_encrypted_content(mut self, include: bool) -> Self {
self.include_encrypted_content = include;
self
}
/// `max_output_tokens` を body に載せるかを上書き。
pub fn with_send_max_output_tokens(mut self, send: bool) -> Self {
self.send_max_output_tokens = send;
self
}
/// `temperature` / `top_p` を body に載せるかを上書き。
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
self.send_sampling_params = send;
self
}
}

View File

@ -0,0 +1,650 @@
//! OpenAI Responses API リクエスト body 生成
//!
//! Chat Completions の `messages` と違い、Responses は `input[]` の
//! item 配列で reasoning / function_call / function_call_output が
//! first-class。`Item` を素に近い形で `input[]` に投影できる。
use serde::{Serialize, Serializer};
use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::OpenAIResponsesScheme;
/// `/v1/responses` のリクエスト body。
#[derive(Debug, Serialize)]
pub(crate) struct ResponsesRequest {
pub model: String,
/// システムプロンプト相当。`input[]` とは別フィールド。
#[serde(skip_serializing_if = "Option::is_none")]
pub instructions: Option<String>,
pub input: Vec<InputItem>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ResponseTool>,
/// 常時 `"auto"` を送る。scheme 固定値。
pub tool_choice: &'static str,
/// 常時 `true` を送る。scheme 固定値。
pub parallel_tool_calls: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<ReasoningConfig>,
/// ZDR / stateless 運用では `false`。
pub store: bool,
/// 常時 `true`。
pub stream: bool,
/// `["reasoning.encrypted_content"]` 等。
#[serde(skip_serializing_if = "Vec::is_empty")]
pub include: Vec<&'static str>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は 400 で弾く。scheme の `send_max_output_tokens`
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u32>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
/// `send_sampling_params` が `false` のときは `None` のまま送る。
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
/// 会話単位の安定キー。ChatGPT backend (codex-oauth) は明示キーが
/// 無いとプロンプトキャッシュがほぼ効かない。pod 側は `SegmentId`
/// を渡す。`Request::cache_key` が `None` のときはキー自体を送らない。
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
}
/// reasoning 制御。
#[derive(Debug, Serialize)]
pub(crate) struct ReasoningConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<String>,
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
pub summary: &'static str,
}
/// `input[]` の 1 要素。
///
/// Responses API の item 型を素に近い形で投影する。未対応 type は
/// 無視reasoning 送信時に `content: []` の場合は `None` として弾く)。
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputItem {
/// 会話メッセージ。user / assistant / developer のいずれか。
/// `Role::System` items は `developer` として投影するChatGPT
/// backend が `role: "system"` を拒否するため。Codex CLI も
/// system 相当の挿入には DeveloperInstructions = `role: "developer"`
/// を使う)。
Message {
role: &'static str,
content: Vec<InputContent>,
},
/// 過去の function tool 呼び出しassistant 側)。
FunctionCall {
call_id: String,
name: String,
/// JSON 文字列object でなくても正規化済み)。
arguments: String,
},
/// function tool の結果user 側)。
FunctionCallOutput {
call_id: String,
/// Responses は文字列 or 構造化 output を許すが、ここでは
/// `summary` + `content` を改行連結した文字列で送る。
output: String,
},
/// reasoning item。`encrypted_content` があれば必ず添える。
Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<String>,
/// Responses API は reasoning item に `summary` フィールドを必須で
/// 要求する(中身が空でも `[]` として送る必要がある。GPT-5 など
/// summary を返さないモデル + reasoning effort 指定なしのターンでは
/// summary text が一切付かないので、ここを skip すると 400
/// "Missing required parameter: 'input[N].summary'" で弾かれる。
summary: Vec<ReasoningSummaryPart>,
#[serde(skip_serializing_if = "Vec::is_empty")]
content: Vec<ReasoningContentPart>,
#[serde(skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
},
}
/// メッセージ content_part。role で input/output を使い分ける。
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputContent {
/// user / developer 側のテキスト
InputText { text: String },
/// assistant 側のテキスト
OutputText { text: String },
}
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum ReasoningSummaryPart {
SummaryText { text: String },
}
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum ReasoningContentPart {
ReasoningText { text: String },
}
/// Responses 用 tool 定義。Chat と違い function キーでネストせず
/// トップレベルに `name` / `parameters` が載る。
#[derive(Debug, Serialize)]
pub(crate) struct ResponseTool {
#[serde(rename = "type")]
pub r#type: &'static str,
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// OpenAI Responses API は `type:"object"` のパラメータスキーマに
/// `properties` が存在することを要求する。schemars は引数なし struct
/// から `properties` を含まない最小スキーマを出すので、serialize
/// 時に空オブジェクトを補う。
#[serde(serialize_with = "serialize_parameters")]
pub parameters: Value,
/// Structured output モード制御。デフォルト false。
pub strict: bool,
}
fn serialize_parameters<S: Serializer>(value: &Value, s: S) -> Result<S::Ok, S::Error> {
if let Some(obj) = value.as_object()
&& obj.get("type").and_then(Value::as_str) == Some("object")
&& !obj.contains_key("properties")
{
let mut patched = obj.clone();
patched.insert("properties".to_string(), Value::Object(Default::default()));
return Value::Object(patched).serialize(s);
}
value.serialize(s)
}
impl OpenAIResponsesScheme {
/// `Request` から wire 形式の body を組み立てる。
pub(crate) fn build_request(
&self,
model: &str,
request: &Request,
capability: &ModelCapability,
) -> ResponsesRequest {
let input = convert_items_to_input(&request.items);
let tools = request.tools.iter().map(convert_tool).collect();
// Reasoning 投影: capability が Effort / Both をサポートし、かつ
// request 側で effort が指定されているときだけ reasoning を付ける。
let supports_effort = matches!(
capability.reasoning,
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
);
let reasoning = request
.config
.reasoning
.as_ref()
.filter(|_| supports_effort)
.map(|effort| ReasoningConfig {
effort: match effort {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
},
summary: "auto",
})
.filter(|reasoning| reasoning.effort.is_some());
let include: Vec<&'static str> = if self.include_encrypted_content {
vec!["reasoning.encrypted_content"]
} else {
Vec::new()
};
ResponsesRequest {
model: model.to_string(),
instructions: request.system_prompt.clone(),
input,
tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning,
store: self.store,
stream: true,
include,
max_output_tokens: if self.send_max_output_tokens {
request.config.max_tokens
} else {
None
},
temperature: if self.send_sampling_params {
request.config.temperature
} else {
None
},
top_p: if self.send_sampling_params {
request.config.top_p
} else {
None
},
prompt_cache_key: request.cache_key.clone(),
}
}
}
/// `Item` 列を `input[]` に変換する。
fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
let mut out = Vec::with_capacity(items.len());
for item in items {
match item {
Item::Message { role, content, .. } => {
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
match role {
Role::User => ("user", |t| InputContent::InputText { text: t }),
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
Role::System => ("developer", |t| InputContent::InputText { text: t }),
};
let parts: Vec<InputContent> = content
.iter()
.map(|p| match p {
ContentPart::Text { text } => text_variant(text.clone()),
ContentPart::Refusal { refusal } => text_variant(refusal.clone()),
})
.collect();
out.push(InputItem::Message {
role: role_str,
content: parts,
});
}
Item::ToolCall {
call_id,
name,
arguments,
..
} => {
// 非 object / 旧形式の "null" を "{}" に正規化。
let normalized = parse_tool_arguments(arguments).to_string();
out.push(InputItem::FunctionCall {
call_id: call_id.clone(),
name: name.clone(),
arguments: normalized,
});
}
Item::ToolResult {
call_id,
summary,
content,
..
} => {
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
out.push(InputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: text,
});
}
Item::Reasoning {
id,
text,
summary,
encrypted_content,
..
} => {
let summary_parts = summary
.iter()
.filter(|s| !s.is_empty())
.map(|s| ReasoningSummaryPart::SummaryText { text: s.clone() })
.collect();
let content_parts = if text.is_empty() {
Vec::new()
} else {
vec![ReasoningContentPart::ReasoningText { text: text.clone() }]
};
out.push(InputItem::Reasoning {
id: id.clone(),
summary: summary_parts,
content: content_parts,
encrypted_content: encrypted_content.clone(),
});
}
}
}
out
}
fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
ResponseTool {
r#type: "function",
name: tool.name.clone(),
description: tool.description.clone(),
parameters: tool.input_schema.clone(),
strict: false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningControl, ReasoningEffort, ReasoningSupport,
StructuredOutput, ToolCallingSupport,
};
fn cap_with_reasoning() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}
fn cap_no_reasoning() -> ModelCapability {
ModelCapability {
reasoning: None,
..cap_with_reasoning()
}
}
#[test]
fn scheme_defaults_to_stateless_zdr() {
let s = OpenAIResponsesScheme::new();
assert!(!s.store);
assert!(s.include_encrypted_content);
}
#[test]
fn includes_encrypted_content_when_enabled() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.include, vec!["reasoning.encrypted_content"]);
assert!(!body.store);
assert!(body.stream);
}
#[test]
fn instructions_from_system_prompt() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().system("be terse").user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.instructions.as_deref(), Some("be terse"));
assert_eq!(body.input.len(), 1);
}
#[test]
fn tool_choice_and_parallel_are_fixed() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.tool_choice, "auto");
assert!(body.parallel_tool_calls);
}
#[test]
fn user_message_uses_input_text() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[0] {
InputItem::Message { role, content } => {
assert_eq!(*role, "user");
assert_eq!(content.len(), 1);
assert!(matches!(&content[0], InputContent::InputText { text } if text == "hi"));
}
_ => panic!("expected message"),
}
}
#[test]
fn system_role_item_is_projected_as_developer() {
// ChatGPT backend (codex-oauth) は input[] の `role: "system"` を
// "System messages are not allowed" で 400 拒否する。in-conversation
// な system note (notify / fs_view auto-read / compaction summary) は
// `role: "developer"` として投影し、両 backend で受理されるようにする。
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("hi")
.item(Item::system_message("[notify] hello"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "developer");
assert!(
matches!(&content[0], InputContent::InputText { text } if text == "[notify] hello"),
);
}
_ => panic!("expected message"),
}
}
#[test]
fn assistant_message_uses_output_text() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").assistant("hello");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "assistant");
assert!(
matches!(&content[0], InputContent::OutputText { text } if text == "hello")
);
}
_ => panic!("expected message"),
}
}
#[test]
fn tool_call_and_result_become_function_items() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("run")
.item(Item::tool_call("c1", "t", r#"{"a":1}"#))
.item(Item::tool_result("c1", "ok"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(matches!(body.input[1], InputItem::FunctionCall { .. }));
assert!(matches!(
body.input[2],
InputItem::FunctionCallOutput { .. }
));
}
#[test]
fn reasoning_item_round_trips_encrypted_content() {
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("inner")
.with_reasoning_summary(vec!["s1".into()])
.with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Reasoning {
summary,
content,
encrypted_content,
..
} => {
assert_eq!(summary.len(), 1);
assert_eq!(content.len(), 1);
assert_eq!(encrypted_content.as_deref(), Some("ENC"));
}
_ => panic!("expected reasoning"),
}
}
#[test]
fn reasoning_summary_field_is_always_serialized() {
// Responses API は reasoning item に `summary` を必須で要求する。
// summary が空でも wire 上に `summary: []` として残らないと、
// ChatGPT backend (codex-oauth) が
// 400 invalid_request_error: Missing required parameter:
// 'input[N].summary'.
// で弾く。GPT-5 + reasoning effort 未指定のターンでは summary text
// が付かないことがあるため、空のままでも skip しないこと。
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("").with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
let reasoning_item = &json["input"][1];
assert_eq!(reasoning_item["type"], "reasoning");
assert!(
reasoning_item.get("summary").is_some(),
"summary key must be present even when empty, got: {reasoning_item}"
);
assert_eq!(reasoning_item["summary"], serde_json::json!([]));
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let reasoning = body.reasoning.expect("reasoning should be set");
assert_eq!(reasoning.effort.as_deref(), Some("high"));
assert_eq!(reasoning.summary, "auto");
}
#[test]
fn reasoning_omitted_when_unsupported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning());
assert!(body.reasoning.is_none());
}
#[test]
fn max_output_tokens_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, Some(100));
}
#[test]
fn max_output_tokens_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(false);
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("max_output_tokens").is_none(),
"max_output_tokens key must not appear in serialised body, got: {json}"
);
}
#[test]
fn sampling_params_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, Some(0.4));
assert_eq!(body.top_p, Some(0.9));
}
#[test]
fn sampling_params_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, None);
assert_eq!(body.top_p, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("temperature").is_none() && json.get("top_p").is_none(),
"temperature/top_p keys must not appear in serialised body, got: {json}"
);
}
#[test]
fn prompt_cache_key_passed_through_when_set() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").cache_key("session-abc");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.prompt_cache_key.as_deref(), Some("session-abc"));
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["prompt_cache_key"], "session-abc");
}
#[test]
fn prompt_cache_key_omitted_when_none() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(body.prompt_cache_key.is_none());
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("prompt_cache_key").is_none(),
"prompt_cache_key key must not appear in serialised body, got: {json}"
);
}
#[test]
fn tool_schema_without_properties_is_normalized() {
// schemars は引数なし struct から `type:"object"` だけのスキーマを
// 吐く。OpenAI Responses は `properties` 欠落を 400 で拒否するので
// 送る直前に空オブジェクトを補うのを確認。
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({ "type": "object" });
let req = Request::new().tool(
ToolDefinition::new("empty")
.description("no args")
.input_schema(raw_schema),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"]["type"], "object");
assert!(
json["tools"][0]["parameters"]["properties"].is_object(),
"properties must be present as an object, got: {}",
json["tools"][0]["parameters"]
);
}
#[test]
fn tool_schema_with_properties_is_untouched() {
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({
"type": "object",
"properties": { "path": { "type": "string" } },
"required": ["path"]
});
let req = Request::new().tool(
ToolDefinition::new("t")
.description("d")
.input_schema(raw_schema.clone()),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"], raw_schema);
}
#[test]
fn serialized_body_has_expected_shape() {
// wire 形式が崩れていないかのスモークテスト
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.system("sys")
.user("hi")
.tool(ToolDefinition::new("t").description("d"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["model"], "gpt-5");
assert_eq!(json["instructions"], "sys");
assert_eq!(json["tool_choice"], "auto");
assert_eq!(json["parallel_tool_calls"], true);
assert_eq!(json["store"], false);
assert_eq!(json["stream"], true);
assert_eq!(json["include"][0], "reasoning.encrypted_content");
assert_eq!(json["tools"][0]["type"], "function");
assert_eq!(json["tools"][0]["name"], "t");
}
}

View File

@ -0,0 +1,88 @@
//! `impl Scheme for OpenAIResponsesScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::{Request, RequestConfig},
};
use super::OpenAIResponsesScheme;
pub use super::events::OpenAIResponsesState;
impl Scheme for OpenAIResponsesScheme {
type State = OpenAIResponsesState;
fn default_base_url(&self) -> &'static str {
// `/v1` は base_url 側に寄せる。ChatGPT OAuth 経由のときは
// `https://chatgpt.com/backend-api/codex` を base にすれば同じ
// `/responses` path で両系統を吸収できるCodex CLI 準拠)。
"https://api.openai.com/v1"
}
fn path(&self, _model_id: &str) -> String {
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let body = self.build_request(model_id, request, capability);
serde_json::to_value(&body).expect("ResponsesRequest is always serialisable")
}
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
super::events::parse_sse(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// ChatGPT backend (codex-oauth) は `max_output_tokens` を 400 で弾く。
// scheme 構築時に `send_max_output_tokens=false` で組まれていれば
// body 投影は止まっているので、ユーザの意図が落ちることだけを通知する。
if !self.send_max_output_tokens && config.max_tokens.is_some() {
warnings.push(ConfigWarning::unsupported(
"max_tokens",
"OpenAI Responses (ChatGPT backend)",
));
}
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
if !self.send_sampling_params {
if config.temperature.is_some() {
warnings.push(ConfigWarning::unsupported(
"temperature",
"OpenAI Responses (ChatGPT backend)",
));
}
if config.top_p.is_some() {
warnings.push(ConfigWarning::unsupported(
"top_p",
"OpenAI Responses (ChatGPT backend)",
));
}
}
warnings
}
}

View File

@ -0,0 +1,485 @@
//! `HttpTransport<S: Scheme>`: すべての LLM wire scheme を共通の 1 本の
//! HTTP クライアントで扱う。
//!
//! 旧 `providers/{anthropic,openai,gemini,ollama}.rs` を置き換える。
//! scheme 固有の差分は [`Scheme`] trait 実装に委譲する。
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
};
use super::auth::{AuthProvider, AuthRequirement};
use super::capability::ModelCapability;
use super::client::{ConfigWarning, LlmClient, ResponseStream};
use super::error::ClientError;
use super::event::Event;
use super::scheme::Scheme;
use super::types::{Request, RequestConfig};
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(30);
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
///
/// - `None`: 認証ヘッダを送らないOllama 等の opt-out
/// - `ApiKey`: 静的な API key 文字列
/// - `Custom`: リクエスト毎に動的にヘッダを組み立てるCodex OAuth 等)
#[derive(Debug, Clone)]
pub enum ResolvedAuth {
None,
ApiKey(String),
Custom(Arc<dyn AuthProvider>),
}
impl ResolvedAuth {
/// 認証要件と実際の解決値が噛み合うか検査する。構築時検証用。
///
/// - `ResolvedAuth::None` は認証を付けない宣言なので、どの
/// `AuthRequirement` でも受け入れるOllama の Anthropic scheme
/// 流用は `required_auth = XApiKey` だが認証ヘッダなしで動く)
/// - `ResolvedAuth::Custom` は「ヘッダ組立を全部こちらで行う」
/// 宣言なので、scheme が要求する形式によらず受け入れる
pub fn matches(&self, req: AuthRequirement) -> bool {
match (self, req) {
(Self::None, _) => true,
(Self::Custom(_), _) => true,
(
Self::ApiKey(_),
AuthRequirement::Bearer
| AuthRequirement::XApiKey
| AuthRequirement::QueryParam { .. },
) => true,
_ => false,
}
}
}
/// scheme 共通の HTTP 通信層。
pub struct HttpTransport<S: Scheme> {
http_client: reqwest::Client,
scheme: S,
model_id: String,
base_url: String,
auth: ResolvedAuth,
capability: ModelCapability,
}
impl<S: Scheme> HttpTransport<S> {
/// 新しい transport を作る。`base_url` は末尾スラッシュの有無を
/// どちらでも受け付ける(内部で正規化)。
pub fn new(
scheme: S,
model_id: impl Into<String>,
base_url: impl Into<String>,
auth: ResolvedAuth,
capability: ModelCapability,
) -> Self {
let base_url = base_url.into();
let base_url = base_url.trim_end_matches('/').to_string();
Self {
http_client: reqwest::Client::new(),
scheme,
model_id: model_id.into(),
base_url,
auth,
capability,
}
}
/// カスタム HTTP クライアントを差し込む(テスト等)。
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
fn build_url(&self) -> String {
let path = self.scheme.path(&self.model_id);
let url = format!("{}{}", self.base_url, path);
// Gemini のようにクエリパラメータで認証する場合は URL にキーを追記する
if let (AuthRequirement::QueryParam { name }, ResolvedAuth::ApiKey(key)) =
(self.scheme.required_auth(), &self.auth)
{
let sep = if url.contains('?') { '&' } else { '?' };
format!("{url}{sep}{name}={key}")
} else {
url
}
}
async fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
match (&self.auth, self.scheme.required_auth()) {
(ResolvedAuth::None, _) | (_, AuthRequirement::None) => {}
(ResolvedAuth::Custom(provider), _) => {
for (name, mut value) in provider.headers().await? {
value.set_sensitive(true);
headers.insert(name, value);
}
}
(ResolvedAuth::ApiKey(key), AuthRequirement::Bearer) => {
let mut val = HeaderValue::from_str(&format!("Bearer {key}"))
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
val.set_sensitive(true);
headers.insert("Authorization", val);
}
(ResolvedAuth::ApiKey(key), AuthRequirement::XApiKey) => {
let mut val = HeaderValue::from_str(key.as_str())
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
val.set_sensitive(true);
headers.insert("x-api-key", val);
}
(_, AuthRequirement::QueryParam { .. }) => {
// クエリパラメータは `build_url` で付与済み
}
(ResolvedAuth::ApiKey(_), AuthRequirement::Custom) => {
// scheme が Custom を要求する組合せに ApiKey は流れてこない想定
// `matches()` で弾かれる)。安全側で何もしない
}
}
for (name, value) in self.scheme.additional_headers() {
let hv = HeaderValue::from_str(&value)
.map_err(|e| ClientError::Config(format!("invalid header {name}: {e}")))?;
headers.insert(name, hv);
}
Ok(headers)
}
fn is_codex_backend(&self) -> bool {
match &self.auth {
ResolvedAuth::Custom(provider) => provider.is_codex_backend(),
_ => false,
}
}
fn apply_stream_headers(
&self,
headers: &mut HeaderMap,
request: &Request,
) -> Result<(), ClientError> {
headers.insert(ACCEPT, HeaderValue::from_static("text/event-stream"));
if self.is_codex_backend()
&& let Some(cache_key) = request.cache_key.as_deref()
{
let value = HeaderValue::from_str(cache_key).map_err(|e| {
ClientError::Config(format!("invalid Codex conversation header: {e}"))
})?;
headers.insert(HeaderName::from_static("session_id"), value.clone());
headers.insert(HeaderName::from_static("x-client-request-id"), value);
}
Ok(())
}
fn encode_request_body(
&self,
body: &serde_json::Value,
headers: &mut HeaderMap,
) -> Result<RequestBody, ClientError> {
if !self.is_codex_backend() {
return Ok(RequestBody::Json(body.clone()));
}
let raw = serde_json::to_vec(body)?;
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
Ok(RequestBody::CompressedJson(compressed))
}
}
enum RequestBody {
Json(serde_json::Value),
CompressedJson(Vec<u8>),
}
async fn response_with_timeout(
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
timeout: Duration,
phase: &'static str,
) -> Result<reqwest::Response, ClientError> {
tokio::time::timeout(timeout, future)
.await
.map_err(|_| ClientError::Timeout { phase, timeout })?
.map_err(ClientError::Http)
}
impl<S: Scheme + Clone> Clone for HttpTransport<S> {
fn clone(&self) -> Self {
Self {
http_client: self.http_client.clone(),
scheme: self.scheme.clone(),
model_id: self.model_id.clone(),
base_url: self.base_url.clone(),
auth: self.auth.clone(),
capability: self.capability.clone(),
}
}
}
/// エラーレスポンスを `ClientError::Api` に変換する。
async fn classify_error_response(resp: reqwest::Response) -> ClientError {
let status = resp.status().as_u16();
let retry_after = resp
.headers()
.get(RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.trim().parse::<u64>().ok())
.map(Duration::from_secs);
let text = resp.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
ClientError::Api {
status: Some(status),
code,
message,
retry_after,
}
} else {
ClientError::Api {
status: Some(status),
code: None,
message: text,
retry_after,
}
}
}
#[async_trait]
impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
self.scheme.validate_config(config)
}
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
let url = self.build_url();
let mut headers = self.build_headers().await?;
self.apply_stream_headers(&mut headers, &request)?;
let body = self
.scheme
.build_request_body(&self.model_id, &request, &self.capability);
let request_body = self.encode_request_body(&body, &mut headers)?;
let builder = self.http_client.post(&url).headers(headers);
let builder = match request_body {
RequestBody::Json(body) => builder.json(&body),
RequestBody::CompressedJson(body) => builder.body(body),
};
let response =
response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
.await?;
if !response.status().is_success() {
return Err(classify_error_response(response).await);
}
let scheme = self.scheme.clone();
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
let event_stream = byte_stream.eventsource();
// scheme 固有のパース状態をストリーム単位で保持する
let mut state = <S::State as Default>::default();
let stream = event_stream
.map(move |result| match result {
Ok(frame) => match scheme.parse_sse(&frame.event, &frame.data, &mut state) {
Ok(events) => Ok(events),
Err(e) => Err(e),
},
Err(e) => Err(ClientError::Sse(e.to_string())),
})
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(events) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[derive(Debug)]
struct TestAuthProvider {
codex: bool,
}
#[async_trait]
impl AuthProvider for TestAuthProvider {
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError> {
Ok(vec![
(
HeaderName::from_static("authorization"),
HeaderValue::from_static("Bearer test-token"),
),
(
HeaderName::from_static("chatgpt-account-id"),
HeaderValue::from_static("account-1"),
),
])
}
fn is_codex_backend(&self) -> bool {
self.codex
}
}
#[derive(Clone)]
struct TestScheme;
impl Scheme for TestScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://example.test"
}
fn path(&self, _model_id: &str) -> String {
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
_capability: &ModelCapability,
) -> serde_json::Value {
json!({
"model": model_id,
"input_len": request.items.len(),
"prompt_cache_key": request.cache_key,
})
}
fn parse_sse(
&self,
_event_type: &str,
_data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
Ok(Vec::new())
}
fn default_capability(&self) -> ModelCapability {
ModelCapability::minimal()
}
}
fn transport(auth: ResolvedAuth) -> HttpTransport<TestScheme> {
HttpTransport::new(
TestScheme,
"gpt-test",
"https://example.test",
auth,
ModelCapability::minimal(),
)
}
#[tokio::test]
async fn response_timeout_returns_retryable_lifecycle_timeout() {
let err = response_with_timeout(
std::future::pending::<Result<reqwest::Response, reqwest::Error>>(),
Duration::from_millis(5),
"stream_open",
)
.await
.unwrap_err();
assert!(crate::llm_client::error::is_retryable(&err));
assert!(matches!(
err,
ClientError::Timeout {
phase: "stream_open",
..
}
));
}
#[tokio::test]
async fn codex_backend_adds_conversation_headers_and_zstd_body() {
let transport = transport(ResolvedAuth::Custom(Arc::new(TestAuthProvider {
codex: true,
})));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert_eq!(headers.get("session_id").unwrap(), "segment-123");
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
let RequestBody::CompressedJson(compressed) = encoded else {
panic!("Codex backend request body must be zstd-compressed");
};
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
#[tokio::test]
async fn non_codex_request_does_not_get_codex_only_headers_or_compression() {
let transport = transport(ResolvedAuth::ApiKey("api-key".to_string()));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert!(headers.get("session_id").is_none());
assert!(headers.get("x-client-request-id").is_none());
assert!(headers.get(CONTENT_ENCODING).is_none());
let RequestBody::Json(decoded) = encoded else {
panic!("non-Codex request body must remain normal JSON");
};
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
}

View File

@ -9,6 +9,10 @@
use serde::{Deserialize, Serialize};
fn is_false(value: &bool) -> bool {
!*value
}
// ============================================================================
// Item - The core unit of conversation
// ============================================================================
@ -74,8 +78,14 @@ pub enum Item {
id: Option<ItemId>,
/// Call ID linking to the tool call
call_id: CallId,
/// Output content
output: String,
/// Short summary (always kept in history, survives pruning)
summary: String,
/// Detailed output (removed by pruning when old enough)
#[serde(default, skip_serializing_if = "Option::is_none")]
content: Option<String>,
/// Whether the tool result represents an execution error.
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
},
/// Reasoning/thinking item
@ -83,8 +93,23 @@ pub enum Item {
/// Optional item ID
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<ItemId>,
/// Reasoning text
/// Reasoning textreasoning body, `reasoning_text.delta` の累積)
text: String,
/// Reasoning summaryOpenAI Responses の `summary_text[]` を格納。
/// 他 scheme は空)
#[serde(default, skip_serializing_if = "Vec::is_empty")]
summary: Vec<String>,
/// サーバから返された暗号化済み reasoning blob。ZDR / `store=false`
/// 運用で stateless に再送するときそのまま添える必要がある。
/// Anthropic の `redacted_thinking.data` もここに格納する。
#[serde(default, skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
/// Anthropic extended thinking の `signature`。新世代 Claude
/// (Opus 4.5+/Sonnet 4.6+) では同一論理ターン内の `thinking`
/// ブロックを送り返す際に必須。改ざん検知に使われる。他 scheme
/// では `None`。
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
/// Item status
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<ItemStatus>,
@ -96,6 +121,20 @@ impl Item {
// Message constructors
// ========================================================================
/// Create a system message item with text content.
///
/// System items in history are sent as `role: "system"` on OpenAI,
/// and as `role: "user"` on Anthropic/Gemini (which lack a system
/// role in conversation items).
pub fn system_message(text: impl Into<String>) -> Self {
Self::Message {
id: None,
role: Role::System,
content: vec![ContentPart::Text { text: text.into() }],
status: None,
}
}
/// Create a user message item with text content
pub fn user_message(text: impl Into<String>) -> Self {
Self::Message {
@ -164,15 +203,41 @@ impl Item {
Self::tool_call(call_id, name, arguments.to_string())
}
/// Create a tool result item
pub fn tool_result(call_id: impl Into<String>, output: impl Into<String>) -> Self {
/// Create a tool result item with summary only (no content).
pub fn tool_result(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, false)
}
/// Create an error tool result item with summary only (no content).
pub fn tool_result_error(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, true)
}
/// Create a tool result item with summary, optional content, and error flag.
pub fn tool_result_item(
call_id: impl Into<String>,
summary: impl Into<String>,
content: Option<String>,
is_error: bool,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
output: output.into(),
summary: summary.into(),
content,
is_error,
}
}
/// Create a tool result item with summary and content.
pub fn tool_result_with_content(
call_id: impl Into<String>,
summary: impl Into<String>,
content: impl Into<String>,
) -> Self {
Self::tool_result_item(call_id, summary, Some(content.into()), false)
}
// ========================================================================
// Reasoning constructors
// ========================================================================
@ -182,10 +247,40 @@ impl Item {
Self::Reasoning {
id: None,
text: text.into(),
summary: Vec::new(),
encrypted_content: None,
signature: None,
status: None,
}
}
/// Set reasoning summary on a `Reasoning` item. No-op on other variants.
pub fn with_reasoning_summary(mut self, new_summary: Vec<String>) -> Self {
if let Self::Reasoning { summary, .. } = &mut self {
*summary = new_summary;
}
self
}
/// Set `encrypted_content` on a `Reasoning` item. No-op on other variants.
pub fn with_encrypted_content(mut self, content: impl Into<String>) -> Self {
if let Self::Reasoning {
encrypted_content, ..
} = &mut self
{
*encrypted_content = Some(content.into());
}
self
}
/// Set Anthropic `signature` on a `Reasoning` item. No-op on other variants.
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
if let Self::Reasoning { signature, .. } = &mut self {
*signature = Some(sig.into());
}
self
}
// ========================================================================
// Builder methods
// ========================================================================
@ -285,6 +380,19 @@ impl Item {
}
}
/// Parse a ToolCall `arguments` string into a JSON object.
///
/// Tool call arguments must be a JSON object at the provider API level
/// (Anthropic rejects non-object `tool_use.input`). This helper normalizes
/// anything that is not a JSON object — empty string, the literal `"null"`,
/// arrays, scalars, or parse failures — to an empty object `{}`.
pub fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
match serde_json::from_str::<serde_json::Value>(arguments) {
Ok(value) if value.is_object() => value,
_ => serde_json::Value::Object(serde_json::Map::new()),
}
}
// ============================================================================
// Content Parts - Components within message items
// ============================================================================
@ -374,6 +482,21 @@ pub struct Request {
pub tools: Vec<ToolDefinition>,
/// Request configuration
pub config: RequestConfig,
/// Index into `items` marking the end of a stable, cacheable prefix.
///
/// Higher layers that know about durable prefix boundaries (e.g. a
/// post-compaction summary) set this so that caching-aware providers
/// (Anthropic today) can place a long-lived cache breakpoint there.
/// Providers without prompt caching ignore the field.
pub cache_anchor: Option<usize>,
/// 会話単位の安定キー。`prompt_cache_key` として送られる
/// (OpenAI Responses)。ChatGPT backend (codex-oauth) は明示キーが
/// 無いと org/project ハッシュ衝突でプロンプトキャッシュが
/// ほぼヒットしないため、pod 側で `SegmentId` を渡す運用を想定。
/// `cache_anchor` と違い名前空間キーであり、`prefix anchor` とは
/// 別の概念。`cache_anchor` を読まない provider と同じく、
/// `prompt_cache_key` を持たない provider は無視する。
pub cache_key: Option<String>,
}
impl Request {
@ -453,6 +576,14 @@ impl Request {
self.config.stop_sequences.push(sequence.into());
self
}
/// Set the conversation cache key.
///
/// 詳細は [`Request::cache_key`] のフィールドコメント参照。
pub fn cache_key(mut self, key: impl Into<String>) -> Self {
self.cache_key = Some(key.into());
self
}
}
// ============================================================================
@ -513,6 +644,12 @@ pub struct RequestConfig {
pub top_k: Option<u32>,
/// Stop sequences
pub stop_sequences: Vec<String>,
/// Reasoning / extended-thinking 制御共通型、scheme 側で各社形式に投影)。
///
/// `None` のときは何も送らない。`Some` でも scheme の
/// `ModelCapability::reasoning` が `None` なら無視される。
#[serde(default)]
pub reasoning: Option<crate::llm_client::capability::ReasoningControl>,
}
impl RequestConfig {
@ -551,3 +688,54 @@ impl RequestConfig {
self
}
}
#[cfg(test)]
mod parse_tool_arguments_tests {
use super::parse_tool_arguments;
use serde_json::{Value, json};
fn empty_object() -> Value {
Value::Object(serde_json::Map::new())
}
#[test]
fn empty_string_normalizes_to_object() {
assert_eq!(parse_tool_arguments(""), empty_object());
}
#[test]
fn literal_null_normalizes_to_object() {
// 既存セッションに残っている "null" が resume 時に復旧できること
assert_eq!(parse_tool_arguments("null"), empty_object());
}
#[test]
fn array_normalizes_to_object() {
assert_eq!(parse_tool_arguments("[1, 2, 3]"), empty_object());
}
#[test]
fn scalar_normalizes_to_object() {
assert_eq!(parse_tool_arguments("42"), empty_object());
assert_eq!(parse_tool_arguments("\"str\""), empty_object());
assert_eq!(parse_tool_arguments("true"), empty_object());
}
#[test]
fn invalid_json_normalizes_to_object() {
assert_eq!(parse_tool_arguments("{not json"), empty_object());
}
#[test]
fn valid_object_passes_through() {
assert_eq!(
parse_tool_arguments(r#"{"city":"Tokyo","days":3}"#),
json!({"city": "Tokyo", "days": 3}),
);
}
#[test]
fn empty_object_passes_through() {
assert_eq!(parse_tool_arguments("{}"), empty_object());
}
}

View File

@ -0,0 +1,451 @@
//! Prune — context projection for old tool-result content.
//!
//! LLM 送信時のコンテキストから古い [`Item::ToolResult`] の `content` を
//! 省略して、コンテキスト窓のトークンを回収する。`summary` は残すので
//! 「何が起きたか」の痕跡は保たれる。
//!
//! # 設計方針
//!
//! Prune は **コンテキスト射影** であり、history の変換ではない。
//! この crate が提供するのは pure な候補抽出 [`prunable_indices`] のみで、
//! 射影の適用は上位層(`pod::prune_hook` 等)が LLM に送る一時コンテキスト
//! に対してだけ行う。Worker の永続履歴は決して変更されない。
//!
//! 保護境界は末尾 token budget で決めるが、この crate は usage 履歴を
//! 所有しない。prefix ごとの token 推定値と savings 推定は上位層から
//! callback で注入される。
use serde::{Deserialize, Serialize};
use crate::llm_client::types::Item;
use crate::token_counter::{EstimateSource, TokenEstimate};
/// Callback that returns token estimates for every prefix boundary of the
/// supplied request history.
///
/// The returned slice must have `history.len() + 1` entries where entry `i`
/// estimates the token count of `history[..i]`. Returning a malformed vector,
/// or estimates whose source is [`EstimateSource::NoData`], makes prune treat
/// the request as having no candidates.
pub type TokenEstimator = Box<dyn Fn(&[Item]) -> Vec<TokenEstimate> + Send + Sync>;
/// Callback that estimates the token savings for projecting the
/// `ToolResult.content` out of `history[i]` for each `i` in `indices`.
///
/// Injected into [`crate::Worker`] via `set_savings_estimator` so the
/// Worker can make `min_savings` decisions without knowing about usage
/// measurement sources. Return `0` to signal "no data / refuse to prune".
///
/// 推定対象は「drop する範囲全体」ではなく「content を None にする差分」
/// であることに注意。item 自体summary 等)は残るので、この callback は
/// 実際の projection と一致する savings を返す必要がある。
pub type SavingsEstimator = Box<dyn Fn(&[Item], &[usize]) -> u64 + Send + Sync>;
/// Result of one prune evaluation pass, surfaced to the optional
/// [`PruneObserver`] for instrumentation.
///
/// Worker は LLM リクエストごとに 1 回 prune の評価をし、その結果を
/// observer が登録されていればこの値で通知する。fire/skip の判定
/// 結果と、判定材料になった候補数 / 推定 savings / 保護領域の先頭 index を持つ。
#[derive(Debug, Clone)]
pub struct PruneEvaluation {
/// `prunable_indices` の長さ。`Skipped::NoCandidates` の時は 0。
pub candidate_count: usize,
/// 推定された savings (tokens)。`NoCandidates` の時は 0。
pub estimated_savings: u64,
/// Token budget で保護される suffix の先頭 item index。
/// usage 推定が `NoData` で境界が決まらない場合は `None`。
pub protected_start_index: Option<usize>,
/// 判定結果。
pub decision: PruneDecision,
}
/// Outcome of one prune evaluation. Each variant is one branch of the
/// "fire vs skip" decision tree the Worker walks before each LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PruneDecision {
/// `prunable_indices` が空 → 何もしない。
SkippedNoCandidates,
/// 候補はあったが推定 savings が `min_savings` 未満 → 何もしない。
SkippedBelowMinSavings,
/// 候補があり savings >= min_savings → projection を適用した。
/// `pruned_count` は `project()` が実際に書き換えた item 数
/// (既に content=None だった候補は 0 計上)。
Fired { pruned_count: usize },
}
/// Optional observer invoked after each prune evaluation, regardless of
/// branch. Pod 等の上位層が install して metrics を発行する。
pub type PruneObserver = Box<dyn Fn(&PruneEvaluation) + Send + Sync>;
/// Configuration for the Prune algorithm.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruneConfig {
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_protected_tokens")]
pub protected_tokens: u64,
/// Minimum token savings required to actually prune. If the prunable
/// content is smaller than this, the caller should skip to avoid
/// pointless KV-cache invalidation. The unit is tokens; the caller
/// is responsible for measuring savings via a usage-history-aware
/// estimator and comparing against this threshold.
#[serde(default = "default_min_savings")]
pub min_savings: u64,
}
fn default_protected_tokens() -> u64 {
8000
}
fn default_min_savings() -> u64 {
4096
}
impl Default for PruneConfig {
fn default() -> Self {
Self {
protected_tokens: default_protected_tokens(),
min_savings: default_min_savings(),
}
}
}
/// Set `content = None` on each `Item::ToolResult` at the given indices.
///
/// Returns the number of items that were actually modified — items that
/// are already content-less are counted as 0. Intended for use on a
/// request-context clone (never on a persistent history).
pub fn project(items: &mut [Item], indices: &[usize]) -> usize {
let mut count = 0;
for &i in indices {
if let Item::ToolResult { content, .. } = &mut items[i]
&& content.is_some()
{
*content = None;
count += 1;
}
}
count
}
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie before
/// the suffix protected by `protected_tokens`. Pure: does not mutate `items`.
///
/// Returns an empty vector when token estimates are unavailable (`NoData`) or
/// no prunable candidates exist.
pub fn prunable_indices(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Vec<usize> {
evaluate_candidates(items, protected_tokens, token_estimates).0
}
/// Same as [`prunable_indices`] but also returns the start index of the
/// protected suffix. `None` means the token boundary could not be determined
/// (currently because usage estimates were `NoData` or malformed).
pub fn evaluate_candidates(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> (Vec<usize>, Option<usize>) {
let Some(protected_start) = protected_start_index(items, protected_tokens, token_estimates)
else {
return (Vec::new(), None);
};
let candidates = items[..protected_start]
.iter()
.enumerate()
.filter_map(|(i, item)| match item {
Item::ToolResult {
content: Some(_), ..
} => Some(i),
_ => None,
})
.collect();
(candidates, Some(protected_start))
}
fn protected_start_index(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Option<usize> {
if token_estimates.len() != items.len() + 1 {
return None;
}
let total = token_estimates[items.len()];
if total.source == EstimateSource::NoData {
return None;
}
if protected_tokens == 0 {
return Some(items.len());
}
let mut protected_start = items.len();
for idx in (0..items.len()).rev() {
let prefix = token_estimates[idx];
if prefix.source == EstimateSource::NoData {
return None;
}
protected_start = idx;
let tail_tokens = total.tokens.saturating_sub(prefix.tokens);
if tail_tokens >= protected_tokens {
break;
}
}
Some(protected_start)
}
#[cfg(test)]
mod tests {
use super::*;
/// Helper: build a history with interleaved user messages and tool results.
fn make_history(turns: &[(&str, Vec<(&str, Option<&str>)>)]) -> Vec<Item> {
let mut items = Vec::new();
for (user_msg, tool_results) in turns {
items.push(Item::user_message(*user_msg));
items.push(Item::assistant_message("ok"));
for (i, (summary, content)) in tool_results.iter().enumerate() {
let call_id = format!("call_{}", items.len() + i);
items.push(Item::tool_call(&call_id, "some_tool", "{}"));
match content {
Some(c) => items.push(Item::tool_result_with_content(&call_id, *summary, *c)),
None => items.push(Item::tool_result(&call_id, *summary)),
}
}
}
items
}
fn measured_prefix(tokens: &[u64]) -> Vec<TokenEstimate> {
tokens
.iter()
.copied()
.map(|tokens| TokenEstimate {
tokens,
source: EstimateSource::Measured,
})
.collect()
}
fn uniform_estimates(items: &[Item], item_tokens: u64) -> Vec<TokenEstimate> {
let mut tokens = Vec::with_capacity(items.len() + 1);
for i in 0..=items.len() {
tokens.push(i as u64 * item_tokens);
}
measured_prefix(&tokens)
}
fn estimates_from_item_tokens(item_tokens: &[u64]) -> Vec<TokenEstimate> {
let mut prefix = Vec::with_capacity(item_tokens.len() + 1);
let mut acc = 0;
prefix.push(acc);
for tokens in item_tokens {
acc += tokens;
prefix.push(acc);
}
measured_prefix(&prefix)
}
fn no_data_estimates(items: &[Item]) -> Vec<TokenEstimate> {
(0..=items.len())
.map(|i| TokenEstimate {
tokens: i as u64,
source: if i == 0 {
EstimateSource::Measured
} else {
EstimateSource::NoData
},
})
.collect()
}
#[test]
fn no_candidates_when_estimate_has_no_data() {
let items = make_history(&[("turn1", vec![("summary1", Some("big content here"))])]);
let estimates = no_data_estimates(&items);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
#[test]
fn no_candidates_when_history_fits_in_protected_tokens() {
let items = make_history(&[
("turn1", vec![("summary1", Some("big content here"))]),
("turn2", vec![("summary2", Some("more content"))]),
]);
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 10_000, &estimates).is_empty());
}
#[test]
fn candidates_before_token_protected_suffix() {
let big = "x".repeat(4096 * 4);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// suffix budget 80 tokens protects turn3+turn4 (8 items), so only s1/s2 are candidates.
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn single_long_task_gets_candidates_without_multiple_user_turns() {
let big = "x".repeat(4096 * 8);
let items = make_history(&[(
"one long task",
vec![
("s1", Some(&big)),
("s2", Some(&big)),
("s3", Some(&big)),
("s4", Some(&big)),
],
)]);
// user + assistant are cheap; every ToolCall is cheap; every ToolResult is heavy.
let item_tokens = vec![1, 1, 1, 5_000, 1, 5_000, 1, 5_000, 1, 5_000];
let estimates = estimates_from_item_tokens(&item_tokens);
let (candidates, protected_start) = evaluate_candidates(&items, 8_000, &estimates);
assert_eq!(protected_start, Some(7));
assert_eq!(candidates.len(), 2);
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn already_pruned_items_excluded_from_candidates() {
let items = make_history(&[
("turn1", vec![("s1", None)]), // already pruned (content=None)
("turn2", vec![]),
("turn3", vec![]),
("turn4", vec![]),
]);
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 20, &estimates).is_empty());
}
#[test]
fn project_drops_content_and_counts_modifications() {
let big = "x".repeat(64);
let mut items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
let count = project(&mut items, &candidates);
assert_eq!(count, 2);
for item in &items {
if let Item::ToolResult {
summary, content, ..
} = item
{
if summary == "s1" || summary == "s2" {
assert!(content.is_none(), "old content should be projected out");
} else {
assert!(content.is_some(), "protected content should remain");
}
}
}
}
#[test]
fn project_skips_already_pruned_items() {
// indices points at an item whose content is already None.
// project() should count it as 0 modifications.
let mut items = make_history(&[
("turn1", vec![("s1", None)]),
("turn2", vec![("s2", Some("hello"))]),
]);
// Manually target s1 even though it's already None.
let target = items
.iter()
.position(|it| matches!(it, Item::ToolResult { summary, .. } if summary == "s1"))
.unwrap();
let count = project(&mut items, &[target]);
assert_eq!(count, 0);
}
#[test]
fn project_is_idempotent() {
let big = "x".repeat(64);
let mut items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![]),
("turn3", vec![]),
("turn4", vec![]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 20, &estimates);
assert_eq!(project(&mut items, &candidates), 1);
// 2 周目: 候補は一度の prunable_indices 結果を使い回しても 0 件。
assert_eq!(project(&mut items, &candidates), 0);
}
#[test]
fn evaluate_candidates_returns_protected_start_index() {
let big = "x".repeat(64);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep"))]),
("turn4", vec![("s4", Some("keep too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// protected_tokens=80 → protected suffix is turn3+turn4, starting at index 8.
assert_eq!(protected_start, Some(8));
}
#[test]
fn evaluate_candidates_reports_zero_start_when_everything_is_protected() {
let items = make_history(&[("only", vec![("s", Some("x"))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 10_000, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, Some(0));
}
#[test]
fn zero_protected_tokens_allows_all_tool_results_as_candidates() {
let big = "x".repeat(64);
let items = make_history(&[("turn1", vec![("s1", Some(&big)), ("s2", Some(&big))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 0, &estimates);
assert_eq!(protected_start, Some(items.len()));
assert_eq!(candidates.len(), 2);
}
#[test]
fn malformed_estimate_vector_is_treated_as_no_boundary() {
let items = make_history(&[("turn1", vec![("s1", Some("x"))])]);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &[]);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
}

View File

@ -1,7 +1,7 @@
//! Worker State
//!
//! State marker types for cache protection using the Type-state pattern.
//! Worker has state transitions from `Mutable` → `CacheLocked`.
//! Worker has state transitions from `Mutable` → `Locked`.
/// Marker trait representing Worker state
///
@ -19,7 +19,7 @@ mod private {
/// - Editing message history (add, delete, clear)
/// - Registering tools and hooks
///
/// Can transition to [`CacheLocked`] state via `Worker::lock()`.
/// Can transition to [`Locked`] state via `Worker::lock()`.
///
/// # Examples
///
@ -54,7 +54,7 @@ impl WorkerState for Mutable {}
/// Can return to [`Mutable`] state via `Worker::unlock()`,
/// but note that cache protection will be released.
#[derive(Debug, Clone, Copy, Default)]
pub struct CacheLocked;
pub struct Locked;
impl private::Sealed for CacheLocked {}
impl WorkerState for CacheLocked {}
impl private::Sealed for Locked {}
impl WorkerState for Locked {}

View File

@ -1,371 +0,0 @@
//! Event Subscription
//!
//! Trait for receiving streaming events from LLM in real-time.
//! Used for stream display to UI and progress display.
use std::sync::{Arc, Mutex};
use crate::{
handler::{
ErrorKind, Handler, StatusKind, TextBlockEvent, TextBlockKind, ToolUseBlockEvent,
ToolUseBlockKind, UsageKind,
},
hook::ToolCall,
timeline::event::{ErrorEvent, StatusEvent, UsageEvent},
};
// =============================================================================
// WorkerSubscriber Trait
// =============================================================================
/// Trait for subscribing to streaming events from LLM
///
/// When registered with Worker, you can receive events from text generation
/// and tool calls in real-time. Ideal for stream display to UI.
///
/// # Available Events
///
/// - **Block events**: Text, tool use (with scope)
/// - **Meta events**: Usage, status, error
/// - **Completion events**: Text complete, tool call complete
/// - **Turn control**: Turn start, turn end
///
/// # Examples
///
/// ```ignore
/// use llm_worker::subscriber::WorkerSubscriber;
/// use llm_worker::timeline::TextBlockEvent;
///
/// struct StreamPrinter;
///
/// impl WorkerSubscriber for StreamPrinter {
/// type TextBlockScope = ();
/// type ToolUseBlockScope = ();
///
/// fn on_text_block(&mut self, _: &mut (), event: &TextBlockEvent) {
/// if let TextBlockEvent::Delta(text) = event {
/// print!("{}", text); // Real-time output
/// }
/// }
///
/// fn on_text_complete(&mut self, text: &str) {
/// println!("\n--- Complete: {} chars ---", text.len());
/// }
/// }
///
/// // Register with Worker
/// worker.subscribe(StreamPrinter);
/// ```
pub trait WorkerSubscriber: Send {
// =========================================================================
// Scope Types (for block events)
// =========================================================================
/// Scope type for text block processing
///
/// Generated with Default::default() at block start,
/// destroyed at block end.
type TextBlockScope: Default + Send + Sync;
/// Scope type for tool use block processing
type ToolUseBlockScope: Default + Send + Sync;
// =========================================================================
// Block Events (with scope management)
// =========================================================================
/// Text block event
///
/// Has Start/Delta/Stop lifecycle.
/// Scope is generated at block start and destroyed at end.
#[allow(unused_variables)]
fn on_text_block(&mut self, scope: &mut Self::TextBlockScope, event: &TextBlockEvent) {}
/// Tool use block event
///
/// Has Start/InputJsonDelta/Stop lifecycle.
#[allow(unused_variables)]
fn on_tool_use_block(
&mut self,
scope: &mut Self::ToolUseBlockScope,
event: &ToolUseBlockEvent,
) {
}
// =========================================================================
// Single Events (no scope needed)
// =========================================================================
/// Usage event
#[allow(unused_variables)]
fn on_usage(&mut self, event: &UsageEvent) {}
/// Status event
#[allow(unused_variables)]
fn on_status(&mut self, event: &StatusEvent) {}
/// Error event
#[allow(unused_variables)]
fn on_error(&mut self, event: &ErrorEvent) {}
// =========================================================================
// Accumulated Events (added in Worker layer)
// =========================================================================
/// Text complete event
///
/// When a text block completes, the entire accumulated text is passed.
/// Convenient for receiving the final result after block processing.
#[allow(unused_variables)]
fn on_text_complete(&mut self, text: &str) {}
/// Tool call complete event
///
/// When a tool use block completes, the complete ToolCall is passed.
#[allow(unused_variables)]
fn on_tool_call_complete(&mut self, call: &ToolCall) {}
// =========================================================================
// Turn Control
// =========================================================================
/// On turn start
///
/// `turn` is a 0-based turn number.
#[allow(unused_variables)]
fn on_turn_start(&mut self, turn: usize) {}
/// On turn end
#[allow(unused_variables)]
fn on_turn_end(&mut self, turn: usize) {}
}
// =============================================================================
// SubscriberAdapter - Bridge WorkerSubscriber to Timeline handlers
// =============================================================================
// =============================================================================
// TextBlock Handler Adapter
// =============================================================================
/// Subscriber adapter for TextBlockKind
pub(crate) struct TextBlockSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> TextBlockSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for TextBlockSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
/// Wrapper for TextBlock scope
pub struct TextBlockScopeWrapper<S: WorkerSubscriber> {
inner: S::TextBlockScope,
buffer: String, // Buffer for on_text_complete
}
impl<S: WorkerSubscriber> Default for TextBlockScopeWrapper<S> {
fn default() -> Self {
Self {
inner: S::TextBlockScope::default(),
buffer: String::new(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<TextBlockKind> for TextBlockSubscriberAdapter<S> {
type Scope = TextBlockScopeWrapper<S>;
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
// Accumulate deltas into buffer
if let TextBlockEvent::Delta(text) = event {
scope.buffer.push_str(text);
}
// Call Subscriber's TextBlock event handler
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_text_block(&mut scope.inner, event);
// Also call on_text_complete on Stop
if matches!(event, TextBlockEvent::Stop(_)) {
subscriber.on_text_complete(&scope.buffer);
}
}
}
}
// =============================================================================
// ToolUseBlock Handler Adapter
// =============================================================================
/// Subscriber adapter for ToolUseBlockKind
pub(crate) struct ToolUseBlockSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> ToolUseBlockSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for ToolUseBlockSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
/// Wrapper for ToolUseBlock scope
pub struct ToolUseBlockScopeWrapper<S: WorkerSubscriber> {
inner: S::ToolUseBlockScope,
id: String,
name: String,
input_json: String, // JSON accumulation
}
impl<S: WorkerSubscriber> Default for ToolUseBlockScopeWrapper<S> {
fn default() -> Self {
Self {
inner: S::ToolUseBlockScope::default(),
id: String::new(),
name: String::new(),
input_json: String::new(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<ToolUseBlockKind> for ToolUseBlockSubscriberAdapter<S> {
type Scope = ToolUseBlockScopeWrapper<S>;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
// Save metadata on Start
if let ToolUseBlockEvent::Start(start) = event {
scope.id = start.id.clone();
scope.name = start.name.clone();
}
// Accumulate InputJsonDelta into buffer
if let ToolUseBlockEvent::InputJsonDelta(json) = event {
scope.input_json.push_str(json);
}
// Call Subscriber's ToolUseBlock event handler
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_tool_use_block(&mut scope.inner, event);
// Also call on_tool_call_complete on Stop
if matches!(event, ToolUseBlockEvent::Stop(_)) {
let input: serde_json::Value =
serde_json::from_str(&scope.input_json).unwrap_or_default();
let tool_call = ToolCall {
id: scope.id.clone(),
name: scope.name.clone(),
input,
};
subscriber.on_tool_call_complete(&tool_call);
}
}
}
}
// =============================================================================
// Meta Event Handler Adapters
// =============================================================================
/// Subscriber adapter for UsageKind
pub(crate) struct UsageSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> UsageSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for UsageSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<UsageKind> for UsageSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &UsageEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_usage(event);
}
}
}
/// Subscriber adapter for StatusKind
pub(crate) struct StatusSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> StatusSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for StatusSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<StatusKind> for StatusSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &StatusEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_status(event);
}
}
}
/// Subscriber adapter for ErrorKind
pub(crate) struct ErrorSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> ErrorSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for ErrorSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<ErrorKind> for ErrorSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ErrorEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_error(event);
}
}
}

View File

@ -10,14 +10,16 @@
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
pub mod event;
mod reasoning_item_collector;
mod text_block_collector;
mod timeline;
mod tool_call_collector;
// 公開API
pub use event::*;
pub use reasoning_item_collector::ReasoningItemCollector;
pub use text_block_collector::TextBlockCollector;
pub use timeline::{ErasedHandler, HandlerWrapper, Timeline};
pub use timeline::Timeline;
pub use tool_call_collector::ToolCallCollector;
// 型定義からのre-export
@ -28,6 +30,7 @@ pub use crate::handler::{
Handler,
Kind,
PingKind,
ReasoningItemKind,
StatusKind,
// Block Events
TextBlockEvent,

View File

@ -0,0 +1,77 @@
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
//!
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
//! Worker はターン終了時に `take_collected()` でドレインして
//! `Item::Reasoning` として `worker.history` に append する。
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ReasoningItemKind};
use crate::llm_client::event::ReasoningItemEvent;
/// 収集された reasoning item の連列。
#[derive(Clone, Default)]
pub struct ReasoningItemCollector {
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
}
impl ReasoningItemCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
self.collected.lock().unwrap().push(event.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::Event;
use crate::timeline::Timeline;
#[test]
fn collects_in_order() {
let collector = ReasoningItemCollector::new();
let mut timeline = Timeline::new();
timeline.on_reasoning_item(collector.clone());
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "first".into(),
signature: Some("sig1".into()),
..Default::default()
}));
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r2".into()),
text: "second".into(),
..Default::default()
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text, "first");
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text, "second");
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
}

View File

@ -8,6 +8,33 @@ use std::marker::PhantomData;
use super::event::*;
use crate::handler::*;
// =============================================================================
// Helpers
// =============================================================================
/// 1リクエスト内で受信した複数 UsageEvent をマージする。
/// 各フィールドについて新しい値が `Some` ならそれで上書き。
/// プロバイダによっては input/cache 系を最初の event だけに載せ、
/// output_tokens を後続 event で更新するため、最後の値だけを取るのではなく
/// フィールド単位で latest-non-None を取る。
fn merge_usage(acc: &mut UsageEvent, new: &UsageEvent) {
if new.input_tokens.is_some() {
acc.input_tokens = new.input_tokens;
}
if new.output_tokens.is_some() {
acc.output_tokens = new.output_tokens;
}
if new.total_tokens.is_some() {
acc.total_tokens = new.total_tokens;
}
if new.cache_read_input_tokens.is_some() {
acc.cache_read_input_tokens = new.cache_read_input_tokens;
}
if new.cache_creation_input_tokens.is_some() {
acc.cache_creation_input_tokens = new.cache_creation_input_tokens;
}
}
// =============================================================================
// Type-erased Handler
// =============================================================================
@ -354,6 +381,7 @@ pub struct Timeline {
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
// Block系ハンドラーBlockTypeごとにグループ化
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
@ -362,6 +390,12 @@ pub struct Timeline {
// 現在アクティブなブロック
current_block: Option<BlockType>,
// 1リクエスト内で受信した Usage event の集約バッファ。
// Anthropic は message_start と message_delta、Gemini は各チャンクと、
// 多くのプロバイダが複数 Usage を発行するため、リクエスト境界で
// 1度だけ発火するためにここでマージする。flush_usage() で発火する。
pending_usage: Option<UsageEvent>,
}
impl Default for Timeline {
@ -377,10 +411,12 @@ impl Timeline {
ping_handlers: Vec::new(),
status_handlers: Vec::new(),
error_handlers: Vec::new(),
reasoning_item_handlers: Vec::new(),
text_block_handlers: Vec::new(),
thinking_block_handlers: Vec::new(),
tool_use_block_handlers: Vec::new(),
current_block: None,
pending_usage: None,
}
}
@ -437,6 +473,18 @@ impl Timeline {
self
}
/// `ReasoningItemKind` 用 Handler を登録
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
where
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
H::Scope: Send + Sync,
{
let mut wrapper = HandlerWrapper::new(handler);
wrapper.start_scope();
self.reasoning_item_handlers.push(Box::new(wrapper));
self
}
/// TextBlockKind用のHandlerを登録
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
where
@ -482,18 +530,38 @@ impl Timeline {
Event::Ping(p) => self.dispatch_ping(p),
Event::Status(s) => self.dispatch_status(s),
Event::Error(e) => self.dispatch_error(e),
// Observability-only event: stream trace records it before timeline dispatch.
Event::UnhandledSse(_) => {}
// Block系: スコープ管理しながらディスパッチ
Event::BlockStart(s) => self.handle_block_start(s),
Event::BlockDelta(d) => self.handle_block_delta(d),
Event::BlockStop(s) => self.handle_block_stop(s),
Event::BlockAbort(a) => self.handle_block_abort(a),
// 完成済み reasoning item: 即時ディスパッチ
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
}
}
/// Usage event を即時には dispatch せず、pending_usage にマージする。
/// 1リクエスト内で複数の Usage event が来ても、ハンドラには 1 度だけ
/// 最終値を渡したいため。flush_usage() で発火する。
fn dispatch_usage(&mut self, event: &UsageEvent) {
match &mut self.pending_usage {
Some(acc) => merge_usage(acc, event),
None => self.pending_usage = Some(event.clone()),
}
}
/// pending_usage を usage_handlers に発火し、バッファをクリアする。
/// 1リクエスト分のストリーム終了時に1回だけ呼ぶ想定。
/// pending_usage が空ならば何もしない。
pub fn flush_usage(&mut self) {
if let Some(event) = self.pending_usage.take() {
for handler in &mut self.usage_handlers {
handler.dispatch(event);
handler.dispatch(&event);
}
}
}
@ -515,6 +583,12 @@ impl Timeline {
}
}
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
for handler in &mut self.reasoning_item_handlers {
handler.dispatch(event);
}
}
fn handle_block_start(&mut self, start: &BlockStart) {
self.current_block = Some(start.block_type);
@ -606,6 +680,36 @@ mod tests {
assert!(timeline.current_block().is_none());
}
#[test]
fn unhandled_sse_is_ignored_by_timeline_handlers() {
struct TestTextHandler {
calls: Arc<Mutex<Vec<TextBlockEvent>>>,
}
impl Handler<TextBlockKind> for TestTextHandler {
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &TextBlockEvent) {
self.calls.lock().unwrap().push(event.clone());
}
}
let calls = Arc::new(Mutex::new(Vec::new()));
let mut timeline = Timeline::new();
timeline.on_text_block(TestTextHandler {
calls: calls.clone(),
});
timeline.dispatch(&Event::UnhandledSse(UnhandledSseEvent {
provider: "openai_responses".to_string(),
event_type: "response.mystery".to_string(),
data_preview: "{}".to_string(),
data_len: 2,
}));
assert!(timeline.current_block().is_none());
assert!(calls.lock().unwrap().is_empty());
}
#[test]
fn test_meta_event_dispatch() {
// シンプルなテスト用構造体
@ -629,9 +733,63 @@ mod tests {
timeline.on_usage(handler);
timeline.dispatch(&Event::usage(100, 50));
// pending_usage に積まれているだけなのでまだ未発火
assert_eq!(calls.lock().unwrap().len(), 0);
// flush で 1 度だけ発火
timeline.flush_usage();
let recorded = calls.lock().unwrap();
assert_eq!(recorded.len(), 1);
assert_eq!(recorded[0].input_tokens, Some(100));
}
#[test]
fn test_usage_aggregation_and_flush() {
struct TestUsageHandler {
calls: Arc<Mutex<Vec<UsageEvent>>>,
}
impl Handler<UsageKind> for TestUsageHandler {
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &UsageEvent) {
self.calls.lock().unwrap().push(event.clone());
}
}
let calls = Arc::new(Mutex::new(Vec::new()));
let mut timeline = Timeline::new();
timeline.on_usage(TestUsageHandler {
calls: calls.clone(),
});
// Anthropic 風: message_start で input + 暫定 output
timeline.dispatch(&Event::Usage(UsageEvent {
input_tokens: Some(409),
output_tokens: Some(1),
total_tokens: Some(410),
cache_read_input_tokens: Some(0),
cache_creation_input_tokens: Some(0),
}));
// message_delta で最終 output
timeline.dispatch(&Event::Usage(UsageEvent {
input_tokens: Some(409),
output_tokens: Some(71),
total_tokens: Some(480),
cache_read_input_tokens: Some(0),
cache_creation_input_tokens: Some(0),
}));
// 未 flush の段階では発火しない
assert_eq!(calls.lock().unwrap().len(), 0);
timeline.flush_usage();
let recorded = calls.lock().unwrap();
assert_eq!(recorded.len(), 1);
assert_eq!(recorded[0].input_tokens, Some(409));
assert_eq!(recorded[0].output_tokens, Some(71));
// flush 後にもう一度 flush しても何も起きない
drop(recorded);
timeline.flush_usage();
assert_eq!(calls.lock().unwrap().len(), 1);
}
}

Some files were not shown because too many files have changed in this diff Show More