Compare commits

...

508 Commits

Author SHA1 Message Date
abe21a5e8a
chore: tune project memory thresholds 2026-05-26 09:05:14 +09:00
9707a0173a
docs: add memory summary resident injection ticket 2026-05-26 08:50:58 +09:00
e95c35b76d
merge: memory consolidation skip observability 2026-05-26 08:37:32 +09:00
28ad8f01ec
fix: confirm SpawnPod initial run delivery 2026-05-26 08:37:24 +09:00
0a07c50be4
chore: ignore generated insomnia memory 2026-05-26 08:14:46 +09:00
5872a53ec1
fix: suppress memory idle skip notices 2026-05-26 08:03:17 +09:00
5b3b16c4b2
docs: refine memory consolidation skip ticket 2026-05-26 07:53:37 +09:00
46765404bf
chore: complete llm retry continuation ticket 2026-05-26 07:22:45 +09:00
3d3db8b6ac
feat: surface llm retry and continuation state 2026-05-26 07:13:59 +09:00
3f750668ba
docs: refine llm retry continuation ticket 2026-05-26 05:20:43 +09:00
fe9b12aa65
docs: note spawnpod delivery race precedent 2026-05-25 07:03:00 +09:00
a414655366
docs: add spawnpod run delivery ticket 2026-05-25 06:37:38 +09:00
0b6f09c112
docs: add live pending pod picker ticket 2026-05-25 06:29:13 +09:00
e5b918283a
docs: add memory consolidation skip ticket 2026-05-25 05:43:06 +09:00
8dc23183c1
docs: specify stream continuation policy 2026-05-25 04:48:07 +09:00
2bd73fdca8
chore: complete memory audit log ticket 2026-05-25 03:38:18 +09:00
6dc696a461
merge: memory-audit-log 2026-05-25 03:38:03 +09:00
7c14b51bac
memory: add audit log events 2026-05-25 03:24:04 +09:00
8653fdd3e5
docs: add actionbar notice api ticket 2026-05-25 02:40:59 +09:00
ef9c23251e
docs: expand memory audit log ticket 2026-05-25 02:06:42 +09:00
9172ad3af1
fix: refine command mode footer 2026-05-25 01:08:41 +09:00
dae7d10fd4
chore: complete tui-system-command-compact ticket 2026-05-24 09:40:41 +09:00
49abacf694
merge: tui-system-command-compact 2026-05-24 09:40:25 +09:00
7054a179d4
test: clean up compact event assertion 2026-05-24 09:39:57 +09:00
2109733cb7
feat: add manual compact command 2026-05-24 08:59:44 +09:00
ebff9a0293
chore: complete tui-command-mode ticket 2026-05-24 08:39:25 +09:00
8daa0f1a01
merge: tui-command-mode 2026-05-24 08:38:39 +09:00
1f8afd1243
feat: add TUI command mode 2026-05-24 08:32:21 +09:00
f6024c0c2c
docs: replace gui mvp with tui spawned pod panel 2026-05-24 08:10:21 +09:00
0fd0a89730
docs: split tui command and navigation tickets 2026-05-24 07:59:51 +09:00
2b547d6dd9
chore: complete worker-history-append-contract ticket 2026-05-24 07:37:29 +09:00
081070f03e
merge: worker-history-append-contract 2026-05-24 07:37:05 +09:00
614d461877
docs: split maintainer workflows by role 2026-05-24 07:34:30 +09:00
f1bd498df7
fix: route worker history appends through callbacks 2026-05-24 06:44:19 +09:00
6e4512afeb
chore: drop stale tui spawn error todo 2026-05-24 06:29:15 +09:00
918ed3900a
chore: complete tui-input-queue ticket 2026-05-23 13:58:09 +09:00
ff747da1a0
merge: tui-input-queue 2026-05-23 13:57:32 +09:00
c8810280af
feat: queue tui input during runs 2026-05-23 13:57:22 +09:00
82775bf9d3
docs: add manual turn rollback ticket 2026-05-23 13:35:03 +09:00
0775b4112b
chore: complete tui-empty-turn-restore ticket 2026-05-23 13:30:01 +09:00
e79e7362f8
merge: tui-empty-turn-restore 2026-05-23 13:29:07 +09:00
e078be443a
feat: restore rolled back tui input 2026-05-23 13:28:56 +09:00
3db9726062
chore: complete pod-empty-turn-rollback ticket 2026-05-23 12:52:42 +09:00
b9498810a4
merge: pod-empty-turn-rollback 2026-05-23 12:52:12 +09:00
fbe8e64410
chore: handle rolled back run result clients 2026-05-23 12:51:40 +09:00
de653f546a
feat: rollback empty interrupted turns 2026-05-23 12:50:46 +09:00
63407f153c
fix: make visible pod list schema object 2026-05-23 12:29:37 +09:00
ee9bedc5de
chore: complete pod-discovery-restore-tools ticket 2026-05-23 12:05:30 +09:00
da7c1a54a1
merge: pod-discovery-restore-tools 2026-05-23 12:04:59 +09:00
9f7c2f3856
feat: add visible pod discovery tools 2026-05-23 12:04:45 +09:00
3f7de349c3
chore: complete memory-extract-remove-input-cap ticket 2026-05-23 09:14:37 +09:00
1feb560ff9
merge: memory-extract-remove-input-cap 2026-05-23 09:14:15 +09:00
902083bd38
fix: remove memory extract input cap 2026-05-23 09:14:07 +09:00
221b1edd92
chore: complete tui-pod-restore-picker ticket 2026-05-23 09:13:57 +09:00
828004a5e2
merge: tui-pod-restore-picker 2026-05-23 09:13:19 +09:00
b26bc420f1
feat: restore tui sessions by pod 2026-05-23 09:13:06 +09:00
fb7abb1b7c
chore: complete spawned-delegation-scope-reclaim ticket 2026-05-23 08:39:04 +09:00
66996f902b
merge: spawned-delegation-scope-reclaim 2026-05-23 08:38:50 +09:00
d62cd09c4d
fix: reclaim delegated scope from stopped children 2026-05-23 08:38:42 +09:00
a4f03e7688
docs: refine pod visibility and tui restore flow 2026-05-23 08:33:00 +09:00
5ade50dec5
update: tui -rの際のリストの時系列ソート 2026-05-23 08:02:05 +09:00
dbb6cca894
chore: complete tui-streaming-input-loss ticket 2026-05-23 07:16:08 +09:00
7c9abb37ad
merge: tui-streaming-input-loss 2026-05-23 07:15:55 +09:00
0535260c8a
fix: preserve tui input during streaming 2026-05-23 07:15:39 +09:00
c435635e5b
chore: complete tui-context-usage-indicator ticket 2026-05-23 07:15:30 +09:00
c78cd28b27
merge: tui-context-usage-indicator 2026-05-23 07:15:17 +09:00
abe890cda5
feat: show context usage in tui status 2026-05-23 07:15:03 +09:00
fa04b03643
docs: identify tui streaming input loss race 2026-05-23 05:47:59 +09:00
1bebd8b6df
Create tui-parts.md 2026-05-23 05:41:48 +09:00
4dec7f916f
fix: tighten task tool usage guidance 2026-05-23 05:11:48 +09:00
8662ca404f
chore: complete prune-token-budget ticket 2026-05-23 05:00:30 +09:00
5f2efeb75e
merge: prune-token-budget 2026-05-23 05:00:15 +09:00
820dea1873
feat: protect prune tail by token budget 2026-05-23 05:00:06 +09:00
8d5ee0f0b8
chore: complete pod-event-callback-delivery ticket 2026-05-23 04:57:26 +09:00
0d39170bbe
merge: pod-event-callback-delivery 2026-05-23 04:57:10 +09:00
16963d15f2
fix: drain snapshots before pod callbacks 2026-05-23 04:57:03 +09:00
2cea02648f
docs: add memory extract input cap ticket 2026-05-23 04:42:38 +09:00
f55503edc3
docs: add pod event callback delivery ticket 2026-05-23 03:29:01 +09:00
d18e3a0256
docs: add spawned delegation scope reclaim ticket 2026-05-23 03:02:48 +09:00
5ba4be1c9b
refactor: remove legacy plural log entries 2026-05-23 02:03:42 +09:00
90d4c8f5ad
docs: track read pod output log entry bug 2026-05-23 00:53:47 +09:00
61c9719da5
docs: add pod discovery restore tools ticket 2026-05-23 00:09:34 +09:00
73fbdcc025
chore: complete spawned-registry-persist ticket 2026-05-22 23:30:16 +09:00
5fdc46db47
merge: spawned-registry-persist 2026-05-22 23:30:06 +09:00
534c6f4cac
feat: persist spawned pod registry 2026-05-22 23:30:02 +09:00
5540ca3d0e
chore: complete pod-name-resume ticket 2026-05-22 22:57:31 +09:00
edfdca3457
merge: pod-name-resume 2026-05-22 22:57:23 +09:00
bd32f704b4
feat: resume pods by name 2026-05-22 22:57:16 +09:00
e55fc9a834
chore: complete pod-state-write-points ticket 2026-05-22 22:29:23 +09:00
7f6e3b949f
merge: pod-state-write-points 2026-05-22 22:29:12 +09:00
0954a4804a
feat: wire pod metadata lifecycle writes 2026-05-22 22:29:08 +09:00
e9cc4b90dc
chore: complete pod-state-backend ticket 2026-05-22 22:03:36 +09:00
8aca67c97c
style: run cargo fmt 2026-05-22 22:03:27 +09:00
d7eabb18c9
merge: pod-state-backend 2026-05-22 22:03:17 +09:00
b13c2735bb
feat: add pod metadata store backend 2026-05-22 22:03:11 +09:00
67f135fbc6
Merge: live-fork-marker 2026-05-20 06:45:49 +09:00
a728b7045d
chore: 空になった Storage 親見出しを TODO から削除 2026-05-20 06:45:43 +09:00
3eabcb6a6d
ticket: live-fork-marker 完了 2026-05-20 06:45:19 +09:00
ffcba3aa54
chore: auto-fork ロジック二重実装を KNOWN_ISSUES に登録 2026-05-20 06:45:14 +09:00
eb752fb295
ticket: live-fork-marker レビュー (Approve) 2026-05-20 06:44:54 +09:00
ac3ee5fcbe
feat: live auto-fork の marker 形式を確定(seq 比較 + forked_from 記録)
方針: 末尾 entry-count 比較で検知し、元 Segment は immutable のまま
(terminal marker を書き戻さない)。fork lineage は新 Segment の
SegmentStart.forked_from に前向きに記録するため、log だけから辿れる。
過去 fork と対称で、nested fork も marker 位置の調停が不要。

- session-store ensure_head_or_fork に at_turn_index 引数を追加し
  新 Segment へ forked_from を記録
- pod ensure_segment_head の auto-fork も同様に forked_from を記録
  (at_turn_index = writer の現 turn_count)
- fork_at の doc に「元 Segment を mutate しない」invariant を明記
- test: nested past-fork が祖先を不変に保つ / Pod 並行 writer drift で
  auto-fork し forked_from を記録 / 元 Segment に marker が書かれない
2026-05-20 06:42:09 +09:00
46b0e20685
Merge: session-grouping-introduce 2026-05-20 06:29:48 +09:00
6a4ee37be8
ticket: session-grouping-introduce 完了 2026-05-20 06:29:43 +09:00
3f3ead3b71
update: session-grouping review follow-up
- PickerOutcome::Picked から未使用の session_id を除去(pod-cli が lookup_session_of で再解決)
- picker preview が singular AssistantItem も拾うように
- fs_store layout doc に migration(後方互換なし、旧 flat sessions は破棄)を明記
- TaskStore は Session-lifetime、ScopedFs/Tracker は Pod-process lifetime と用語整理
- Pod::session_id / from_manifest_spawned のコメント補強
2026-05-20 06:29:37 +09:00
a6cc05f74c
feat: Session(Segment 群の grouping)を導入
- SessionId 型を新設、各 SegmentStart に session_id を持たせる
- compaction / 内部 fork は同 SessionId を継承、fork() は新 Session を発行
- Store API を (SessionId, SegmentId) ベースに、FsStore layout は
  <root>/<session_id>/<segment_id>.jsonl に
- Store::list_sessions / list_segments(session_id) / lookup_session_of を追加
- restore_by_segment shim を session-store に提供(pod-cli --session で使用)
- SegmentState に SegmentLocation (session_id, segment_id) を保持し ArcSwap で更新
- RestoredState に session_id: Option<SessionId> を追加
- Picker は Session 単位に列挙、leaf segment を解決して resume
2026-05-20 06:17:56 +09:00
e4cda5d3f2
Merge: segment-rename 2026-05-20 05:18:11 +09:00
dd9abfee2e
ticket: segment-rename 完了 2026-05-20 05:18:04 +09:00
d7ff25b6a7
update: 残存 Session 識別子の Segment 化(review follow-up)
レビュー指摘の通り、次の session-grouping-introduce で新 SessionId が
入る前に名称衝突を避けるため取り残しを掃除。

- PodError::Session{Empty,ScopeMissing} → Segment{Empty,ScopeMissing}
- ScopeLockError::SessionConflict → SegmentConflict
- Pod.session_state / SegmentState.set_session_id 系
- source_session_id / prev_session_id / ensure_session_head / short_session
- pod_cli の "Session ID:" 表示
- fs_store の sessions ローカル変数
2026-05-20 05:17:49 +09:00
7577577c9f
update: Session-lifetime/scoped を Pod-lifetime に修正
タスクストア/ファイルトラッカーは compaction を跨いで Pod プロセス寿命まで生きる。
旧 SessionId = Segment の時代の表現を Pod-lifetime に正す。pod_cli の表示も Segment: に。
2026-05-20 05:06:38 +09:00
0d7c37f673
update: SessionId / SessionStart / SessionOrigin 等を Segment 系名称へ
- Type/Function/Variantを Segment* 系へ統一
  - SessionId/SessionStart/SessionOrigin/SessionStartState/SessionState/SessionLogSink/SessionLockInfo
  - new_session_id / session_id / create_session* / list_sessions / lookup_session / update_session / find_by_session
  - protocol Event::SessionRotated → SegmentRotated、CompactDone.new_session_id → new_segment_id
- Module: session_log → segment_log / session → segment (file mv 含む)
  pod 側の session_log_sink → segment_log_sink も同様
- crate 名 (session-store)、CLI flag (--session)、ResumeWithSession (CLI tied) は据え置き
- session-tests/session_metrics_test 等の Store impl も追従
2026-05-20 05:06:04 +09:00
d5c7330659
Merge: entry-hash-abolish 2026-05-20 04:53:52 +09:00
9c1f51b4f0
ticket: entry-hash-abolish 完了 2026-05-20 04:53:47 +09:00
1d8a490504
update: 旧用語コメントの掃除と KNOWN_ISSUES 追記
- 残存していた head_hash / SessionHead 言及コメントを 3 箇所更新
- FsStore::read_entry_count の O(n) 計測コストを KNOWN_ISSUES に登録
2026-05-20 04:53:33 +09:00
6e791d8668
ticket: entry-hash-abolish レビュー (Approve) 2026-05-20 04:49:17 +09:00
d5dff6d17b
update: entry hash chain と session_head mutex を撤廃
- HashedEntry / EntryHash / compute_hash / build_chain 撤去、JSONL は 1 行 1 LogEntry
- SessionOrigin.at_hash → at_turn_index (TurnEnd 由来) に置換
- Pod 側 SessionHead mutex を ArcSwap<SessionId> + AtomicUsize の SessionState に置換
- ensure_head_or_fork は store の entry count と writer の append tally で判定
- session-store から sha2 / hex 依存、pod から parking_lot 依存を削除
2026-05-20 04:31:37 +09:00
35c15923db
ticket: 永続化整理を 8 個に分割
persistence-semantics と pod-persistent-state を実装可能な粒度に分割。
Storage 層 (Phase 1) を entry-hash-abolish / segment-rename /
session-grouping-introduce / live-fork-marker に、Pod 単位永続化
(Phase 2) を pod-state-backend / pod-state-write-points /
pod-name-resume / spawned-registry-persist に切り出した。
2026-05-20 04:07:44 +09:00
be5e413b55
Merge: invoke-turn-llmcall-semantics
# Conflicts:
#	crates/pod/src/controller.rs
2026-05-15 22:08:41 +09:00
58c2a51ae1
ticket: invoke-turn-llmcall-semantics 完了 2026-05-15 21:54:40 +09:00
e00e284d8c
ticket: worker-history-append-contract 作成 2026-05-15 21:53:24 +09:00
e5f5670f68
chore: KNOWN_ISSUES に controller_test::double_run_returns_error の flakiness を追記 2026-05-15 21:52:40 +09:00
a2376b0742
ticket: pod-interrupt-prep-internalize 完了 2026-05-15 21:52:24 +09:00
fbd7d8acb7
ticket: pod-interrupt-prep-internalize レビュー (Approve with follow-up) 2026-05-15 21:51:57 +09:00
282a857248
update: Paused→Run の interrupt 前処理を Pod::run に内包 2026-05-15 21:51:57 +09:00
9304b52f17
ticket: invoke-turn-llmcall-semantics review (Approve) 2026-05-15 21:42:43 +09:00
d0dbac109d
feat: Invoke marker と LlmCall callback を導入し AgentTurn セマンティクスを明確化
- protocol: InvokeKind enum、Event::InvokeStart / LlmCallStart / LlmCallEnd 追加
- llm-worker: Worker.llm_call_count と on_llm_call_start/end callback、turn_count を AgentTurn 数として doc 更新
- session-store: LogEntry::Invoke { ts, trigger } 追加 (replay は marker のみで state 不変)
- pod: run/run_for_notification 開始時に Invoke marker commit、PendingRun::RunForNotification(InvokeKind) で kind を伝搬
- pod ipc: sink + server で Invoke エントリーを Event::InvokeStart として broadcast
- tui: 新 Event 3種を no-op で受理 (UI 設計はチケット範囲外)
2026-05-15 07:04:26 +09:00
d710cac879
ticket: invoke/turn/llmcall 決定事項と実装範囲を明文化 2026-05-15 06:48:57 +09:00
bca9161a42
ticket: Exchange語撤廃、Invoke/Turn/LlmCall でセマンティクスを再整理 2026-05-15 05:41:13 +09:00
61b4c0f5cd
ticket: pod-input-validate-internlize完了 2026-05-15 05:38:27 +09:00
d076258d30
update: Controllerで入力のValidationを行っていた部分をPod側に移す 2026-05-15 05:33:33 +09:00
b5069a9f82
ticket: PodとControllerの責務の抱え違いを修正するチケット 2026-05-15 04:52:39 +09:00
da417efddd
ticket: pod-parent-turn-callback完了 2026-05-15 04:43:12 +09:00
21053f7d01
ticket: pod-parent-turn-callbackレビュー 2026-05-15 04:42:29 +09:00
456af3167b
ticket: 消し忘れ 2026-05-15 04:39:30 +09:00
8019d0d77c
update: 親にターン完了を通達する経路の整理 2026-05-15 04:38:53 +09:00
6e5b1482e6
update: エントリの単数化のフォローアップ 2026-05-14 19:42:23 +09:00
7520dcad87
update: 書き込みの不要なasyncを削除 2026-05-14 19:16:48 +09:00
112ccb2365
ticket: 書き込みのsync化を計画 2026-05-14 16:45:58 +09:00
fe9cecb51a
update: SystemItem1本化 2026-05-14 14:36:29 +09:00
65a5e68035
ticket: イベントプロトコルと永続化におけるシステムイベントの統合 2026-05-14 04:12:40 +09:00
63e27b2dee
chore: cargo fmt 2026-05-14 03:36:08 +09:00
350bb1afd8
fix: 実態にそぐわないEvent::Entryを実装した構造を訂正 2026-05-14 03:35:52 +09:00
0f76142993
refactor: Podのメインループのリファクタリング 2026-05-14 03:27:49 +09:00
7c66b7e073
ticket: 追加:Podのメインループとソケット通信周りのリファクタリング 2026-05-13 22:16:25 +09:00
69d67ab050
ticket: add tui manual compact command 2026-05-13 06:50:27 +09:00
877e094a53
docs: update pod cli manifest flags 2026-05-13 06:44:48 +09:00
5a16cc6daf
ticket: note tui user manifest overlay mismatch 2026-05-13 06:41:23 +09:00
b0c91049b1
close: complete pod manifest and file ref tickets 2026-05-13 06:30:45 +09:00
418451ebf8
merge: file-ref-directory 2026-05-13 06:30:45 +09:00
3d0dce2a2e
merge: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
4bde31e952
review: file-ref-directory 2026-05-13 06:30:45 +09:00
533610f053
review: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
8e50a9583a
refactor: PodControllerの構造のリファクタリング 2026-05-13 06:07:38 +09:00
cb24586362
docs(tickets): PodControllerの構造調整チケット作成 2026-05-13 05:43:23 +09:00
e32b208dee
chore: planの更新 2026-05-13 05:42:55 +09:00
7363b105f6
feat: handle directory file refs 2026-05-13 02:57:58 +09:00
0ebe173009
feat: organize pod manifest cli flags 2026-05-13 02:57:50 +09:00
023de0f58d
feat: Languageインストラクションの追加 2026-05-13 02:27:30 +09:00
eae0efb2c0
update: fmt + memoryに用いる言語の構成 2026-05-13 01:57:04 +09:00
0141880b9d
fix: compact時にToolCallとOutputの間でCutしてしまう問題 2026-05-13 00:59:02 +09:00
2b5da965ca
chore: workflowの調整・knowledgeの追加テスト 2026-05-13 00:06:33 +09:00
ba72a66a40
merge: lint common crate 2026-05-12 21:56:49 +09:00
671e07a31e
chore: complete lint common crate ticket 2026-05-12 21:56:39 +09:00
7ce4600a42
refactor: extract shared lint record primitives 2026-05-12 21:56:25 +09:00
2f70411254
docs(tickets): submit時FileRefでディレクトリを参照した時の挙動 2026-05-12 17:39:40 +09:00
7c5b810fa1
docs(tickets): mainfest-output-upload-limits完了 2026-05-12 17:27:47 +09:00
86fc889606
feat: add manifest output upload limits 2026-05-12 16:20:15 +09:00
59bf20f2cd
Merge branch 'tui-knowledge-completion' into develop 2026-05-12 15:43:29 +09:00
f7f59dd30c
docs(memory): fix knowledge dir path in collect_resident_knowledge doc 2026-05-12 15:07:39 +09:00
806440ac7a
docs(tickets): review tui knowledge completion (approve) 2026-05-12 14:56:30 +09:00
7b8eb3af8d
feat(pod): wire knowledge slugs into # completion 2026-05-12 14:45:46 +09:00
705c873097
docs(tickets): tui knowledge completion unimplemented fix 2026-05-12 14:40:37 +09:00
ae6c27a5c7
docs(tickets): define work item query strategy 2026-05-12 02:32:32 +09:00
03a577527a
docs(tickets): use timestamp work item ids 2026-05-12 02:07:29 +09:00
b4dff2835e
docs: add ai maintainer work item plan 2026-05-12 01:53:52 +09:00
e87a515474
docs(tickets): add lint-common crate ticket 2026-05-12 00:06:06 +09:00
9df6bd5fcb
merge: workflow crate extraction 2026-05-11 22:50:19 +09:00
6610ef8150
docs(tickets): complete workflow crate extraction 2026-05-11 22:50:06 +09:00
7159a66a60
review: workflow crate extraction 2026-05-11 22:49:50 +09:00
7db4146f3d
refactor: extract workflow crate 2026-05-11 22:49:07 +09:00
d8f29bcbcb
merge: anthropic assistant burst bundling 2026-05-11 22:24:36 +09:00
f444b387be
docs(tickets): complete anthropic assistant burst bundling 2026-05-11 22:23:53 +09:00
d18f536945
review: anthropic assistant burst bundling 2026-05-11 22:23:38 +09:00
19badfe8b7
fix: bundle anthropic assistant bursts 2026-05-11 22:22:36 +09:00
31f94bf791
merge: memory usage metrics 2026-05-11 21:46:24 +09:00
ac09bfcc21
docs(tickets): complete memory usage metrics 2026-05-11 21:46:19 +09:00
76f83a0894
review: memory usage metrics 2026-05-11 21:46:19 +09:00
d581a35426
feat: add memory usage event metrics 2026-05-11 21:29:48 +09:00
f69aa469f8
docs(tickets): complete memory phase naming cleanup 2026-05-11 17:16:36 +09:00
3d4d83db68
docs(tickets): simplify memory usage metrics 2026-05-11 16:54:23 +09:00
9abddb5a95
fix: remove remaining memory phase wording 2026-05-11 01:57:39 +09:00
075730d0a6
docs(tickets): compact-worker-occupancy-cap完了 2026-05-11 01:56:20 +09:00
aae36f2b56
update: memoryシステムの"Phase"表記を撤廃 2026-05-11 01:55:28 +09:00
e418f3996f
docs(tickets): memory-extract-occupancy-cap 完了 2026-05-11 01:32:45 +09:00
240b36d738
review: memory-extract-occupancy-cap (approve) 2026-05-11 01:25:20 +09:00
0356e29707
feat: extract worker サーキットブレーカーを占有量ベースに統一 2026-05-11 01:20:37 +09:00
eec33aba98
docs(tickets): add memory-extract-occupancy-cap ticket 2026-05-11 01:14:59 +09:00
248e3d7aa2
Merge branch 'compact-worker-occupancy-cap' into develop 2026-05-11 01:12:32 +09:00
3beaff7679
review: compact-worker-occupancy-cap (set_max_turns 分岐削除) 2026-05-11 00:56:41 +09:00
ef0cdf75e2
feat: compact worker サーキットブレーカーを占有量ベースに統一 2026-05-11 00:43:16 +09:00
5976aac78d
docs(tickets): add memory audit log ticket 2026-05-11 00:06:42 +09:00
d818b37f3d
docs(tickets): completed tickets cleanup 2026-05-10 17:31:34 +09:00
f8bae4a298
merge: memory prompt record policy 2026-05-10 14:40:58 +09:00
3abf4d4d1a
docs: generalize memory prompt record policy 2026-05-10 14:40:52 +09:00
e2a6c43fea
docs: memory effectiveness plan 2026-05-10 01:25:10 +09:00
df01d8e567
docs: memory prompt ticket policy ticket 2026-05-10 01:13:57 +09:00
e647d1a7c9
feat: client-crateの実装 2026-05-10 00:57:50 +09:00
29f45bee6e
chore: E2Eの計画とgit運用の話 2026-05-09 05:04:57 +09:00
576814ed20
docs(tickets): file-ref-symlink-diagnostics完了 2026-05-09 04:22:27 +09:00
5590fc4ff1
docs(tickets): file-ref-symlink-diagnosticsレビュー 2026-05-09 04:21:56 +09:00
25d22fc4af
feat: Toolsのシンボリックリンク対応 2026-05-09 04:21:56 +09:00
9194b10d50
docs(tickets): tui-assistant-markdown完了 2026-05-09 03:31:49 +09:00
99bc161e43
docs(tickets): permission既定policy整理チケット追加 2026-05-09 03:27:22 +09:00
37dfd7e327
docs(tickets): permission-extension-point完了 2026-05-09 03:20:17 +09:00
7bbc9afc7a
feat: パターンベースのツール権限制御を追加 2026-05-09 03:20:02 +09:00
3337731222
chore: tui compact progress ticket完了 2026-05-09 03:14:23 +09:00
16bd8e3a88
feat: compactのプログレス表示 2026-05-09 03:11:53 +09:00
ed08ee1ce1
chore: git方針の変更とセマンティクス変更の計画の帳尻合わせ 2026-05-08 20:17:11 +09:00
12e7d27a08
docs(tickets): 自己改善workflowの設計 2026-05-08 01:50:55 +09:00
f09e6a0156
docs(tickets): workflow-directory-layout完了 2026-05-08 01:08:25 +09:00
8c12e799da
update: Workflowディレクトリ修正のフォローアップ 2026-05-08 00:59:08 +09:00
04f1837fa9
feat: Workflowの読み取り位置変更の実装 2026-05-08 00:15:50 +09:00
5ec24707f4
docs(tickets): reportの運用・Workflowのディレクトリ位置修正 2026-05-07 23:34:00 +09:00
c0c5eb9ad2
feat: TUIのmarkdown対応 2026-05-05 18:30:25 +09:00
e9e80c5918
docs(tickets): PermissionのチケットとTUIのmd表示 2026-05-05 17:16:03 +09:00
f4ab361889
docs(tickets): agent-skills完了 2026-05-05 16:00:40 +09:00
60c779b80c
update: Agent skills実装のレビュー・対応 2026-05-05 13:54:02 +09:00
50fa2ce3f7
feat: writingに対する基本的な指示promptを追加 2026-05-05 13:42:34 +09:00
760b304969
feat: agent skillsの互換実装 2026-05-05 13:16:10 +09:00
5fe9a5805e
fix: Reasoningの永続化のスキーマのミスを修正 2026-05-05 12:30:29 +09:00
64b5d61a23
docs(tickets): turnのセマンティクスを変える計画 2026-05-05 12:29:52 +09:00
461d7f9142
docs(tickets): reasoning-history-perisit完了 2026-05-04 23:06:21 +09:00
6f62ea8ce8
update: Reasoningコンテキスト管理のレビュー・対応 2026-05-04 23:05:08 +09:00
9fd61e8068
feat: Reasoningのコンテキスト管理の対応 2026-05-04 21:31:44 +09:00
dd3903efde
docs(tickets): Reasoningのコンテキスト管理とPruneの調整チケット追加 2026-05-04 21:16:31 +09:00
089db05535
docs(tickets): tui-task-display完了 2026-05-04 20:43:21 +09:00
0a83909f30
feat: Task表示のレビュー・修正 2026-05-04 17:28:39 +09:00
9072ac4e03
feat: TUI上に進行中のTaskを表示する実装 2026-05-04 17:06:02 +09:00
6178812979
docs(tickets): Compaction進行中のライブ表示 2026-05-04 17:03:51 +09:00
d385a72d85
docs(tickets): post-run memory detach 完了 2026-05-04 16:11:38 +09:00
c48b99cfe3
feat: Pos処理の非同期化・Busy状態の削除 2026-05-04 15:52:27 +09:00
632d63df33
docs(tickets): 追加:タスクリストの表示とコンテキスト長インジケータ 2026-05-04 15:32:40 +09:00
107dcf6636
docs(tickets): Busyの切り離し 2026-05-04 13:20:25 +09:00
7f9d2f93f9
Merge branch 'llm-worker-transient-retry' into develop 2026-05-04 13:16:26 +09:00
9771533b31
docs(tickets): pod状態のTUI同期完了 2026-05-04 13:08:44 +09:00
36a4e9f9b8
feat: Podのステータス同期の修正 2026-05-04 12:55:29 +09:00
0be30052c1
feat: Podのステータスを厳密にし、同期漏れを防ぐ 2026-05-04 12:55:11 +09:00
72e03f9e8f
docs(tickets): llm-worker-transient-retry完了 2026-05-04 12:51:41 +09:00
09a1cde92c
docs(tickets): llm-worker-transient-retry レビュー追記
7183847 のレビュー結果を Approve として記録する。チケット要件
(リトライ対象 / バックオフ / Retry-After 上書き / mid-stream 温存 /
完了条件) はすべて満たしており、コードベースの層構造を歪める変更も
ない。Retry-After テストの方針差 (実時間 1s vs 仮想時間 5s) と
connect refused テストの試行回数未検証は non-blocking として
review.md に記録。
2026-05-04 12:49:13 +09:00
7183847ee5
feat(llm-worker): HTTP transient エラーへのリトライを追加
`transport.rs` の HTTP 送信〜ステータスチェック区間に指数バックオフ
+ フルジッターのリトライループを追加する。SSE 読み出し開始後 (
`bytes_stream()` 以降) のエラーは従来どおりそのまま流す。

- `is_retryable(&ClientError)`: 408/425/429/500/502/503/504/529 と
  reqwest の connect/timeout のみ true
- `RetryPolicy` (default: base 500ms / cap 10s / max_attempts 4 /
  total_timeout 30s)
- `Retry-After` ヘッダ (秒数) があればバックオフを上書き
- リトライ発火ごとに warn! でステータス・attempt・wait を出す

ref: tickets/llm-worker-transient-retry.md
2026-05-04 12:45:33 +09:00
1451998e0e
Merge branch 'tui-system-message-render' into develop 2026-05-04 12:10:17 +09:00
5bc6fb4b5c
docs(tickets): tui-system-message-render完了 2026-05-04 12:05:50 +09:00
ac1a672973
feat: システムメッセージをTUIで表示させる 2026-05-04 12:04:09 +09:00
4ec1c8b64c
update: Taskツールの説明を更新 2026-05-04 11:32:04 +09:00
771503e69c
docs(tickets): tuiトークン表示完了 2026-05-04 00:07:59 +09:00
89a66f1d58
docs(tickets): tuiトークン表示レビュー 2026-05-04 00:05:59 +09:00
8be579dc3c
feat: tuiのトークン集計表示の修正 2026-05-04 00:01:37 +09:00
ffd59b05a1
docs(tickets): TUI表示トークンの集計の修正 2026-05-03 23:28:31 +09:00
d2f2b7920d
docs(tickets): チケット追加:システムメッセージのTUI表示とセッションのロールバック・フォーク 2026-05-03 22:43:21 +09:00
357fedc1a1
docs(tickets): tui-pod-event-render 完了 (消し忘れ片付け) 2026-05-03 22:14:24 +09:00
a07ccb0158
update: Taskツール群の説明を更新 2026-05-03 22:09:45 +09:00
9cbcd87f20
docs(tickets): notify-history-persist 完了 (消し忘れ片付け) 2026-05-03 22:07:18 +09:00
1602eea2c8
docs(tickets): session-todo-reminder spec を pending_history_appends に改訂 (AGENTS.md 揮発禁止に整合) 2026-05-03 21:53:20 +09:00
878e64597e
Merge branch 'session-todo-tools' into develop
# Conflicts:
#	tickets/session-todo.md
2026-05-03 21:50:30 +09:00
46208a3b45
docs(tickets): session-todo (本体) 完了 2026-05-03 21:48:44 +09:00
c214ea79d4
update: tuiからspawnする際にエラー詳細が落ちていた問題を修正 2026-05-03 21:47:54 +09:00
c693126703
docs(tickets): notify-history-persist完了 2026-05-03 21:37:13 +09:00
46390a9006
docs(tickets): session-todo レビュー反映 (Approve) + reminder spec 段階レビュー 2026-05-03 21:34:54 +09:00
420f74edc6
fix: TaskStore snapshot を JSON ブロック化 + 構造ラウンドトリップテスト追加 2026-05-03 21:33:50 +09:00
ceafff92b6
fix: TaskStore snapshot を compact 後 history の末尾に置いて retained 中の TaskCreate 重複を防ぐ 2026-05-03 21:26:49 +09:00
e8045776f2
feat: notify-history-persist実装 2026-05-03 19:27:22 +09:00
6f2aca84bf
feat: セッション内 Task ツール (TaskCreate/List/Get/Update + 履歴 replay + compact 跨ぎ) 2026-05-03 19:03:52 +09:00
28fe1dae1c
docs(tickets): セッション内 Task ツールを本体と注意機構に分割 2026-05-03 19:03:48 +09:00
ee9c60bec2
Merge branch 'resume-scope-claim' into develop
# Conflicts:
#	TODO.md
2026-05-03 18:59:01 +09:00
7e0d61eb08
docs(tickets): resume-scope-claim 完了 2026-05-03 18:56:39 +09:00
4328cb334a
fix: resume-scope-claim レビュー指摘対応 (deny セマンティクス doc・破損 snapshot の警告ログ) 2026-05-03 18:56:21 +09:00
02dcf182a7
docs(tickets): resume-scope-claim レビュー (Approve) 2026-05-03 18:46:15 +09:00
8ff6318cef
docs(tickets): Notifyが永続化されいない問題についてのチケット 2026-05-03 18:45:10 +09:00
c759307e40
feat: resume時のscope claimを過去の有効scopeに揃える 2026-05-03 17:12:36 +09:00
0c10150b02
feat: session-metrics完了 2026-05-03 15:56:06 +09:00
b9635c5002
feat: session-metrics実装 2026-05-03 15:10:43 +09:00
9010a920a4
feat: TUIに他Podからの通知を表示する 2026-05-03 12:45:05 +09:00
44bc35bd31
docs(tickets): 消し忘れチケットども 2026-05-03 01:16:22 +09:00
f8a3a7838b
chore: TODOから[ ]を削除 2026-05-03 01:08:43 +09:00
8b5f75ecc4
Update AGENTS.md 2026-05-03 01:06:23 +09:00
be22c65af3
docs(tickets): tuiでPodEventを表示する・セッション中でメトリクスを取るチケットを追加 2026-05-03 01:01:09 +09:00
d9f55185f0
update: tuiの文字入力のCtrlブロックを追加 2026-05-03 00:44:38 +09:00
4b9b4f1450
update: memoryシステム周りのプロンプトの整理 2026-05-03 00:27:10 +09:00
670abdc336
docs(tickets): memory-consolidation-drop-input-cap完了 2026-05-02 23:57:36 +09:00
2d5c6aad5f
update: Consolidationの不要なToken上限の削除 2026-05-02 23:48:33 +09:00
f16ccc0a09
docs(tickets): セッション内TODOツールと注意機構のチケット 2026-05-02 23:48:01 +09:00
6ebd10a006
update: codexのキャッシュ利用が出来てなかった問題 2026-05-02 03:23:44 +09:00
d8d802d120
fix: tuiからのPod作成の挙動を修正・開発時にcargo runでpodを起動する経路を実装 2026-05-02 02:13:30 +09:00
288e2239d4
Merge branch 'workflow-impl' into develop
# Conflicts:
#	crates/pod/src/controller.rs
#	crates/pod/src/pod.rs
2026-05-02 01:47:49 +09:00
8194bb10f4
docs(tickets): workflow完了 2026-05-02 01:40:06 +09:00
a9ad0c2e0d
update: workflowの実装修正 2026-05-02 01:38:50 +09:00
8ed739261f
docs(tickets): 消し忘れチケット 2026-05-02 01:36:19 +09:00
5a29c90786
feat: dynamic-scopeの実装修正 2026-05-02 01:33:32 +09:00
0d66b397af
feat: dynamic-scopeの実装 2026-05-02 01:26:17 +09:00
fa84d48c62
fix: SpawnPodの起動経路の問題・を修正 2026-05-02 01:09:57 +09:00
433ee0f37c
Implement workflow MVP 2026-05-02 00:46:47 +09:00
00755cf1b8
update: manifestで一部値のzeroの扱いを変更 2026-05-02 00:08:46 +09:00
31d9b9b2b7
fix(llm-worker): openai_responsesのroleの最新の投影を反映 2026-05-01 23:55:26 +09:00
d18de45293
chore: dev-depsの整理 2026-05-01 23:50:14 +09:00
2790a35acf
docs(tickets): workflowのプロパティ名の修正 2026-05-01 23:40:47 +09:00
776a6a29bd
chore: 依存パッケージの集約 2026-05-01 23:35:46 +09:00
0046f1efc9
Merge branch 'tui-mouse-scroll' into develop 2026-05-01 23:22:58 +09:00
1e060914ce
feat: memory P2の修正 2026-05-01 23:22:49 +09:00
2159711cd0
feat(tui): マウスホイールスクロール完了 2026-05-01 23:16:02 +09:00
1afe7c53aa
スキルの整理 2026-05-01 23:14:37 +09:00
25b016f3da
feat(tui): マウスホイールでスクロールする実装 2026-05-01 23:14:16 +09:00
beb6b686a1
メモリPhase2の実装 2026-05-01 23:00:55 +09:00
0070aabd26
docs: memoryシステムの仕様変更と、動的Tool・VCSの話 2026-05-01 18:47:52 +09:00
3e2c9ee32b
bashツール一旦完了 2026-05-01 18:47:09 +09:00
97f9b14ceb
bashツール実装 2026-05-01 18:14:13 +09:00
6e9ef385c8
ClaudeによるTool出力メタ認知 2026-05-01 02:47:44 +09:00
c331936455
ファイル参照を与えた際に自動的に読ませる実装 2026-04-30 21:58:10 +09:00
75c61bd3cb
TUI補完の細かい挙動修正 2026-04-30 14:38:03 +09:00
6788db1ef2
tuiの補完の実装 2026-04-30 12:46:48 +09:00
4b09ff0234
claudeの動的ツールの調査レポート 2026-04-30 01:35:42 +09:00
9177ee8ef3
fix: セッション復元時にhistoryが表示されない問題 2026-04-30 00:02:26 +09:00
d2ee84775b
cargo fmt 2026-04-29 23:20:25 +09:00
cb1d3e72e4
templatureがcodexエンドポイントで使えない件の修正 2026-04-29 23:20:16 +09:00
1b1f8f40c6
session-log関連完了 2026-04-29 23:00:55 +09:00
c70b0bdc5d
session-logリファクタのレビュー・修正 2026-04-29 22:55:36 +09:00
010edf2c94
session-log-segments実装 2026-04-29 22:42:10 +09:00
1ab6dbcee3
session-log-decouple-item実装 2026-04-29 22:24:18 +09:00
bd8154204a
session-storeの永続化形式からllm-workerの内部型を削除 2026-04-29 22:09:30 +09:00
7c2ef374e6
tui-input-word-motion完了 2026-04-29 21:45:49 +09:00
119a73c112
tui-input-word-motionレビュー・半角カナに関する修正 2026-04-29 21:41:24 +09:00
862c38d7f7
tuiの単語単位Backspace 2026-04-29 21:31:19 +09:00
0ad3923932 tuiの単語境界カーソル移動実装 2026-04-29 21:23:29 +09:00
588c25a570 workflowのチケットとtuiの単語境界カーソル移動のチケット 2026-04-29 21:22:49 +09:00
043c2e862c pod-registry-rename完了 2026-04-29 21:05:09 +09:00
27e5074450 pod-registry-rename修正 2026-04-29 21:04:47 +09:00
5fa060a748 pod-registryのモジュール分割 2026-04-29 20:14:34 +09:00
71434b9d8b scope-lock -> pod-registry 2026-04-29 20:01:32 +09:00
8a1baa5020 scope.lockの意味変更に伴うクレート名変更チケット作成 2026-04-29 19:54:08 +09:00
6fa7f169b4 memory-phase1-extract完了消し忘れ 2026-04-29 19:53:37 +09:00
44d660c894 tui-session-restore完了 2026-04-29 19:52:24 +09:00
b9575f1534 tuiからセッションを復帰する経路の実装 2026-04-29 19:03:03 +09:00
e98a596235 不要なforkの削除 2026-04-28 20:19:50 +09:00
3c90729156 resumeの実装 2026-04-28 18:52:58 +09:00
51309ec5bf max_tokenとreasoning_tokenに関するdocs修正 2026-04-28 18:01:17 +09:00
af57d5b566 max_tokensのスキーマ不整合に関する修正 2026-04-28 17:58:24 +09:00
0f6b724184 tui-thinking-display完了 2026-04-28 16:23:09 +09:00
d5d0e4124b tui-thinking-display修正 2026-04-28 16:22:45 +09:00
b192a3ce4e TUIにThinkingを表示する実装 2026-04-28 16:10:48 +09:00
1466f11a0b ThinkingのTUI表示のチケット作成 2026-04-28 16:07:41 +09:00
a79abb3c27 session-store-llm-worker-type-ownership完了 2026-04-28 15:44:16 +09:00
e72aac8cf2 セッション関連の責務の分離 2026-04-28 15:43:34 +09:00
06c5ecfeb3 memory-phase1の、トークンカウントの実装位置が悪い件 2026-04-28 14:24:38 +09:00
092386d98f memory-phase1-extract修正 2026-04-28 13:12:21 +09:00
03c4f49f73 memoryを抽出する仕組みの実装 2026-04-28 12:58:33 +09:00
ed5c07c301 session-restoreの設計更新 2026-04-28 12:42:49 +09:00
18dd98e05b session復帰経路を作るチケット・テスト用のファイルの削除 2026-04-28 12:31:38 +09:00
46e1b92ade memoryが.insomnia配下ではなくworkspace root直下を想定していた問題の修正 2026-04-28 11:53:08 +09:00
341bd71dc5 memoryのクエリと動作のテスト 2026-04-28 11:37:41 +09:00
1e068f4beb worker-generation-settings完了 2026-04-28 09:38:23 +09:00
bc9f93ab09 生成設定のmanifest化の実装 2026-04-28 09:37:22 +09:00
7d0b639fa4 cargo fmt 2026-04-27 22:51:07 +09:00
553b69b55a model-reasoning-control完了 2026-04-27 22:49:56 +09:00
532218dd40 model-reasoning-contolレビュー 2026-04-27 22:41:51 +09:00
f8948be43d model-reasoning-control実装 2026-04-27 22:25:27 +09:00
34d5c3aab6 home-dir-layout完了 2026-04-27 22:11:15 +09:00
5c91535a74 home-dir-layout修正 2026-04-27 22:10:36 +09:00
915061fd49 home-dirの整理 2026-04-27 21:45:30 +09:00
29812a9262 reasoningを利用可能にするチケット 2026-04-27 20:21:22 +09:00
b62b5c47b2 memory-resident-injection完了 2026-04-27 18:30:21 +09:00
7e921be43e メモリー内容のシステムプロンプトへの埋め込みの実装 2026-04-27 18:25:47 +09:00
2e562dc3e0 環境変数に関するチケットの修正 2026-04-27 18:11:40 +09:00
13974c66fb pod-spawn-ui完了・設定UI関連のチケット作成 2026-04-27 17:38:32 +09:00
97446180cd memory-search-tool完了 2026-04-27 17:26:07 +09:00
c73c870844 memoryサーチツールを実装 2026-04-27 17:24:08 +09:00
8ab1e5a858 manifest読み込み経路の整理チケット作成 2026-04-27 17:17:00 +09:00
d000e777b7 manifest側で設定ファイルの収集を行うようにした 2026-04-27 16:52:23 +09:00
e296320c7a tuiからSpawnする仮UI 2026-04-27 16:22:06 +09:00
db187813ff memory-file-format完了 2026-04-27 13:59:04 +09:00
837e77449d メモリーに関するクレート作成・ファイル構造の実装 2026-04-27 13:33:31 +09:00
be88b4bae5 セグメントのセッション永続化チケット 2026-04-27 13:25:16 +09:00
e122f4aadb submit-segment-protocol完了 2026-04-27 11:42:42 +09:00
ee6d2d2100 submitをvec segmentを受け付ける形に変更 2026-04-27 11:03:58 +09:00
de3e4f78ab notification-naming完了 2026-04-26 23:30:46 +09:00
7903259348 Method::NotifyとEvent::Notificationが紛らわしい問題 2026-04-26 23:25:50 +09:00
30508f7851 memory実装チケット 2026-04-26 17:00:38 +09:00
505d3a8341 カタログの実装完了、ドキュメント整理 2026-04-24 13:33:56 +09:00
bd0e10653d podのモジュール分割完了 2026-04-24 11:58:11 +09:00
79987dd754 podのモジュール分割 2026-04-24 11:48:27 +09:00
539dc604e3 modelsとprovidersをカタログ化 2026-04-24 10:45:03 +09:00
a4bd599d0f モデルとプロバイダーをカタログ化するチケット 2026-04-23 16:18:30 +09:00
553e281ba1 llm-provider-catalog実装 2026-04-23 15:37:51 +09:00
85de0aa2b1 Agents.mdを一定閾値でturncateする仕様を削除 2026-04-23 01:34:25 +09:00
4ae2a46ccb pod-prompt-catalog完了 2026-04-22 17:43:42 +09:00
c6c02f846f Promptを一元管理するファイルから参照する実装 2026-04-22 17:43:05 +09:00
6c406cdfd5 Memoryシステムの整理・Promptカタログチケット 2026-04-22 13:21:15 +09:00
bb14109b4e TUIのEditツール周りの表示とカラー 2026-04-22 01:17:58 +09:00
a39bce779c 複数クライアント間でのRunメソッドの同期漏れ 2026-04-21 23:59:49 +09:00
18533b3580 改行テキストの行計算・Padding設定 2026-04-21 23:26:34 +09:00
789348c252 TUIのオーバーホール実装 2026-04-21 23:12:35 +09:00
e0e6cc8616 protocol-tool-result-shape完了 2026-04-21 20:52:19 +09:00
b8d5398520 TUIに向けたprotocolの詳細調整 2026-04-21 20:50:59 +09:00
94e40c8ee4 TUIオーバーホールチケット 2026-04-21 19:37:14 +09:00
7698619f1b メモリシステムの設計 2026-04-21 19:23:07 +09:00
822f8d9ec2 モデル性能のハードコードを消し飛し、Codexのフォーマットの修正 2026-04-21 18:35:56 +09:00
25906220eb Docsのアップロード 2026-04-21 17:39:43 +09:00
0668bd5213 protocol拡張の実装(完了) 2026-04-21 09:30:02 +09:00
f4d21cd994 protocol拡張の実装 2026-04-21 09:27:58 +09:00
0ea1ca5ec7 protocolの拡張に関するチケット修正 2026-04-21 08:42:54 +09:00
d9358047cf llm-auth-codex-oauth完了 2026-04-20 23:14:45 +09:00
177ff80615 codexのOAuthを使う実装 2026-04-20 23:13:52 +09:00
bd8c5601c7 openai-responses完了 2026-04-20 03:00:48 +09:00
97326eef04 openai-responses対応 2026-04-20 02:59:16 +09:00
bf072cc4f0 llm-model-config完了 2026-04-20 00:57:27 +09:00
b1e4572823 llm-model-configの実装 2026-04-19 23:32:14 +09:00
1a4863d211 llmのモデル情報の設計チケット 2026-04-19 22:29:37 +09:00
ebee0b95ef マニフェストを継承してPodをスポーンさせる 2026-04-19 18:01:47 +09:00
170e0c2099 SpawnPodツールが落ちる問題の発見 2026-04-19 15:14:15 +09:00
79b0e3d51b Pause実装完了 2026-04-19 15:12:06 +09:00
0284b5a76f TUIからPauseする実装 2026-04-19 14:27:53 +09:00
78cf4599a2 compact-improvements をマージ
- 閾値の個別指定化 (compact_threshold / compact_request_threshold) と Option 化
- 占有量ソースを UsageRecord timeline に一本化 (last_input_tokens 撤去)
- retained_turns → retained_tokens
- compact worker をツール駆動に再設計 (mark_read_required / add_reference / write_summary / read_file)
- Auto-read budget と compact_worker_max_input_tokens の上限制御
- 新 history は system message のみで構成 [summary, auto-read..., references, retained...]
2026-04-19 12:14:16 +09:00
18b7556e0a compact-improvements チケット完了 2026-04-19 12:13:03 +09:00
5375698813 anthropic-cache完了 2026-04-19 12:07:03 +09:00
1a3e9030bd compact: retained_tokens テスト値を現実的な値に変更
2 を 8_000 に。retained_turns 時代の名残で 2 は "2 トークン保持" と読めてしまい意味不明だったため。
2026-04-19 12:02:11 +09:00
fc634bcd87 Anthropicのキャッシュポイントを打つ実装 2026-04-19 11:57:55 +09:00
da021103e4 compact: compact worker をツール駆動マルチターンに再設計
段階 4〜9 を一括で実装:
- mark_read_required / add_reference / write_summary + read_file の 4 ツールで
  compact worker を駆動。結果は CompactWorkerContext に集約
- 新セッションの先頭を [summary, ...auto-read, references, ...retained] で構築
- デフォルトリファレンスは tracker.recent_files(5) から
- auto-read は compact_auto_read_budget で総量制限。超過は即エラー
- compact worker 自身は compact_worker_max_input_tokens で累計入力を制限
- 5 セクション要約フォーマットに system prompt を更新
- write_summary 未呼び出し / auto-read 空のときは 1 回追加プロンプトで促す
2026-04-19 09:26:55 +09:00
db2dd8a3c0 compact: retained_turns を retained_tokens に置換
保護単位をターン数からトークン量に変更。compact 時のカット位置は
Pod::split_for_retained() で UsageRecord を逆算ソースとして決定し、
ターン境界ではなくアイテム単位で切る。デフォルトは 8000 トークン。
2026-04-19 08:56:16 +09:00
83f68e35ad compact: 要約入力から content/arguments/reasoning を除く
ToolCall.arguments, ToolResult.content, Reasoning は auto-read 側の責務。
要約は意思決定と意図のキャプチャに集中させ、コードや tool IO は持ち込まない。
2026-04-19 08:51:04 +09:00
f0a865552c compact: 閾値を個別指定化し占有量ソースを UsageRecord に一本化
- manifest に compact_request_threshold を追加 (proactive と safety net を個別指定)
- CompactState の両閾値を Option<u64> 化、last_input_tokens を撤去
- 閾値判定は Pod::total_tokens() / usage_history 経由の実測値ベースに切替
- turn_threshold → request_threshold にリネーム、Between-requests のログへ
2026-04-19 08:49:25 +09:00
a88febc15e 引数なしでToolCallすると構造エラーになる問題の修正 2026-04-19 08:39:16 +09:00
143715fb22 pod-upstream-event完了 2026-04-19 08:31:42 +09:00
3cdd8323de pod-upstream-event修正 2026-04-19 08:31:16 +09:00
7637f0e440 pod-upstream-event実装 2026-04-19 08:20:07 +09:00
cc7bb0b711 マニフェスト改修完了 2026-04-19 08:05:20 +09:00
3e788da7a7 プロジェクトManifestの相対基準の修正 2026-04-19 08:03:59 +09:00
6434b068fe マニフェスト解決の相対パス化 2026-04-19 07:53:54 +09:00
3cb9b251fe Pod操作ツール実装完了 2026-04-19 06:41:20 +09:00
1668e981b4 Pod操作ツール修正 2026-04-19 06:40:45 +09:00
bb71439787 Pod操作ツールの実装 2026-04-19 06:32:44 +09:00
8087349474 SpawnPodツール完了 2026-04-18 20:31:10 +09:00
cdbad36a48 scope-lock完了 2026-04-18 19:26:23 +09:00
2a7ee256f5 Scope-Lockの実装 2026-04-18 19:25:03 +09:00
8e43503bdb Compactのチケット修正 2026-04-18 19:15:39 +09:00
4f5f5bfe76 チケット分割 2026-04-18 18:48:26 +09:00
5786fedc1c Notificationの実装 2026-04-18 17:48:35 +09:00
e9a464a23c Interceptorの責務分離完了 2026-04-18 17:27:22 +09:00
9d038fc3b7 interceptorの修正 2026-04-18 17:19:59 +09:00
cc8c4c8189 shutdown実装完了 2026-04-16 13:55:17 +09:00
aa8a1ee64b プロトコル経由のshutdow経路 2026-04-16 13:49:53 +09:00
ac5265be41 instruction-file-refs完了 2026-04-16 13:08:08 +09:00
9e11cfac7e instructionファイルの定義・読み込みの実装 2026-04-16 11:16:16 +09:00
2052ac498c pod-factory完了 2026-04-16 00:57:26 +09:00
e8a5fe557a podのマニフェストの分離実装 2026-04-16 00:54:27 +09:00
5b25287471 tui-notification-channel完了 2026-04-15 12:59:15 +09:00
62c5cb87dd warn/errorのTUIへの通知ルート 2026-04-15 12:58:31 +09:00
6e10a722c3 greetingカードの作成 2026-04-15 10:35:15 +09:00
7c59b8677b AGENTS.md完了 2026-04-15 05:21:54 +09:00
4b1a73d38f AGENTS.mdの読み取り 2026-04-15 05:21:43 +09:00
3a02358668 tool出力制限の修正 2026-04-15 04:23:07 +09:00
fade875c6f tool出力の制限 2026-04-15 04:08:56 +09:00
61fabbc3b8 システムプロンプト完了 2026-04-15 02:46:12 +09:00
34ac754644 システムプロンプトの実装 2026-04-15 02:44:42 +09:00
203f188dae tuiの文字間隔修正・prompt設計の計画 2026-04-14 13:11:18 +09:00
57fb22ed94 scope再設計の完了 2026-04-14 12:10:00 +09:00
db02afb74f scopeの再設計 2026-04-14 12:09:18 +09:00
f8eabd3ac8 prune-savings-estimation完了 2026-04-14 03:42:04 +09:00
9eef8117c8 cargo fmt 2026-04-14 03:13:36 +09:00
be96efb5ed prune-projection完了 2026-04-14 02:57:25 +09:00
ff88fbc7e4 pruneのトークン計算置き換え・Podに接続 2026-04-14 02:35:35 +09:00
0a1d01d9b5 pruneで用いるトークン計算の改善 2026-04-14 00:15:09 +09:00
a89c4487c9 token-counter実装 2026-04-13 20:32:02 +09:00
13d83e0fd3 token-counter実装 2026-04-13 20:21:26 +09:00
d5e2c3819d usage永続化のdoc修正 2026-04-13 07:13:49 +09:00
101679dbb8 usageデータの永続化実装 2026-04-13 07:09:05 +09:00
313b5158b8 TODO・Ticketのアップデート 2026-04-13 05:58:33 +09:00
6f2362ec77 ToolsのTracker実装 2026-04-13 04:26:27 +09:00
a947922192 チケット更新 2026-04-13 04:10:19 +09:00
c850cdf2b6 組み込みツールの実装 2026-04-13 03:43:02 +09:00
029bb1fba7 Compactの実装 2026-04-13 02:08:25 +09:00
9747bd6d34 TUIをinline viewportに変更 2026-04-12 07:32:06 +09:00
48e62f65df compactの実装 2026-04-12 07:09:48 +09:00
f2aaa3683f TUIのratatuiを0.30.0にした 2026-04-12 06:57:07 +09:00
46526ed262 session-storeとして分離 2026-04-12 06:31:34 +09:00
be1119d859 Pruneの実装 2026-04-12 06:02:46 +09:00
afabd3d7fd TUI上のターンカウンタ・ターン統計の実装 2026-04-12 05:41:22 +09:00
0c9551eef0 Tool Outputの仕様簡素化 2026-04-12 05:19:00 +09:00
444c90d7e4 コンテキスト圧縮の設計更新 2026-04-12 04:47:42 +09:00
37e6301397 Pod切断時にTUIがハングする問題 2026-04-12 04:22:26 +09:00
601d93f8d0 history取得:TUI側の実装 2026-04-12 03:44:45 +09:00
57a8ad6b97 historyを返すプロトコル 2026-04-12 03:37:49 +09:00
1617a982e1 Tickets整理 2026-04-12 03:19:12 +09:00
26f9294f42 ツールの動的削除の実装 2026-04-11 20:01:55 +09:00
982e0d2dbb Workerのリファクタリング 2026-04-11 19:47:34 +09:00
61a977779e Podにキーを渡す実装 2026-04-11 19:28:59 +09:00
e1cf8fad0f Workerの自動キャッシュロック 2026-04-11 18:47:33 +09:00
3151fc27ef workerのAPI設計 2026-04-11 17:30:32 +09:00
748e858ec5 HookのPod側への移動・Interceptorの実装 2026-04-11 17:19:20 +09:00
b15a5ceffc プロトコルStreamのユーティリティ共通化 2026-04-11 15:58:52 +09:00
c936492a29 Sessionのハッシュ 2026-04-11 15:14:02 +09:00
71b3d550a4 Remove Pod-ID 2026-04-11 14:18:49 +09:00
88aa2cf953 llm-workerのAPI改善 2026-04-11 14:11:40 +09:00
1b8e2173bb Pod-ID (UUID)の削除 2026-04-11 03:44:37 +09:00
13ba83d45d Create remove-pod-id.md 2026-04-11 03:40:55 +09:00
b3d3fd524a Podのバイナリ実装 2026-04-11 03:26:38 +09:00
8aa058ef75 tickets 2026-04-11 03:23:48 +09:00
5af96aa191 Max Turnの実装 2026-04-11 03:16:36 +09:00
ee307e2926 Add README to all crates 2026-04-11 03:07:56 +09:00
8d18357460 cratesの整理 2026-04-11 02:48:50 +09:00
1d92680e76 プロトコルの定義 2026-04-09 05:23:57 +09:00
cc1ceee81d Crate設計・mv 2026-04-09 04:55:14 +09:00
375 changed files with 79603 additions and 6469 deletions

View File

@ -0,0 +1,119 @@
---
name: "ticket-reviewer"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. "
model: opus
color: purple
---
You are a senior reviewer specialized in the `insomnia` project. You are an expert at evaluating ticket-scoped implementations against their stated premises and requirements, and at safeguarding the codebase from unnecessary complexity or architectural drift. You operate strictly within the project's ticket lifecycle conventions defined in `CLAUDE.md`.
## Your Core Responsibility
Given a ticket (normally `tickets/<name>.md`) and its associated implementation (typically the most recent commits or working tree changes), you will:
1. Read the ticket thoroughly to understand its **背景・前提・要件**.
2. Inspect the implementation (diff + surrounding code, not only the diff).
3. Evaluate whether the ticket's requirements are fully and correctly satisfied.
4. Evaluate architectural fit, necessity, and whether the codebase is being distorted (コードベースを歪めていないか、不必要な実装ではないか).
5. Produce `tickets/<name>.review.md` with findings and a clear judgment.
6. Update the original `tickets/<name>.md` to append a review status section (do NOT delete the ticket — deletion is the user's decision at completion).
You must NEVER run `git` write operations (commit, add, push, etc.). Git is the user's responsibility (per CLAUDE.md). You only edit/create files in the working tree.
## Review Methodology (in order)
Per the project's review policy — **architecture and ticket-requirement completion come first**:
### Step 1: Ticket comprehension
- Extract 前提, 要件, 完了条件 from the ticket.
- Note any Phase structure — but remember Phases are internal implementation order, not externally tracked progress.
- Confirm the ticket's intended scope boundary.
### Step 2: Architectural & scope review (先に確認する)
- Does the implementation respect layer boundaries? (e.g., `llm-worker` stays low-level; higher-level features live in upper layers.)
- Are new crates named without the `insomnia-` prefix, short and consistent?
- Were dependencies added via `cargo add` (not manual edits to Cargo.toml)?
- Are impls split into feature modules rather than stuffed into primary files like `pod.rs`?
- Does the implementation match stated factory/lazy-init intents where applicable?
- Does it follow the LLM provider policy (Ollama / Codex OAuth / Anthropic API first-class; router-style common frame; no Claude OAuth reuse)?
- Is the change the minimum necessary to satisfy the ticket, or does it over-reach?
### Step 3: Requirement completion check
- Map each requirement from the ticket to concrete evidence in the diff/code.
- Flag any requirement that is unmet, partially met, or silently deferred.
- Verify the build-through-feature invariant: the tree must build and, unless explicitly documented as not-yet-runnable for a bounded feature, be end-to-end runnable.
### Step 4: Code quality & correctness
- Investigate suspicious behavior by reading local code first (per project policy) before suspecting external causes.
- Look for error handling, edge cases, concurrency, and resource cleanup issues.
- Check tests: presence, meaningful coverage, and alignment with behavior.
- Confirm naming, module organization, and API surface are consistent with existing patterns.
### Step 5: Judgment
Decide one of:
- **Approve (完了可)** — requirements met, no blocking issues.
- **Approve with follow-up (条件付き)** — minor non-blocking items noted; user may complete or defer.
- **Request changes (要修正)** — blocking issues must be addressed.
## Output Artifacts
### A. `tickets/<name>.review.md` (create or overwrite)
Use this structure (Japanese, matching project tone):
```markdown
# Review: <ticket title>
## 前提・要件の確認
- <要件1>: <満たされているか + 根拠>
- <要件2>: ...
## アーキテクチャ・スコープ
- <観点と判断>
## 指摘事項
### Blocking
- <項目><理由と該当箇所 path:line>
### Non-blocking / Follow-up
- <項目><理由>
### Nits
- <項目>
## 判断
<Approve / Approve with follow-up / Request changes><一文の理由>
```
Omit empty sections. Cite concrete file paths and line ranges. Be concise; avoid restating obvious code.
### B. Update `tickets/<name>.md`
Append (or update if present) a trailing section like:
```markdown
## Review
- 状態: <Approve / Approve with follow-up / Request changes>
- レビュー詳細: [./<name>.review.md](./<name>.review.md)
- 日付: 2026-04-21
```
Do not modify the ticket's 背景・要件 sections unless the user explicitly asked for it. Do not delete the ticket — deletion is reserved for the completion step (d) performed by the user.
## Operating Principles
- **Do not commit or stage anything.** File edits only. The user will handle git.
- **Do not over-engineer the review.** Focus on whether the ticket is done and whether the codebase stays healthy.
- **Prefer concrete citations** (path:line) over abstract complaints.
- **Ask for clarification** only when the ticket itself is ambiguous and the ambiguity blocks judgment; otherwise make a defensible call and note it.
- **Re-review mode**: if `.review.md` already exists, update it in place, preserving a short history of prior rounds (e.g., `## Round 2` section) so the evolution is visible until the ticket is closed.
- **TODO.md is not your concern** unless a requirement explicitly demands it; ticket lifecycle edits to TODO.md are the user's.
## Quality Self-Check (before finishing)
1. Did I evaluate architectural fit before nitpicks?
2. Did I map every ticket requirement to evidence?
3. Are all blocking issues genuinely blocking (not stylistic)?
4. Did I avoid making git writes?
5. Did I update both `<name>.review.md` and `<name>.md`?
6. Is my judgment line unambiguous?

View File

@ -0,0 +1,26 @@
---
name: worktree-workflow
description: "Worktreeを用いた開発フローを進める。git上の開発に置けるミクロな指示で、プロジェクトの管理に関する指示は提供されていない。"
allowed-tools: "Bash(cd *), Bash(git worktree *), Bash(mkdir *), Bash(cp *), Bash(ln *), Bash(ls *), Bash(find *)"
---
# Worktreeを用いた開発
Goal: 実装を完了させ、ブランチをマージ待ちの状態にする。
`./.worktree`にworktreeを作成します。
エージェントの1セッション=1ワークツリーとしており、ブランチ/イシュー/チケット単位で切ります。
このワークフローにおいては、ブランチはローカルで並行開発するためのマージ後削除の運用とし、Worktreeと同名のbranchを同時に作って進めます。メインのディレクトリのブランチから切るものとして扱います。
```
git worktree add .worktree/<task-name> -n <task-name>
```
## flake.nixの無効化
基本的に、CWDを変更できない場合、.envrcによる自動アクティベートは効かないので無視で構わない。
## 完了時
マージウィンドウからこのスキルがinvokeされた際は、ブランチのマージ・worktreeの削除まで行う。対して、実装者がマージしてクローズしてはならない。

3
.gitignore vendored
View File

@ -1,4 +1,5 @@
/target
.direnv
*.local
*.local*
.env
.worktree

1
.insomnia/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/memory/

View File

@ -0,0 +1,20 @@
---
created_at: 2026-05-11T22:10:00Z
updated_at: 2026-05-11T22:10:00Z
kind: policy
description: Claude Codeを用いてレビューやinsomniaだけではできないタスクを行う
model_invokation: false
user_invocable: true
last_sources: []
---
Bashツールを用いて`claude`を呼び出す。
`claude -p "<prompt>"`で非対話モードでのClaude Codeの利用が出来る。
また、`claude -p "<prompt>" --continue`を用いることで、直前のセッションを再開する形で実行できる。
insomniaではまだできないのでclaudeにやらせたいタスク
- WebSearch / WebFetch
-

11
.insomnia/manifest.toml Normal file
View File

@ -0,0 +1,11 @@
[scope]
allow = [
{ target = ".", permission = "write", recursive = true },
{ target = "/home/hare/ghq", permission = "read", recursive = true },
]
[memory]
extract_threshold = 50000
consolidation_threshold_files = 5
consolidation_threshold_bytes = 50000

View File

@ -0,0 +1,143 @@
---
description: TODO / tickets / docs / git history から次の作業候補を見繕い、課題発見や方針決定を半自動でイテレーションする WIP maintainer workflow
model_invokation: false
user_invocable: true
requires: []
---
# Auto Maintain Workflow (WIP)
insomnia を AI maintainer として運用するための半自動 loop。TODO / tickets から「今進められそうな作業」を選ぶだけでなく、課題の発見、設計判断の切り分け、次に人間へ戻すべき問いの整理までを扱う。
これは unattended 自動開発ではない。実装の並列委譲は `multi-agent-workflow`、worktree の機械的作成は `worktree-workflow` に任せる。本 Workflow はその前段として、何を進めるべきか、何をまだ決めるべきかを整理する。
参照:
- `docs/plan/ai-maintainer.md`
- `tickets/auto-maintain-workflow.md`
## 位置づけ
AI maintainer の目的は、コードを書くこと自体ではなく、プロジェクト状態を前に進めることである。
この Workflow は WIP として、以下を行う。
- TODO / tickets / docs / git history を読んで現在地を把握する。
- 実装可能な ticket と、方針決定が必要な ticket を分ける。
- 小さく実装できる候補を提案する。
- 設計相談が必要な論点を人間に戻す。
- 運用上の問題や繰り返し発生する詰まりを report / ticket / workflow 改訂候補として整理する。
## 非目標
現時点では以下をしない。
- 常駐 scheduler として自動実行する。
- 人間の合意なしに新規 ticket を作る。
- 人間の合意なしに既存 ticket を大幅変更する。
- 人間の合意なしに ticket 完了削除を行う。
- push する。
- Workflow を自律生成・自律改訂する。
- scope / permission / history persistence / prompt context 加工原則に関わる判断を勝手に決める。
## 入力として読むもの
必要に応じて以下を読む。
1. `TODO.md`
2. `tickets/*.md`
3. `docs/plan/`
4. `docs/report/`
5. `git log --oneline` / ticket file の git history
6. 既存 worktree / branch 状態
7. 最近の失敗や通知、ユーザーからの観測
TODO と ticket の不整合を見つけたら、勝手に修正せず、まず報告する。ただしユーザーが明示的に「直して」と言った場合は Mode 1 として整理してよい。
## 分類
候補を以下に分ける。
### A. 実装委譲可能
- 要件と完了条件が具体的。
- 影響範囲が限定的。
- test / build で確認できる。
- 大きな設計判断が不要。
- scope を狭く切れる。
この場合は、人間に候補として提示する。人間が実行を許可したら `$user/multi-agent-workflow` に進む。
### B. 方針決定が必要
- 複数の設計方針が自然に導ける。
- protocol / permission / scope / persistence / prompt context に触れる。
- UX の仕様が未確定。
- 既存 ticket の要件が古い。
この場合は、実装せず、決めるべき問いを短く提示する。
### C. ticket 整理が必要
- TODO にあるが ticket がない。
- ticket があるが TODO にない。
- 完了済みに見えるが残っている。
- ticket の前提が変わっている。
この場合は、不整合と修正案を提示する。修正は人間の許可後に行う。
### D. report / workflow 改善候補
- 同じ tool 問題が繰り返し出る。
- Workflow の指示が曖昧で実装 Pod が迷った。
- AI が過剰に Task tool を使うなど、運用上の癖が出た。
- 通知や Pod completion tracking など、開発基盤の不足が観測された。
この場合は、すぐ ticket 化するか、`docs/report/` に観測として残すか、人間に確認する。
## 半自動 iteration
1. 状態把握
- TODO / tickets / git status を読む。
- 最近完了した流れや未完了 branch を確認する。
2. 候補抽出
- 実装可能そうな ticket を 2〜5 件挙げる。
- correctness / developer experience / user-visible UX / cleanup で分類する。
3. 推奨順位
- blocking correctness を最優先。
- 実害が出ている運用問題を次点。
- 小さく完了できる UX / cleanup を次点。
- 大きな設計変更は方針相談に回す。
4. 人間への提示
- 「次に進めるなら X」を1つ推奨する。
- 理由を短く述べる。
- 実装委譲する場合の scope / test 方針を添える。
5. 実行への接続
- 人間が「進めて」と言ったら `$user/multi-agent-workflow` に接続する。
- worktree 作成は `$user/worktree-workflow` に従う。
## エスカレーション基準
以下では実装に進まず、人間へ戻す。
- ticket の要件から複数の設計方針が自然に導ける。
- 長期構造、crate boundary、protocol、permission、scope、history persistence に触れる。
- prompt context 加工原則に関わる。
- 新 ticket の作成、既存 ticket の大幅変更、ticket 完了削除について合意がない。
- test 不能、再現不能、または作業範囲外の不具合に遭遇した。
- WorkItem / Thread / Lease / maintainer state など、まだ設計中の概念が必要になる。
## まだ固定しないもの
以下は `docs/plan/ai-maintainer.md` の上位設計に残し、本 Workflow では詳細を固定しない。
- WorkItemStore / LeaseStore。
- operation inbox / trial log。
- QA feedback を ticket / review / report のどれに落とすか。
- AI 自身の feedback を Knowledge / report / ticket / workflow 改訂のどれにするか。
- maintainer doctor。
- reviewer Pod の評価基準の機械化。

View File

@ -0,0 +1,150 @@
---
description: worktree と子 Pod を使って複数 ticket の実装・レビュー・修正・完了処理を並列に進める orchestration フロー
model_invokation: true
user_invocable: true
requires: []
---
# Multi-agent Worktree Workflow
insomnia を insomnia で開発する際の、worktree + 実装 Pod + 親 Pod review の標準フロー。これは **実装を並列に進めるためのフロー** であり、worktree の機械的作成手順は `$user/worktree-workflow`、ticket 候補選定や方針探索の半自動 loop は `$user/auto-maintain` に分ける。
## 目的
- 実装差分を ticket ごとの child worktree に隔離する。
- 実装 Pod に narrow write scope を渡して並列実装させる。
- 親 Pod が diff / test / ticket 要件を review し、必要なら修正依頼する。
- approve 後に merge / ticket 完了処理 / main workspace での再検証を行う。
## 開始条件
以下が揃っている時に使う。
- 対象 ticket が決まっている。
- ticket の背景・要件・完了条件から実装方針が概ね導ける。
- worktree 作成と git 書き込み操作について、人間の許可がある。
- main workspace の unrelated dirty changes を把握している。
設計方針が複数自然に導ける場合、protocol / scope / permission / history persistence に触れる場合、ticket 自体の再定義が必要な場合は、実装委譲前に人間へ戻す。
## 親 Pod / orchestrator の責務
1. 状態確認
- `git status --short --branch`
- 対象 ticket
- 関連 TODO / docs / 既存 worktree
2. worktree 作成
- `$user/worktree-workflow` に従い `./.worktree/<task-name>` を作る。
- `.insomnia` を sparse checkout で除外する。
3. 実装 Pod spawn
- read scope: main workspace 全体。
- write scope: child worktree、または必要最小 directory。
- task には以下を明示する。
- child worktree path / branch
- 対象 ticket path
- Bash は必ず child worktree に `cd` すること
- main workspace の `TODO.md` / `tickets/` / `docs/report/` / `.insomnia` は編集しないこと
- 範囲外事項
- 実行すべき build / test / format
- 完了報告項目
4. 監督
- `ReadPodOutput` で報告を読む。
- 通知が来ない場合でも、worktree の `git status` / `git diff` / test で完了状態を確認する。
- 必要なら `SendToPod` で修正依頼する。
5. review
- ticket の背景・要件・完了条件・範囲外に照らして diff を確認する。
- build / test / `git diff --check` を確認する。
- 必要なら reviewer Pod を read-only で立てる。
6. merge / lifecycle
- approve 後に main workspace へ merge する。
- `TODO.md` から該当行を削除し、`tickets/foo.md` を削除して完了 commit を作る。
- main workspace で必要な test / `cargo check --workspace` / `cargo fmt --check` を再実行する。
## 実装 Pod の責務
- child worktree 内でのみ実装する。
- main workspace の管理ファイルを書かない。
- 指定された build / test / format を実行する。
- ticket 要件外の設計変更、依存関係追加、scope / permission / history persistence / prompt context 加工原則に触れる変更が必要なら止めて報告する。
- 完了時に以下を報告する。
- worktree path / branch
- commit hashcommit した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか
## 実装 Pod の commit 方針
実装 Pod には child worktree 内での commit を許可してよい。
- commit は ticket 内で意味のある粒度にする。
- 例: `feat: ...`、`fix: ...`、`test: ...`、`docs: ...`
- 実装 Pod は merge / push / branch deletion / worktree remove をしない。
- 実装 Pod は `TODO.md` / `tickets/` の完了処理 commit をしない。
- 親 Pod は review 時に commit 粒度も確認する。
- 必要な修正は、原則追加 commit として積む。履歴改変や squash は人間の明示指示がある時だけ行う。
## Review → 修正 → 完了の標準形
### Approve
1. 実装 Pod を停止し、scope を回収する。
2. 親 Pod が main workspace で `git merge --no-ff <branch>` する。
3. 親 Pod が `TODO.md``tickets/foo.md` を完了処理して commit する。
4. main workspace で検証コマンドを再実行する。
5. 変更内容・commit・検証結果・残 dirty changes を報告する。
### Request changes
1. blocking finding をファイル / 行 / 理由 / 修正方針つきで整理する。
2. 実装 Pod が生きていれば `SendToPod` で修正依頼する。
3. 停止済みなら、同じ worktree / branch / scope で再 spawn するか、親 Pod が最小修正する。
4. 修正後に focused test と必要な broader test を再実行する。
5. 再 review する。
### Non-blocking comments
- ticket 要件外の改善はその場で混ぜない。
- 必要なら後続 ticket / docs/report にする。
- non-blocking を理由に completion を遅らせない。
## 並列実装時の注意
- 1 ticket = 1 worktree = 1 branch を基本にする。
- 複数 Pod に同じ write scope を渡さない。
- parent は child の write scope 配下を直接編集しない。
- 依存関係がある ticket は、土台 branch を merge してから次 worktree を切る。
- parallel に走らせた Pod の完了通知は取りこぼしうるため、`ReadPodOutput` と worktree 状態で確認する。
## 完了報告の標準形
```text
完了:
- ticket: <path>
- branch: <name>
- commits:
- <hash> <subject>
- 変更概要: ...
- 検証:
- cargo fmt --check
- cargo check --workspace
- cargo test ...
- review: approve / approve with comments / request changes
- 未解決事項: ...
- 残 dirty changes: ...
```
## この Workflow で扱わないもの
以下は `$user/auto-maintain` または別の設計相談で扱う。
- ticket 候補を見繕うこと。
- 新規 ticket 作成判断。
- QA feedback / AI feedback を ticket / report / workflow に落とす判断。
- 長期 maintainer loop / WorkItemStore / LeaseStore の設計。

View File

@ -0,0 +1,98 @@
---
description: insomnia プロジェクトで child git worktree を作成・管理するための機械的手順。実装 Pod に作らせず、親 Pod が main workspace で実行する。
model_invokation: false
user_invocable: true
requires: []
---
# Worktree Workflow
insomnia プロジェクトで実装差分を main workspace から分離するため、`./.worktree/<task-name>` に child git worktree を作る。これは **worktree の扱い方だけ** を定める Workflow であり、ticket 選定、実装委譲、review、merge の運用は `$user/multi-agent-workflow` 側で扱う。
insomnia では Pod の write scope が排他的に委譲されるため、child worktree に `.insomnia` を置かない。main workspace は orchestration / ticket / docs / memory / workflow 管理の場所として残し、child worktree はコード差分専用の作業面として扱う。
## 適用範囲
この Workflow は親 Pod / orchestrator が main workspace で実行する。
- 実装 Pod にこの Workflow を渡して worktree を作らせない。
- 実装 Pod は、親 Pod が作成済みの child worktree を受け取り、その中で実装・build・test・報告を行う。
- ticket 作成、TODO 更新、review artifact、docs/report は main workspace 側で扱う。
## 原則
- 1 ticket / 1 実装 task につき 1 worktree を作る。
- worktree path は `./.worktree/<task-name>`
- branch 名は原則 `<task-name>` と同じ kebab-case。
- child worktree には `.insomnia` を出さない。
- child worktree は実装差分用。`TODO.md` / `tickets/` / `docs/report/` / workflow / memory は原則 main workspace 側で扱う。
- push はしない。
## 事前確認
作成前に以下を確認する。
1. 対象 ticket / task が決まっているか。
2. `<task-name>` が branch / path 名に使える kebab-case か。
3. `git worktree add` を実行してよい許可があるか。
4. main workspace に混ぜてはいけない未保存差分がないか。
5. 同名 branch / worktree が既に存在しないか。
同名 branch がある場合は、既存 branch を使うか、人間に確認する。`git worktree add -b` で上書きしない。
## 作成手順
main workspace で実行する。
```bash
git worktree add .worktree/<task-name> -b <task-name>
git -C .worktree/<task-name> sparse-checkout init --no-cone
git -C .worktree/<task-name> sparse-checkout set --no-cone \
'/*' \
'!/.insomnia/' \
'!/.insomnia/**'
```
確認する。
```bash
git -C .worktree/<task-name> status --short --branch
test ! -e .worktree/<task-name>/.insomnia
```
失敗した場合は、worktree / branch / lock の状態を確認し、勝手に cleanup せず人間へ報告する。
## 子 Pod へ渡す scope
子 Pod を使う場合、子 Pod の cwd は main workspace のままになる。必ず作業対象が child worktree であることを明示し、Bash 実行時は毎回 `cd /home/hare/Projects/insomnia/.worktree/<task-name> && ...` させる。
推奨 scope:
```text
read: /home/hare/Projects/insomnia
write: /home/hare/Projects/insomnia/.worktree/<task-name>
```
より狭く切れる場合は、write scope を変更対象 crate / directory まで狭めてよい。ただし build / test に必要な生成物を書けることを確認する。
## child worktree 内の禁止事項
- `.insomnia` を作らない / コピーしない。
- main workspace の `TODO.md` / `tickets/` / `docs/report/` を編集しない。
- merge / push / branch deletion / worktree remove をしない。
- scope / permission / history persistence / prompt context 加工原則に関わる設計変更を無断で行わない。
## 完了時の扱い
worktree 作成 Workflow としては、完了時に merge しない。merge、ticket 完了、TODO 削除は `$user/multi-agent-workflow` または人間の明示指示で行う。
実装 Pod へ渡す完了報告項目の標準形:
- worktree path
- branch 名
- commit hash実装 Pod に commit を許可した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか

75
AGENTS.md Normal file
View File

@ -0,0 +1,75 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
## このシステムに置ける設計要旨
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトはghqでgetしており、必要に応じて`~/ghq`からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
- 1チケット = 1行。未完了のみ記載し、完了したら行ごと削除する履歴はgitで追える
- ネストは同一領域のグルーピング(表示用)にのみ使う。実装上の依存関係はネストで表現しない
- 完了した子は削除し、親は未完了の子がある限り残す。最後の子が完了したら親ごと削除
- Ticketを追加する際は、合わせてTODOも書くこと
### Ticket の粒度
- 1チケット = 完了時点で、実装が仕様又は機能として説明できる粒度。
- 作成時、背景や要件を前提として書き、実装の方針やコードの詳細は不必要に増やさない。
- チケット内のステップPhase 1, 2, ...は実装順序であり、TODO等、外に出さない
- ビルドが通り、その機能に限り,まだ動作できないと明示出来ている場合を除いて全体を通して動作させられる状態である必要がある。
### Ticket のライフサイクル
gitがタイムラインの単一の情報源。ファイル操作とcommitで状態遷移を表現する。
a. 作成: `tickets/foo.md` を作成してcommit
b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

1
CLAUDE.md Symbolic link
View File

@ -0,0 +1 @@
AGENTS.md

2608
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,12 +1,57 @@
[workspace]
resolver = "2"
members = [
"crates/insomnia",
"crates/client",
"crates/daemon",
"crates/llm-worker",
"crates/llm-worker-macros",
"crates/llm-worker-persistence",
"crates/session-store",
"crates/manifest",
"crates/pod",
"crates/protocol",
"crates/provider",
"crates/pod-registry",
"crates/session-metrics",
"crates/lint-common",
"crates/tools",
"crates/tui",
"crates/memory",
"crates/workflow",
]
[workspace.package]
edition = "2024"
license = "MIT"
[workspace.dependencies]
# Internal crates
client = { path = "crates/client" }
llm-worker = { path = "crates/llm-worker", version = "0.2" }
llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" }
manifest = { path = "crates/manifest" }
lint-common = { path = "crates/lint-common" }
memory = { path = "crates/memory" }
pod-registry = { path = "crates/pod-registry" }
protocol = { path = "crates/protocol" }
provider = { path = "crates/provider" }
session-metrics = { path = "crates/session-metrics" }
session-store = { path = "crates/session-store" }
tools = { path = "crates/tools" }
# External
# Note: `reqwest` and `chrono` are not aggregated here because some crates
# need `default-features = false`, which workspace inheritance cannot override.
async-trait = "0.1"
fs4 = "0.13"
futures = "0.3"
libc = "0.2"
schemars = "1.2"
serde = "1.0"
serde_json = "1.0"
sha2 = "0.11"
tempfile = "3.27"
thiserror = "2.0"
tokio = "1.52"
toml = "1.1"
tracing = "0.1"
uuid = "1.23"

18
KNOWN_ISSUES.md Normal file
View File

@ -0,0 +1,18 @@
# Known Issues
Ticket を切るほどではないが、次に近所を触るときに合わせて拾いたい小粒な所見の置き場。
## 運用
- 1 項目 = 出典 (file:line) + 症状 (一文) + トリガー (いつ拾うか、一文)
- 関連 ticket があれば `→ [tickets/foo.md]` でリンク
- 修正したら同じコミットで該当エントリを削除する (履歴は git)
- ここに溜める基準: 「ticket は重い」「だが忘れたら次の触り手が踏む」もの。明確に作業すべきものは ticket 化する
## エントリ
- `crates/tui/src/app.rs:478-485` — bad workflow slug を含む `Method::Run` 送信時、`Event::UserMessage` の早期 broadcast で `turn_index += 1` されターンヘッダだけ残る ("ghost turn header")。次に TUI のターンヘッダ / エラー表示周りを触るときに整理。→ [tickets/pod-input-validate-internalize.md] の review 由来。
- `crates/pod/src/controller.rs:944``worker_error_code``PodError::WorkflowResolve(_) => InvalidRequest` が post-commit な resolve エラー (`KnowledgeNotFound` 等) にも適用される。意味論的には妥当方向だが、resolve 系のエラー粒度を分けたくなったタイミングで再評価。
- `crates/pod/tests/controller_test.rs``double_run_returns_error` がたまに失敗する flakiness を観測。`pod-interrupt-prep-internalize` 以前から存在する別件。次に controller_test の Run 連投系のタイミングを触るときに併せて原因を切り分け。
- `crates/session-store/src/fs_store.rs:117-122``FsStore::read_entry_count``fs::read_to_string` で全文ロードしてから行数カウントするため O(n)。`ensure_head_or_fork` は run-start でしか呼ばれず現状は許容範囲だが、長期セッションが普通になった時点で `\n` バイト数の cheap count か末尾 seek に置き換える。
- `crates/session-store/src/segment.rs:121` `ensure_head_or_fork` (free fn, test 専用・本番 caller ゼロ) と `crates/pod/src/pod.rs` `Pod::ensure_segment_head` (本番 inline) に live auto-fork の検知 + forked_from 記録が二重実装されている。entry-hash-abolish 以前からの重複で、両方独立にテスト済みだが drift 必至。session-store 側を本番から呼ぶ形に寄せるか free fn を畳むかは要設計判断。Pod state / fork 周辺を次に触るときに統合を検討。

View File

@ -3,16 +3,3 @@
insomnia(i6a)は不休のエージェントループを回すためのエージェントプラットフォーム。
ワークフローを統括し、四六時中電力を消費し、イテレーションします。
## Crates
| クレート | 概要 |
|---|---|
| `insomnia` | トップレベルアプリケーション(未実装) |
| `llm-worker` | 自律的なLLMシステムを構築するためのライブラリ |
| `llm-worker-macros` | `llm-worker`用の手続きマクロ (`#[tool_registry]`, `#[tool]`) |
## ドキュメント
- [要件](crates/llm-worker/docs/requirements.md) — llm-workerに求める性能 (R1-R4)
- [アーキテクチャ](crates/llm-worker/docs/architecture.md) — 3層構成とモジュール配置

35
TODO.md
View File

@ -1,7 +1,28 @@
- [x] 永続化データ構造の制定
- [ ] テスト設計
- [x] ツール出力の遅延読み込み設計 (ToolOutput / BlobStore / auto_summarize)
- [ ] ツール設計
- [ ] ツールの動的追加/削除 (unregister, replace)
- [ ] ToolDefinition ファクトリの遅延初期化修正 (現状 register 時に即時呼び出しされている。セッション開始=初回メッセージ送信時まで遅延させる)
- [x] inspect ツール実装
- Workflow / Skills
- 内部 Worker / 内部 Pod の Workflow 化 → [tickets/internal-worker-workflow.md](tickets/internal-worker-workflow.md)
- 半自動開発運用 Workflow → [tickets/auto-maintain-workflow.md](tickets/auto-maintain-workflow.md)
- AI maintainer 用 WorkItem / Thread 抽象 → [tickets/maintainer-work-items.md](tickets/maintainer-work-items.md)
- Prompt / Workflow 評価メトリクスと改善 Offer → [tickets/prompt-eval-metrics.md](tickets/prompt-eval-metrics.md)
- Permission: allow-all 既定 policy への整理 → [tickets/permission-default-policy.md](tickets/permission-default-policy.md)
- Pod: 任意ターンからの Fork複数ターン巻き戻しを汎用化 → [tickets/pod-session-fork.md](tickets/pod-session-fork.md)
- Pod/TUI: 手動 rollback 導線 → [tickets/manual-turn-rollback.md](tickets/manual-turn-rollback.md)
- Pod: Inbound PodEvent ハンドリングの重複を統合 → [tickets/pod-inbound-pod-event-dedup.md](tickets/pod-inbound-pod-event-dedup.md)
- SpawnPod 初回 task delivery の受理確認 → [tickets/spawnpod-initial-run-confirmation.md](tickets/spawnpod-initial-run-confirmation.md)
- E2E テストハーネス(`tests/e2e/`、opt-in → [tickets/e2e-harness.md](tickets/e2e-harness.md)
- メモリ機構
- consolidation skip 表示と invalid staging の観測性 → [tickets/memory-consolidation-skip-observability.md](tickets/memory-consolidation-skip-observability.md)
- summary.md の resident 注入 → [tickets/memory-summary-resident-injection.md](tickets/memory-summary-resident-injection.md)
- TUI 拡充
- navigation mode / block focus の設計 → [tickets/tui-navigation-mode-design.md](tickets/tui-navigation-mode-design.md)
- spawned child Pod の一覧と一時 attach → [tickets/tui-spawned-pod-panel.md](tickets/tui-spawned-pod-panel.md)
- actionbar transient notice API → [tickets/tui-actionbar-transient-notice-api.md](tickets/tui-actionbar-transient-notice-api.md)
- tui -r picker で live pending Pod が表示から漏れる → [tickets/tui-picker-live-pending-pods.md](tickets/tui-picker-live-pending-pods.md)
- user manifest env override 時の spawn scope overlay 前提ズレ → [tickets/tui-user-manifest-env-overlay.md](tickets/tui-user-manifest-env-overlay.md)
- ユーザーマニフェストのモデル設定 wizard → [tickets/tui-user-model-setup.md](tickets/tui-user-model-setup.md)
- セッション内 Task ツールの注意機構(無アクティビティで `<system-reminder>` ナッジ) → [tickets/session-todo-reminder.md](tickets/session-todo-reminder.md)
- ワークスペースのメモリーをLintするヘッドレスCLI
- system-reminder 注入機構の汎用化2件目の利用者が出た時に検討。タグ形式 `<system-reminder>...</system-reminder>` の規約は session-todo-reminder で先行確立。注入された Item は worker.history に append する方針)
- Bashツールがファイル編集に常用されている問題をdesciptionで抑制
- 事前定義したManifestをProfile的に扱い、Orchestrator/Coder/Researcherで別々のモデル/設定を使わせる運用ができるようにする
- 複数のPodのViewを行き来できるUI

11
crates/client/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "client"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
protocol = { workspace = true }
manifest = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "net", "io-util", "sync", "time", "process", "fs"] }
uuid = { workspace = true }

15
crates/client/src/lib.rs Normal file
View File

@ -0,0 +1,15 @@
//! Pod プロトコルを喋るクライアント。
//!
//! - [`PodClient`]: 既存 pod の Unix ソケットへ接続して `Method` を送り、
//! `Event` を受け取る低レベル接続。
//! - [`spawn`]: pod バイナリをサブプロセスとして起動し、`INSOMNIA-READY`
//! ハンドシェイクが終わるまで待つフロー。subprocess を立ち上げる必要が
//! ない呼び出し側 (=既存 pod に attach する場合) は使わなくてよい。
//!
//! TUI / GUI / E2E ハーネスはこの crate に依存して protocol を喋る。
mod pod_client;
pub mod spawn;
pub use pod_client::PodClient;
pub use spawn::{SpawnConfig, SpawnError, SpawnReady, spawn_pod};

View File

@ -0,0 +1,45 @@
use std::io;
use std::path::Path;
use protocol::stream::{JsonLineReader, JsonLineWriter};
use protocol::{Event, Method};
use tokio::net::UnixStream;
use tokio::sync::mpsc;
pub struct PodClient {
writer: JsonLineWriter<tokio::io::WriteHalf<UnixStream>>,
event_rx: mpsc::Receiver<Event>,
}
impl PodClient {
pub async fn connect(path: &Path) -> Result<Self, io::Error> {
let stream = UnixStream::connect(path).await?;
let (reader, writer) = tokio::io::split(stream);
let writer = JsonLineWriter::new(writer);
let (event_tx, event_rx) = mpsc::channel::<Event>(256);
tokio::spawn(async move {
let mut reader = JsonLineReader::new(reader);
while let Ok(Some(event)) = reader.next::<Event>().await {
if event_tx.send(event).await.is_err() {
break;
}
}
});
Ok(Self { writer, event_rx })
}
pub async fn send(&mut self, method: &Method) -> Result<(), io::Error> {
self.writer.write(method).await
}
pub fn try_next_event(&mut self) -> Option<Event> {
self.event_rx.try_recv().ok()
}
pub async fn next_event(&mut self) -> Option<Event> {
self.event_rx.recv().await
}
}

299
crates/client/src/spawn.rs Normal file
View File

@ -0,0 +1,299 @@
//! pod バイナリをサブプロセスとして立ち上げ、`INSOMNIA-READY` を待つ
//! ハンドシェイク。
//!
//! - 親プロセス (TUI / GUI / E2E) は overlay TOML を組み立ててこの関数に
//! 渡す。pod はそれを受けて socket を bind し、stderr に
//! `INSOMNIA-READY\t<name>\t<socket>` を吐く。
//! - 待機中の stderr 行は `progress` コールバック越しに呼び出し側へ流す。
//! UI の進捗表示や E2E のログ収集はここで賄う。
//! - `kill_on_drop = false` + `process_group(0)` により、親プロセス
//! ライフサイクルから切り離した detached pod を作る。ready 後の lifecycle
//! 管理は runtime ディレクトリ / socket を介して行う。
use std::io;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use uuid::Uuid;
const READY_PREFIX: &str = "INSOMNIA-READY\t";
const READY_TIMEOUT: Duration = Duration::from_secs(20);
/// `spawn_pod` の入力。
pub struct SpawnConfig {
/// `pod.name` として使う識別子。runtime ディレクトリ
/// (`manifest::paths::pod_runtime_dir`) の解決と、ready 行に乗る
/// 名前との突き合わせに使う。
pub pod_name: String,
/// `--overlay` で pod に渡す TOML 文字列。
pub overlay_toml: String,
/// pod の current_dir。
pub cwd: PathBuf,
/// `Some(id)` のとき `--session <id>` を付与し、当該セッションから
/// resume させる。
pub resume_from: Option<Uuid>,
/// true のとき `--pod <pod_name>` を付与し、pod 側で name-keyed state
/// があれば resume、なければ同名の新規 Pod として起動させる。
pub resume_by_pod_name: bool,
}
pub struct SpawnReady {
pub pod_name: String,
pub socket_path: PathBuf,
}
#[derive(Debug)]
pub enum SpawnError {
Io(io::Error),
/// runtime ディレクトリが解決できなかった (環境変数未設定等)。
RuntimeDirUnavailable,
PodLaunchFailed(io::Error),
PodExitedEarly {
stderr_tail: String,
},
Timeout,
}
impl std::fmt::Display for SpawnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "io error: {e}"),
Self::RuntimeDirUnavailable => write!(
f,
"could not resolve runtime directory (set INSOMNIA_HOME, INSOMNIA_RUNTIME_DIR, XDG_RUNTIME_DIR, or HOME)"
),
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
Self::PodExitedEarly { stderr_tail } => {
if stderr_tail.is_empty() {
write!(f, "pod exited before becoming ready")
} else {
write!(f, "pod exited before becoming ready: {stderr_tail}")
}
}
Self::Timeout => write!(
f,
"pod did not become ready within {}s",
READY_TIMEOUT.as_secs()
),
}
}
}
impl std::error::Error for SpawnError {}
impl From<io::Error> for SpawnError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
/// pod を spawn し、`INSOMNIA-READY` ハンドシェイクが終わるまで待つ。
///
/// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
/// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let pod_bin = resolve_pod_command();
let pod_runtime_dir = manifest::paths::pod_runtime_dir(&config.pod_name)
.ok_or(SpawnError::RuntimeDirUnavailable)?;
std::fs::create_dir_all(&pod_runtime_dir).map_err(SpawnError::Io)?;
let stderr_path = pod_runtime_dir.join("stderr.log");
let stderr_file = std::fs::File::create(&stderr_path).map_err(SpawnError::Io)?;
let mut command = Command::new(&pod_bin);
command
.arg("--overlay")
.arg(&config.overlay_toml)
.current_dir(&config.cwd)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::from(stderr_file))
.process_group(0);
if config.resume_by_pod_name {
command.arg("--pod").arg(&config.pod_name);
}
if let Some(id) = config.resume_from {
command.arg("--session").arg(id.to_string());
}
let mut child = command.spawn().map_err(SpawnError::PodLaunchFailed)?;
// Default `kill_on_drop = false` plus `process_group(0)` makes this
// a detached Pod once startup succeeds: dropping the handle does not
// terminate it, and terminal-generated signals for the parent's
// process group do not hit the Pod. Runtime state/socket files are
// the source of truth after that point.
let ready = match wait_for_ready_file(&mut progress, &stderr_path, &mut child).await {
Ok(ready) => ready,
Err(e) => {
let _ = child.start_kill();
let _ = child.wait().await;
return Err(e);
}
};
tokio::spawn(async move {
let _ = child.wait().await;
});
Ok(ready)
}
async fn wait_for_ready_file<F>(
progress: &mut F,
stderr_path: &Path,
child: &mut tokio::process::Child,
) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let mut tail = StderrTail::new();
let deadline = tokio::time::Instant::now() + READY_TIMEOUT;
let mut offset = 0usize;
loop {
let content = match tokio::fs::read_to_string(stderr_path).await {
Ok(content) => content,
Err(e) if e.kind() == io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(SpawnError::Io(e)),
};
if content.len() > offset {
for line in content[offset..].lines() {
if let Some(rest) = line.strip_prefix(READY_PREFIX) {
let mut parts = rest.splitn(2, '\t');
let pod_name = parts.next().unwrap_or("").to_string();
let socket_str = parts.next().unwrap_or("").to_string();
if pod_name.is_empty() || socket_str.is_empty() {
return Err(SpawnError::PodExitedEarly {
stderr_tail: format!("malformed ready line: {line}"),
});
}
let socket_path = PathBuf::from(socket_str);
wait_for_socket(
&socket_path,
deadline,
child,
stderr_path,
&mut tail,
&mut offset,
)
.await?;
return Ok(SpawnReady {
pod_name,
socket_path,
});
}
tail.push(line);
progress(line);
}
offset = content.len();
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
// Pod は exit 直前に最終 stderr 行を flush することがある。
// child.wait() が解決した後に再読みして、原因行を取りこ
// ぼさず PodExitedEarly に載せる。
drain_stderr_into_tail(stderr_path, &mut tail, &mut offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.into_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(100)) => {}
}
}
}
async fn wait_for_socket(
socket_path: &Path,
deadline: tokio::time::Instant,
child: &mut tokio::process::Child,
stderr_path: &Path,
tail: &mut StderrTail,
offset: &mut usize,
) -> Result<(), SpawnError> {
loop {
match tokio::net::UnixStream::connect(socket_path).await {
Ok(_) => return Ok(()),
Err(e)
if e.kind() == io::ErrorKind::NotFound
|| e.kind() == io::ErrorKind::ConnectionRefused => {}
Err(e) => return Err(SpawnError::Io(e)),
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
drain_stderr_into_tail(stderr_path, tail, offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.as_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(50)) => {}
}
}
}
async fn drain_stderr_into_tail(stderr_path: &Path, tail: &mut StderrTail, offset: &mut usize) {
let Ok(content) = tokio::fs::read_to_string(stderr_path).await else {
return;
};
if content.len() <= *offset {
return;
}
for line in content[*offset..].lines() {
if !line.starts_with(READY_PREFIX) {
tail.push(line);
}
}
*offset = content.len();
}
/// Resolves the binary used to launch a child Pod. Must point at a
/// `pod`-compatible executable — the parent reads the child's stderr
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
/// extra lines on stderr will pollute that handshake.
///
/// `INSOMNIA_POD_COMMAND` overrides the lookup (used by tests to inject
/// a mock binary). Otherwise we defer to `PATH` — missing binary
/// surfaces as the spawn `io::Error`.
fn resolve_pod_command() -> PathBuf {
if let Ok(cmd) = std::env::var("INSOMNIA_POD_COMMAND")
&& !cmd.is_empty()
{
return PathBuf::from(cmd);
}
PathBuf::from("pod")
}
struct StderrTail {
lines: std::collections::VecDeque<String>,
}
impl StderrTail {
fn new() -> Self {
Self {
lines: std::collections::VecDeque::with_capacity(8),
}
}
fn push(&mut self, line: &str) {
if self.lines.len() == 8 {
self.lines.pop_front();
}
self.lines.push_back(line.to_string());
}
fn as_string(&self) -> String {
self.lines.iter().cloned().collect::<Vec<_>>().join(" | ")
}
fn into_string(self) -> String {
self.lines.into_iter().collect::<Vec<_>>().join(" | ")
}
}

10
crates/daemon/Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "daemon"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
manifest = { workspace = true }
protocol = { workspace = true }
tokio = { workspace = true, features = ["full"] }

9
crates/daemon/README.md Normal file
View File

@ -0,0 +1,9 @@
# daemon
Pod のライフサイクルを管理する常駐デーモン。未実装。
## 依存クレート
- `manifest` — マニフェスト設定
- `protocol` — 通信プロトコル型
- `tokio` — 非同期ランタイム

1
crates/daemon/src/lib.rs Normal file
View File

@ -0,0 +1 @@

View File

@ -1,18 +0,0 @@
[package]
name = "insomnia"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { path = "../llm-worker" }
llm-worker-persistence = { path = "../llm-worker-persistence" }
serde = { version = "1.0", features = ["derive"] }
toml = "0.8"
uuid = { version = "1", features = ["v7", "serde"] }
thiserror = "2.0"
tokio = { version = "1.49", features = ["fs"] }
[dev-dependencies]
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
tempfile = "3.24"

View File

@ -1,9 +0,0 @@
pub mod manifest;
pub mod pod;
pub mod provider;
pub mod scope;
pub use manifest::{PodManifest, ProviderConfig, ProviderKind};
pub use pod::{Pod, PodError, PodId, PodRunResult, apply_worker_manifest, new_pod_id};
pub use provider::build_client;
pub use scope::Scope;

View File

@ -1,164 +0,0 @@
use std::path::PathBuf;
use serde::Deserialize;
/// Declarative configuration for a Pod.
///
/// Parsed from a TOML manifest file. Describes the provider, model,
/// system prompt, and optional directory scope.
#[derive(Debug, Clone, Deserialize)]
pub struct PodManifest {
pub pod: PodMeta,
pub provider: ProviderConfig,
pub worker: WorkerManifest,
#[serde(default)]
pub scope: Option<ScopeConfig>,
}
/// Pod metadata.
#[derive(Debug, Clone, Deserialize)]
pub struct PodMeta {
pub name: String,
}
/// LLM provider configuration.
#[derive(Debug, Clone, Deserialize)]
pub struct ProviderConfig {
pub kind: ProviderKind,
pub model: String,
/// Environment variable name holding the API key.
#[serde(default)]
pub api_key_env: Option<String>,
/// Custom base URL for the provider API.
#[serde(default)]
pub base_url: Option<String>,
}
/// Supported LLM providers.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ProviderKind {
Anthropic,
Openai,
Gemini,
Ollama,
}
/// Worker-level configuration embedded in the manifest.
#[derive(Debug, Clone, Deserialize)]
pub struct WorkerManifest {
#[serde(default)]
pub system_prompt: Option<String>,
#[serde(default)]
pub max_tokens: Option<u32>,
#[serde(default)]
pub temperature: Option<f32>,
}
/// Directory scope configuration.
#[derive(Debug, Clone, Deserialize)]
pub struct ScopeConfig {
pub root: PathBuf,
}
impl PodManifest {
/// Parse a manifest from a TOML string.
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
toml::from_str(s)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_minimal_manifest() {
let toml = r#"
[pod]
name = "test-agent"
[provider]
kind = "anthropic"
model = "claude-sonnet-4-20250514"
[worker]
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "test-agent");
assert_eq!(manifest.provider.kind, ProviderKind::Anthropic);
assert_eq!(manifest.provider.model, "claude-sonnet-4-20250514");
assert!(manifest.provider.api_key_env.is_none());
assert!(manifest.scope.is_none());
assert!(manifest.worker.system_prompt.is_none());
}
#[test]
fn parse_full_manifest() {
let toml = r#"
[pod]
name = "code-reviewer"
[provider]
kind = "anthropic"
model = "claude-sonnet-4-20250514"
api_key_env = "ANTHROPIC_API_KEY"
[worker]
system_prompt = "You are a code reviewer."
max_tokens = 4096
temperature = 0.3
[scope]
root = "./src"
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "code-reviewer");
assert_eq!(
manifest.provider.api_key_env.as_deref(),
Some("ANTHROPIC_API_KEY")
);
assert_eq!(
manifest.worker.system_prompt.as_deref(),
Some("You are a code reviewer.")
);
assert_eq!(manifest.worker.max_tokens, Some(4096));
assert_eq!(manifest.worker.temperature, Some(0.3));
assert_eq!(
manifest.scope.as_ref().unwrap().root,
PathBuf::from("./src")
);
}
#[test]
fn parse_ollama_no_api_key() {
let toml = r#"
[pod]
name = "local-agent"
[provider]
kind = "ollama"
model = "llama3"
[worker]
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.provider.kind, ProviderKind::Ollama);
assert!(manifest.provider.api_key_env.is_none());
}
#[test]
fn reject_unknown_provider() {
let toml = r#"
[pod]
name = "test"
[provider]
kind = "unknown_provider"
model = "x"
[worker]
"#;
assert!(PodManifest::from_toml(toml).is_err());
}
}

View File

@ -1,180 +0,0 @@
use llm_worker::llm_client::client::LlmClient;
use llm_worker::llm_client::RequestConfig;
use llm_worker::Worker;
use llm_worker_persistence::{
Session, SessionConfig, SessionError, SessionId, Store, StoreError,
};
use crate::manifest::{PodManifest, WorkerManifest};
use crate::scope::Scope;
/// Pod identifier. UUID v7 (time-ordered).
pub type PodId = uuid::Uuid;
/// Generate a new Pod ID.
pub fn new_pod_id() -> PodId {
uuid::Uuid::now_v7()
}
/// An independent agent execution unit.
///
/// Wraps a persistent [`Session`] with manifest metadata and an optional
/// directory scope. This is the primary abstraction in insomnia.
pub struct Pod<C: LlmClient, St: Store> {
id: PodId,
manifest: PodManifest,
session: Session<C, St>,
scope: Option<Scope>,
}
impl<C: LlmClient, St: Store> Pod<C, St> {
/// Create a new Pod from a pre-built Worker and store.
///
/// The caller is responsible for constructing the `LlmClient` from the
/// manifest's provider config. This keeps Pod free of provider-specific
/// dependencies.
pub async fn new(
manifest: PodManifest,
worker: Worker<C>,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let session = Session::new(worker, store, SessionConfig::default()).await?;
Ok(Self {
id: new_pod_id(),
manifest,
session,
scope,
})
}
/// Restore a Pod from a persisted session.
pub async fn restore(
id: PodId,
session_id: SessionId,
manifest: PodManifest,
client: C,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let session = Session::restore(client, store, session_id, SessionConfig::default()).await?;
Ok(Self {
id,
manifest,
session,
scope,
})
}
/// The Pod's unique identifier.
pub fn id(&self) -> PodId {
self.id
}
/// The session ID used for persistence.
pub fn session_id(&self) -> SessionId {
self.session.session_id()
}
/// The Pod's manifest.
pub fn manifest(&self) -> &PodManifest {
&self.manifest
}
/// The Pod's directory scope, if any.
pub fn scope(&self) -> Option<&Scope> {
self.scope.as_ref()
}
/// Direct access to the underlying session.
///
/// Use this to register tools, hooks, or subscribers on the worker
/// before calling [`run`](Self::run).
pub fn session_mut(&mut self) -> &mut Session<C, St> {
&mut self.session
}
/// Send user input and run until the LLM turn completes.
pub async fn run(&mut self, input: impl Into<String>) -> Result<PodRunResult, PodError> {
let result = self.session.run(input).await?;
Ok(result.into())
}
/// Resume from a paused state.
pub async fn resume(&mut self) -> Result<PodRunResult, PodError> {
let result = self.session.resume().await?;
Ok(result.into())
}
}
impl<St: Store> Pod<Box<dyn LlmClient>, St> {
/// Create a Pod entirely from a manifest.
///
/// Builds the LLM client from the provider config, applies worker
/// settings, and creates a new persistent session.
pub async fn from_manifest(
manifest: PodManifest,
store: St,
scope: Option<Scope>,
) -> Result<Self, PodError> {
let client = crate::provider::build_client(&manifest.provider)?;
let mut worker = Worker::new(client);
apply_worker_manifest(&mut worker, &manifest.worker);
let session = Session::new(worker, store, SessionConfig::default()).await?;
Ok(Self {
id: new_pod_id(),
manifest,
session,
scope,
})
}
}
/// Apply worker-level manifest settings to a Worker.
pub fn apply_worker_manifest<C: LlmClient>(worker: &mut Worker<C>, wm: &WorkerManifest) {
if let Some(ref prompt) = wm.system_prompt {
worker.set_system_prompt(prompt);
}
let mut config = RequestConfig::new();
if let Some(max_tokens) = wm.max_tokens {
config.max_tokens = Some(max_tokens);
}
if let Some(temperature) = wm.temperature {
config.temperature = Some(temperature);
}
worker.set_request_config(config);
}
/// Result of a Pod run.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PodRunResult {
/// The LLM finished its turn normally.
Finished,
/// The LLM paused (e.g. awaiting user confirmation via a hook).
Paused,
}
impl From<llm_worker::WorkerResult> for PodRunResult {
fn from(r: llm_worker::WorkerResult) -> Self {
match r {
llm_worker::WorkerResult::Finished => PodRunResult::Finished,
llm_worker::WorkerResult::Paused => PodRunResult::Paused,
}
}
}
/// Pod errors.
#[derive(Debug, thiserror::Error)]
pub enum PodError {
#[error(transparent)]
Session(#[from] SessionError),
#[error(transparent)]
Store(#[from] StoreError),
#[error("scope violation: {path} is outside the allowed directory")]
ScopeViolation { path: String },
#[error("provider configuration error: {0}")]
ProviderConfig(String),
}

View File

@ -1,60 +0,0 @@
use llm_worker::llm_client::client::LlmClient;
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::providers::gemini::GeminiClient;
use llm_worker::llm_client::providers::ollama::OllamaClient;
use llm_worker::llm_client::providers::openai::OpenAIClient;
use crate::manifest::{ProviderConfig, ProviderKind};
use crate::pod::PodError;
/// Build an [`LlmClient`] from a [`ProviderConfig`].
///
/// Resolves the API key from the environment variable specified in the config.
pub fn build_client(config: &ProviderConfig) -> Result<Box<dyn LlmClient>, PodError> {
let api_key = config
.api_key_env
.as_deref()
.map(std::env::var)
.transpose()
.map_err(|e| PodError::ProviderConfig(format!("env var: {e}")))?;
match config.kind {
ProviderKind::Anthropic => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("anthropic requires api_key_env".into())
})?;
let mut client = AnthropicClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Openai => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("openai requires api_key_env".into())
})?;
let mut client = OpenAIClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Gemini => {
let key = api_key.ok_or_else(|| {
PodError::ProviderConfig("gemini requires api_key_env".into())
})?;
let mut client = GeminiClient::new(key, &config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
ProviderKind::Ollama => {
let mut client = OllamaClient::new(&config.model);
if let Some(ref url) = config.base_url {
client = client.with_base_url(url);
}
Ok(Box::new(client))
}
}
}

View File

@ -1,101 +0,0 @@
use std::path::{Path, PathBuf};
/// Directory scope constraining a Pod's write access.
///
/// Read access is unrestricted — only write operations are checked against the scope.
#[derive(Debug, Clone)]
pub struct Scope {
root: PathBuf,
}
impl Scope {
/// Create a new scope rooted at the given directory.
///
/// The path is canonicalized to resolve symlinks and relative components.
pub fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
let root = root.into().canonicalize()?;
Ok(Self { root })
}
/// The root directory of this scope.
pub fn root(&self) -> &Path {
&self.root
}
/// Check whether `path` falls within this scope.
///
/// The path is canonicalized before comparison.
pub fn contains(&self, path: &Path) -> bool {
match path.canonicalize() {
Ok(canonical) => canonical.starts_with(&self.root),
Err(_) => {
// Path doesn't exist yet — check the parent directory instead.
// This handles write_file to a new file inside the scope.
match path.parent().and_then(|p| p.canonicalize().ok()) {
Some(parent) => parent.starts_with(&self.root),
None => false,
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn contains_file_inside_scope() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = dir.path().join("test.txt");
fs::write(&file, "hello").unwrap();
assert!(scope.contains(&file));
}
#[test]
fn rejects_file_outside_scope() {
let dir = TempDir::new().unwrap();
let outside = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = outside.path().join("test.txt");
fs::write(&file, "hello").unwrap();
assert!(!scope.contains(&file));
}
#[test]
fn contains_new_file_in_existing_parent() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
// File doesn't exist yet, but parent dir is inside scope
let new_file = dir.path().join("new.txt");
assert!(scope.contains(&new_file));
}
#[test]
fn contains_nested_directory() {
let dir = TempDir::new().unwrap();
let nested = dir.path().join("a/b/c");
fs::create_dir_all(&nested).unwrap();
let scope = Scope::new(dir.path()).unwrap();
let file = nested.join("test.txt");
assert!(scope.contains(&file));
}
#[test]
fn rejects_traversal_attack() {
let dir = TempDir::new().unwrap();
let scope = Scope::new(dir.path()).unwrap();
let traversal = dir.path().join("../../../etc/passwd");
assert!(!scope.contains(&traversal));
}
}

View File

@ -0,0 +1,13 @@
[package]
name = "lint-common"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
[dev-dependencies]
serde_json = { workspace = true }

View File

@ -0,0 +1,81 @@
//! Common frontmatter helpers.
use chrono::{DateTime, Utc};
use crate::RecordLintError;
/// Trait record frontmatter types implement so linters can drive them uniformly.
pub trait Frontmatter: Sized {
/// Hard upper bound on body chars (excluding the frontmatter block).
const BODY_LIMIT: usize;
fn created_at(&self) -> Option<DateTime<Utc>>;
fn updated_at(&self) -> Option<DateTime<Utc>>;
}
const FRONTMATTER_DELIM: &str = "---";
/// Split a markdown document into `(yaml_frontmatter, body)`.
///
/// Expects the document to start with `---\n` and have a closing
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
/// after the closing delimiter is consumed.
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), RecordLintError> {
// The opening delimiter must be the very first line.
let after_open = content
.strip_prefix(FRONTMATTER_DELIM)
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
.ok_or(RecordLintError::MissingFrontmatter)?;
// Look for the closing `---` on its own line.
let mut yaml_end = None;
let mut byte_offset = 0usize;
for line in after_open.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
if trimmed == FRONTMATTER_DELIM {
yaml_end = Some((byte_offset, byte_offset + line.len()));
break;
}
byte_offset += line.len();
}
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
RecordLintError::MalformedFrontmatter("missing closing `---` line".to_string())
})?;
let yaml = &after_open[..yaml_end_excl];
let body = &after_open[body_start..];
Ok((yaml, body))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn splits_simple() {
let doc = "---\nfoo: 1\n---\nbody here\n";
let (y, b) = split_frontmatter(doc).unwrap();
assert_eq!(y, "foo: 1\n");
assert_eq!(b, "body here\n");
}
#[test]
fn no_leading_delim_errors() {
let err = split_frontmatter("hello").unwrap_err();
assert!(matches!(err, RecordLintError::MissingFrontmatter));
}
#[test]
fn no_closing_delim_errors() {
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
assert!(matches!(err, RecordLintError::MalformedFrontmatter(_)));
}
#[test]
fn handles_empty_body() {
let doc = "---\nfoo: 1\n---\n";
let (_, b) = split_frontmatter(doc).unwrap();
assert_eq!(b, "");
}
}

View File

@ -0,0 +1,20 @@
//! Shared record lint primitives for memory and workflow files.
mod frontmatter;
mod slug;
pub use frontmatter::{Frontmatter, split_frontmatter};
pub use slug::{Slug, is_valid_slug};
/// Common lint errors for Markdown record syntax shared by memory and workflow.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum RecordLintError {
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
InvalidSlug(String),
#[error("malformed frontmatter: {0}")]
MalformedFrontmatter(String),
#[error("frontmatter is missing or document is empty")]
MissingFrontmatter,
}

View File

@ -0,0 +1,146 @@
//! Slug type and validation.
//!
//! Syntax (agent-skills compatible):
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
//! - 164 chars
//! - lowercase ASCII alphanumerics and `-`
//! - cannot start or end with `-`
//! - no consecutive `--`
use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Deserializer, Serialize};
use crate::RecordLintError;
const MIN_LEN: usize = 1;
const MAX_LEN: usize = 64;
/// Validated slug. Constructible only via [`Slug::parse`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[serde(transparent)]
pub struct Slug(String);
impl Slug {
/// Parse and validate. Returns [`RecordLintError::InvalidSlug`] on rejection.
pub fn parse(s: impl Into<String>) -> Result<Self, RecordLintError> {
let s = s.into();
if is_valid_slug(&s) {
Ok(Self(s))
} else {
Err(RecordLintError::InvalidSlug(s))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for Slug {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for Slug {
type Err = RecordLintError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl<'de> Deserialize<'de> for Slug {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
Self::parse(raw).map_err(serde::de::Error::custom)
}
}
/// Pure-fn predicate matching the agent-skills slug regex without
/// pulling in the `regex` crate.
pub fn is_valid_slug(s: &str) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if len < MIN_LEN || len > MAX_LEN {
return false;
}
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
return false;
}
let mut prev_dash = false;
for &b in bytes {
if b == b'-' {
if prev_dash {
return false;
}
prev_dash = true;
} else if is_alnum_lower(b) {
prev_dash = false;
} else {
return false;
}
}
true
}
fn is_alnum_lower(b: u8) -> bool {
b.is_ascii_digit() || b.is_ascii_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_basic_slugs() {
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
assert!(is_valid_slug(s), "expected `{s}` valid");
assert!(Slug::parse(s).is_ok());
}
}
#[test]
fn rejects_bad_slugs() {
for s in [
"", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä",
] {
assert!(!is_valid_slug(s), "expected `{s}` invalid");
assert!(Slug::parse(s).is_err());
}
}
#[test]
fn enforces_length_bounds() {
let too_long = "a".repeat(MAX_LEN + 1);
assert!(!is_valid_slug(&too_long));
let max = "a".repeat(MAX_LEN);
assert!(is_valid_slug(&max));
}
#[test]
fn deserializes_via_serde() {
let json = "\"valid-slug\"";
let slug: Slug = serde_json::from_str(json).unwrap();
assert_eq!(slug.as_str(), "valid-slug");
let bad = "\"BAD\"";
let err: Result<Slug, _> = serde_json::from_str(bad);
assert!(err.is_err());
}
}

View File

@ -0,0 +1,9 @@
# llm-worker-macros
Rust メソッドを LLM 呼び出し可能なツールとして自動登録する手続きマクロクレート。引数構造体・Tool トレイト実装・ToolDefinition を自動生成する。
## 公開マクロ
- `#[tool_registry]` — impl ブロックに付与し、内部の `#[tool]` メソッドを一括処理
- `#[tool]` — メソッドをツールとしてマーク
- `#[description = "..."]` — 引数に説明を付与JSON Schema の description に反映)

View File

@ -192,13 +192,13 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
let result_handling = if is_result_type(&sig.output) {
quote! {
match result {
Ok(val) => Ok(format!("{:?}", val)),
Ok(val) => Ok(format!("{:?}", val).into()),
Err(e) => Err(::llm_worker::tool::ToolError::ExecutionFailed(format!("{}", e))),
}
}
} else {
quote! {
Ok(format!("{:?}", result))
Ok(format!("{:?}", result).into())
}
};
@ -247,7 +247,7 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
#[async_trait::async_trait]
impl ::llm_worker::tool::Tool for #tool_struct_name {
async fn execute(&self, input_json: &str) -> Result<String, ::llm_worker::tool::ToolError> {
async fn execute(&self, input_json: &str) -> Result<::llm_worker::tool::ToolOutput, ::llm_worker::tool::ToolError> {
#execute_body
}
}

View File

@ -1,21 +0,0 @@
[package]
name = "llm-worker-persistence"
description = "Session persistence for llm-worker via append-only JSONL logs"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { path = "../llm-worker" }
async-trait = "0.1"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
tokio = { version = "1.49", features = ["fs", "io-util"] }
uuid = { version = "1", features = ["v7", "serde"] }
thiserror = "2.0"
[dev-dependencies]
tokio = { version = "1.49", features = ["macros", "rt-multi-thread", "fs", "io-util"] }
tempfile = "3.24"
futures = "0.3"
async-trait = "0.1"

View File

@ -1,47 +0,0 @@
//! [`ToolOutputProcessor`] implementation backed by a [`BlobStore`].
//!
//! Converts large tool output strings into [`ToolOutput::Stored`] and
//! persists the content via a [`BlobStore`], returning a summary with
//! a blob reference for conversation history.
use crate::blob_store::BlobStore;
use async_trait::async_trait;
use llm_worker::tool::{ToolError, ToolOutput, ToolOutputProcessor};
use std::sync::Arc;
/// A [`ToolOutputProcessor`] that stores large outputs in a [`BlobStore`].
///
/// Small outputs (≤ `INLINE_THRESHOLD` bytes) pass through unchanged.
/// Large outputs are stored as blobs, and a summary with a `[blob:<id>]`
/// reference replaces the original content in conversation history.
pub struct BlobOutputProcessor<B: BlobStore> {
blob_store: Arc<B>,
}
impl<B: BlobStore> BlobOutputProcessor<B> {
/// Create a new processor backed by the given blob store.
pub fn new(blob_store: Arc<B>) -> Self {
Self { blob_store }
}
}
#[async_trait]
impl<B: BlobStore + 'static> ToolOutputProcessor for BlobOutputProcessor<B> {
async fn process(&self, output: String) -> Result<String, ToolError> {
let tool_output = ToolOutput::from(output);
match tool_output {
ToolOutput::Inline(s) => Ok(s),
ToolOutput::Stored { summary, content } => {
let blob_id = self
.blob_store
.store(&content)
.await
.map_err(|e| ToolError::Internal(format!("blob store error: {e}")))?;
// Prepend blob reference to the summary
Ok(format!("[blob:{blob_id}] {summary}"))
}
}
}
}

View File

@ -1,54 +0,0 @@
//! Blob storage abstraction for large tool outputs.
//!
//! [`BlobStore`] provides async storage and retrieval of [`Content`] blobs,
//! keeping them separate from session logs. Session logs reference blobs
//! by [`BlobId`] in tool result summaries.
use llm_worker::tool::Content;
use std::future::Future;
/// Unique blob identifier. UUID v7 (time-ordered).
pub type BlobId = uuid::Uuid;
/// Generate a new blob ID.
pub fn new_blob_id() -> BlobId {
uuid::Uuid::now_v7()
}
/// Errors from the blob store.
#[derive(Debug, thiserror::Error)]
pub enum BlobStoreError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("blob not found: {0}")]
NotFound(BlobId),
}
/// Async blob storage backend.
///
/// Stores and retrieves [`Content`] blobs independently of session logs.
/// All methods take `&self` — implementations should use interior mutability
/// when needed.
pub trait BlobStore: Send + Sync {
/// Store content and return its assigned ID.
fn store(
&self,
content: &Content,
) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
/// Load content by ID.
fn load(
&self,
id: BlobId,
) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
/// Check if a blob exists.
fn exists(
&self,
id: BlobId,
) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
}

View File

@ -1,83 +0,0 @@
//! Filesystem-backed blob store.
//!
//! Layout:
//! - Text blobs: `{root}/{blob_id}.txt`
//! - Structured blobs: `{root}/{blob_id}.json`
use crate::blob_store::{new_blob_id, BlobId, BlobStore, BlobStoreError};
use llm_worker::tool::Content;
use std::path::PathBuf;
use tokio::fs;
/// Filesystem-backed blob store.
///
/// Each blob is stored as a single file. Text content uses `.txt`,
/// structured (JSON) content uses `.json`.
#[derive(Clone)]
pub struct FsBlobStore {
root: PathBuf,
}
impl FsBlobStore {
/// Create a new `FsBlobStore` rooted at the given directory.
/// Creates the directory if it does not exist.
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, BlobStoreError> {
let root = root.into();
fs::create_dir_all(&root).await?;
Ok(Self { root })
}
fn text_path(&self, id: BlobId) -> PathBuf {
self.root.join(format!("{id}.txt"))
}
fn json_path(&self, id: BlobId) -> PathBuf {
self.root.join(format!("{id}.json"))
}
/// Resolve the actual path for a blob, checking both extensions.
fn resolve_path(&self, id: BlobId) -> Option<(PathBuf, bool)> {
let txt = self.text_path(id);
if txt.exists() {
return Some((txt, true));
}
let json = self.json_path(id);
if json.exists() {
return Some((json, false));
}
None
}
}
impl BlobStore for FsBlobStore {
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
let id = new_blob_id();
match content {
Content::Text(text) => {
fs::write(self.text_path(id), text.as_bytes()).await?;
}
Content::Structured(value) => {
let json = serde_json::to_string_pretty(value)?;
fs::write(self.json_path(id), json.as_bytes()).await?;
}
}
Ok(id)
}
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
let (path, is_text) = self
.resolve_path(id)
.ok_or(BlobStoreError::NotFound(id))?;
let bytes = fs::read_to_string(&path).await?;
if is_text {
Ok(Content::Text(bytes))
} else {
let value = serde_json::from_str(&bytes)?;
Ok(Content::Structured(value))
}
}
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
Ok(self.resolve_path(id).is_some())
}
}

View File

@ -1,133 +0,0 @@
//! Filesystem-backed JSONL store.
//!
//! Layout:
//! - Session log: `{root}/{session_id}.jsonl`
//! - Event trace: `{root}/{session_id}.trace.jsonl`
use crate::event_trace::TraceEntry;
use crate::session_log::LogEntry;
use crate::store::{Store, StoreError};
use crate::SessionId;
use std::path::{Path, PathBuf};
use tokio::fs;
use tokio::io::AsyncWriteExt;
/// Filesystem-backed JSONL store.
///
/// Each session is stored as a single `.jsonl` file with one [`LogEntry`]
/// per line. Writes use append mode for crash safety.
#[derive(Clone)]
pub struct FsStore {
root: PathBuf,
}
impl FsStore {
/// Create a new `FsStore` rooted at the given directory.
/// Creates the directory if it does not exist.
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, StoreError> {
let root = root.into();
fs::create_dir_all(&root).await?;
Ok(Self { root })
}
fn log_path(&self, id: SessionId) -> PathBuf {
self.root.join(format!("{id}.jsonl"))
}
fn trace_path(&self, id: SessionId) -> PathBuf {
self.root.join(format!("{id}.trace.jsonl"))
}
async fn append_line(&self, path: &Path, line: &str) -> Result<(), StoreError> {
let mut file = fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
.await?;
file.write_all(line.as_bytes()).await?;
file.write_all(b"\n").await?;
file.flush().await?;
Ok(())
}
fn parse_jsonl<T: serde::de::DeserializeOwned>(
content: &str,
) -> Result<Vec<T>, StoreError> {
let mut entries = Vec::new();
for (i, line) in content.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let entry: T =
serde_json::from_str(line).map_err(|e| StoreError::Corrupt {
line: i + 1,
message: e.to_string(),
})?;
entries.push(entry);
}
Ok(entries)
}
}
impl Store for FsStore {
async fn append(&self, id: SessionId, entry: &LogEntry) -> Result<(), StoreError> {
let line = serde_json::to_string(entry)?;
self.append_line(&self.log_path(id), &line).await
}
async fn read_all(&self, id: SessionId) -> Result<Vec<LogEntry>, StoreError> {
let path = self.log_path(id);
if !path.exists() {
return Err(StoreError::NotFound(id));
}
let content = fs::read_to_string(&path).await?;
Self::parse_jsonl(&content)
}
async fn list_sessions(&self) -> Result<Vec<SessionId>, StoreError> {
let mut sessions = Vec::new();
let mut dir = fs::read_dir(&self.root).await?;
while let Some(entry) = dir.next_entry().await? {
let path = entry.path();
// Only match .jsonl files, not .trace.jsonl
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.ends_with(".jsonl") && !name.ends_with(".trace.jsonl") {
let stem = name.trim_end_matches(".jsonl");
if let Ok(id) = stem.parse::<SessionId>() {
sessions.push(id);
}
}
}
// UUID v7: lexicographic sort = chronological sort, newest first
sessions.sort_by(|a, b| b.cmp(a));
Ok(sessions)
}
async fn create_session(
&self,
id: SessionId,
entries: &[LogEntry],
) -> Result<(), StoreError> {
let path = self.log_path(id);
let mut content = String::new();
for entry in entries {
content.push_str(&serde_json::to_string(entry)?);
content.push('\n');
}
fs::write(&path, content.as_bytes()).await?;
Ok(())
}
async fn exists(&self, id: SessionId) -> Result<bool, StoreError> {
Ok(self.log_path(id).exists())
}
async fn append_trace(
&self,
id: SessionId,
entry: &TraceEntry,
) -> Result<(), StoreError> {
let line = serde_json::to_string(entry)?;
self.append_line(&self.trace_path(id), &line).await
}
}

View File

@ -1,668 +0,0 @@
//! Built-in `inspect` tool for retrieving stored blob content.
//!
//! When large tool outputs are stored in a [`BlobStore`], only a summary
//! with a `[blob:<id>]` reference is placed in conversation history.
//! This tool lets the LLM retrieve details on demand, with optional
//! selectors for partial access.
use std::sync::Arc;
use async_trait::async_trait;
use serde::Deserialize;
use serde_json::json;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
use llm_worker::state::Mutable;
use llm_worker::ToolRegistryError;
use llm_worker::Worker;
use llm_worker::llm_client::LlmClient;
use crate::blob_store::{BlobId, BlobStore};
// ─── Constants ───────────────────────────────────────────────────────────────
/// Maximum lines shown in the default text preview.
const DEFAULT_PREVIEW_LINES: usize = 50;
/// Maximum array elements shown in the default preview.
const DEFAULT_PREVIEW_ELEMENTS: usize = 5;
/// Maximum object keys whose values are shown in the default preview.
const DEFAULT_PREVIEW_KEYS: usize = 3;
// ─── Selector ────────────────────────────────────────────────────────────────
/// Parsed selector for partial blob content retrieval.
#[derive(Debug, Clone, PartialEq, Eq)]
enum Selector {
/// Extract a range of lines (1-based, inclusive).
Lines { start: usize, end: usize },
/// Extract a range of array elements (0-based, exclusive end).
Slice { start: usize, end: usize },
/// Extract a specific key from a JSON object.
Key(String),
}
fn parse_selector(s: &str) -> Result<Selector, ToolError> {
if let Some(rest) = s.strip_prefix("lines:") {
let (a, b) = rest
.split_once('-')
.ok_or_else(|| ToolError::InvalidArgument(format!(
"invalid lines selector '{s}': expected format lines:N-M"
)))?;
let start: usize = a.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid start line number: '{a}'"))
})?;
let end: usize = b.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid end line number: '{b}'"))
})?;
if start == 0 {
return Err(ToolError::InvalidArgument(
"line numbers are 1-based, got 0".into(),
));
}
if start > end {
return Err(ToolError::InvalidArgument(format!(
"start line ({start}) must be <= end line ({end})"
)));
}
Ok(Selector::Lines { start, end })
} else if let Some(rest) = s.strip_prefix("slice:") {
let (a, b) = rest
.split_once("..")
.ok_or_else(|| ToolError::InvalidArgument(format!(
"invalid slice selector '{s}': expected format slice:N..M"
)))?;
let start: usize = a.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid start index: '{a}'"))
})?;
let end: usize = b.parse().map_err(|_| {
ToolError::InvalidArgument(format!("invalid end index: '{b}'"))
})?;
if start > end {
return Err(ToolError::InvalidArgument(format!(
"start index ({start}) must be <= end index ({end})"
)));
}
Ok(Selector::Slice { start, end })
} else if let Some(rest) = s.strip_prefix("key:") {
if rest.is_empty() {
return Err(ToolError::InvalidArgument("key name must not be empty".into()));
}
Ok(Selector::Key(rest.to_string()))
} else {
Err(ToolError::InvalidArgument(format!(
"unrecognized selector format: '{s}'. Expected: lines:N-M, slice:N..M, or key:NAME"
)))
}
}
// ─── InspectTool ─────────────────────────────────────────────────────────────
#[derive(Deserialize)]
struct InspectArgs {
blob_id: String,
selector: Option<String>,
}
/// Built-in tool that retrieves stored blob content.
pub struct InspectTool<B: BlobStore> {
blob_store: Arc<B>,
}
impl<B: BlobStore> InspectTool<B> {
pub fn new(blob_store: Arc<B>) -> Self {
Self { blob_store }
}
}
impl<B: BlobStore + 'static> InspectTool<B> {
/// Create a [`ToolDefinition`] factory for this tool.
pub fn tool_definition(blob_store: Arc<B>) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new("inspect")
.description(
"Retrieve content from a stored blob referenced by [blob:<id>] in conversation history. \
Supports selectors for partial access: \
'lines:N-M' (text line range, 1-based inclusive), \
'slice:N..M' (array element range, 0-based exclusive end), \
'key:NAME' (object key lookup). \
Without a selector, returns metadata and a preview.",
)
.input_schema(json!({
"type": "object",
"properties": {
"blob_id": {
"type": "string",
"description": "The blob UUID from a [blob:<id>] reference"
},
"selector": {
"type": "string",
"description": "Optional: 'lines:N-M', 'slice:N..M', or 'key:NAME'"
}
},
"required": ["blob_id"]
}));
let tool = Arc::new(InspectTool::new(Arc::clone(&blob_store))) as Arc<dyn Tool>;
(meta, tool)
})
}
}
#[async_trait]
impl<B: BlobStore + 'static> Tool for InspectTool<B> {
async fn execute(&self, input_json: &str) -> Result<String, ToolError> {
let args: InspectArgs = serde_json::from_str(input_json)
.map_err(|e| ToolError::InvalidArgument(format!("invalid arguments: {e}")))?;
let blob_id: BlobId = args
.blob_id
.parse()
.map_err(|_| ToolError::InvalidArgument(format!(
"invalid blob_id: '{}' is not a valid UUID", args.blob_id
)))?;
let content = self
.blob_store
.load(blob_id)
.await
.map_err(|e| ToolError::ExecutionFailed(format!("{e}")))?;
match args.selector {
None => Ok(default_view(&content)),
Some(sel) => {
let selector = parse_selector(&sel)?;
apply_selector(&content, &selector)
}
}
}
}
// ─── Default view ────────────────────────────────────────────────────────────
use llm_worker::tool::Content;
fn default_view(content: &Content) -> String {
match content {
Content::Text(text) => default_view_text(text),
Content::Structured(value) => default_view_structured(value),
}
}
fn default_view_text(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
let total = lines.len();
let size = text.len();
let preview_end = total.min(DEFAULT_PREVIEW_LINES);
let mut out = format!("type: text\nlines: {total}\nsize: {size} bytes\n\n");
out.push_str(&format!("── preview (lines 1-{preview_end}) ──\n"));
for line in &lines[..preview_end] {
out.push_str(line);
out.push('\n');
}
if total > DEFAULT_PREVIEW_LINES {
out.push_str(&format!("... ({} more lines)\n", total - DEFAULT_PREVIEW_LINES));
}
out
}
fn default_view_structured(value: &serde_json::Value) -> String {
use serde_json::Value;
match value {
Value::Array(arr) => {
let total = arr.len();
let preview_end = total.min(DEFAULT_PREVIEW_ELEMENTS);
let mut out = format!("type: json_array\nentries: {total}\n\n");
out.push_str(&format!("── preview (0..{preview_end}) ──\n"));
for item in &arr[..preview_end] {
if let Ok(json) = serde_json::to_string_pretty(item) {
out.push_str(&json);
out.push('\n');
}
}
if total > DEFAULT_PREVIEW_ELEMENTS {
out.push_str(&format!("... ({} more entries)\n", total - DEFAULT_PREVIEW_ELEMENTS));
}
out
}
Value::Object(map) => {
let total = map.len();
let mut out = format!("type: json_object\nkeys: {total}\n\n── keys ──\n");
for (key, val) in map.iter() {
out.push_str(&format!("{key}: {}\n", value_type_label(val)));
}
// Preview first N key-value pairs
let preview_keys: Vec<_> = map.iter().take(DEFAULT_PREVIEW_KEYS).collect();
if !preview_keys.is_empty() {
out.push_str("\n── preview ──\n");
for (key, val) in preview_keys {
if let Ok(json) = serde_json::to_string_pretty(val) {
out.push_str(&format!("{key}: {json}\n"));
}
}
}
out
}
other => {
// Scalar — just show it
serde_json::to_string_pretty(other).unwrap_or_default()
}
}
}
fn value_type_label(value: &serde_json::Value) -> &'static str {
match value {
serde_json::Value::Null => "null",
serde_json::Value::Bool(_) => "bool",
serde_json::Value::Number(_) => "number",
serde_json::Value::String(_) => "string",
serde_json::Value::Array(_) => "array",
serde_json::Value::Object(_) => "object",
}
}
// ─── Selector application ────────────────────────────────────────────────────
fn apply_selector(content: &Content, selector: &Selector) -> Result<String, ToolError> {
match (content, selector) {
(Content::Text(text), Selector::Lines { start, end }) => {
let lines: Vec<&str> = text.lines().collect();
let total = lines.len();
// Convert 1-based inclusive to 0-based
let from = (*start - 1).min(total);
let to = (*end).min(total);
if from >= total {
return Ok(format!("(no lines — content has {total} lines)"));
}
Ok(lines[from..to].join("\n"))
}
(Content::Structured(serde_json::Value::Array(arr)), Selector::Slice { start, end }) => {
let total = arr.len();
let from = (*start).min(total);
let to = (*end).min(total);
let slice = &arr[from..to];
serde_json::to_string_pretty(slice)
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}")))
}
(Content::Structured(serde_json::Value::Object(map)), Selector::Key(key)) => {
match map.get(key.as_str()) {
Some(val) => serde_json::to_string_pretty(val)
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}"))),
None => {
let available: Vec<_> = map.keys().collect();
Err(ToolError::InvalidArgument(format!(
"key '{key}' not found. Available keys: {available:?}"
)))
}
}
}
// Type mismatches
(Content::Text(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays, but this blob contains text. Use 'lines:N-M' instead.".into(),
)),
(Content::Text(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects, but this blob contains text. Use 'lines:N-M' instead.".into(),
)),
(Content::Structured(_), Selector::Lines { .. }) => Err(ToolError::InvalidArgument(
"lines selector only applies to text content, but this blob contains JSON. Use 'slice:N..M' or 'key:NAME' instead.".into(),
)),
(Content::Structured(serde_json::Value::Object(_)), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays, but this blob is a JSON object. Use 'key:NAME' instead.".into(),
)),
(Content::Structured(serde_json::Value::Array(_)), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects, but this blob is a JSON array. Use 'slice:N..M' instead.".into(),
)),
(Content::Structured(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
"slice selector only applies to JSON arrays.".into(),
)),
(Content::Structured(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
"key selector only applies to JSON objects.".into(),
)),
}
}
// ─── Registration helper ─────────────────────────────────────────────────────
/// Register the `inspect` tool on a [`Worker`].
///
/// Call this alongside [`BlobOutputProcessor`](crate::BlobOutputProcessor)
/// setup so the LLM can retrieve stored blob content.
pub fn register_inspect_tool<C, B>(
worker: &mut Worker<C, Mutable>,
blob_store: Arc<B>,
) -> Result<(), ToolRegistryError>
where
C: LlmClient,
B: BlobStore + 'static,
{
worker.register_tool(InspectTool::<B>::tool_definition(blob_store))
}
// ─── Tests ───────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::blob_store::{new_blob_id, BlobStoreError};
use llm_worker::tool::Content;
use std::collections::HashMap;
use tokio::sync::Mutex;
// ── In-memory BlobStore for tests ────────────────────────────────────
struct MemBlobStore {
blobs: Mutex<HashMap<BlobId, Content>>,
}
impl MemBlobStore {
fn new() -> Self {
Self {
blobs: Mutex::new(HashMap::new()),
}
}
}
impl BlobStore for MemBlobStore {
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
let id = new_blob_id();
self.blobs.lock().await.insert(id, content.clone());
Ok(id)
}
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
self.blobs
.lock()
.await
.get(&id)
.cloned()
.ok_or(BlobStoreError::NotFound(id))
}
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
Ok(self.blobs.lock().await.contains_key(&id))
}
}
// ── Selector parsing ─────────────────────────────────────────────────
#[test]
fn parse_lines_valid() {
assert_eq!(
parse_selector("lines:1-50").unwrap(),
Selector::Lines { start: 1, end: 50 }
);
assert_eq!(
parse_selector("lines:5-5").unwrap(),
Selector::Lines { start: 5, end: 5 }
);
}
#[test]
fn parse_lines_zero_start() {
let err = parse_selector("lines:0-5").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_lines_inverted() {
let err = parse_selector("lines:50-20").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_lines_missing_dash() {
let err = parse_selector("lines:20").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_slice_valid() {
assert_eq!(
parse_selector("slice:0..10").unwrap(),
Selector::Slice { start: 0, end: 10 }
);
assert_eq!(
parse_selector("slice:3..8").unwrap(),
Selector::Slice { start: 3, end: 8 }
);
}
#[test]
fn parse_slice_inverted() {
let err = parse_selector("slice:10..3").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_key_valid() {
assert_eq!(
parse_selector("key:results").unwrap(),
Selector::Key("results".into())
);
// Key name with colon
assert_eq!(
parse_selector("key:nested:key").unwrap(),
Selector::Key("nested:key".into())
);
}
#[test]
fn parse_key_empty() {
let err = parse_selector("key:").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn parse_unknown_prefix() {
let err = parse_selector("unknown:foo").unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
// ── Default view ─────────────────────────────────────────────────────
#[test]
fn default_view_text_short() {
let text = "line1\nline2\nline3\n";
let content = Content::Text(text.into());
let view = default_view(&content);
assert!(view.contains("type: text"));
assert!(view.contains("lines: 3"));
assert!(view.contains("line1"));
assert!(!view.contains("more lines"));
}
#[test]
fn default_view_text_long() {
let text: String = (1..=100).map(|i| format!("line {i}\n")).collect();
let content = Content::Text(text);
let view = default_view(&content);
assert!(view.contains("type: text"));
assert!(view.contains("lines: 100"));
assert!(view.contains("line 1"));
assert!(view.contains("line 50"));
assert!(!view.contains("line 51\n"));
assert!(view.contains("50 more lines"));
}
#[test]
fn default_view_array() {
let arr: Vec<serde_json::Value> = (0..20).map(|i| json!({"id": i})).collect();
let content = Content::Structured(json!(arr));
let view = default_view(&content);
assert!(view.contains("type: json_array"));
assert!(view.contains("entries: 20"));
assert!(view.contains("15 more entries"));
}
#[test]
fn default_view_object() {
let content = Content::Structured(json!({
"name": "test",
"count": 42,
"items": [1, 2, 3],
"nested": {"a": 1}
}));
let view = default_view(&content);
assert!(view.contains("type: json_object"));
assert!(view.contains("keys: 4"));
assert!(view.contains("── keys ──"));
assert!(view.contains("── preview ──"));
}
// ── Selector application ─────────────────────────────────────────────
#[test]
fn apply_lines_on_text() {
let text = "a\nb\nc\nd\ne\nf\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 4 }).unwrap();
assert_eq!(result, "b\nc\nd");
}
#[test]
fn apply_lines_clamp() {
let text = "a\nb\nc\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 100 }).unwrap();
assert_eq!(result, "b\nc");
}
#[test]
fn apply_lines_beyond_content() {
let text = "a\nb\n";
let content = Content::Text(text.into());
let result = apply_selector(&content, &Selector::Lines { start: 10, end: 20 }).unwrap();
assert!(result.contains("no lines"));
}
#[test]
fn apply_slice_on_array() {
let content = Content::Structured(json!([10, 20, 30, 40, 50]));
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 3 }).unwrap();
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
assert_eq!(parsed, vec![20, 30]);
}
#[test]
fn apply_slice_clamp() {
let content = Content::Structured(json!([10, 20, 30]));
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 100 }).unwrap();
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
assert_eq!(parsed, vec![20, 30]);
}
#[test]
fn apply_key_on_object() {
let content = Content::Structured(json!({"name": "test", "count": 42}));
let result = apply_selector(&content, &Selector::Key("name".into())).unwrap();
assert_eq!(result.trim(), "\"test\"");
}
#[test]
fn apply_key_not_found() {
let content = Content::Structured(json!({"name": "test"}));
let err = apply_selector(&content, &Selector::Key("missing".into())).unwrap_err();
match err {
ToolError::InvalidArgument(msg) => {
assert!(msg.contains("missing"));
assert!(msg.contains("name"));
}
_ => panic!("expected InvalidArgument"),
}
}
// ── Type mismatch errors ─────────────────────────────────────────────
#[test]
fn lines_on_json_error() {
let content = Content::Structured(json!([1, 2, 3]));
let err = apply_selector(&content, &Selector::Lines { start: 1, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn slice_on_text_error() {
let content = Content::Text("hello".into());
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn key_on_text_error() {
let content = Content::Text("hello".into());
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn slice_on_object_error() {
let content = Content::Structured(json!({"a": 1}));
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[test]
fn key_on_array_error() {
let content = Content::Structured(json!([1, 2, 3]));
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
// ── Integration via execute() ────────────────────────────────────────
#[tokio::test]
async fn execute_default_view() {
let store = Arc::new(MemBlobStore::new());
let text = (1..=100).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
let blob_id = store.store(&Content::Text(text)).await.unwrap();
let tool = InspectTool::new(store);
let result = tool
.execute(&json!({"blob_id": blob_id.to_string()}).to_string())
.await
.unwrap();
assert!(result.contains("type: text"));
assert!(result.contains("lines: 100"));
}
#[tokio::test]
async fn execute_with_selector() {
let store = Arc::new(MemBlobStore::new());
let blob_id = store
.store(&Content::Structured(json!({"name": "test", "value": 42})))
.await
.unwrap();
let tool = InspectTool::new(store);
let result = tool
.execute(&json!({"blob_id": blob_id.to_string(), "selector": "key:name"}).to_string())
.await
.unwrap();
assert_eq!(result.trim(), "\"test\"");
}
#[tokio::test]
async fn execute_invalid_blob_id() {
let store = Arc::new(MemBlobStore::new());
let tool = InspectTool::new(store);
let err = tool
.execute(&json!({"blob_id": "not-a-uuid"}).to_string())
.await
.unwrap_err();
assert!(matches!(err, ToolError::InvalidArgument(_)));
}
#[tokio::test]
async fn execute_blob_not_found() {
let store = Arc::new(MemBlobStore::new());
let tool = InspectTool::new(store);
let fake_id = new_blob_id();
let err = tool
.execute(&json!({"blob_id": fake_id.to_string()}).to_string())
.await
.unwrap_err();
assert!(matches!(err, ToolError::ExecutionFailed(_)));
}
}

View File

@ -1,49 +0,0 @@
//! Session persistence for `llm-worker` via append-only JSONL logs.
//!
//! # Architecture
//!
//! Sessions are recorded as a sequence of [`LogEntry`] values, one per line
//! in a `.jsonl` file. Reading the log and collecting entries reconstructs
//! the full [`Worker`] state — no separate snapshots or checkpoints needed.
//!
//! Debug-mode [`TraceEntry`] records capture raw stream events in a separate
//! `.trace.jsonl` file, independent of the session log.
//!
//! # Quick start
//!
//! ```ignore
//! use llm_worker_persistence::{Session, SessionConfig, FsStore};
//!
//! let store = FsStore::new("./sessions").await?;
//! let worker = Worker::new(client);
//! let mut session = Session::new(worker, store, SessionConfig::default()).await?;
//! session.run("Hello!").await?;
//! ```
pub mod blob_output_processor;
pub mod blob_store;
pub mod event_trace;
pub mod fs_blob_store;
pub mod fs_store;
pub mod inspect_tool;
pub mod session;
pub mod session_log;
pub mod store;
pub use blob_output_processor::BlobOutputProcessor;
pub use blob_store::{BlobId, BlobStore, BlobStoreError};
pub use inspect_tool::{InspectTool, register_inspect_tool};
pub use event_trace::TraceEntry;
pub use fs_blob_store::FsBlobStore;
pub use fs_store::FsStore;
pub use session::{Session, SessionConfig, SessionError};
pub use session_log::{LogEntry, Outcome, RestoredState, collect_state};
pub use store::{Store, StoreError};
/// Session identifier. UUID v7 (time-ordered, lexicographically sortable).
pub type SessionId = uuid::Uuid;
/// Generate a new session ID.
pub fn new_session_id() -> SessionId {
uuid::Uuid::now_v7()
}

View File

@ -1,338 +0,0 @@
//! Persistent session wrapper around [`Worker`].
//!
//! [`Session`] intercepts `Worker` operations and appends [`LogEntry`] records
//! to a [`Store`]. It does not modify `Worker` internals — all persistence
//! happens by observing state before and after each operation.
use crate::session_log::{self, LogEntry, Outcome};
use crate::store::{Store, StoreError};
use crate::SessionId;
use llm_worker::llm_client::client::LlmClient;
use llm_worker::state::Mutable;
use llm_worker::{Worker, WorkerError, WorkerResult};
/// Configuration for session persistence.
#[derive(Debug, Clone)]
pub struct SessionConfig {
/// Record raw stream events to a separate trace file.
/// Default: `false`.
pub record_event_trace: bool,
}
impl Default for SessionConfig {
fn default() -> Self {
Self {
record_event_trace: false,
}
}
}
/// Errors from session operations.
#[derive(Debug, thiserror::Error)]
pub enum SessionError {
#[error(transparent)]
Worker(#[from] WorkerError),
#[error(transparent)]
Store(#[from] StoreError),
}
/// Persistent session wrapping a [`Worker`].
///
/// The `worker` field is public for direct access to Worker APIs
/// (tool registration, hook setup, subscriber management, etc.).
/// State-mutating operations (`run`, `resume`) should go through
/// Session methods to ensure proper logging.
pub struct Session<C: LlmClient, St: Store> {
pub worker: Worker<C, Mutable>,
store: St,
session_id: SessionId,
_config: SessionConfig,
}
impl<C: LlmClient, St: Store> Session<C, St> {
/// Create a new session, writing the initial `SessionStart` entry.
pub async fn new(
worker: Worker<C, Mutable>,
store: St,
config: SessionConfig,
) -> Result<Self, StoreError> {
let session_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: worker.get_system_prompt().map(String::from),
config: worker.request_config().clone(),
history: worker.history().to_vec(),
};
store.append(session_id, &start).await?;
Ok(Self {
worker,
store,
session_id,
_config: config,
})
}
/// Restore a session from a stored log.
///
/// Reads all log entries, collects state from them,
/// and returns a `Session` ready for `resume()`.
pub async fn restore(
client: C,
store: St,
session_id: SessionId,
config: SessionConfig,
) -> Result<Self, SessionError> {
let entries = store.read_all(session_id).await?;
let state = session_log::collect_state(&entries);
let mut worker = Worker::new(client);
if let Some(ref prompt) = state.system_prompt {
worker.set_system_prompt(prompt);
}
worker.set_history(state.history);
worker.set_request_config(state.config);
worker.set_turn_count(state.turn_count);
worker.set_last_run_interrupted(state.last_run_interrupted);
Ok(Self {
worker,
store,
session_id,
_config: config,
})
}
/// The session ID.
pub fn session_id(&self) -> SessionId {
self.session_id
}
/// Reference to the underlying store.
pub fn store(&self) -> &St {
&self.store
}
/// Run a user turn, logging all state changes.
pub async fn run(
&mut self,
user_input: impl Into<String>,
) -> Result<WorkerResult, SessionError> {
let history_before = self.worker.history().len();
let result = self.worker.run(user_input).await;
self.log_history_delta(history_before).await?;
self.log_turn_end().await?;
self.log_outcome(&result).await?;
result.map_err(SessionError::Worker)
}
/// Resume from a paused state, logging all state changes.
pub async fn resume(&mut self) -> Result<WorkerResult, SessionError> {
let history_before = self.worker.history().len();
let result = self.worker.resume().await;
self.log_history_delta(history_before).await?;
self.log_turn_end().await?;
self.log_outcome(&result).await?;
result.map_err(SessionError::Worker)
}
/// Fork this session at its current state.
/// Returns the new session ID. The new log contains a `SessionStart`
/// seeded with the current history.
pub async fn fork(&self) -> Result<SessionId, StoreError> {
let fork_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: self.worker.get_system_prompt().map(String::from),
config: self.worker.request_config().clone(),
history: self.worker.history().to_vec(),
};
self.store.create_session(fork_id, &[start]).await?;
Ok(fork_id)
}
/// Fork from an arbitrary point in a stored session's log.
/// Replays entries up to `up_to_entry` and creates a new session
/// with that reconstructed state.
pub async fn fork_at(
store: &St,
source_id: SessionId,
up_to_entry: usize,
) -> Result<SessionId, StoreError> {
let entries = store.read_all(source_id).await?;
let truncated = &entries[..up_to_entry.min(entries.len())];
let state = session_log::collect_state(truncated);
let fork_id = crate::new_session_id();
let start = LogEntry::SessionStart {
ts: session_log::now_millis(),
system_prompt: state.system_prompt,
config: state.config,
history: state.history,
};
store.create_session(fork_id, &[start]).await?;
Ok(fork_id)
}
/// Log a `CacheLocked` entry.
pub async fn log_cache_locked(
&self,
locked_prefix_len: usize,
) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::CacheLocked {
ts: session_log::now_millis(),
locked_prefix_len,
},
)
.await
}
/// Log a `CacheUnlocked` entry.
pub async fn log_cache_unlocked(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::CacheUnlocked {
ts: session_log::now_millis(),
},
)
.await
}
/// Log a `ConfigChanged` entry.
pub async fn log_config_changed(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::ConfigChanged {
ts: session_log::now_millis(),
config: self.worker.request_config().clone(),
},
)
.await
}
// ── Private helpers ──────────────────────────────────────────────────
async fn log_history_delta(&self, before_len: usize) -> Result<(), StoreError> {
let history = self.worker.history();
if history.len() <= before_len {
return Ok(());
}
let ts = session_log::now_millis();
let new_items = &history[before_len..];
let mut i = 0;
// Classify and group items by type.
// The actual items from history are used (not pre-constructed copies),
// so any modifications by hooks (e.g. on_prompt_submit) are captured correctly.
while i < new_items.len() {
let item = &new_items[i];
if item.is_user_message() {
self.store
.append(
self.session_id,
&LogEntry::UserInput {
ts,
item: new_items[i].clone(),
},
)
.await?;
i += 1;
} else if item.is_tool_result() {
let start = i;
while i < new_items.len() && new_items[i].is_tool_result() {
i += 1;
}
self.store
.append(
self.session_id,
&LogEntry::ToolResults {
ts,
items: new_items[start..i].to_vec(),
},
)
.await?;
} else if item.is_assistant_message()
|| item.is_tool_call()
|| item.is_reasoning()
{
let start = i;
while i < new_items.len()
&& (new_items[i].is_assistant_message()
|| new_items[i].is_tool_call()
|| new_items[i].is_reasoning())
{
i += 1;
}
self.store
.append(
self.session_id,
&LogEntry::AssistantItems {
ts,
items: new_items[start..i].to_vec(),
},
)
.await?;
} else {
self.store
.append(
self.session_id,
&LogEntry::HookInjectedItems {
ts,
items: vec![new_items[i].clone()],
},
)
.await?;
i += 1;
}
}
Ok(())
}
async fn log_turn_end(&self) -> Result<(), StoreError> {
self.store
.append(
self.session_id,
&LogEntry::TurnEnd {
ts: session_log::now_millis(),
turn_count: self.worker.turn_count(),
},
)
.await
}
async fn log_outcome(
&self,
result: &Result<WorkerResult, WorkerError>,
) -> Result<(), StoreError> {
let outcome = match result {
Ok(WorkerResult::Finished) => Outcome::Finished,
Ok(WorkerResult::Paused) => Outcome::Paused,
Err(e) => Outcome::Error {
message: e.to_string(),
},
};
self.store
.append(
self.session_id,
&LogEntry::RunOutcome {
ts: session_log::now_millis(),
outcome,
interrupted: self.worker.last_run_interrupted(),
},
)
.await
}
}

View File

@ -1,285 +0,0 @@
//! Session log types for append-only JSONL persistence.
//!
//! Each [`LogEntry`] represents a single state transition in a session,
//! serialized as one line in a `.jsonl` file. Reading all entries and
//! collecting them via [`collect_state`] reconstructs the full [`Worker`] state.
use llm_worker::llm_client::types::{Item, RequestConfig};
use serde::{Deserialize, Serialize};
/// A single session log entry, serialized as one JSONL line.
///
/// Variants correspond to specific mutation points in `Worker`:
/// - `SessionStart` — always the first entry; captures initial state
/// - `UserInput` / `AssistantItems` / `ToolResults` / `HookInjectedItems` — history appends
/// - `TurnEnd` — turn boundary marker
/// - `CacheLocked` / `CacheUnlocked` — KV cache state transitions
/// - `RunOutcome` — marks end of a `run()` or `resume()` call
/// - `ConfigChanged` — `RequestConfig` mutation
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum LogEntry {
/// Session start. Always the first entry in a log.
/// For forked sessions, `history` contains the seed state from the parent.
SessionStart {
ts: u64,
system_prompt: Option<String>,
config: RequestConfig,
history: Vec<Item>,
},
/// User input pushed to history (worker.rs:229).
UserInput { ts: u64, item: Item },
/// Assistant response items added to history (worker.rs:1040-1041).
AssistantItems { ts: u64, items: Vec<Item> },
/// Tool execution results added to history (worker.rs:897-900, 1072-1076).
ToolResults { ts: u64, items: Vec<Item> },
/// Items injected by `on_turn_end` hook via `ContinueWithMessages` (worker.rs:1055).
HookInjectedItems { ts: u64, items: Vec<Item> },
/// Turn boundary. Records the turn count after increment.
TurnEnd { ts: u64, turn_count: usize },
/// KV cache locked. Records the history prefix length that is now immutable.
CacheLocked { ts: u64, locked_prefix_len: usize },
/// KV cache unlocked.
CacheUnlocked { ts: u64 },
/// Outcome of a `run()` or `resume()` call.
/// This is metadata for auditing; state collection does not branch on the outcome.
RunOutcome {
ts: u64,
outcome: Outcome,
interrupted: bool,
},
/// `RequestConfig` changed.
ConfigChanged { ts: u64, config: RequestConfig },
}
/// Outcome of a run/resume call. Metadata for auditing only.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum Outcome {
Finished,
Paused,
Error { message: String },
}
/// State collected from log entries.
#[derive(Debug, Clone)]
pub struct RestoredState {
pub system_prompt: Option<String>,
pub config: RequestConfig,
pub history: Vec<Item>,
pub turn_count: usize,
pub locked_prefix_len: usize,
pub last_run_interrupted: bool,
}
/// Replay a sequence of log entries to reconstruct worker state.
pub fn collect_state(entries: &[LogEntry]) -> RestoredState {
let mut state = RestoredState {
system_prompt: None,
config: RequestConfig::default(),
history: Vec::new(),
turn_count: 0,
locked_prefix_len: 0,
last_run_interrupted: false,
};
for entry in entries {
match entry {
LogEntry::SessionStart {
system_prompt,
config,
history,
..
} => {
state.system_prompt = system_prompt.clone();
state.config = config.clone();
state.history = history.clone();
}
LogEntry::UserInput { item, .. } => {
state.history.push(item.clone());
}
LogEntry::AssistantItems { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::ToolResults { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::HookInjectedItems { items, .. } => {
state.history.extend(items.iter().cloned());
}
LogEntry::TurnEnd { turn_count, .. } => {
state.turn_count = *turn_count;
}
LogEntry::CacheLocked {
locked_prefix_len, ..
} => {
state.locked_prefix_len = *locked_prefix_len;
}
LogEntry::CacheUnlocked { .. } => {
state.locked_prefix_len = 0;
}
LogEntry::RunOutcome { interrupted, .. } => {
state.last_run_interrupted = *interrupted;
}
LogEntry::ConfigChanged { config, .. } => {
state.config = config.clone();
}
}
}
state
}
/// Get the current timestamp in milliseconds since Unix epoch.
pub fn now_millis() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.expect("system clock before Unix epoch")
.as_millis() as u64
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn replay_empty() {
let state = collect_state(&[]);
assert!(state.history.is_empty());
assert_eq!(state.turn_count, 0);
assert_eq!(state.locked_prefix_len, 0);
}
#[test]
fn replay_session_start_sets_initial_state() {
let entries = vec![LogEntry::SessionStart {
ts: 1000,
system_prompt: Some("You are helpful.".into()),
config: RequestConfig::default().with_max_tokens(1024),
history: vec![Item::user_message("seed")],
}];
let state = collect_state(&entries);
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
assert_eq!(state.config.max_tokens, Some(1024));
assert_eq!(state.history.len(), 1);
}
#[test]
fn replay_full_turn() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Hello"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::assistant_message("Hi!")],
},
LogEntry::TurnEnd {
ts: 3100,
turn_count: 1,
},
LogEntry::RunOutcome {
ts: 3200,
outcome: Outcome::Finished,
interrupted: false,
},
];
let state = collect_state(&entries);
assert_eq!(state.history.len(), 2);
assert_eq!(state.turn_count, 1);
assert!(!state.last_run_interrupted);
}
#[test]
fn replay_with_tool_calls() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Check weather"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::tool_call("call_1", "get_weather", r#"{"city":"Tokyo"}"#)],
},
LogEntry::ToolResults {
ts: 3500,
items: vec![Item::tool_result("call_1", "Sunny, 25C")],
},
LogEntry::AssistantItems {
ts: 4000,
items: vec![Item::assistant_message("It's sunny in Tokyo!")],
},
LogEntry::TurnEnd {
ts: 4100,
turn_count: 1,
},
];
let state = collect_state(&entries);
assert_eq!(state.history.len(), 4);
assert!(state.history[1].is_tool_call());
assert!(state.history[2].is_tool_result());
}
#[test]
fn replay_cache_lock_unlock() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![Item::user_message("a"), Item::assistant_message("b")],
},
LogEntry::CacheLocked {
ts: 2000,
locked_prefix_len: 2,
},
LogEntry::CacheUnlocked { ts: 3000 },
];
let state = collect_state(&entries);
assert_eq!(state.locked_prefix_len, 0);
// Check locked state before unlock
let state_locked = collect_state(&entries[..2]);
assert_eq!(state_locked.locked_prefix_len, 2);
}
#[test]
fn replay_config_changed() {
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
LogEntry::ConfigChanged {
ts: 2000,
config: RequestConfig::default().with_temperature(0.5),
},
];
let state = collect_state(&entries);
assert_eq!(state.config.temperature, Some(0.5));
}
}

View File

@ -1,68 +0,0 @@
//! Persistence backend abstraction.
//!
//! [`Store`] defines the async interface for reading and writing session logs.
//! Implementations handle the physical storage (filesystem, database, etc.).
use crate::event_trace::TraceEntry;
use crate::session_log::LogEntry;
use crate::SessionId;
use std::future::Future;
/// Errors from the persistence store.
#[derive(Debug, thiserror::Error)]
pub enum StoreError {
#[error("I/O error: {0}")]
Io(#[from] std::io::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("session not found: {0}")]
NotFound(SessionId),
#[error("log corrupted at line {line}: {message}")]
Corrupt { line: usize, message: String },
}
/// Async persistence backend for session logs.
///
/// All methods take `&self` — implementations should use interior mutability
/// (e.g., append-mode file handles) when needed.
pub trait Store: Send + Sync {
/// Append a single log entry to the session.
fn append(
&self,
id: SessionId,
entry: &LogEntry,
) -> impl Future<Output = Result<(), StoreError>> + Send;
/// Read all log entries for a session, in order.
fn read_all(
&self,
id: SessionId,
) -> impl Future<Output = Result<Vec<LogEntry>, StoreError>> + Send;
/// List all session IDs, most recent first.
fn list_sessions(&self)
-> impl Future<Output = Result<Vec<SessionId>, StoreError>> + Send;
/// Create a new session with initial entries.
fn create_session(
&self,
id: SessionId,
entries: &[LogEntry],
) -> impl Future<Output = Result<(), StoreError>> + Send;
/// Check if a session exists.
fn exists(
&self,
id: SessionId,
) -> impl Future<Output = Result<bool, StoreError>> + Send;
/// Append a trace entry to the debug event trace file.
fn append_trace(
&self,
id: SessionId,
entry: &TraceEntry,
) -> impl Future<Output = Result<(), StoreError>> + Send;
}

View File

@ -1,176 +0,0 @@
use llm_worker::llm_client::types::{Item, RequestConfig};
use llm_worker_persistence::{
FsStore, LogEntry, Outcome, Store, TraceEntry, new_session_id, collect_state,
};
#[tokio::test]
async fn round_trip_write_and_read() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: Some("You are helpful.".into()),
config: RequestConfig::default().with_max_tokens(1024),
history: vec![],
},
LogEntry::UserInput {
ts: 2000,
item: Item::user_message("Hello"),
},
LogEntry::AssistantItems {
ts: 3000,
items: vec![Item::assistant_message("Hi there!")],
},
LogEntry::TurnEnd {
ts: 3100,
turn_count: 1,
},
LogEntry::RunOutcome {
ts: 3200,
outcome: Outcome::Finished,
interrupted: false,
},
];
// Write entries one by one
for entry in &entries {
store.append(id, entry).await.unwrap();
}
// Read back
let read_back = store.read_all(id).await.unwrap();
assert_eq!(read_back.len(), entries.len());
// Replay and verify state
let state = collect_state(&read_back);
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
assert_eq!(state.config.max_tokens, Some(1024));
assert_eq!(state.history.len(), 2);
assert_eq!(state.turn_count, 1);
assert!(!state.last_run_interrupted);
}
#[tokio::test]
async fn create_session_writes_all_entries() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let entries = vec![
LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![Item::user_message("seed"), Item::assistant_message("ok")],
},
];
store.create_session(id, &entries).await.unwrap();
let read_back = store.read_all(id).await.unwrap();
assert_eq!(read_back.len(), 1);
let state = collect_state(&read_back);
assert_eq!(state.history.len(), 2);
}
#[tokio::test]
async fn list_sessions_returns_newest_first() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id1 = new_session_id();
// Small delay to ensure different UUID v7 timestamps
tokio::time::sleep(std::time::Duration::from_millis(2)).await;
let id2 = new_session_id();
let start = LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
};
store.append(id1, &start).await.unwrap();
store.append(id2, &start).await.unwrap();
let sessions = store.list_sessions().await.unwrap();
assert_eq!(sessions.len(), 2);
assert_eq!(sessions[0], id2); // newest first
assert_eq!(sessions[1], id1);
}
#[tokio::test]
async fn exists_returns_correct_state() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
assert!(!store.exists(id).await.unwrap());
store
.append(
id,
&LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
)
.await
.unwrap();
assert!(store.exists(id).await.unwrap());
}
#[tokio::test]
async fn not_found_error_for_missing_session() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
let result = store.read_all(id).await;
assert!(result.is_err());
}
#[tokio::test]
async fn trace_entries_in_separate_file() {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
let id = new_session_id();
// Write a log entry
store
.append(
id,
&LogEntry::SessionStart {
ts: 1000,
system_prompt: None,
config: RequestConfig::default(),
history: vec![],
},
)
.await
.unwrap();
// Write a trace entry
let trace = TraceEntry {
ts: 1500,
turn: 0,
event: llm_worker::llm_client::event::Event::Ping(
llm_worker::llm_client::event::PingEvent { timestamp: None },
),
};
store.append_trace(id, &trace).await.unwrap();
// Log should have 1 entry, unaffected by trace
let log = store.read_all(id).await.unwrap();
assert_eq!(log.len(), 1);
// Trace file should exist separately
let trace_path = dir.path().join(format!("{id}.trace.jsonl"));
assert!(trace_path.exists());
}

View File

@ -1,335 +0,0 @@
mod common;
use std::sync::Arc;
use async_trait::async_trait;
use common::MockLlmClient;
use llm_worker::hook::{Hook, HookError, OnTurnEnd, OnTurnEndResult};
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent};
use llm_worker::llm_client::types::{Item, RequestConfig};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
use llm_worker::Worker;
use llm_worker_persistence::{
FsStore, LogEntry, Outcome, Session, SessionConfig, Store, collect_state,
};
// =============================================================================
// Helpers
// =============================================================================
fn simple_text_events() -> Vec<Event> {
vec![
Event::text_block_start(0),
Event::text_delta(0, "Hello!"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
]
}
fn tool_call_events() -> Vec<Vec<Event>> {
vec![
// 1st response: tool call
vec![
Event::tool_use_start(0, "call_1", "get_weather"),
Event::tool_input_delta(0, r#"{"city":"Tokyo"}"#),
Event::tool_use_stop(0),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
// 2nd response: final text
vec![
Event::text_block_start(0),
Event::text_delta(0, "It's sunny in Tokyo!"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
]
}
#[derive(Clone)]
struct MockWeatherTool;
#[async_trait]
impl Tool for MockWeatherTool {
async fn execute(&self, _input_json: &str) -> Result<String, ToolError> {
Ok("Sunny, 25C".to_string())
}
}
fn weather_tool_definition() -> ToolDefinition {
Arc::new(|| {
let meta = ToolMeta::new("get_weather")
.description("Get weather")
.input_schema(serde_json::json!({
"type": "object",
"properties": {
"city": { "type": "string" }
},
"required": ["city"]
}));
(meta, Arc::new(MockWeatherTool) as Arc<dyn Tool>)
})
}
/// Hook that forces Pause on the first turn end.
struct PauseOnFirstTurnEnd;
#[async_trait]
impl Hook<OnTurnEnd> for PauseOnFirstTurnEnd {
async fn call(&self, _input: &mut Vec<Item>) -> Result<OnTurnEndResult, HookError> {
Ok(OnTurnEndResult::Paused)
}
}
async fn make_store() -> (tempfile::TempDir, FsStore) {
let dir = tempfile::tempdir().unwrap();
let store = FsStore::new(dir.path()).await.unwrap();
(dir, store)
}
// =============================================================================
// Tests
// =============================================================================
#[tokio::test]
async fn session_run_logs_entries() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hi").await.unwrap();
let entries = store.read_all(sid).await.unwrap();
// SessionStart, UserInput, AssistantItems, TurnEnd, RunOutcome (at minimum)
assert!(entries.len() >= 4, "expected at least 4 entries, got {}", entries.len());
// First entry is SessionStart
assert!(matches!(entries[0], LogEntry::SessionStart { .. }));
// Has a RunOutcome with Finished
let has_finished = entries.iter().any(|e| matches!(
e,
LogEntry::RunOutcome { outcome: Outcome::Finished, .. }
));
assert!(has_finished, "should have a Finished outcome");
}
#[tokio::test]
async fn session_restore_round_trip() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let mut worker = Worker::new(client);
worker.set_system_prompt("You are helpful.");
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hi").await.unwrap();
let original_history = session.worker.history().to_vec();
let original_turn_count = session.worker.turn_count();
// Restore
let restore_client = MockLlmClient::new(vec![]); // won't be called
let restored = Session::restore(restore_client, store.clone(), sid, SessionConfig::default())
.await
.unwrap();
assert_eq!(restored.worker.history().len(), original_history.len());
assert_eq!(restored.worker.turn_count(), original_turn_count);
assert_eq!(
restored.worker.get_system_prompt().map(String::from),
Some("You are helpful.".to_string())
);
}
#[tokio::test]
async fn session_run_with_tool_call() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::with_responses(tool_call_events());
let mut worker = Worker::new(client);
worker.register_tool(weather_tool_definition()).unwrap();
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("What's the weather?").await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_tool_results = entries.iter().any(|e| matches!(e, LogEntry::ToolResults { .. }));
assert!(has_tool_results, "should have ToolResults entry");
let has_assistant = entries.iter().any(|e| matches!(e, LogEntry::AssistantItems { .. }));
assert!(has_assistant, "should have AssistantItems entry");
}
#[tokio::test]
async fn session_resume_after_pause() {
let (_dir, store) = make_store().await;
// First run: tool call with pause hook → Paused
let client = MockLlmClient::with_responses(tool_call_events());
let mut worker = Worker::new(client);
worker.register_tool(weather_tool_definition()).unwrap();
worker.add_on_turn_end_hook(PauseOnFirstTurnEnd);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
let result = session.run("Weather?").await.unwrap();
assert!(matches!(result, llm_worker::WorkerResult::Paused));
// Check RunOutcome is Paused
let entries = store.read_all(sid).await.unwrap();
let has_paused = entries.iter().any(|e| matches!(
e,
LogEntry::RunOutcome { outcome: Outcome::Paused, .. }
));
assert!(has_paused, "should have Paused outcome");
// Restore and resume
let resume_client = MockLlmClient::with_responses(vec![vec![
Event::text_block_start(0),
Event::text_delta(0, "After resume"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
]]);
let mut restored = Session::restore(resume_client, store.clone(), sid, SessionConfig::default())
.await
.unwrap();
assert!(restored.worker.last_run_interrupted());
// resume may or may not succeed depending on Worker internal state,
// but the restore itself should work
let _ = restored.resume().await;
}
#[tokio::test]
async fn session_fork_preserves_state() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let mut worker = Worker::new(client);
worker.set_system_prompt("System prompt");
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
session.run("Hello").await.unwrap();
let original_history_len = session.worker.history().len();
let fork_id = session.fork().await.unwrap();
// Fork should have a SessionStart with the current history
let fork_entries = store.read_all(fork_id).await.unwrap();
assert_eq!(fork_entries.len(), 1);
assert!(matches!(&fork_entries[0], LogEntry::SessionStart { .. }));
let fork_state = collect_state(&fork_entries);
assert_eq!(fork_state.history.len(), original_history_len);
assert_eq!(fork_state.system_prompt.as_deref(), Some("System prompt"));
}
#[tokio::test]
async fn session_fork_at_truncates() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(simple_text_events());
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.run("Hello").await.unwrap();
let all_entries = store.read_all(sid).await.unwrap();
assert!(all_entries.len() > 2);
// Fork at entry 2 (SessionStart + UserInput only)
let fork_id = Session::<MockLlmClient, FsStore>::fork_at(&store, sid, 2)
.await
.unwrap();
let fork_entries = store.read_all(fork_id).await.unwrap();
assert_eq!(fork_entries.len(), 1); // Just the new SessionStart
let fork_state = collect_state(&fork_entries);
// Should have the state from replaying only the first 2 entries
let original_truncated_state = collect_state(&all_entries[..2]);
assert_eq!(fork_state.history.len(), original_truncated_state.history.len());
}
#[tokio::test]
async fn session_config_changed_logged() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client);
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
// Modify config via worker and log it
session.worker.set_request_config(RequestConfig::default().with_temperature(0.7));
session.log_config_changed().await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_config_changed = entries.iter().any(|e| matches!(
e,
LogEntry::ConfigChanged { config, .. } if config.temperature == Some(0.7)
));
assert!(has_config_changed, "should have ConfigChanged entry");
}
#[tokio::test]
async fn session_cache_lock_unlock_logged() {
let (_dir, store) = make_store().await;
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client);
let session = Session::new(worker, store.clone(), SessionConfig::default())
.await
.unwrap();
let sid = session.session_id();
session.log_cache_locked(5).await.unwrap();
session.log_cache_unlocked().await.unwrap();
let entries = store.read_all(sid).await.unwrap();
let has_locked = entries.iter().any(|e| matches!(
e,
LogEntry::CacheLocked { locked_prefix_len: 5, .. }
));
assert!(has_locked, "should have CacheLocked entry");
let has_unlocked = entries.iter().any(|e| matches!(e, LogEntry::CacheUnlocked { .. }));
assert!(has_unlocked, "should have CacheUnlocked entry");
// State after all entries: unlocked
let state = collect_state(&entries);
assert_eq!(state.locked_prefix_len, 0);
}

View File

@ -6,22 +6,23 @@ edition.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
tracing = "0.1"
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
async-trait = { workspace = true }
futures = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
tokio-util = "0.7"
reqwest = { version = "0.13.1", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
reqwest = { version = "0.13", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
eventsource-stream = "0.2"
llm-worker-macros = { path = "../llm-worker-macros", version = "0.2" }
llm-worker-macros = { workspace = true }
[dev-dependencies]
clap = { version = "4.5", features = ["derive", "env"] }
schemars = "1.2"
tempfile = "3.24"
schemars = { workspace = true }
tempfile = { workspace = true }
dotenv = "0.15"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
trybuild = "1.0.116"
wiremock = "0.6.5"

View File

@ -0,0 +1,23 @@
# llm-worker
LLM との対話を管理する低レベル基盤クレート。会話履歴、ツール実行、イベントストリーミング、ライフサイクルフックを統合した `Worker` 抽象を提供する。
## 公開型
### コア
- `Worker<C, S>` — LLM 対話の中央管理(ターン実行、ツール呼び出し、キャンセル)
- `WorkerConfig` / `WorkerResult` / `WorkerError` — 設定・実行結果・エラー
- `Item` / `ContentPart` / `Role` — 会話履歴の構成要素
### モジュール
- `llm_client` — プロバイダ抽象(`LlmClient` トレイト、`Request`, `RequestConfig`, Anthropic/OpenAI/Gemini/Ollama 実装)
- `tool` — ツール定義・実行(`Tool` トレイト、`ToolDefinition`, `ToolOutput`, サイズ判定による Inline/Stored 切替)
- `tool_server` — ツール登録・ルックアップ(`ToolServer`, `ToolServerHandle`
- `hook` — 実行フローへの介入ポイント(`Hook` トレイト、`PreToolCall`, `PostToolCall`, `OnTurnEnd` など)
- クロージャベースイベント購読(`Worker::on_text_block()`, `on_tool_use_block()`, `on_usage()` 等)
- `timeline` — イベントストリームのディスパッチ(`Handler` トレイト、各ブロックコレクター)。パワーユーザー向けに `timeline_mut()` も提供
- `event` — ストリーミングイベント型(`Event`, `BlockStart`, `BlockDelta` など)
- `state` — 型状態パターンによるキャッシュ保護(`Mutable` / `CacheLocked`
cratesの整理Add READMEsRE to all crates@@

View File

@ -33,7 +33,7 @@ llm-workerは3層構成でLLMとのインタラクションを管理する。
| `tool` / `tool_server` | ツール定義・登録・実行 | R3 |
| `timeline` | イベントストリーム処理、Handler dispatch | — |
| `handler` | Handler/Kind trait、ブロック別ハンドラ | — |
| `subscriber` | WorkerSubscriber trait、UI向けイベント配信 | — |
| `callback` | クロージャベースイベント購読(`on_text_block`, `on_usage` 等) | — |
| `llm_client` | LLMプロバイダへのHTTPリクエスト/ストリーミング | — |
| `llm_client/scheme` | プロバイダ固有ワイヤーフォーマット変換 | — |
| `llm_client/providers` | Anthropic, OpenAI, Gemini, Ollama実装 | — |

View File

@ -1,132 +0,0 @@
# ツール出力の遅延読み込み設計
## 課題
ツール実行結果(ファイル内容、検索結果等)は サイズが予測不能 で、
全量を `Item::ToolResult { output: String }` として LLM コンテキストに
載せると、トークン消費が爆発する。
## 方針
- ツール出力に **Inline / Stored** の区別を導入する
- Stored な出力は **BlobStore** に保存し、履歴には要約のみ載せる
- LLM が詳細を見たい場合は **inspect ツール** で部分取得する
## データ型
### ToolOutputllm-worker 側)
```rust
pub enum ToolOutput {
/// 小さな結果: そのまま history に載る
Inline(String),
/// 大きな結果: summary だけ history に載り、全体は BlobStore に保存される
Stored {
summary: String,
content: Content,
},
}
pub enum Content {
Text(String),
Structured(serde_json::Value),
}
```
- `Tool::execute()` の戻り値は `Result<String, ToolError>` のまま据え置き
- `From<String> for ToolOutput` で閾値ベースの自動昇格を行う
- ツール実装者が明示的に `ToolOutput` を返したい場合は別トレイトメソッドを用意
### BlobStorellm-worker-persistence 側)
```rust
pub type BlobId = uuid::Uuid; // UUID v7
pub trait BlobStore: Send + Sync {
fn store(&self, content: &Content) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
fn load(&self, id: BlobId) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
fn exists(&self, id: BlobId) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
}
```
### FsBlobStore レイアウト
```
blobs/
├── {blob_id}.txt # Content::Text
└── {blob_id}.json # Content::Structured
```
セッションとは独立したフラットなストア。セッションとの紐付けは
ログ側の参照summary 内の `[blob:<id>]`)で行う。
## 自動サマリ
`From<String>` による自動昇格時のサマリ生成ルール:
| 項目 | 値 |
|---|---|
| Inline 閾値 | 800 bytes |
| サマリ上限 | 400 bytes |
| 先頭行数 | 5 行 |
| 末尾行数 | 3 行 |
### Text のサマリ形式
```
[blob:<id>] text | {N} lines
── head ──
{先頭5行}
── tail ──
{末尾3行}
```
### Structured (JSON Array) のサマリ形式
```
[blob:<id>] json_array | {N} entries
── schema ──
{最初の要素のキー: 型}
── head ──
{先頭2要素}
```
### Structured (JSON Object) のサマリ形式
```
[blob:<id>] json_object | {N} keys
── keys ──
{キー一覧と各値の型/サイズ}
```
## Worker への統合
```
Tool::execute() → Result<String, ToolError>
▼ From<String> for ToolOutput
ToolOutput::Inline(s) ← len ≤ 800
ToolOutput::Stored { .. } ← len > 800
▼ Worker が BlobStore に保存
Item::ToolResult { output: summary } ← history に載る
▼ LLM が詳細を見たい場合
inspect(blob_id, selector?) → 部分取得
```
Worker はオプショナルに `BlobStore` を保持する。
BlobStore が未設定の場合は従来通り全量 Inline として扱う。
## inspect ツール
Worker に BlobStore が設定されている場合、自動的に登録される組み込みツール。
```
inspect(blob_id, selector?)
```
- selector 省略: メタ情報 + 先頭部分
- `lines:20-50`: 行範囲Text 用)
- `slice:3..8`: インデックス範囲Array 用)
- `key:results`: キー指定Object 用)

View File

@ -20,9 +20,16 @@ mod recorder;
mod scenarios;
use clap::{Parser, ValueEnum};
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::providers::gemini::GeminiClient;
use llm_worker::llm_client::providers::openai::OpenAIClient;
use llm_worker::llm_client::scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
fn make_transport<S: Scheme>(scheme: S, model: &str, auth: ResolvedAuth) -> HttpTransport<S> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
HttpTransport::new(scheme, model.to_string(), base_url, auth, cap)
}
#[derive(Parser, Debug)]
#[command(author, version, about, long_about = None)]
@ -60,7 +67,7 @@ async fn run_scenario_with_anthropic(
let api_key = std::env::var("ANTHROPIC_API_KEY")
.expect("ANTHROPIC_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514");
let client = AnthropicClient::new(&api_key, model);
let client = make_transport(AnthropicScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -82,7 +89,7 @@ async fn run_scenario_with_openai(
let api_key =
std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("gpt-4o");
let client = OpenAIClient::new(&api_key, model);
let client = make_transport(OpenAIScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -101,10 +108,15 @@ async fn run_scenario_with_ollama(
subdir: &str,
model: Option<String>,
) -> Result<(), Box<dyn std::error::Error>> {
use llm_worker::llm_client::providers::ollama::OllamaClient;
// Ollama typically runs local, no key needed or placeholder
let model = model.as_deref().unwrap_or("llama3"); // default example
let client = OllamaClient::new(model); // base_url placeholder, handled by client default
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
let model = model.as_deref().unwrap_or("llama3");
let client = HttpTransport::new(
AnthropicScheme::new(),
model.to_string(),
"http://localhost:11434".to_string(),
ResolvedAuth::None,
AnthropicScheme::new().default_capability(),
);
recorder::record_request(
&client,
@ -126,7 +138,7 @@ async fn run_scenario_with_gemini(
let api_key =
std::env::var("GEMINI_API_KEY").expect("GEMINI_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("gemini-2.0-flash");
let client = GeminiClient::new(&api_key, model);
let client = make_transport(GeminiScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,

View File

@ -2,11 +2,10 @@
//!
//! Example of cancelling from another thread during streaming
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::{Worker, WorkerResult};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
@ -24,46 +23,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let api_key =
std::env::var("ANTHROPIC_API_KEY").expect("ANTHROPIC_API_KEY environment variable not set");
let client = AnthropicClient::new(&api_key, "claude-sonnet-4-20250514");
let worker = Arc::new(Mutex::new(Worker::new(client)));
let scheme = AnthropicScheme::new();
let model = "claude-sonnet-4-20250514".to_string();
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap);
let worker = Worker::new(client);
println!("🚀 Starting Worker...");
println!("💡 Will cancel after 2 seconds\n");
// Get cancel sender first (without holding lock)
let cancel_tx = {
let w = worker.lock().await;
w.cancel_sender()
};
// Get cancel sender before run (Mutable state)
let cancel_tx = worker.cancel_sender();
// Task 1: Run Worker
let worker_clone = worker.clone();
let task = tokio::spawn(async move {
let mut w = worker_clone.lock().await;
println!("📡 Sending request to LLM...");
match w.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
Ok(WorkerResult::Finished) => {
println!("✅ Task completed normally");
}
Ok(WorkerResult::Paused) => {
println!("⏸️ Task paused");
}
Err(e) => {
println!("❌ Task error: {}", e);
}
}
});
// Task 2: Cancel after 2 seconds
// Task: Cancel after 2 seconds
tokio::spawn(async move {
tokio::time::sleep(Duration::from_secs(2)).await;
println!("\n🛑 Cancelling worker...");
let _ = cancel_tx.send(()).await;
});
// Wait for task completion
task.await?;
println!("📡 Sending request to LLM...");
match worker.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
Ok(out) => match out.result {
WorkerResult::Finished => println!("✅ Task completed normally"),
WorkerResult::Paused => println!("⏸️ Task paused"),
WorkerResult::LimitReached => println!("🔒 Turn limit reached"),
WorkerResult::Yielded => println!("↩️ Task yielded"),
},
Err(e) => {
println!("❌ Task error: {}", e);
}
}
println!("\n✨ Demo complete!");

View File

@ -41,13 +41,14 @@ use tracing_subscriber::EnvFilter;
use clap::{Parser, ValueEnum};
use llm_worker::{
Worker,
hook::{Hook, HookError, PostToolCall, PostToolCallContext, PostToolCallResult},
interceptor::{Interceptor, PostToolAction, ToolResultInfo},
llm_client::{
LlmClient,
providers::{
anthropic::AnthropicClient, gemini::GeminiClient, ollama::OllamaClient,
openai::OpenAIClient,
capability::{CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport},
scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
},
transport::{HttpTransport, ResolvedAuth},
},
timeline::{Handler, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind},
};
@ -270,34 +271,34 @@ impl Handler<ToolUseBlockKind> for ToolCallPrinter {
}
}
/// Hook that displays tool execution results
struct ToolResultPrinterHook {
/// Policy that displays tool execution results.
struct ToolResultPrinterPolicy {
call_names: Arc<Mutex<HashMap<String, String>>>,
}
impl ToolResultPrinterHook {
impl ToolResultPrinterPolicy {
fn new(call_names: Arc<Mutex<HashMap<String, String>>>) -> Self {
Self { call_names }
}
}
#[async_trait]
impl Hook<PostToolCall> for ToolResultPrinterHook {
async fn call(&self, ctx: &mut PostToolCallContext) -> Result<PostToolCallResult, HookError> {
impl Interceptor for ToolResultPrinterPolicy {
async fn post_tool_call(&self, info: &mut ToolResultInfo) -> PostToolAction {
let name = self
.call_names
.lock()
.unwrap()
.remove(&ctx.result.tool_use_id)
.unwrap_or_else(|| ctx.result.tool_use_id.clone());
.remove(&info.result.tool_use_id)
.unwrap_or_else(|| info.result.tool_use_id.clone());
if ctx.result.is_error {
println!(" Result ({}): ❌ {}", name, ctx.result.content);
if info.result.is_error {
println!(" Result ({}): ❌ {}", name, info.result.summary);
} else {
println!(" Result ({}): ✅ {}", name, ctx.result.content);
println!(" Result ({}): ✅ {}", name, info.result.summary);
}
Ok(PostToolCallResult::Continue)
PostToolAction::Continue
}
}
@ -327,6 +328,22 @@ fn get_api_key(args: &Args) -> Result<String, String> {
}
/// Create client based on provider
fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
fn build_transport<S: Scheme>(scheme: S, model: String, auth: ResolvedAuth) -> Box<dyn LlmClient> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
Box::new(HttpTransport::new(scheme, model, base_url, auth, cap))
}
fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
let model = args
.model
@ -336,21 +353,32 @@ fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
let api_key = get_api_key(args)?;
match args.provider {
Provider::Anthropic => {
let client = AnthropicClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Gemini => {
let client = GeminiClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Openai => {
let client = OpenAIClient::new(&api_key, &model);
Ok(Box::new(client))
}
Provider::Anthropic => Ok(build_transport(
AnthropicScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Gemini => Ok(build_transport(
GeminiScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Openai => Ok(build_transport(
OpenAIScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
)),
Provider::Ollama => {
let client = OllamaClient::new(&model);
Ok(Box::new(client))
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
let scheme = AnthropicScheme::new();
let cap = default_capability();
Ok(Box::new(HttpTransport::new(
scheme,
model,
"http://localhost:11434".to_string(),
ResolvedAuth::None,
cap,
)))
}
}
}
@ -438,10 +466,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Register tools (unless --no-tools)
if !args.no_tools {
let app = AppContext;
worker
.register_tool(app.get_current_time_definition())
.unwrap();
worker.register_tool(app.calculate_definition()).unwrap();
worker.register_tool(app.get_current_time_definition());
worker.register_tool(app.calculate_definition());
}
// Register streaming display handlers
@ -450,7 +476,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
.on_text_block(StreamingPrinter::new())
.on_tool_use_block(ToolCallPrinter::new(tool_call_names.clone()));
worker.add_post_tool_call_hook(ToolResultPrinterHook::new(tool_call_names));
worker.set_interceptor(ToolResultPrinterPolicy::new(tool_call_names));
// One-shot mode
if let Some(prompt) = args.prompt {
@ -465,7 +491,27 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
return Ok(());
}
// Interactive loop
// Interactive loop — first input transitions Mutable → Locked
print!("\n👤 You: ");
io::stdout().flush()?;
let mut first_input = String::new();
io::stdin().read_line(&mut first_input)?;
let first_input = first_input.trim();
if first_input == "quit" || first_input == "exit" || first_input.is_empty() {
println!("\n👋 Goodbye!");
return Ok(());
}
let mut locked = match worker.run(first_input).await {
Ok(out) => out.worker,
Err(e) => {
eprintln!("\n❌ Error: {}", e);
return Ok(());
}
};
loop {
print!("\n👤 You: ");
io::stdout().flush()?;
@ -483,8 +529,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
break;
}
// Run Worker (Worker manages history)
match worker.run(input).await {
match locked.run(input).await {
Ok(_) => {}
Err(e) => {
eprintln!("\n❌ Error: {}", e);

View File

@ -0,0 +1,291 @@
//! Closure-based event callback API
//!
//! Provides a closure-based alternative to implementing `Handler<K>` directly.
//! Register callbacks on `Worker` via `on_text_block()`, `on_tool_use_block()`,
//! `on_usage()`, etc.
use std::marker::PhantomData;
use crate::handler::{
Handler, Kind, TextBlockEvent, TextBlockKind, ThinkingBlockEvent, ThinkingBlockKind,
ToolUseBlockEvent, ToolUseBlockKind, ToolUseBlockStart,
};
use crate::tool::ToolCall;
// =============================================================================
// TextBlock Closure Handler
// =============================================================================
/// Callback scope for a text block.
///
/// Passed to the setup closure registered with `Worker::on_text_block()`.
/// Register per-block callbacks via `on_delta()` and `on_stop()`.
///
/// # Examples
///
/// ```ignore
/// worker.on_text_block(|block| {
/// block.on_delta(|text| print!("{}", text));
/// block.on_stop(|full_text| println!("\n--- {} chars ---", full_text.len()));
/// });
/// ```
pub struct TextBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl TextBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated text of the block.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
/// Per-block state created by Timeline's scope lifecycle.
#[derive(Default)]
pub(crate) struct TextBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
/// Closure-based `Handler<TextBlockKind>` adapter.
pub(crate) struct ClosureTextBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut TextBlockScope) + Send + Sync>,
}
impl Handler<TextBlockKind> for ClosureTextBlockHandler {
type Scope = TextBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
match event {
TextBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = TextBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
TextBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
TextBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ThinkingBlock Closure Handler
// =============================================================================
/// Callback scope for a thinking block.
///
/// Mirrors `TextBlockScope`. Some providers (or some configurations)
/// emit thinking metadata without plaintext deltas — in that case the
/// block fires `Start` and `Stop` with no `Delta` in between, which is
/// expected and not an error.
pub struct ThinkingBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl ThinkingBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each thinking text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated thinking text. May be empty when
/// the provider didn't emit any plaintext deltas.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
#[derive(Default)]
pub(crate) struct ThinkingBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
pub(crate) struct ClosureThinkingBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut ThinkingBlockScope) + Send + Sync>,
}
impl Handler<ThinkingBlockKind> for ClosureThinkingBlockHandler {
type Scope = ThinkingBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
match event {
ThinkingBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = ThinkingBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ThinkingBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
ThinkingBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ToolUseBlock Closure Handler
// =============================================================================
/// Callback scope for a tool use block.
///
/// Passed to the setup closure registered with `Worker::on_tool_use_block()`.
/// The setup closure also receives `&ToolUseBlockStart` with `id` and `name`.
///
/// # Examples
///
/// ```ignore
/// worker.on_tool_use_block(|start, block| {
/// println!("Tool: {} ({})", start.name, start.id);
/// block.on_delta(|json| { /* streaming JSON fragment */ });
/// block.on_stop(|call| println!("Done: {}", call.name));
/// });
/// ```
pub struct ToolUseBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
}
impl ToolUseBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each JSON input delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the fully assembled `ToolCall` with parsed JSON input.
pub fn on_stop(&mut self, f: impl FnMut(&ToolCall) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
/// Per-block state for tool use closure handler.
#[derive(Default)]
pub(crate) struct ToolUseBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
id: String,
name: String,
input_json: String,
}
/// Closure-based `Handler<ToolUseBlockKind>` adapter.
pub(crate) struct ClosureToolUseBlockHandler {
pub(crate) setup: Box<dyn FnMut(&ToolUseBlockStart, &mut ToolUseBlockScope) + Send + Sync>,
}
impl Handler<ToolUseBlockKind> for ClosureToolUseBlockHandler {
type Scope = ToolUseBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
match event {
ToolUseBlockEvent::Start(start) => {
scope.id = start.id.clone();
scope.name = start.name.clone();
scope.input_json.clear();
let mut builder = ToolUseBlockScope::new();
(self.setup)(start, &mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ToolUseBlockEvent::InputJsonDelta(json) => {
scope.input_json.push_str(json);
if let Some(f) = &mut scope.on_delta {
f(json);
}
}
ToolUseBlockEvent::Stop(_) => {
let input: serde_json::Value =
serde_json::from_str(&scope.input_json).unwrap_or_default();
let tool_call = ToolCall {
id: std::mem::take(&mut scope.id),
name: std::mem::take(&mut scope.name),
input,
};
if let Some(f) = &mut scope.on_stop {
f(&tool_call);
}
}
}
}
}
// =============================================================================
// Generic Meta Event Closure Handler
// =============================================================================
/// Closure-based `Handler<K>` adapter for meta events (Usage, Status, Error).
pub(crate) struct ClosureMetaHandler<F, K>
where
K: Kind,
{
pub(crate) callback: F,
pub(crate) _kind: PhantomData<K>,
}
impl<F, K> Handler<K> for ClosureMetaHandler<F, K>
where
F: FnMut(&K::Event) + Send + Sync,
K: Kind,
{
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &K::Event) {
(self.callback)(event);
}
}

View File

@ -91,6 +91,16 @@ impl Kind for ErrorKind {
type Event = ErrorEvent;
}
/// Reasoning item Kind - 完成済み reasoning item の永続化用
///
/// 1 reasoning item につき 1 度だけ発火する。Worker は
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
/// `Item::Reasoning` として history に append する。
pub struct ReasoningItemKind;
impl Kind for ReasoningItemKind {
type Event = ReasoningItemEvent;
}
// =============================================================================
// Block Kind Definitions
// =============================================================================

View File

@ -1,310 +0,0 @@
//! Hook-related type definitions
//!
//! Types used for turn control and intervention in the Worker layer
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use thiserror::Error;
// =============================================================================
// Hook Event Kinds
// =============================================================================
pub trait HookEventKind: Send + Sync + 'static {
type Input;
type Output;
}
pub struct OnPromptSubmit;
pub struct PreLlmRequest;
pub struct PreToolCall;
pub struct PostToolCall;
pub struct OnTurnEnd;
pub struct OnAbort;
pub struct OnTextDelta;
pub struct OnToolCallDelta;
pub struct OnStreamChunk;
pub struct OnStreamComplete;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum OnPromptSubmitResult {
Continue,
Cancel(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreLlmRequestResult {
Continue,
Cancel(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreToolCallResult {
Continue,
Skip,
Abort(String),
Pause,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PostToolCallResult {
Continue,
Abort(String),
}
#[derive(Debug, Clone)]
pub enum OnTurnEndResult {
Finish,
ContinueWithMessages(Vec<crate::Item>),
Paused,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum StreamHookResult {
Continue,
Abort(String),
Pause,
}
use std::sync::Arc;
use crate::tool::{Tool, ToolMeta};
/// Input context for PreToolCall
pub struct ToolCallContext {
/// Tool call information (modifiable)
pub call: ToolCall,
/// Tool meta information (immutable)
pub meta: ToolMeta,
/// Tool instance (for state access)
pub tool: Arc<dyn Tool>,
}
/// Input context for PostToolCall
pub struct PostToolCallContext {
/// Tool call information
pub call: ToolCall,
/// Tool execution result (modifiable)
pub result: ToolResult,
/// Tool meta information (immutable)
pub meta: ToolMeta,
/// Tool instance (for state access)
pub tool: Arc<dyn Tool>,
}
/// Input context for OnTextDelta
#[derive(Debug, Clone)]
pub struct TextDeltaContext {
/// Block index
pub index: usize,
/// Text delta content
pub delta: String,
}
/// Input context for OnToolCallDelta
#[derive(Debug, Clone)]
pub struct ToolCallDeltaContext {
/// Block index
pub index: usize,
/// Partial JSON fragment
pub delta_json_fragment: String,
}
/// Input context for OnStreamChunk
#[derive(Debug, Clone)]
pub struct StreamChunkContext {
/// Public worker-level event
pub event: crate::event::Event,
}
/// Input context for OnStreamComplete
#[derive(Debug, Clone)]
pub struct StreamCompleteContext {
/// Current turn number
pub turn: usize,
/// Number of streamed events in this request
pub event_count: usize,
}
impl HookEventKind for OnPromptSubmit {
type Input = crate::Item;
type Output = OnPromptSubmitResult;
}
impl HookEventKind for PreLlmRequest {
type Input = Vec<crate::Item>;
type Output = PreLlmRequestResult;
}
impl HookEventKind for PreToolCall {
type Input = ToolCallContext;
type Output = PreToolCallResult;
}
impl HookEventKind for PostToolCall {
type Input = PostToolCallContext;
type Output = PostToolCallResult;
}
impl HookEventKind for OnTurnEnd {
type Input = Vec<crate::Item>;
type Output = OnTurnEndResult;
}
impl HookEventKind for OnAbort {
type Input = String;
type Output = ();
}
impl HookEventKind for OnTextDelta {
type Input = TextDeltaContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnToolCallDelta {
type Input = ToolCallDeltaContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnStreamChunk {
type Input = StreamChunkContext;
type Output = StreamHookResult;
}
impl HookEventKind for OnStreamComplete {
type Input = StreamCompleteContext;
type Output = StreamHookResult;
}
// =============================================================================
// Tool Call / Result Types
// =============================================================================
/// Tool call information
///
/// Represents a ToolUse block from LLM, modifiable in Hook processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCall {
/// Tool call ID (used for linking with response)
pub id: String,
/// Tool name
pub name: String,
/// Input arguments (JSON)
pub input: Value,
}
/// Tool execution result
///
/// Represents the result after tool execution, modifiable in Hook processing
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolResult {
/// Corresponding tool call ID
pub tool_use_id: String,
/// Result content
pub content: String,
/// Whether this is an error
#[serde(default)]
pub is_error: bool,
}
impl ToolResult {
/// Create a success result
pub fn success(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
Self {
tool_use_id: tool_use_id.into(),
content: content.into(),
is_error: false,
}
}
/// Create an error result
pub fn error(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
Self {
tool_use_id: tool_use_id.into(),
content: content.into(),
is_error: true,
}
}
}
// =============================================================================
// Hook Error
// =============================================================================
/// Hook error
#[derive(Debug, Error)]
pub enum HookError {
/// Processing was aborted
#[error("Aborted: {0}")]
Aborted(String),
/// Internal error
#[error("Hook error: {0}")]
Internal(String),
}
// =============================================================================
// Hook Trait
// =============================================================================
/// Trait for handling Hook events
///
/// Each event type has a different return type, constrained via `HookEventKind`.
#[async_trait]
pub trait Hook<E: HookEventKind>: Send + Sync {
async fn call(&self, input: &mut E::Input) -> Result<E::Output, HookError>;
}
// =============================================================================
// Hook Registry
// =============================================================================
/// Registry holding all Hooks
///
/// Used internally by Worker to manage all Hook types.
pub struct HookRegistry {
/// on_prompt_submit Hook
pub(crate) on_prompt_submit: Vec<Box<dyn Hook<OnPromptSubmit>>>,
/// pre_llm_request Hook
pub(crate) pre_llm_request: Vec<Box<dyn Hook<PreLlmRequest>>>,
/// pre_tool_call Hook
pub(crate) pre_tool_call: Vec<Box<dyn Hook<PreToolCall>>>,
/// post_tool_call Hook
pub(crate) post_tool_call: Vec<Box<dyn Hook<PostToolCall>>>,
/// on_turn_end Hook
pub(crate) on_turn_end: Vec<Box<dyn Hook<OnTurnEnd>>>,
/// on_abort Hook
pub(crate) on_abort: Vec<Box<dyn Hook<OnAbort>>>,
/// on_text_delta Hook
pub(crate) on_text_delta: Vec<Box<dyn Hook<OnTextDelta>>>,
/// on_tool_call_delta Hook
pub(crate) on_tool_call_delta: Vec<Box<dyn Hook<OnToolCallDelta>>>,
/// on_stream_chunk Hook
pub(crate) on_stream_chunk: Vec<Box<dyn Hook<OnStreamChunk>>>,
/// on_stream_complete Hook
pub(crate) on_stream_complete: Vec<Box<dyn Hook<OnStreamComplete>>>,
}
impl Default for HookRegistry {
fn default() -> Self {
Self::new()
}
}
impl HookRegistry {
/// Create an empty HookRegistry
pub fn new() -> Self {
Self {
on_prompt_submit: Vec::new(),
pre_llm_request: Vec::new(),
pre_tool_call: Vec::new(),
post_tool_call: Vec::new(),
on_turn_end: Vec::new(),
on_abort: Vec::new(),
on_text_delta: Vec::new(),
on_tool_call_delta: Vec::new(),
on_stream_chunk: Vec::new(),
on_stream_complete: Vec::new(),
}
}
}

View File

@ -0,0 +1,185 @@
//! Interceptor - control flow delegation for the Worker execution loop
//!
//! Defines the [`Interceptor`] trait that upper layers (e.g. Pod) implement
//! to inject orchestration decisions (approval, skip, pause, abort)
//! into the Worker's turn loop without the Worker knowing about
//! higher-level concepts.
use std::sync::Arc;
use async_trait::async_trait;
use crate::Item;
use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
// =============================================================================
// Action Enums
// =============================================================================
/// Action after prompt submission.
#[derive(Debug, Clone, PartialEq)]
pub enum PromptAction {
/// Proceed normally.
Continue,
/// Cancel with a reason.
Cancel(String),
/// Proceed, and append these items to history right after the user
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
/// submit edge: lets the upper layer attach resolver-produced
/// system messages (e.g. `@<path>` file content) so they sit
/// adjacent to the user message that referenced them.
ContinueWith(Vec<Item>),
}
/// Action before an LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreRequestAction {
/// Proceed normally.
Continue,
/// Cancel with a reason (treated as an error).
Cancel(String),
/// Yield control to the caller for external processing.
///
/// The Worker exits the turn loop cleanly with `WorkerResult::Yielded`.
/// The caller is expected to resume execution later.
Yield,
}
/// Action before a tool call.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PreToolAction {
/// Proceed with execution.
Continue,
/// Skip this tool call (do not execute).
Skip,
/// Do not execute the tool call; commit this synthetic result instead.
///
/// This preserves provider-visible `tool_use` / `tool_result` pairing
/// without aborting the whole turn.
SyntheticResult(ToolResult),
/// Abort the entire run.
Abort(String),
/// Pause execution (can be resumed later).
Pause,
}
/// Action after a tool call.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PostToolAction {
/// Proceed normally.
Continue,
/// Abort the entire run.
Abort(String),
}
/// Action at the end of a turn (when LLM produces no tool calls).
#[derive(Debug, Clone)]
pub enum TurnEndAction {
/// Turn is finished, return to caller.
Finish,
/// Continue with additional messages injected into history.
ContinueWithMessages(Vec<Item>),
/// Pause execution (can be resumed later).
Pause,
}
// =============================================================================
// Context Types
// =============================================================================
/// Context for pre-tool-call decisions.
pub struct ToolCallInfo {
/// Tool call information (modifiable).
pub call: ToolCall,
/// Tool meta information.
pub meta: ToolMeta,
/// Tool instance (for state access).
pub tool: Arc<dyn Tool>,
}
/// Context for post-tool-call decisions.
pub struct ToolResultInfo {
/// Original tool call.
pub call: ToolCall,
/// Tool execution result (modifiable).
pub result: ToolResult,
/// Tool meta information.
pub meta: ToolMeta,
/// Tool instance (for state access).
pub tool: Arc<dyn Tool>,
}
// =============================================================================
// Interceptor Trait
// =============================================================================
/// Intercepts the Worker execution loop at key decision points.
///
/// All methods have default implementations that let the Worker
/// proceed without intervention. Upper layers (e.g. Pod) provide
/// richer implementations for approval flows, permission checks, etc.
#[async_trait]
pub trait Interceptor: Send + Sync {
/// Called after receiving user input, before adding to history.
async fn on_prompt_submit(&self, _item: &mut Item) -> PromptAction {
PromptAction::Continue
}
/// Items that should be **committed to `worker.history`** just
/// before the next LLM request. Returned items are `extend`ed into
/// the persistent history (and therefore picked up by the per-turn
/// clone that backs the LLM request, plus the usual
/// history-persistence path).
///
/// Use this for inputs that arrive from outside the LLM and need
/// to be reflected in the on-disk history — notifications,
/// cross-Pod events, system reminders. Do **not** use
/// [`Self::pre_llm_request`] for that purpose: it mutates a
/// per-request clone, so any committed assistant response that
/// reacts to the injection would have no visible trigger on the
/// next turn (or after resume / compaction).
///
/// `pre_llm_request` remains the right place for purely
/// reproducible per-request transformations (pruning, content
/// trimming, cache anchors) that depend only on the existing
/// history.
async fn pending_history_appends(&self) -> Vec<Item> {
Vec::new()
}
/// Called before each LLM request. The context starts as a clone
/// of `worker.history` (after `pending_history_appends` and the
/// Worker's own prune projection have been applied) and can be
/// further modified for that single request only — mutations here
/// are **not** persisted back to history. Use
/// [`Self::pending_history_appends`] for inputs that need to land
/// in history.
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
PreRequestAction::Continue
}
/// Called before each tool is executed.
async fn pre_tool_call(&self, _info: &mut ToolCallInfo) -> PreToolAction {
PreToolAction::Continue
}
/// Called after each tool completes.
async fn post_tool_call(&self, _info: &mut ToolResultInfo) -> PostToolAction {
PostToolAction::Continue
}
/// Called when a turn ends with no tool calls.
async fn on_turn_end(&self, _history: &[Item]) -> TurnEndAction {
TurnEndAction::Finish
}
/// Called when execution is interrupted (abort or cancel).
async fn on_abort(&self, _reason: &str) {}
}
/// Default interceptor: no intervention. Worker proceeds through the loop
/// without any external control flow decisions.
pub(crate) struct DefaultInterceptor;
#[async_trait]
impl Interceptor for DefaultInterceptor {}

View File

@ -6,8 +6,8 @@
//!
//! - [`Worker`] - Central component for managing LLM interactions
//! - [`tool::Tool`] - Tools that can be invoked by the LLM
//! - [`hook::Hook`] - Hooks for intercepting turn progression
//! - [`subscriber::WorkerSubscriber`] - Subscribing to streaming events
//! - [`interceptor::Interceptor`] - Control-flow delegation for the execution loop
//! - Closure-based event callbacks via `Worker::on_text_block()`, `on_tool_use_block()`, etc.
//!
//! # Quick Start
//!
@ -27,26 +27,38 @@
//!
//! # Cache Protection
//!
//! To maximize KV cache hit rate, transition to the locked state
//! with [`Worker::lock()`] before execution.
//! `run()` automatically locks the cache. To edit state between turns,
//! call `unlock_cache()` first; the next `run()` re-locks automatically.
//!
//! ```ignore
//! let mut locked = worker.lock();
//! locked.run("user input").await?;
//! worker.run("user input").await?;
//! worker.unlock_cache();
//! worker.set_system_prompt("new prompt");
//! worker.run("next input").await?;
//! ```
mod handler;
mod message;
mod worker;
pub(crate) mod callback;
pub mod event;
pub mod hook;
pub mod interceptor;
pub mod llm_client;
pub mod prune;
pub mod state;
pub mod subscriber;
pub mod timeline;
pub mod token_counter;
pub mod tool;
pub mod tool_server;
pub mod usage_record;
pub use callback::{TextBlockScope, ThinkingBlockScope, ToolUseBlockScope};
pub use handler::ToolUseBlockStart;
pub use interceptor::Interceptor;
pub use message::{ContentPart, Item, Message, Role};
pub use worker::{ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
pub use usage_record::UsageRecord;
pub use worker::{
LlmRetryNotice, RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult,
};

View File

@ -0,0 +1,48 @@
//! `Scheme` 実装と通信層が要求する認証要件、および動的認証プロバイダ。
//!
//! マニフェスト側の型(`ModelConfig` / `SchemeKind` / `AuthRef`)は
//! `crates/manifest` に置き、llm-worker はそれを知らずに済む。
//! `AuthRequirement` は scheme が宣言する「この scheme はどんな認証を
//! 期待するか」のランタイム記述で、manifest 側の `AuthRef` との
//! 照合(`AuthRef → ResolvedAuth` 変換の適否)は `crates/provider`
//! で行う。
//!
//! Codex OAuth のようにリクエスト毎にトークンが変わり得る認証は
//! [`AuthProvider`] trait を `crates/provider` 側で実装し、
//! [`super::transport::ResolvedAuth::Custom`] 経由で transport に渡す。
use async_trait::async_trait;
use reqwest::header::{HeaderName, HeaderValue};
use super::error::ClientError;
/// `Scheme::required_auth()` が返す認証要件。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AuthRequirement {
/// 認証を行わないOllama など)
None,
/// `Authorization: Bearer <token>` ヘッダtoken は API key 相当)
Bearer,
/// `x-api-key: <token>` ヘッダAnthropic 形式)
XApiKey,
/// クエリパラメータ `?<name>=<token>`Gemini 形式)
QueryParam { name: &'static str },
/// 複合ヘッダCodex OAuth 等、`crates/provider` 側で解決)
Custom,
}
/// リクエスト毎に認証ヘッダを動的に組み立てるプロバイダ。
///
/// Codex OAuth のように access_token が refresh で更新されたり、
/// `ChatGPT-Account-Id` / `X-OpenAI-Fedramp` のような複数ヘッダを
/// 同時に注入する必要があるケースで使う。実体は `crates/provider`
/// 側に置き、llm-worker は trait を知るだけ。
///
/// 返したヘッダはそのまま `HeaderMap` に挿入される。`Authorization`
/// 含む scheme 既定の認証ヘッダは送出されないので、必要なら
/// 実装側でセットすること。
#[async_trait]
pub trait AuthProvider: Send + Sync + std::fmt::Debug {
/// 1 リクエスト分の認証ヘッダを返す。refresh が必要なら内部で行う。
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError>;
}

View File

@ -0,0 +1,169 @@
//! モデル能力メタデータ
//!
//! `ModelCapability` はモデルが持つ機能差を表現する。scheme は同じでも
//! モデルごとに reasoning 可否や prompt caching 方式が違うため、scheme
//! から分離して保持する。
//!
//! 値の供給経路は 2 通り:
//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル)
//! 2. `ModelConfig::capability` での明示 override未知モデル、または上書き
use serde::{Deserialize, Deserializer, Serialize, Serializer};
/// モデル能力メタデータ
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ModelCapability {
pub tool_calling: ToolCallingSupport,
pub structured_output: StructuredOutput,
#[serde(default)]
pub reasoning: Option<ReasoningSupport>,
#[serde(default)]
pub vision: bool,
pub prompt_caching: CacheStrategy,
}
impl ModelCapability {
/// 何もサポートしない安全側デフォルト。未知モデルのフォールバック用。
pub const fn minimal() -> Self {
Self {
tool_calling: ToolCallingSupport::None,
structured_output: StructuredOutput::None,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
}
/// ツール呼び出しサポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ToolCallingSupport {
/// 非サポート
None,
/// 1 回のレスポンスで 1 ツールのみ
Sequential,
/// 1 回のレスポンスで複数ツール並行
Parallel,
}
/// Structured output サポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum StructuredOutput {
None,
/// `json_object` モード(スキーマなし JSON 強制)
JsonObject,
/// JSON Schema 指定で構造化出力
JsonSchema,
}
/// Reasoningextended thinkingサポート
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ReasoningSupport {
/// OpenAI 形式: `reasoning.effort` (low/medium/high)
Effort,
/// Anthropic 形式: `thinking.budget_tokens`
BudgetTokens,
/// 両対応(内部では共通 `ReasoningControl` として扱い、各 scheme で投影)
Both,
}
/// Prompt caching 戦略
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum CacheStrategy {
/// Anthropic: `cache_control` マーカーを明示挿入
Explicit { max_breakpoints: u8 },
/// それ以外: サーバ側自動 prefix、または未サポート
Auto,
}
/// Reasoning 制御共通型、scheme 側で各社形式に投影)。
///
/// 文字列は provider-native な effort label、数値は provider-native な
/// thinking budget token として扱う。どちらか一方だけを型で表現する。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum ReasoningControl {
Effort(ReasoningEffort),
BudgetTokens(i32),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
XHigh,
Other(String),
}
impl ReasoningEffort {
pub fn as_str(&self) -> &str {
match self {
Self::Minimal => "minimal",
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
Self::XHigh => "xhigh",
Self::Other(label) => label.as_str(),
}
}
}
impl From<String> for ReasoningEffort {
fn from(value: String) -> Self {
match value.as_str() {
"minimal" => Self::Minimal,
"low" => Self::Low,
"medium" => Self::Medium,
"high" => Self::High,
"xhigh" => Self::XHigh,
_ => Self::Other(value),
}
}
}
impl Serialize for ReasoningEffort {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for ReasoningEffort {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
String::deserialize(deserializer).map(Self::from)
}
}
#[cfg(test)]
mod tests {
use super::{ReasoningControl, ReasoningEffort};
#[test]
fn reasoning_control_deserializes_effort_labels() {
let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap();
assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh));
let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap();
assert_eq!(
unknown,
ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into()))
);
}
#[test]
fn reasoning_control_deserializes_signed_budget() {
let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap();
assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1));
}
}

View File

@ -36,6 +36,8 @@ impl std::fmt::Display for ConfigWarning {
}
}
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>;
/// LLMクライアントのtrait
///
/// 各プロバイダはこのtraitを実装し、統一されたインターフェースを提供する。
@ -49,10 +51,13 @@ pub trait LlmClient: Send + Sync {
/// # Returns
/// * `Ok(Stream)` - イベントストリーム
/// * `Err(ClientError)` - エラー
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>;
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError>;
/// Clone this client into a new `Box<dyn LlmClient>`.
///
/// Used when a second client instance is needed (e.g. for context
/// compaction) without access to the original construction parameters.
fn clone_boxed(&self) -> Box<dyn LlmClient>;
/// 設定をバリデーションし、未サポートの設定があれば警告を返す
///
@ -68,18 +73,25 @@ pub trait LlmClient: Send + Sync {
}
}
impl Clone for Box<dyn LlmClient> {
fn clone(&self) -> Self {
self.clone_boxed()
}
}
/// `Box<dyn LlmClient>` に対する `LlmClient` の実装
///
/// これにより、動的ディスパッチを使用するクライアントも `Worker` で利用可能になる。
#[async_trait]
impl LlmClient for Box<dyn LlmClient> {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
(**self).stream(request).await
}
fn clone_boxed(&self) -> Box<dyn LlmClient> {
(**self).clone_boxed()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
(**self).validate_config(config)
}

View File

@ -1,6 +1,6 @@
//! LLMクライアントエラー型
use std::fmt;
use std::{fmt, time::Duration};
/// LLMクライアントのエラー
#[derive(Debug)]
@ -16,6 +16,7 @@ pub enum ClientError {
status: Option<u16>,
code: Option<String>,
message: String,
retry_after: Option<Duration>,
},
/// 設定エラー
Config(String),
@ -31,6 +32,7 @@ impl fmt::Display for ClientError {
status,
code,
message,
..
} => {
write!(f, "API error")?;
if let Some(s) = status {
@ -67,3 +69,86 @@ impl From<serde_json::Error> for ClientError {
ClientError::Json(err)
}
}
impl ClientError {
pub fn status(&self) -> Option<u16> {
match self {
ClientError::Api { status, .. } => *status,
_ => None,
}
}
pub fn retry_after(&self) -> Option<Duration> {
match self {
ClientError::Api { retry_after, .. } => *retry_after,
_ => None,
}
}
}
/// transient な失敗としてリトライ対象になるかを判定する。
///
/// 対象:
/// - `Api { status }` のうち 408 / 425 / 429 / 500 / 502 / 503 / 504 / 529
/// - `Http(reqwest::Error)` のうち `is_connect()` または `is_timeout()`
///
/// それ以外Json、Sse、Config、上記以外の Api ステータス)は false。
/// SSE 読み出し開始後の失敗は呼び出し側で `Sse` として上に流すため、
/// ここで対象外にしておけば自動的に弾かれる。
pub fn is_retryable(error: &ClientError) -> bool {
match error {
ClientError::Api {
status: Some(code), ..
} => matches!(*code, 408 | 425 | 429 | 500 | 502 | 503 | 504 | 529),
ClientError::Api { status: None, .. } => false,
ClientError::Http(e) => e.is_connect() || e.is_timeout(),
ClientError::Json(_) | ClientError::Sse(_) | ClientError::Config(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn api_err(status: Option<u16>) -> ClientError {
ClientError::Api {
status,
code: None,
message: String::new(),
retry_after: None,
}
}
#[test]
fn retryable_status_codes() {
for code in [408u16, 425, 429, 500, 502, 503, 504, 529] {
assert!(
is_retryable(&api_err(Some(code))),
"status {code} should be retryable",
);
}
}
#[test]
fn non_retryable_status_codes() {
for code in [400u16, 401, 403, 404, 409, 410, 422, 501] {
assert!(
!is_retryable(&api_err(Some(code))),
"status {code} should not be retryable",
);
}
}
#[test]
fn api_without_status_not_retryable() {
assert!(!is_retryable(&api_err(None)));
}
#[test]
fn json_sse_config_not_retryable() {
let json_err = serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
assert!(!is_retryable(&ClientError::Json(json_err)));
assert!(!is_retryable(&ClientError::Sse("boom".into())));
assert!(!is_retryable(&ClientError::Config("boom".into())));
}
}

View File

@ -17,6 +17,9 @@ use serde::{Deserialize, Serialize};
///
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
/// は別経路で発火する)
///
/// # ブロックのライフサイクル
///
@ -41,6 +44,18 @@ pub enum Event {
BlockStop(BlockStop),
/// ブロック中断
BlockAbort(BlockAbort),
/// Reasoning item の完成。scheme が「次の request に送り返すための
/// reasoning material が揃った」点で 1 度だけ発火する。
///
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
///
/// 上位層Worker / ReasoningItemCollectorはこれを `Item::Reasoning`
/// として `worker.history` に append する。streaming 表示用の
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
/// は依然として並行発火するlive display と round-trip persist の責務分離)。
ReasoningItem(ReasoningItemEvent),
}
// =============================================================================
@ -54,17 +69,27 @@ pub struct PingEvent {
}
/// 使用量イベント
///
/// プロバイダから受信した 1 LLM リクエスト分のトークン会計。
/// 各 scheme で正規化され、フィールドの意味は全プロバイダ共通:
///
/// - `input_tokens` は **送信した prompt prefix 全体の占有量**(プロンプト全長)。
/// キャッシュヒット分も含まれる。Anthropic は raw API では非キャッシュ分のみを
/// `input_tokens` として返すため、`AnthropicScheme::convert_usage` で
/// `cache_read + cache_creation` を加算してこの規約に揃えている。
/// - `cache_read_input_tokens` / `cache_creation_input_tokens` は上記の内訳で、
/// 料金会計用。占有量からは差し引かない。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct UsageEvent {
/// 入力トークン数
/// 送信した prompt prefix の総トークン数(占有量、キャッシュ込み)
pub input_tokens: Option<u64>,
/// 出力トークン数
/// このリクエストで生成された出力トークン数
pub output_tokens: Option<u64>,
/// 合計トークン数
/// `input_tokens + output_tokens`
pub total_tokens: Option<u64>,
/// キャッシュ読み込みトークン数
/// `input_tokens` のうちキャッシュから読まれた分(割引料金)
pub cache_read_input_tokens: Option<u64>,
/// キャッシュ作成トークン数
/// `input_tokens` のうちこのリクエストでキャッシュに書かれた分割増料金、Anthropic
pub cache_creation_input_tokens: Option<u64>,
}
@ -202,6 +227,31 @@ impl BlockAbort {
}
}
// =============================================================================
// Reasoning Item Event
// =============================================================================
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
/// materialtext, summary, encrypted_content, signature, idを揃えて
/// 1 度だけ発火する。
///
/// `Item::Reasoning` のフィールドを 1:1 に持つ。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ReasoningItemEvent {
/// scheme 側で観測した item idOpenAI Responses の `id`)。
pub id: Option<String>,
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は
/// `reasoning_text` 累積。redacted_thinking では空。
pub text: String,
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
pub summary: Vec<String>,
/// 暗号化された opaque blobAnthropic `redacted_thinking.data` /
/// OpenAI Responses `encrypted_content`)。
pub encrypted_content: Option<String>,
/// Anthropic extended thinking signature。round-trip 必須。
pub signature: Option<String>,
}
/// 停止理由
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum StopReason {

View File

@ -16,14 +16,19 @@
//! - `providers`: プロバイダ固有のクライアント実装
//! - `scheme`: APIスキーマリクエスト/レスポンス変換)
pub mod auth;
pub mod capability;
pub mod client;
pub mod error;
pub mod event;
pub mod types;
pub mod providers;
pub mod retry;
pub mod scheme;
pub mod transport;
pub use auth::*;
pub use capability::*;
pub use client::*;
pub use error::*;
pub use event::*;

View File

@ -1,201 +0,0 @@
//! Anthropic プロバイダ実装
//!
//! Anthropic Messages APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, scheme::anthropic::AnthropicScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt, future::ready};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// Anthropic クライアント
pub struct AnthropicClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: AnthropicScheme,
/// ベースURL
base_url: String,
}
impl AnthropicClient {
/// 新しいAnthropicクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: AnthropicScheme::default(),
base_url: "https://api.anthropic.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: AnthropicScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
headers.insert(
"x-api-key",
HeaderValue::from_str(&self.api_key)
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?,
);
headers.insert(
"anthropic-version",
HeaderValue::from_str(&self.scheme.api_version)
.map_err(|e| ClientError::Config(format!("Invalid API version: {}", e)))?,
);
// 細粒度ツールストリーミングを有効にする場合
if self.scheme.fine_grained_tool_streaming {
headers.insert(
"anthropic-beta",
HeaderValue::from_static("fine-grained-tool-streaming-2025-05-14"),
);
}
Ok(headers)
}
}
#[async_trait]
impl LlmClient for AnthropicClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
let url = format!("{}/v1/messages", self.base_url);
let headers = self.build_headers()?;
let body = self.scheme.build_request(&self.model, &request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
// AnthropicはBlockStopイベントに正しいblock_typeを含まないため、
// クライアント側で状態を追跡して補完する
let mut current_block_type = None;
let stream = event_stream.filter_map(move |result| {
ready(match result {
Ok(event) => {
// SSEイベントをパース
match scheme.parse_event(&event.event, &event.data) {
Ok(Some(mut evt)) => {
// ブロックタイプの追跡と修正
match &evt {
Event::BlockStart(start) => {
current_block_type = Some(start.block_type);
}
Event::BlockStop(stop) => {
if let Some(block_type) = current_block_type.take() {
// 正しいブロックタイプで上書き
// (Event::BlockStopの中身を置換)
evt =
Event::BlockStop(crate::llm_client::event::BlockStop {
block_type,
..stop.clone()
});
}
}
_ => {}
}
Some(Ok(evt))
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
Err(e) => Some(Err(ClientError::Sse(e.to_string()))),
})
});
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
assert_eq!(client.model, "claude-sonnet-4-20250514");
}
#[test]
fn test_build_headers() {
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
let headers = client.build_headers().unwrap();
assert!(headers.contains_key("x-api-key"));
assert!(headers.contains_key("anthropic-version"));
assert!(headers.contains_key("anthropic-beta"));
}
}

View File

@ -1,185 +0,0 @@
//! Gemini プロバイダ実装
//!
//! Google Gemini APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, scheme::gemini::GeminiScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// Gemini クライアント
pub struct GeminiClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: GeminiScheme,
/// ベースURL
base_url: String,
}
impl GeminiClient {
/// 新しいGeminiクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: GeminiScheme::default(),
base_url: "https://generativelanguage.googleapis.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: GeminiScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
Ok(headers)
}
}
#[async_trait]
impl LlmClient for GeminiClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
// URL構築: base_url/v1beta/models/{model}:streamGenerateContent?alt=sse&key={api_key}
let url = format!(
"{}/v1beta/models/{}:streamGenerateContent?alt=sse&key={}",
self.base_url, self.model, self.api_key
);
let headers = self.build_headers()?;
let body = self.scheme.build_request(&request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
// Gemini error format: { "error": { "code": xxx, "message": "...", "status": "..." } }
let error = json.get("error").unwrap_or(&json);
let code = error
.get("status")
.and_then(|v| v.as_str())
.map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
let stream = event_stream
.map(move |result| {
match result {
Ok(event) => {
// SSEイベントをパース
// Geminiは "data: {...}" 形式で送る
match scheme.parse_event(&event.data) {
Ok(Some(events)) => Ok(Some(events)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
Err(e) => Err(ClientError::Sse(e.to_string())),
}
})
// flatten Option<Vec<Event>> stream to Stream<Event>
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Ok(None) => Box::pin(futures::stream::empty()),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_client_creation() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
assert_eq!(client.model, "gemini-2.0-flash");
}
#[test]
fn test_build_headers() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
let headers = client.build_headers().unwrap();
assert!(headers.contains_key("content-type"));
}
#[test]
fn test_custom_base_url() {
let client = GeminiClient::new("test-key", "gemini-2.0-flash")
.with_base_url("https://custom.api.example.com");
assert_eq!(client.base_url, "https://custom.api.example.com");
}
}

View File

@ -1,8 +0,0 @@
//! プロバイダ実装
//!
//! 各プロバイダ固有のHTTPクライアント実装
pub mod anthropic;
pub mod gemini;
pub mod ollama;
pub mod openai;

View File

@ -1,62 +0,0 @@
//! Ollama プロバイダ実装
//!
//! OllamaはOpenAI互換APIを提供するため、OpenAIクライアントと互換性がある。
//! デフォルトのベースURLと認証設定が異なる。
use std::pin::Pin;
use crate::llm_client::{
ClientError, LlmClient, Request, event::Event, providers::openai::OpenAIClient,
scheme::openai::OpenAIScheme,
};
use async_trait::async_trait;
use futures::Stream;
/// Ollama クライアント
///
/// 内部的にOpenAIClientを使用するラッパー、もしくはOpenAIClientと同様の実装を持つ。
/// ここではOpenAIClient構成をカスタマイズして提供する。
pub struct OllamaClient {
inner: OpenAIClient,
}
impl OllamaClient {
/// 新しいOllamaクライアントを作成
pub fn new(model: impl Into<String>) -> Self {
// Ollama usually runs on localhost:11434/v1
// API key is "ollama" or ignored
let base_url = "http://localhost:11434";
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
let client = OpenAIClient::new("ollama", model)
.with_base_url(base_url)
.with_scheme(scheme);
// Currently OpenAIScheme sets include_usage: true. Ollama supports checks?
// Assuming Ollama modern versions support usage.
Self { inner: client }
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.inner = self.inner.with_base_url(url);
self
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.inner = self.inner.with_http_client(client);
self
}
}
#[async_trait]
impl LlmClient for OllamaClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
self.inner.stream(request).await
}
}

View File

@ -1,212 +0,0 @@
//! OpenAI プロバイダ実装
//!
//! OpenAI Chat Completions APIと通信し、Eventストリームを出力
use std::pin::Pin;
use crate::llm_client::{
ClientError, ConfigWarning, LlmClient, Request, RequestConfig, event::Event,
scheme::openai::OpenAIScheme,
};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
/// OpenAI クライアント
pub struct OpenAIClient {
/// HTTPクライアント
http_client: reqwest::Client,
/// APIキー
api_key: String,
/// モデル名
model: String,
/// スキーマ
scheme: OpenAIScheme,
/// ベースURL
base_url: String,
}
impl OpenAIClient {
/// 新しいOpenAIクライアントを作成
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
Self {
http_client: reqwest::Client::new(),
api_key: api_key.into(),
model: model.into(),
scheme: OpenAIScheme::default(),
base_url: "https://api.openai.com".to_string(),
}
}
/// カスタムHTTPクライアントを設定
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
/// スキーマを設定
pub fn with_scheme(mut self, scheme: OpenAIScheme) -> Self {
self.scheme = scheme;
self
}
/// ベースURLを設定
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
self.base_url = url.into();
self
}
/// リクエストヘッダーを構築
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
let api_key_val = if self.api_key.is_empty() {
// For providers like Ollama, API key might be empty/dummy.
// But typical OpenAI requires it.
// We'll allow empty if user intends it, but usually it's checked.
HeaderValue::from_static("")
} else {
let mut val = HeaderValue::from_str(&format!("Bearer {}", self.api_key))
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?;
val.set_sensitive(true);
val
};
if !api_key_val.is_empty() {
headers.insert("Authorization", api_key_val);
}
Ok(headers)
}
}
#[async_trait]
impl LlmClient for OpenAIClient {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
// Construct the URL: base_url usually ends without slash, path starts with slash or vice versa.
// Standard OpenAI base is "https://api.openai.com". Endpoint is "/v1/chat/completions".
// If external base_url includes /v1, we should be careful.
// Let's assume defaults. If user provides "http://localhost:11434/v1", we append "/chat/completions".
// Or cleaner: user provides full base up to version?
// Anthropic client uses "{}/v1/messages".
// Let's stick to appending "/v1/chat/completions" if base is just host,
// OR assume base includes /v1 if user overrides it?
// Let's use robust joining or simple assumption matching Anthropic pattern:
// Default: https://api.openai.com -> https://api.openai.com/v1/chat/completions
// However, Ollama default is http://localhost:11434/v1/chat/completions if using OpenAI compact.
// If we configure base_url via `with_base_url`, it's flexible.
// Let's try to detect if /v1 is present or just append consistently.
// Ideally `base_url` should be the root passed to `new`.
let url = if self.base_url.ends_with("/v1") {
format!("{}/chat/completions", self.base_url)
} else if self.base_url.ends_with("/") {
format!("{}v1/chat/completions", self.base_url)
} else {
format!("{}/v1/chat/completions", self.base_url)
};
let headers = self.build_headers()?;
let body = self.scheme.build_request(&self.model, &request);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
// エラーレスポンスをチェック
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
// JSONでエラーをパースしてみる
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
// OpenAI error format: { "error": { "message": "...", "type": "...", ... } }
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
}
// SSEストリームを構築
let scheme = self.scheme.clone();
let byte_stream = response
.bytes_stream()
.map_err(|e| std::io::Error::other(e));
let event_stream = byte_stream.eventsource();
let stream = event_stream
.map(move |result| {
match result {
Ok(event) => {
// SSEイベントをパース
// OpenAI stream events are "data: {...}"
// event.event is usually "message" (default) or empty.
// parse_event takes data string.
if event.data == "[DONE]" {
// End of stream handled inside parse_event usually returning None
Ok(None)
} else {
match scheme.parse_event(&event.data) {
Ok(Some(events)) => Ok(Some(events)),
Ok(None) => Ok(None),
Err(e) => Err(e),
}
}
}
Err(e) => Err(ClientError::Sse(e.to_string())),
}
})
// flatten Option<Vec<Event>> stream to Stream<Event>
// map returns Result<Option<Vec<Event>>, Error>
// We want Stream<Item = Result<Event, Error>>
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Ok(None) => Box::pin(futures::stream::empty()),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// OpenAI does not support top_k
if config.top_k.is_some() {
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI"));
}
warnings
}
}

View File

@ -0,0 +1,104 @@
//! LLM response stream を開く前の transient error 向けリトライポリシー。
//!
//! Worker が `LlmClient::stream` の open error に対して `is_retryable` を見て
//! retry / backoff / TUI event / cancellation をまとめて管理する。
//! SSE 読み出し開始後の失敗は対象外。
use std::time::Duration;
/// 指数バックオフ + ジッター + 累積タイムアウトを表すポリシー。
///
/// `Default` は llm-worker 全体の固定値を返す。manifest 経由の上書きが
/// 必要になったら拡張する(現状は不要 → `tickets/llm-worker-transient-retry.md`)。
#[derive(Debug, Clone)]
pub struct RetryPolicy {
/// 指数の基準値。`base * 2^attempt` を `cap` で頭打ちにした上限から
/// フルジッターで実際の wait を抽選する。
pub base: Duration,
/// 1 回あたりの wait の上限。
pub cap: Duration,
/// 試行の合計回数(初回 + リトライ)。`1` ならリトライしない。
pub max_attempts: u32,
/// 初回送信開始からの累積タイムアウト。これを超える wait は打ち切る。
pub total_timeout: Duration,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
base: Duration::from_millis(500),
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
}
}
}
impl RetryPolicy {
/// `attempt` 回目の失敗0-indexed後に待つ時間を返す。
/// `Retry-After` で上書きしたい場合は呼び出さず、その値をそのまま使う。
pub fn backoff(&self, attempt: u32) -> Duration {
let shift = attempt.min(20);
let base_nanos = self.base.as_nanos() as u64;
let exp_nanos = base_nanos.saturating_mul(1u64 << shift);
let cap_nanos = self.cap.as_nanos() as u64;
let upper = exp_nanos.min(cap_nanos);
Duration::from_nanos(jitter_nanos(upper))
}
}
/// `[0, max_nanos]` から擬似乱数的に 1 つ取り出す。`SystemTime` の
/// 下位ビットを splitmix64 で攪拌するだけの軽量実装で、暗号的乱数性は
/// 持たないがフルジッターのぶつかり回避には十分。
fn jitter_nanos(max_nanos: u64) -> u64 {
if max_nanos == 0 {
return 0;
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
x ^= x >> 31;
x % (max_nanos + 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_policy_values() {
let p = RetryPolicy::default();
assert_eq!(p.base, Duration::from_millis(500));
assert_eq!(p.cap, Duration::from_secs(10));
assert_eq!(p.max_attempts, 4);
assert_eq!(p.total_timeout, Duration::from_secs(30));
}
#[test]
fn backoff_respects_cap() {
let p = RetryPolicy::default();
for attempt in 0..30u32 {
assert!(
p.backoff(attempt) <= p.cap,
"attempt {attempt} exceeded cap",
);
}
}
#[test]
fn backoff_zero_when_base_zero() {
let p = RetryPolicy {
base: Duration::ZERO,
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
};
for attempt in 0..5 {
assert_eq!(p.backoff(attempt), Duration::ZERO);
}
}
}

View File

@ -0,0 +1,23 @@
//! Anthropic scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`claude-*` など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは未知モデルでも「この wire で
//! 安全に送れる最小共通項」を返すだけに留める。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability。
///
/// Ollama の `/v1/messages` 流用を想定して `cache_control` を送らない
/// `CacheStrategy::Auto` にする。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -12,6 +12,7 @@ use crate::llm_client::{
use serde::Deserialize;
use super::AnthropicScheme;
use super::scheme_impl::{AnthropicState, PendingThinking};
/// Anthropic SSEイベントタイプ
#[derive(Debug, Clone, PartialEq, Eq)]
@ -75,7 +76,21 @@ pub(crate) enum ContentBlock {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
Thinking {
#[serde(default)]
thinking: String,
/// 非ストリーミングレスポンス由来の初期 signature通常はストリームでは
/// 空 → `signature_delta` で埋まる)。
#[serde(default)]
signature: Option<String>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化された opaque blob。signature ではなく、まるごと
/// `redacted_thinking.data` として送り返す必要がある。
#[serde(default)]
data: String,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -228,7 +243,9 @@ impl AnthropicScheme {
fn convert_block_start(&self, event: &ContentBlockStartEvent) -> Event {
let (block_type, metadata) = match &event.content_block {
ContentBlock::Text { .. } => (BlockType::Text, BlockMetadata::Text),
ContentBlock::Thinking { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
(BlockType::Thinking, BlockMetadata::Thinking)
}
ContentBlock::ToolUse { id, name, .. } => (
BlockType::ToolUse,
BlockMetadata::ToolUse {
@ -264,13 +281,139 @@ impl AnthropicScheme {
}))
}
/// state を持ち回す上位パース。
///
/// `parse_event` の単発 Event に加えて、以下を行う:
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
/// `state.pending_thinking` に蓄積し、`content_block_stop` で
/// `Event::ReasoningItem` を追加発火する
/// - `signature_delta` を蓄積Stream channel には流さず、reasoning event
/// にだけ反映する)
pub(crate) fn parse_with_state(
&self,
event_type: &str,
data: &str,
state: &mut AnthropicState,
) -> Result<Vec<Event>, ClientError> {
let Some(parsed_event_type) = AnthropicEventType::parse(event_type) else {
return Ok(Vec::new());
};
// signature_delta はストリーム表示には流さず、state にだけ蓄積。
// それ以外は parse_event で標準 Event 化する。
let mut emitted: Vec<Event> = Vec::new();
match parsed_event_type {
AnthropicEventType::ContentBlockStart => {
let raw: ContentBlockStartEvent = serde_json::from_str(data)?;
state.current_block_type = Some(match &raw.content_block {
ContentBlock::Text { .. } => BlockType::Text,
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
BlockType::Thinking
}
ContentBlock::ToolUse { .. } => BlockType::ToolUse,
});
match &raw.content_block {
ContentBlock::Thinking {
thinking,
signature,
} => {
state.pending_thinking = Some(PendingThinking {
text: thinking.clone(),
signature: signature.clone(),
redacted_data: None,
});
}
ContentBlock::RedactedThinking { data: blob } => {
state.pending_thinking = Some(PendingThinking {
text: String::new(),
signature: None,
redacted_data: Some(blob.clone()),
});
}
_ => {}
}
emitted.push(self.convert_block_start(&raw));
}
AnthropicEventType::ContentBlockDelta => {
let raw: ContentBlockDeltaEvent = serde_json::from_str(data)?;
match &raw.delta {
DeltaBlock::ThinkingDelta { thinking } => {
if let Some(pending) = state.pending_thinking.as_mut() {
pending.text.push_str(thinking);
}
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Thinking(thinking.clone()),
}));
}
DeltaBlock::SignatureDelta { signature } => {
if let Some(pending) = state.pending_thinking.as_mut() {
// 通常 1 回しか来ないが、複数 fragment 来ても連結しておく
match &mut pending.signature {
Some(acc) => acc.push_str(signature),
None => pending.signature = Some(signature.clone()),
}
}
}
DeltaBlock::TextDelta { text } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Text(text.clone()),
}));
}
DeltaBlock::InputJsonDelta { partial_json } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::InputJson(partial_json.clone()),
}));
}
}
}
AnthropicEventType::ContentBlockStop => {
let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
emitted.push(Event::BlockStop(BlockStop {
index: raw.index,
block_type,
stop_reason: None,
}));
if matches!(block_type, BlockType::Thinking) {
if let Some(pending) = state.pending_thinking.take() {
emitted.push(Event::ReasoningItem(pending.into_event()));
}
}
}
// 残りは state を必要としない。既存 parse_event に委譲。
_ => {
if let Some(event) = self.parse_event(event_type, data)? {
emitted.push(event);
}
}
}
Ok(emitted)
}
fn convert_usage(&self, usage: &UsageData) -> UsageEvent {
let input = usage.input_tokens.unwrap_or(0);
// Anthropic の `input_tokens` は **キャッシュ外** の入力トークンのみで、
// プロンプト全長は input_tokens + cache_read + cache_creation。
// UsageEvent の `input_tokens` には「占有量(プロンプト全長)」を載せる
// 規約に合わせて、ここでキャッシュ分を足し込む。
// cache_read_input_tokens / cache_creation_input_tokens は内訳として
// 別フィールドに残るので、料金計算側で `input - cache_read - cache_creation`
// により非キャッシュ入力分は逆算可能。
let raw_input = usage.input_tokens.unwrap_or(0);
let cache_read = usage.cache_read_input_tokens.unwrap_or(0);
let cache_creation = usage.cache_creation_input_tokens.unwrap_or(0);
let input_total = raw_input + cache_read + cache_creation;
let output = usage.output_tokens.unwrap_or(0);
UsageEvent {
input_tokens: usage.input_tokens,
input_tokens: usage.input_tokens.map(|_| input_total),
output_tokens: usage.output_tokens,
total_tokens: Some(input + output),
total_tokens: Some(input_total + output),
cache_read_input_tokens: usage.cache_read_input_tokens,
cache_creation_input_tokens: usage.cache_creation_input_tokens,
}
@ -289,12 +432,33 @@ mod tests {
let event = scheme.parse_event("message_start", data).unwrap().unwrap();
match event {
Event::Usage(u) => {
// キャッシュなしなので input_total = raw_input = 10
assert_eq!(u.input_tokens, Some(10));
}
_ => panic!("Expected Usage event"),
}
}
#[test]
fn test_convert_usage_includes_cache_in_input_total() {
// Anthropic の input_tokens はキャッシュ外のみで、占有量は
// input + cache_read + cache_creation。
// UsageEvent.input_tokens は占有量に正規化される。
let scheme = AnthropicScheme::new();
let usage = UsageData {
input_tokens: Some(100),
output_tokens: Some(50),
cache_read_input_tokens: Some(800),
cache_creation_input_tokens: Some(200),
};
let event = scheme.convert_usage(&usage);
// 100 + 800 + 200 = 1100
assert_eq!(event.input_tokens, Some(1100));
assert_eq!(event.cache_read_input_tokens, Some(800));
assert_eq!(event.cache_creation_input_tokens, Some(200));
assert_eq!(event.total_tokens, Some(1150));
}
#[test]
fn test_parse_content_block_start_text() {
let scheme = AnthropicScheme::new();
@ -359,6 +523,117 @@ mod tests {
}
}
#[test]
fn thinking_block_emits_reasoning_item_with_signature() {
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
let evs = scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
&mut state,
)
.unwrap();
assert!(matches!(evs[0], Event::BlockStart(_)));
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hello "}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"world"}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"SIG-XYZ"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
// BlockStop と ReasoningItem の 2 件が並ぶ
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
};
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
assert!(reasoning.encrypted_content.is_none());
}
#[test]
fn redacted_thinking_emits_reasoning_item_with_data() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"opaque-blob"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem");
};
assert!(reasoning.text.is_empty());
assert!(reasoning.signature.is_none());
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
}
#[test]
fn text_block_does_not_emit_reasoning_item() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
}
#[test]
fn test_parse_ping() {
let scheme = AnthropicScheme::new();

View File

@ -3,8 +3,12 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
mod capability;
mod events;
mod request;
mod scheme_impl;
pub use scheme_impl::AnthropicState;
/// Anthropicスキーマ
///

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,107 @@
//! `impl Scheme for AnthropicScheme`
//!
//! Anthropic Messages API の wire 表現に必要な URL・ヘッダ・SSE パース・
//! リクエスト body 生成を共通 `Scheme` trait にぶら下げる。
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
event::{BlockType, Event, ReasoningItemEvent},
scheme::Scheme,
types::Request,
};
use super::AnthropicScheme;
/// Anthropic の SSE パースで必要な状態。
///
/// 1. `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 直前の `content_block_start` で観測した `block_type` を保持して
/// `BlockStop` に書き戻す。
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
/// `content_block_stop` で `Event::ReasoningItem` を発火する
/// round-trip 永続化のため)。
#[derive(Debug, Default)]
pub struct AnthropicState {
pub(crate) current_block_type: Option<BlockType>,
pub(crate) pending_thinking: Option<PendingThinking>,
}
/// 1 つの `thinking` または `redacted_thinking` content_block の蓄積バッファ。
#[derive(Debug, Default)]
pub(crate) struct PendingThinking {
pub(crate) text: String,
pub(crate) signature: Option<String>,
pub(crate) redacted_data: Option<String>,
}
impl PendingThinking {
pub(crate) fn into_event(self) -> ReasoningItemEvent {
ReasoningItemEvent {
id: None,
text: self.text,
summary: Vec::new(),
encrypted_content: self.redacted_data,
signature: self.signature,
}
}
}
impl Scheme for AnthropicScheme {
type State = AnthropicState;
fn default_base_url(&self) -> &'static str {
"https://api.anthropic.com"
}
fn path(&self, _model_id: &str) -> String {
"/v1/messages".to_string()
}
fn required_auth(&self) -> AuthRequirement {
// Ollama の `/v1/messages` 互換では認証が要らないが、それは
// `AuthRef::None` + `build_headers` 側の「ResolvedAuth::None
// なら何もしない」分岐で吸収する(`accepts` 判定で弾かれない
// よう、現状は XApiKey を要求しつつ、None 側でもパスするよう
// にする戦略)。
AuthRequirement::XApiKey
}
fn additional_headers(&self) -> Vec<(&'static str, String)> {
let mut headers = vec![("anthropic-version", self.api_version.clone())];
if self.fine_grained_tool_streaming {
headers.push((
"anthropic-beta",
"fine-grained-tool-streaming-2025-05-14".to_string(),
));
}
headers
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(model_id, request, capability);
serde_json::to_value(&req).expect("AnthropicRequest is always serialisable")
}
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
self.parse_with_state(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
}

View File

@ -0,0 +1,20 @@
//! Gemini scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gemini-*` バージョン別の reasoning 有無)は
//! 高レベル構築層(`provider::capability`)の責務。ここでは wire の
//! 保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability(未知モデル / 未明示モデル用)。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -131,6 +131,7 @@ impl GeminiScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse Gemini SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -3,8 +3,10 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
mod capability;
mod events;
mod request;
mod scheme_impl;
/// Geminiスキーマ
///

View File

@ -7,7 +7,8 @@ use serde_json::Value;
use crate::llm_client::{
Request,
types::{Item, Role, ToolDefinition},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::GeminiScheme;
@ -139,11 +140,26 @@ pub(crate) struct GeminiGenerationConfig {
/// Stop sequences
#[serde(skip_serializing_if = "Vec::is_empty")]
pub stop_sequences: Vec<String>,
/// Thinking / reasoning 設定Gemini 2.5 以降)。
#[serde(skip_serializing_if = "Option::is_none")]
pub thinking_config: Option<GeminiThinkingConfig>,
}
/// Gemini thinking config (gemini-2.5 以降)
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct GeminiThinkingConfig {
/// Token budget for thinking. `-1` means dynamic.
pub thinking_budget: i32,
}
impl GeminiScheme {
/// Build Gemini request from Request
pub(crate) fn build_request(&self, request: &Request) -> GeminiRequest {
pub(crate) fn build_request(
&self,
request: &Request,
capability: &ModelCapability,
) -> GeminiRequest {
let contents = self.convert_items_to_contents(&request.items);
// System prompt
@ -177,6 +193,24 @@ impl GeminiScheme {
None
};
// Reasoning の投影: capability が BudgetTokens / Both をサポートし、
// request 側で budget_tokens が指定されているときだけ thinking_config を付ける。
let supports_budget = matches!(
capability.reasoning,
Some(ReasoningSupport::BudgetTokens | ReasoningSupport::Both),
);
let thinking_config = request
.config
.reasoning
.as_ref()
.filter(|_| supports_budget)
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig {
thinking_budget: *budget,
}),
ReasoningControl::Effort(_) => None,
});
// Generation config
let generation_config = Some(GeminiGenerationConfig {
max_output_tokens: request.config.max_tokens,
@ -184,6 +218,7 @@ impl GeminiScheme {
top_p: request.config.top_p,
top_k: request.config.top_k,
stop_sequences: request.config.stop_sequences.clone(),
thinking_config,
});
GeminiRequest {
@ -216,9 +251,8 @@ impl GeminiScheme {
);
let gemini_role = match role {
Role::User => "user",
Role::User | Role::System => "user",
Role::Assistant => "model",
Role::System => continue, // Skip system role items
};
let parts: Vec<GeminiPart> = content
@ -245,9 +279,8 @@ impl GeminiScheme {
});
}
// Parse arguments
let args = serde_json::from_str(arguments)
.unwrap_or_else(|_| Value::Object(serde_json::Map::new()));
// Parse arguments (normalize non-object / legacy "null" payloads to {})
let args = parse_tool_arguments(arguments);
pending_model_parts.push(GeminiPart::FunctionCall {
function_call: GeminiFunctionCall {
@ -258,7 +291,10 @@ impl GeminiScheme {
}
Item::ToolResult {
call_id, output, ..
call_id,
summary,
content,
..
} => {
// Flush pending model parts first
if !pending_model_parts.is_empty() {
@ -268,12 +304,16 @@ impl GeminiScheme {
});
}
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
pending_user_parts.push(GeminiPart::FunctionResponse {
function_response: GeminiFunctionResponse {
name: call_id.clone(),
response: GeminiFunctionResponseContent {
name: call_id.clone(),
content: Value::String(output.clone()),
content: Value::String(text),
},
},
});
@ -336,6 +376,26 @@ impl GeminiScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap()
}
}
#[test]
fn test_build_simple_request() {
@ -344,7 +404,7 @@ mod tests {
.system("You are a helpful assistant.")
.user("Hello!");
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert!(gemini_req.system_instruction.is_some());
assert_eq!(gemini_req.contents.len(), 1);
@ -366,7 +426,7 @@ mod tests {
})),
);
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.tools.len(), 1);
assert_eq!(gemini_req.tools[0].function_declarations.len(), 1);
@ -382,7 +442,7 @@ mod tests {
let scheme = GeminiScheme::new();
let request = Request::new().user("Hello").assistant("Hi there!");
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.contents.len(), 2);
assert_eq!(gemini_req.contents[0].role, "user");
@ -401,11 +461,36 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let gemini_req = scheme.build_request(&request);
let gemini_req = scheme.build_request(&request, &cap());
assert_eq!(gemini_req.contents.len(), 3);
assert_eq!(gemini_req.contents[0].role, "user");
assert_eq!(gemini_req.contents[1].role, "model");
assert_eq!(gemini_req.contents[2].role, "user");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
let thinking = config.thinking_config.expect("thinking config");
assert_eq!(thinking.thinking_budget, -1);
}
#[test]
fn effort_reasoning_not_projected_to_gemini() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
assert!(config.thinking_config.is_none());
}
}

View File

@ -0,0 +1,49 @@
//! `impl Scheme for GeminiScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError, auth::AuthRequirement, capability::ModelCapability, event::Event, scheme::Scheme,
types::Request,
};
use super::GeminiScheme;
impl Scheme for GeminiScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://generativelanguage.googleapis.com"
}
fn path(&self, model_id: &str) -> String {
format!("/v1beta/models/{model_id}:streamGenerateContent?alt=sse")
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::QueryParam { name: "key" }
}
fn build_request_body(
&self,
_model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(request, capability);
serde_json::to_value(&req).expect("GeminiRequest is always serialisable")
}
fn parse_sse(
&self,
_event_type: &str,
data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
}

View File

@ -3,7 +3,90 @@
//! 各APIスキーマごとの変換ロジック
//! - リクエスト変換: Request → プロバイダ固有JSON
//! - レスポンス変換: SSEイベント → Event
//!
//! [`Scheme`] trait により `HttpTransport<S>` から scheme 固有の差分
//! パス、ヘッダ、認証要件、body 生成、SSE パース)をすべて委譲する。
pub mod anthropic;
pub mod gemini;
pub mod openai;
pub mod openai_chat;
pub mod openai_responses;
use serde_json::Value;
use super::auth::AuthRequirement;
use super::capability::ModelCapability;
use super::client::ConfigWarning;
use super::error::ClientError;
use super::event::Event;
use super::types::{Request, RequestConfig};
/// wire scheme の抽象。各プロバイダの API 仕様ごとに 1 つ実装する。
///
/// `HttpTransport<S: Scheme>` が URL 組立・認証ヘッダ挿入・SSE パース
/// のループを担い、`Scheme` 実装は各仕様固有の差分のみ提供する。
///
/// # 状態
///
/// SSE パースでフレーム間に状態を保つ必要がある schemeAnthropic の
/// `BlockStop` に `block_type` が載らない仕様の補完など)は
/// [`Scheme::State`] に中間状態を表す型を置く。
/// 状態を持たない scheme は `type State = ()` とする。
pub trait Scheme: Clone + Send + Sync + 'static {
/// SSE パースのフレーム間で共有する状態。`HttpTransport` が
/// ストリーム開始時に `Default::default()` を一度だけ作り、
/// フレームごとに `&mut` で渡す。
type State: Default + Send + 'static;
/// scheme のベース URL`ModelConfig::base_url` 未指定時のデフォルト)
fn default_base_url(&self) -> &'static str;
/// リクエスト先の相対パス。Gemini のようにモデル名をパスに埋め込む
/// プロバイダもあるため、モデル ID を受け取る。
fn path(&self, model_id: &str) -> String;
/// この scheme が要求する認証形式。`build_client` 時に
/// `manifest::AuthRef` と照合する。
fn required_auth(&self) -> AuthRequirement;
/// `Content-Type` 以外の追加ヘッダ。`anthropic-version` / `anthropic-beta` 等。
fn additional_headers(&self) -> Vec<(&'static str, String)> {
Vec::new()
}
/// リクエスト body を生成する。`capability` は `CacheStrategy` や
/// `ReasoningSupport` を参照して scheme 側の挙動を分岐させるため
/// に渡される。
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value;
/// SSE イベント 1 件を 0 個以上の [`Event`] に変換する。
///
/// `event_type` は SSE フレームの `event:` フィールド、`data` は
/// `data:` フィールド。`[DONE]` 等の終端マーカーは実装側で判定する。
/// `state` はストリーム単位で共有される可変状態。
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError>;
/// scheme 既定の capability。モデル ID に関係なく、この wire で
/// 安全に送れる最小共通項を返す。既知モデル ID の能力テーブルは
/// `provider::capability::lookup` 側(高レベル構築層)の責務で、
/// scheme はここには関与しない。
fn default_capability(&self) -> ModelCapability;
/// scheme 側でサポートしていない `RequestConfig` フィールドを
/// 警告として返す(例: OpenAI Chat は `top_k` 非対応)。
/// デフォルトは空 Vec。
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let _ = config;
Vec::new()
}
}

View File

@ -0,0 +1,20 @@
//! OpenAI Chat Completions scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gpt-5` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等)
/// で未知モデル ID を受けたときのフォールバックに使う。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -75,6 +75,7 @@ impl OpenAIScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -3,8 +3,10 @@
//! - リクエストJSON生成
//! - SSEイベントパース → Event変換
pub(crate) mod capability;
mod events;
mod request;
mod scheme_impl;
/// OpenAIスキーマ
///

View File

@ -7,7 +7,8 @@ use serde_json::Value;
use crate::llm_client::{
Request,
types::{Item, Role, ToolDefinition},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::OpenAIScheme;
@ -34,6 +35,9 @@ pub(crate) struct OpenAIRequest {
pub tools: Vec<OpenAITool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<String>,
/// Reasoning efforto1 / o3 / o4 / gpt-5 系で有効)。
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<String>,
}
#[derive(Debug, Serialize)]
@ -110,7 +114,12 @@ pub(crate) struct OpenAIToolCallFunction {
impl OpenAIScheme {
/// Build OpenAI request from Request
pub(crate) fn build_request(&self, model: &str, request: &Request) -> OpenAIRequest {
pub(crate) fn build_request(
&self,
model: &str,
request: &Request,
capability: &ModelCapability,
) -> OpenAIRequest {
let mut messages = Vec::new();
// Add system message if present
@ -135,6 +144,22 @@ impl OpenAIScheme {
(None, request.config.max_tokens)
};
// Reasoning の投影: capability が Effort / Both をサポートし、
// request 側で effort が指定されているときだけ reasoning_effort を付ける。
let supports_effort = matches!(
capability.reasoning,
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
);
let reasoning_effort = request
.config
.reasoning
.as_ref()
.filter(|_| supports_effort)
.and_then(|rc| match rc {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
});
OpenAIRequest {
model: model.to_string(),
max_completion_tokens,
@ -149,6 +174,7 @@ impl OpenAIScheme {
messages,
tools,
tool_choice: None,
reasoning_effort,
}
}
@ -201,18 +227,24 @@ impl OpenAIScheme {
arguments,
..
} => {
// Normalize non-object / legacy "null" payloads to "{}" so
// OpenAI gets a valid JSON object string.
let normalized_args = parse_tool_arguments(arguments).to_string();
pending_tool_calls.push(OpenAIToolCall {
id: call_id.clone(),
r#type: "function".to_string(),
function: OpenAIToolCallFunction {
name: name.clone(),
arguments: arguments.clone(),
arguments: normalized_args,
},
});
}
Item::ToolResult {
call_id, output, ..
call_id,
summary,
content,
..
} => {
// Flush pending tool calls before tool result
self.flush_pending_assistant(
@ -221,9 +253,13 @@ impl OpenAIScheme {
&mut pending_assistant_text,
);
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
messages.push(OpenAIMessage {
role: "tool".to_string(),
content: Some(OpenAIContent::Text(output.clone())),
content: Some(OpenAIContent::Text(text)),
tool_calls: vec![],
tool_call_id: Some(call_id.clone()),
name: None,
@ -284,13 +320,26 @@ impl OpenAIScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
#[test]
fn test_build_simple_request() {
let scheme = OpenAIScheme::new();
let request = Request::new().system("System prompt").user("Hello");
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.model, "gpt-4o");
assert_eq!(body.messages.len(), 2);
@ -311,7 +360,7 @@ mod tests {
.user("Check weather")
.tool(ToolDefinition::new("weather").description("Get weather"));
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.tools.len(), 1);
assert_eq!(body.tools[0].function.name, "weather");
}
@ -321,7 +370,7 @@ mod tests {
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
let request = Request::new().user("Hello").max_tokens(100);
let body = scheme.build_request("llama3", &request);
let body = scheme.build_request("llama3", &request, &cap());
assert_eq!(body.max_tokens, Some(100));
assert!(body.max_completion_tokens.is_none());
@ -332,12 +381,44 @@ mod tests {
let scheme = OpenAIScheme::new();
let request = Request::new().user("Hello").max_tokens(100);
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.max_completion_tokens, Some(100));
assert!(body.max_tokens.is_none());
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other(
"provider-native".into(),
)));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Effort),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native"));
}
#[test]
fn budget_reasoning_not_projected_to_openai_chat() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Both),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert!(body.reasoning_effort.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = OpenAIScheme::new();
@ -350,7 +431,7 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let body = scheme.build_request("gpt-4o", &request);
let body = scheme.build_request("gpt-4o", &request, &cap());
assert_eq!(body.messages.len(), 3);
assert_eq!(body.messages[0].role, "user");

View File

@ -0,0 +1,67 @@
//! `impl Scheme for OpenAIScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::{Request, RequestConfig},
};
use super::OpenAIScheme;
impl Scheme for OpenAIScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://api.openai.com"
}
fn path(&self, _model_id: &str) -> String {
"/v1/chat/completions".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let req = self.build_request(model_id, request, capability);
serde_json::to_value(&req).expect("OpenAIRequest is always serialisable")
}
fn parse_sse(
&self,
_event_type: &str,
data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
// `data: [DONE]` は終端マーカー
if data.trim() == "[DONE]" {
return Ok(Vec::new());
}
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// OpenAI Chat Completions API は top_k を受け付けない
if config.top_k.is_some() {
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI Chat"));
}
warnings
}
}

View File

@ -0,0 +1,18 @@
//! OpenAI Responses scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gpt-5` / `codex-` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}

View File

@ -0,0 +1,973 @@
//! OpenAI Responses API の SSE イベントパース
//!
//! `response.*` 名前空間の SSE を共通の [`Event`](crate::llm_client::event::Event)
//! に変換する。Responses の (output_index, content_index) 2 次元座標と
//! insomnia 側 1 次元 `BlockStart/Delta/Stop::index` のマッピングは
//! [`OpenAIResponsesState`] が保持する。
use std::collections::HashMap;
use serde::Deserialize;
use crate::llm_client::{
ClientError,
event::{
BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent,
Event, ReasoningItemEvent, ResponseStatus, StatusEvent, UsageEvent,
},
};
/// SSE パース中の座標 → flat block index マップ。
#[derive(Debug, Default)]
pub struct OpenAIResponsesState {
slots: HashMap<SlotKey, SlotInfo>,
next_index: usize,
/// 蓄積中の reasoning output_item。`output_item.added`(Reasoning) で
/// 確保し、`reasoning_text.delta` / `reasoning_summary_text.delta` で
/// 蓄積、`output_item.done`(Reasoning) で `Event::ReasoningItem` を
/// 発火してエントリを除去する。
pending_reasoning: HashMap<usize, PendingReasoning>,
}
/// 1 つの reasoning output_item の蓄積バッファ。
#[derive(Debug, Default)]
struct PendingReasoning {
id: Option<String>,
/// `reasoning_text.delta` の累積。複数 content_part あれば順に concat。
text: String,
/// `reasoning_summary_text.delta` を summary_index 順に蓄積。
summary: Vec<String>,
}
impl OpenAIResponsesState {
fn allocate(&mut self, key: SlotKey, block_type: BlockType) -> SlotInfo {
let info = SlotInfo {
flat_index: self.next_index,
block_type,
};
self.next_index += 1;
self.slots.insert(key, info);
info
}
/// 既存 slot を取得。無ければ `block_type` で暗黙に確保し、
/// 新規確保したかを併せて返す。delta 先行 / content_part.added が
/// 抜けたときの防御。
fn get_or_allocate(&mut self, key: SlotKey, block_type: BlockType) -> (SlotInfo, bool) {
if let Some(info) = self.slots.get(&key).copied() {
(info, false)
} else {
(self.allocate(key, block_type), true)
}
}
fn ensure_reasoning(&mut self, output_index: usize) -> &mut PendingReasoning {
self.pending_reasoning.entry(output_index).or_default()
}
fn extend_reasoning_summary(&mut self, output_index: usize, summary_index: usize, text: &str) {
let entry = self.ensure_reasoning(output_index);
if entry.summary.len() <= summary_index {
entry.summary.resize(summary_index + 1, String::new());
}
entry.summary[summary_index].push_str(text);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
enum SlotKey {
/// tool_use (function_call / custom_tool_call) — output_item 全体で 1 block
OutputItem(usize),
/// message の output_text / reasoning item の reasoning_text
ContentPart { output: usize, content: usize },
/// reasoning item の summary_text (summary_index)
Summary { output: usize, summary: usize },
}
#[derive(Debug, Clone, Copy)]
struct SlotInfo {
flat_index: usize,
block_type: BlockType,
}
// ============================================================================
// SSE イベントの JSON 構造
// ============================================================================
#[derive(Debug, Deserialize)]
struct OutputItemAdded {
output_index: usize,
item: OutputItem,
}
#[derive(Debug, Deserialize)]
struct OutputItemDone {
output_index: usize,
#[allow(dead_code)]
item: OutputItem,
}
/// `response.output_item.added/done` の `item`。
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum OutputItem {
Message {
#[allow(dead_code)]
id: Option<String>,
},
Reasoning {
#[serde(default)]
id: Option<String>,
/// `output_item.done` で初めて埋まる。`include=["reasoning.encrypted_content"]`
/// 指定時に opaque blob が乗る。
#[serde(default)]
encrypted_content: Option<String>,
},
FunctionCall {
#[allow(dead_code)]
#[serde(default)]
id: Option<String>,
call_id: String,
name: String,
},
CustomToolCall {
#[allow(dead_code)]
#[serde(default)]
id: Option<String>,
call_id: String,
name: String,
},
#[serde(other)]
Other,
}
#[derive(Debug, Deserialize)]
struct ContentPartAdded {
output_index: usize,
content_index: usize,
part: ContentPart,
}
#[derive(Debug, Deserialize)]
struct ContentPartDone {
output_index: usize,
content_index: usize,
#[allow(dead_code)]
part: ContentPart,
}
/// `response.content_part.added/done` の `part`。
#[derive(Debug, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
enum ContentPart {
OutputText {
#[allow(dead_code)]
#[serde(default)]
text: String,
},
ReasoningText {
#[allow(dead_code)]
#[serde(default)]
text: String,
},
#[serde(other)]
Other,
}
#[derive(Debug, Deserialize)]
struct OutputTextDelta {
output_index: usize,
content_index: usize,
delta: String,
}
#[derive(Debug, Deserialize)]
struct ReasoningTextDelta {
output_index: usize,
content_index: usize,
delta: String,
}
#[derive(Debug, Deserialize)]
struct ReasoningSummaryPartAdded {
output_index: usize,
summary_index: usize,
#[allow(dead_code)]
#[serde(default)]
part: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
struct ReasoningSummaryTextDelta {
output_index: usize,
summary_index: usize,
delta: String,
}
#[derive(Debug, Deserialize)]
struct ReasoningSummaryPartDone {
output_index: usize,
summary_index: usize,
}
#[derive(Debug, Deserialize)]
struct FunctionCallArgumentsDelta {
output_index: usize,
delta: String,
}
#[derive(Debug, Deserialize)]
struct CustomToolCallInputDelta {
output_index: usize,
delta: String,
}
#[derive(Debug, Deserialize)]
struct ResponseCompleted {
response: CompletedResponse,
}
#[derive(Debug, Deserialize)]
struct CompletedResponse {
#[serde(default)]
usage: Option<ResponsesUsage>,
}
#[derive(Debug, Deserialize)]
struct ResponsesUsage {
#[serde(default)]
input_tokens: Option<u64>,
#[serde(default)]
output_tokens: Option<u64>,
#[serde(default)]
total_tokens: Option<u64>,
/// `input_tokens` の内訳。`cached_tokens` がプロンプトキャッシュヒット分。
#[serde(default)]
input_tokens_details: Option<InputTokensDetails>,
}
#[derive(Debug, Deserialize)]
struct InputTokensDetails {
#[serde(default)]
cached_tokens: Option<u64>,
}
#[derive(Debug, Deserialize)]
struct ResponseFailed {
response: FailedResponse,
}
#[derive(Debug, Deserialize)]
struct FailedResponse {
#[serde(default)]
error: Option<ErrorDetail>,
}
#[derive(Debug, Deserialize)]
struct ErrorDetail {
#[serde(rename = "type", default)]
error_type: Option<String>,
#[serde(default)]
message: Option<String>,
}
#[derive(Debug, Deserialize)]
struct TopLevelError {
#[serde(default)]
message: Option<String>,
#[serde(rename = "type", default)]
error_type: Option<String>,
#[serde(default)]
code: Option<String>,
}
// ============================================================================
// parse entry point
// ============================================================================
/// SSE フレーム 1 件をパースし、0 個以上の [`Event`] に変換する。
///
/// `event_type` は SSE の `event:` フィールド。未対応の event は
/// 静かに無視する。`data` が JSON でない / 必要なフィールドが抜けて
/// いる等は [`ClientError::Api`] で返す。
pub(crate) fn parse_sse(
event_type: &str,
data: &str,
state: &mut OpenAIResponsesState,
) -> Result<Vec<Event>, ClientError> {
match event_type {
"response.created" => Ok(vec![Event::Status(StatusEvent {
status: ResponseStatus::Started,
})]),
"response.completed" => {
let ev: ResponseCompleted = from_json(data)?;
let mut out = Vec::new();
if let Some(usage) = ev.response.usage {
out.push(Event::Usage(UsageEvent {
input_tokens: usage.input_tokens,
output_tokens: usage.output_tokens,
total_tokens: usage.total_tokens.or_else(|| {
Some(usage.input_tokens.unwrap_or(0) + usage.output_tokens.unwrap_or(0))
}),
cache_read_input_tokens: usage
.input_tokens_details
.and_then(|d| d.cached_tokens),
// Responses API は cache 書き込みを別計上しないinput_tokens に含まれる)
cache_creation_input_tokens: None,
}));
}
out.push(Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}));
Ok(out)
}
"response.failed" | "response.incomplete" => {
let ev: ResponseFailed = from_json(data)?;
let (code, message) = match ev.response.error {
Some(err) => (err.error_type, err.message.unwrap_or_default()),
None => (None, format!("response {event_type}")),
};
Ok(vec![
Event::Error(ErrorEvent { code, message }),
Event::Status(StatusEvent {
status: ResponseStatus::Failed,
}),
])
}
"response.output_item.added" => {
let ev: OutputItemAdded = from_json(data)?;
match ev.item {
OutputItem::FunctionCall { call_id, name, .. }
| OutputItem::CustomToolCall { call_id, name, .. } => {
let info =
state.allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse);
Ok(vec![Event::BlockStart(BlockStart {
index: info.flat_index,
block_type: BlockType::ToolUse,
metadata: BlockMetadata::ToolUse { id: call_id, name },
})])
}
OutputItem::Reasoning { id, .. } => {
// wrapper を確保。中身の content_part / summary_part は
// 別 SlotKey で扱われ続けるStreaming 表示は維持)。
let entry = state.ensure_reasoning(ev.output_index);
if id.is_some() {
entry.id = id;
}
Ok(Vec::new())
}
_ => Ok(Vec::new()),
}
}
"response.output_item.done" => {
let ev: OutputItemDone = from_json(data)?;
// Reasoning wrapper の done で蓄積分を ReasoningItem として発火。
// これは `slots` の OutputItem slot とは独立している
// (FunctionCall は slots、Reasoning は pending_reasoning)。
if let OutputItem::Reasoning {
id,
encrypted_content,
..
} = ev.item
{
let mut pending = state
.pending_reasoning
.remove(&ev.output_index)
.unwrap_or_default();
if pending.id.is_none() {
pending.id = id;
}
return Ok(vec![Event::ReasoningItem(ReasoningItemEvent {
id: pending.id,
text: pending.text,
summary: pending
.summary
.into_iter()
.filter(|s| !s.is_empty())
.collect(),
encrypted_content,
signature: None,
})]);
}
if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: info.block_type,
stop_reason: None,
})])
} else {
Ok(Vec::new())
}
}
"response.content_part.added" => {
let ev: ContentPartAdded = from_json(data)?;
let (block_type, metadata) = match ev.part {
ContentPart::OutputText { .. } => (BlockType::Text, BlockMetadata::Text),
ContentPart::ReasoningText { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
ContentPart::Other => return Ok(Vec::new()),
};
let info = state.allocate(
SlotKey::ContentPart {
output: ev.output_index,
content: ev.content_index,
},
block_type,
);
Ok(vec![Event::BlockStart(BlockStart {
index: info.flat_index,
block_type,
metadata,
})])
}
"response.content_part.done" => {
let ev: ContentPartDone = from_json(data)?;
if let Some(info) = state.slots.remove(&SlotKey::ContentPart {
output: ev.output_index,
content: ev.content_index,
}) {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: info.block_type,
stop_reason: None,
})])
} else {
Ok(Vec::new())
}
}
"response.output_text.delta" => {
let ev: OutputTextDelta = from_json(data)?;
Ok(ensure_and_delta(
state,
SlotKey::ContentPart {
output: ev.output_index,
content: ev.content_index,
},
BlockType::Text,
BlockMetadata::Text,
DeltaContent::Text(ev.delta),
))
}
"response.reasoning_text.delta" => {
let ev: ReasoningTextDelta = from_json(data)?;
// round-trip 用に蓄積
state
.ensure_reasoning(ev.output_index)
.text
.push_str(&ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::ContentPart {
output: ev.output_index,
content: ev.content_index,
},
BlockType::Thinking,
BlockMetadata::Thinking,
DeltaContent::Thinking(ev.delta),
))
}
"response.reasoning_summary_part.added" => {
let ev: ReasoningSummaryPartAdded = from_json(data)?;
let info = state.allocate(
SlotKey::Summary {
output: ev.output_index,
summary: ev.summary_index,
},
BlockType::Thinking,
);
Ok(vec![Event::BlockStart(BlockStart {
index: info.flat_index,
block_type: BlockType::Thinking,
metadata: BlockMetadata::Thinking,
})])
}
"response.reasoning_summary_text.delta" => {
let ev: ReasoningSummaryTextDelta = from_json(data)?;
// round-trip 用に蓄積
state.extend_reasoning_summary(ev.output_index, ev.summary_index, &ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::Summary {
output: ev.output_index,
summary: ev.summary_index,
},
BlockType::Thinking,
BlockMetadata::Thinking,
DeltaContent::Thinking(ev.delta),
))
}
"response.reasoning_summary_part.done" => {
let ev: ReasoningSummaryPartDone = from_json(data)?;
if let Some(info) = state.slots.remove(&SlotKey::Summary {
output: ev.output_index,
summary: ev.summary_index,
}) {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
block_type: info.block_type,
stop_reason: None,
})])
} else {
Ok(Vec::new())
}
}
"response.function_call_arguments.delta" => {
let ev: FunctionCallArgumentsDelta = from_json(data)?;
Ok(ensure_and_delta(
state,
SlotKey::OutputItem(ev.output_index),
BlockType::ToolUse,
BlockMetadata::ToolUse {
id: String::new(),
name: String::new(),
},
DeltaContent::InputJson(ev.delta),
))
}
"response.custom_tool_call_input.delta" => {
let ev: CustomToolCallInputDelta = from_json(data)?;
Ok(ensure_and_delta(
state,
SlotKey::OutputItem(ev.output_index),
BlockType::ToolUse,
BlockMetadata::ToolUse {
id: String::new(),
name: String::new(),
},
DeltaContent::InputJson(ev.delta),
))
}
"error" => {
let ev: TopLevelError = from_json(data).unwrap_or(TopLevelError {
message: Some(data.to_string()),
error_type: None,
code: None,
});
Ok(vec![Event::Error(ErrorEvent {
code: ev.error_type.or(ev.code),
message: ev.message.unwrap_or_default(),
})])
}
// 未対応 / 情報系イベントは無視
_ => Ok(Vec::new()),
}
}
/// 対応する BlockStart がまだ発行されていなければ発行しつつ、delta を流す。
/// content_part.added を取りこぼしても delta 単独で復旧できるようにする。
fn ensure_and_delta(
state: &mut OpenAIResponsesState,
key: SlotKey,
block_type: BlockType,
metadata: BlockMetadata,
delta: DeltaContent,
) -> Vec<Event> {
let (info, just_created) = state.get_or_allocate(key, block_type);
let mut out = Vec::with_capacity(2);
if just_created {
out.push(Event::BlockStart(BlockStart {
index: info.flat_index,
block_type,
metadata,
}));
}
out.push(Event::BlockDelta(BlockDelta {
index: info.flat_index,
delta,
}));
out
}
fn from_json<T: for<'de> Deserialize<'de>>(data: &str) -> Result<T, ClientError> {
serde_json::from_str(data).map_err(|e| ClientError::Api {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {e}"),
retry_after: None,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn run(event_type: &str, data: &str) -> (Vec<Event>, OpenAIResponsesState) {
let mut state = OpenAIResponsesState::default();
let events = parse_sse(event_type, data, &mut state).unwrap();
(events, state)
}
fn with(state: &mut OpenAIResponsesState, event_type: &str, data: &str) -> Vec<Event> {
parse_sse(event_type, data, state).unwrap()
}
#[test]
fn created_emits_status_started() {
let (events, _) = run("response.created", r#"{"response":{}}"#);
assert!(matches!(
events[0],
Event::Status(StatusEvent {
status: ResponseStatus::Started
})
));
}
#[test]
fn completed_emits_usage_and_status() {
let data =
r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#;
let (events, _) = run("response.completed", data);
assert!(matches!(events[0], Event::Usage(_)));
assert!(matches!(
events[1],
Event::Status(StatusEvent {
status: ResponseStatus::Completed
})
));
if let Event::Usage(u) = &events[0] {
assert_eq!(u.input_tokens, Some(10));
assert_eq!(u.output_tokens, Some(20));
assert_eq!(u.total_tokens, Some(30));
assert_eq!(u.cache_read_input_tokens, None);
assert_eq!(u.cache_creation_input_tokens, None);
}
}
#[test]
fn completed_extracts_cached_tokens_from_input_tokens_details() {
let data = r#"{"response":{"usage":{
"input_tokens":12345,
"input_tokens_details":{"cached_tokens":11000},
"output_tokens":50,
"total_tokens":12395
}}}"#;
let (events, _) = run("response.completed", data);
let Event::Usage(u) = &events[0] else {
panic!("expected usage")
};
assert_eq!(u.input_tokens, Some(12345));
assert_eq!(u.output_tokens, Some(50));
assert_eq!(u.total_tokens, Some(12395));
assert_eq!(u.cache_read_input_tokens, Some(11000));
// OpenAI Responses は cache 書き込みを別計上しない
assert_eq!(u.cache_creation_input_tokens, None);
}
#[test]
fn text_stream_start_delta_stop() {
let mut state = OpenAIResponsesState::default();
// output_item.added (message) → 無視
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"message","id":"m1"}}"#,
);
// content_part.added (output_text) → BlockStart(Text)
let ev = with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"m1","part":{"type":"output_text","text":""}}"#,
);
assert_eq!(ev.len(), 1);
assert!(matches!(ev[0], Event::BlockStart(_)));
// delta
let ev = with(
&mut state,
"response.output_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"m1","delta":"hi"}"#,
);
assert_eq!(ev.len(), 1);
if let Event::BlockDelta(d) = &ev[0] {
assert!(matches!(&d.delta, DeltaContent::Text(t) if t == "hi"));
} else {
panic!("expected delta");
}
// content_part.done → BlockStop
let ev = with(
&mut state,
"response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"m1","part":{"type":"output_text","text":"hi"}}"#,
);
assert_eq!(ev.len(), 1);
if let Event::BlockStop(s) = &ev[0] {
assert_eq!(s.block_type, BlockType::Text);
} else {
panic!("expected stop");
}
}
#[test]
fn function_call_start_delta_stop() {
let mut state = OpenAIResponsesState::default();
// output_item.added (function_call) → BlockStart(ToolUse, id, name)
let ev = with(
&mut state,
"response.output_item.added",
r#"{"output_index":1,"item":{"type":"function_call","id":"fc1","call_id":"call_abc","name":"get_weather"}}"#,
);
assert_eq!(ev.len(), 1);
if let Event::BlockStart(s) = &ev[0] {
assert_eq!(s.block_type, BlockType::ToolUse);
if let BlockMetadata::ToolUse { id, name } = &s.metadata {
assert_eq!(id, "call_abc");
assert_eq!(name, "get_weather");
} else {
panic!("expected ToolUse metadata");
}
} else {
panic!("expected BlockStart");
}
// arguments delta
let ev = with(
&mut state,
"response.function_call_arguments.delta",
r#"{"output_index":1,"item_id":"fc1","delta":"{\"x\":"}"#,
);
assert_eq!(ev.len(), 1);
if let Event::BlockDelta(d) = &ev[0] {
assert!(matches!(&d.delta, DeltaContent::InputJson(j) if j == "{\"x\":"));
}
// output_item.done → BlockStop
let ev = with(
&mut state,
"response.output_item.done",
r#"{"output_index":1,"item":{"type":"function_call","call_id":"call_abc","name":"get_weather","arguments":"{\"x\":1}"}}"#,
);
assert_eq!(ev.len(), 1);
assert!(matches!(ev[0], Event::BlockStop(_)));
}
#[test]
fn custom_tool_call_input_delta_parsed() {
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"custom_tool_call","id":"ct1","call_id":"call_xyz","name":"custom"}}"#,
);
let ev = with(
&mut state,
"response.custom_tool_call_input.delta",
r#"{"output_index":0,"item_id":"ct1","delta":"raw"}"#,
);
assert_eq!(ev.len(), 1);
if let Event::BlockDelta(d) = &ev[0] {
assert!(matches!(&d.delta, DeltaContent::InputJson(j) if j == "raw"));
} else {
panic!("expected delta");
}
}
#[test]
fn reasoning_text_delta_emits_thinking() {
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
);
let ev = with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"think"}"#,
);
if let Event::BlockDelta(d) = &ev[0] {
assert!(matches!(&d.delta, DeltaContent::Thinking(t) if t == "think"));
} else {
panic!("expected thinking delta");
}
}
#[test]
fn reasoning_summary_start_delta_stop() {
let mut state = OpenAIResponsesState::default();
let ev = with(
&mut state,
"response.reasoning_summary_part.added",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","part":{"type":"summary_text","text":""}}"#,
);
assert!(matches!(ev[0], Event::BlockStart(_)));
let ev = with(
&mut state,
"response.reasoning_summary_text.delta",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","delta":"sum"}"#,
);
if let Event::BlockDelta(d) = &ev[0] {
assert!(matches!(&d.delta, DeltaContent::Thinking(t) if t == "sum"));
}
let ev = with(
&mut state,
"response.reasoning_summary_part.done",
r#"{"output_index":0,"summary_index":0,"item_id":"r1"}"#,
);
assert!(matches!(ev[0], Event::BlockStop(_)));
}
#[test]
fn delta_without_prior_start_recovers() {
// 防御: content_part.added が落ちても delta 単独で BlockStart+Delta を発行
let mut state = OpenAIResponsesState::default();
let ev = with(
&mut state,
"response.output_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"m1","delta":"hi"}"#,
);
assert_eq!(ev.len(), 2);
assert!(matches!(ev[0], Event::BlockStart(_)));
assert!(matches!(ev[1], Event::BlockDelta(_)));
}
#[test]
fn parallel_output_items_get_distinct_indices() {
// 2 つの function_call が並列で output_item.added される場合、
// flat index が別々になるParallel tool calling の基本)。
let mut state = OpenAIResponsesState::default();
let ev1 = with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"function_call","id":"a","call_id":"c1","name":"t1"}}"#,
);
let ev2 = with(
&mut state,
"response.output_item.added",
r#"{"output_index":1,"item":{"type":"function_call","id":"b","call_id":"c2","name":"t2"}}"#,
);
let i1 = if let Event::BlockStart(s) = &ev1[0] {
s.index
} else {
panic!()
};
let i2 = if let Event::BlockStart(s) = &ev2[0] {
s.index
} else {
panic!()
};
assert_ne!(i1, i2);
}
#[test]
fn failed_response_emits_error_and_status() {
let data = r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#;
let (events, _) = run("response.failed", data);
assert_eq!(events.len(), 2);
assert!(matches!(events[0], Event::Error(_)));
assert!(matches!(
events[1],
Event::Status(StatusEvent {
status: ResponseStatus::Failed
})
));
}
#[test]
fn reasoning_output_item_emits_reasoning_item_with_text_summary_encrypted() {
// 完成済み reasoning wrapper が text + summary[] + encrypted_content を持って
// ReasoningItem として届くこと。
let mut state = OpenAIResponsesState::default();
// wrapper added (id だけ持つ)
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#,
);
// 内側の reasoning_text 用 content_part
with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"hello "}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"world"}"#,
);
with(
&mut state,
"response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"hello world"}}"#,
);
// summary 1 件
with(
&mut state,
"response.reasoning_summary_part.added",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","part":{"type":"summary_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_summary_text.delta",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","delta":"sum-A"}"#,
);
with(
&mut state,
"response.reasoning_summary_part.done",
r#"{"output_index":0,"summary_index":0,"item_id":"r1"}"#,
);
// wrapper done (encrypted_content が乗る)
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC-XYZ"}}"#,
);
assert_eq!(evs.len(), 1);
let Event::ReasoningItem(reasoning) = &evs[0] else {
panic!("expected ReasoningItem, got {:?}", evs[0]);
};
assert_eq!(reasoning.id.as_deref(), Some("r1"));
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.summary, vec!["sum-A".to_string()]);
assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC-XYZ"));
assert!(reasoning.signature.is_none());
// pending_reasoning は drain されていること
assert!(state.pending_reasoning.is_empty());
}
#[test]
fn reasoning_wrapper_without_inner_content_emits_empty_text() {
// encrypted_content だけ届くreasoning_text 無し)ケースでも
// ReasoningItem は発火する。
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.output_item.added",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9"}}"#,
);
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9","encrypted_content":"BLOB"}}"#,
);
let Event::ReasoningItem(r) = &evs[0] else {
panic!()
};
assert!(r.text.is_empty());
assert!(r.summary.is_empty());
assert_eq!(r.encrypted_content.as_deref(), Some("BLOB"));
}
#[test]
fn unknown_event_is_ignored() {
let (events, _) = run("response.in_progress", "{}");
assert!(events.is_empty());
}
}

View File

@ -0,0 +1,84 @@
//! OpenAI Responses API スキーマ (`/v1/responses`)
//!
//! Chat Completions とは別物の item-based wire format。reasoning item と
//! function_call item が first-class で、SSE イベントも `response.*` 名前空間で
//! 流れる。ChatGPT OAuth 経路 (codex) は本 scheme 必須。
//!
//! - リクエスト JSON 生成: [`request`]
//! - SSE イベントパース → [`Event`](crate::llm_client::event::Event) 変換: [`events`]
mod capability;
mod events;
mod request;
mod scheme_impl;
pub use scheme_impl::OpenAIResponsesState;
/// OpenAI Responses scheme 本体。
///
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
/// ように受理パラメータが subset の経路では provider 層で
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy
#[derive(Debug, Clone)]
pub struct OpenAIResponsesScheme {
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
pub store: bool,
/// `include: ["reasoning.encrypted_content"]` を付けるか。
/// `store=false` で reasoning を使うなら必須。
pub include_encrypted_content: bool,
/// `max_output_tokens` を body に載せるか。公式 OpenAI Responses API は
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_max_output_tokens: bool,
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_sampling_params: bool,
}
impl Default for OpenAIResponsesScheme {
fn default() -> Self {
Self {
store: false,
include_encrypted_content: true,
send_max_output_tokens: true,
send_sampling_params: true,
}
}
}
impl OpenAIResponsesScheme {
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
pub fn new() -> Self {
Self::default()
}
/// `store` を上書き。
pub fn with_store(mut self, store: bool) -> Self {
self.store = store;
self
}
/// `include: ["reasoning.encrypted_content"]` の有無を上書き。
pub fn with_include_encrypted_content(mut self, include: bool) -> Self {
self.include_encrypted_content = include;
self
}
/// `max_output_tokens` を body に載せるかを上書き。
pub fn with_send_max_output_tokens(mut self, send: bool) -> Self {
self.send_max_output_tokens = send;
self
}
/// `temperature` / `top_p` を body に載せるかを上書き。
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
self.send_sampling_params = send;
self
}
}

View File

@ -0,0 +1,650 @@
//! OpenAI Responses API リクエスト body 生成
//!
//! Chat Completions の `messages` と違い、Responses は `input[]` の
//! item 配列で reasoning / function_call / function_call_output が
//! first-class。`Item` を素に近い形で `input[]` に投影できる。
use serde::{Serialize, Serializer};
use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::OpenAIResponsesScheme;
/// `/v1/responses` のリクエスト body。
#[derive(Debug, Serialize)]
pub(crate) struct ResponsesRequest {
pub model: String,
/// システムプロンプト相当。`input[]` とは別フィールド。
#[serde(skip_serializing_if = "Option::is_none")]
pub instructions: Option<String>,
pub input: Vec<InputItem>,
#[serde(skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<ResponseTool>,
/// 常時 `"auto"` を送る。scheme 固定値。
pub tool_choice: &'static str,
/// 常時 `true` を送る。scheme 固定値。
pub parallel_tool_calls: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<ReasoningConfig>,
/// ZDR / stateless 運用では `false`。
pub store: bool,
/// 常時 `true`。
pub stream: bool,
/// `["reasoning.encrypted_content"]` 等。
#[serde(skip_serializing_if = "Vec::is_empty")]
pub include: Vec<&'static str>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は 400 で弾く。scheme の `send_max_output_tokens`
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u32>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
/// `send_sampling_params` が `false` のときは `None` のまま送る。
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
/// 会話単位の安定キー。ChatGPT backend (codex-oauth) は明示キーが
/// 無いとプロンプトキャッシュがほぼ効かない。pod 側は `SegmentId`
/// を渡す。`Request::cache_key` が `None` のときはキー自体を送らない。
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
}
/// reasoning 制御。
#[derive(Debug, Serialize)]
pub(crate) struct ReasoningConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<String>,
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
pub summary: &'static str,
}
/// `input[]` の 1 要素。
///
/// Responses API の item 型を素に近い形で投影する。未対応 type は
/// 無視reasoning 送信時に `content: []` の場合は `None` として弾く)。
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputItem {
/// 会話メッセージ。user / assistant / developer のいずれか。
/// `Role::System` items は `developer` として投影するChatGPT
/// backend が `role: "system"` を拒否するため。Codex CLI も
/// system 相当の挿入には DeveloperInstructions = `role: "developer"`
/// を使う)。
Message {
role: &'static str,
content: Vec<InputContent>,
},
/// 過去の function tool 呼び出しassistant 側)。
FunctionCall {
call_id: String,
name: String,
/// JSON 文字列object でなくても正規化済み)。
arguments: String,
},
/// function tool の結果user 側)。
FunctionCallOutput {
call_id: String,
/// Responses は文字列 or 構造化 output を許すが、ここでは
/// `summary` + `content` を改行連結した文字列で送る。
output: String,
},
/// reasoning item。`encrypted_content` があれば必ず添える。
Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<String>,
/// Responses API は reasoning item に `summary` フィールドを必須で
/// 要求する(中身が空でも `[]` として送る必要がある。GPT-5 など
/// summary を返さないモデル + reasoning effort 指定なしのターンでは
/// summary text が一切付かないので、ここを skip すると 400
/// "Missing required parameter: 'input[N].summary'" で弾かれる。
summary: Vec<ReasoningSummaryPart>,
#[serde(skip_serializing_if = "Vec::is_empty")]
content: Vec<ReasoningContentPart>,
#[serde(skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
},
}
/// メッセージ content_part。role で input/output を使い分ける。
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputContent {
/// user / developer 側のテキスト
InputText { text: String },
/// assistant 側のテキスト
OutputText { text: String },
}
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum ReasoningSummaryPart {
SummaryText { text: String },
}
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum ReasoningContentPart {
ReasoningText { text: String },
}
/// Responses 用 tool 定義。Chat と違い function キーでネストせず
/// トップレベルに `name` / `parameters` が載る。
#[derive(Debug, Serialize)]
pub(crate) struct ResponseTool {
#[serde(rename = "type")]
pub r#type: &'static str,
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// OpenAI Responses API は `type:"object"` のパラメータスキーマに
/// `properties` が存在することを要求する。schemars は引数なし struct
/// から `properties` を含まない最小スキーマを出すので、serialize
/// 時に空オブジェクトを補う。
#[serde(serialize_with = "serialize_parameters")]
pub parameters: Value,
/// Structured output モード制御。デフォルト false。
pub strict: bool,
}
fn serialize_parameters<S: Serializer>(value: &Value, s: S) -> Result<S::Ok, S::Error> {
if let Some(obj) = value.as_object()
&& obj.get("type").and_then(Value::as_str) == Some("object")
&& !obj.contains_key("properties")
{
let mut patched = obj.clone();
patched.insert("properties".to_string(), Value::Object(Default::default()));
return Value::Object(patched).serialize(s);
}
value.serialize(s)
}
impl OpenAIResponsesScheme {
/// `Request` から wire 形式の body を組み立てる。
pub(crate) fn build_request(
&self,
model: &str,
request: &Request,
capability: &ModelCapability,
) -> ResponsesRequest {
let input = convert_items_to_input(&request.items);
let tools = request.tools.iter().map(convert_tool).collect();
// Reasoning 投影: capability が Effort / Both をサポートし、かつ
// request 側で effort が指定されているときだけ reasoning を付ける。
let supports_effort = matches!(
capability.reasoning,
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
);
let reasoning = request
.config
.reasoning
.as_ref()
.filter(|_| supports_effort)
.map(|effort| ReasoningConfig {
effort: match effort {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
},
summary: "auto",
})
.filter(|reasoning| reasoning.effort.is_some());
let include: Vec<&'static str> = if self.include_encrypted_content {
vec!["reasoning.encrypted_content"]
} else {
Vec::new()
};
ResponsesRequest {
model: model.to_string(),
instructions: request.system_prompt.clone(),
input,
tools,
tool_choice: "auto",
parallel_tool_calls: true,
reasoning,
store: self.store,
stream: true,
include,
max_output_tokens: if self.send_max_output_tokens {
request.config.max_tokens
} else {
None
},
temperature: if self.send_sampling_params {
request.config.temperature
} else {
None
},
top_p: if self.send_sampling_params {
request.config.top_p
} else {
None
},
prompt_cache_key: request.cache_key.clone(),
}
}
}
/// `Item` 列を `input[]` に変換する。
fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
let mut out = Vec::with_capacity(items.len());
for item in items {
match item {
Item::Message { role, content, .. } => {
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
match role {
Role::User => ("user", |t| InputContent::InputText { text: t }),
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
Role::System => ("developer", |t| InputContent::InputText { text: t }),
};
let parts: Vec<InputContent> = content
.iter()
.map(|p| match p {
ContentPart::Text { text } => text_variant(text.clone()),
ContentPart::Refusal { refusal } => text_variant(refusal.clone()),
})
.collect();
out.push(InputItem::Message {
role: role_str,
content: parts,
});
}
Item::ToolCall {
call_id,
name,
arguments,
..
} => {
// 非 object / 旧形式の "null" を "{}" に正規化。
let normalized = parse_tool_arguments(arguments).to_string();
out.push(InputItem::FunctionCall {
call_id: call_id.clone(),
name: name.clone(),
arguments: normalized,
});
}
Item::ToolResult {
call_id,
summary,
content,
..
} => {
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
out.push(InputItem::FunctionCallOutput {
call_id: call_id.clone(),
output: text,
});
}
Item::Reasoning {
id,
text,
summary,
encrypted_content,
..
} => {
let summary_parts = summary
.iter()
.filter(|s| !s.is_empty())
.map(|s| ReasoningSummaryPart::SummaryText { text: s.clone() })
.collect();
let content_parts = if text.is_empty() {
Vec::new()
} else {
vec![ReasoningContentPart::ReasoningText { text: text.clone() }]
};
out.push(InputItem::Reasoning {
id: id.clone(),
summary: summary_parts,
content: content_parts,
encrypted_content: encrypted_content.clone(),
});
}
}
}
out
}
fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
ResponseTool {
r#type: "function",
name: tool.name.clone(),
description: tool.description.clone(),
parameters: tool.input_schema.clone(),
strict: false,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningControl, ReasoningEffort, ReasoningSupport,
StructuredOutput, ToolCallingSupport,
};
fn cap_with_reasoning() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
}
}
fn cap_no_reasoning() -> ModelCapability {
ModelCapability {
reasoning: None,
..cap_with_reasoning()
}
}
#[test]
fn scheme_defaults_to_stateless_zdr() {
let s = OpenAIResponsesScheme::new();
assert!(!s.store);
assert!(s.include_encrypted_content);
}
#[test]
fn includes_encrypted_content_when_enabled() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.include, vec!["reasoning.encrypted_content"]);
assert!(!body.store);
assert!(body.stream);
}
#[test]
fn instructions_from_system_prompt() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().system("be terse").user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.instructions.as_deref(), Some("be terse"));
assert_eq!(body.input.len(), 1);
}
#[test]
fn tool_choice_and_parallel_are_fixed() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.tool_choice, "auto");
assert!(body.parallel_tool_calls);
}
#[test]
fn user_message_uses_input_text() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[0] {
InputItem::Message { role, content } => {
assert_eq!(*role, "user");
assert_eq!(content.len(), 1);
assert!(matches!(&content[0], InputContent::InputText { text } if text == "hi"));
}
_ => panic!("expected message"),
}
}
#[test]
fn system_role_item_is_projected_as_developer() {
// ChatGPT backend (codex-oauth) は input[] の `role: "system"` を
// "System messages are not allowed" で 400 拒否する。in-conversation
// な system note (notify / fs_view auto-read / compaction summary) は
// `role: "developer"` として投影し、両 backend で受理されるようにする。
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("hi")
.item(Item::system_message("[notify] hello"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "developer");
assert!(
matches!(&content[0], InputContent::InputText { text } if text == "[notify] hello"),
);
}
_ => panic!("expected message"),
}
}
#[test]
fn assistant_message_uses_output_text() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").assistant("hello");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "assistant");
assert!(
matches!(&content[0], InputContent::OutputText { text } if text == "hello")
);
}
_ => panic!("expected message"),
}
}
#[test]
fn tool_call_and_result_become_function_items() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("run")
.item(Item::tool_call("c1", "t", r#"{"a":1}"#))
.item(Item::tool_result("c1", "ok"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(matches!(body.input[1], InputItem::FunctionCall { .. }));
assert!(matches!(
body.input[2],
InputItem::FunctionCallOutput { .. }
));
}
#[test]
fn reasoning_item_round_trips_encrypted_content() {
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("inner")
.with_reasoning_summary(vec!["s1".into()])
.with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Reasoning {
summary,
content,
encrypted_content,
..
} => {
assert_eq!(summary.len(), 1);
assert_eq!(content.len(), 1);
assert_eq!(encrypted_content.as_deref(), Some("ENC"));
}
_ => panic!("expected reasoning"),
}
}
#[test]
fn reasoning_summary_field_is_always_serialized() {
// Responses API は reasoning item に `summary` を必須で要求する。
// summary が空でも wire 上に `summary: []` として残らないと、
// ChatGPT backend (codex-oauth) が
// 400 invalid_request_error: Missing required parameter:
// 'input[N].summary'.
// で弾く。GPT-5 + reasoning effort 未指定のターンでは summary text
// が付かないことがあるため、空のままでも skip しないこと。
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("").with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
let reasoning_item = &json["input"][1];
assert_eq!(reasoning_item["type"], "reasoning");
assert!(
reasoning_item.get("summary").is_some(),
"summary key must be present even when empty, got: {reasoning_item}"
);
assert_eq!(reasoning_item["summary"], serde_json::json!([]));
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let reasoning = body.reasoning.expect("reasoning should be set");
assert_eq!(reasoning.effort.as_deref(), Some("high"));
assert_eq!(reasoning.summary, "auto");
}
#[test]
fn reasoning_omitted_when_unsupported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning());
assert!(body.reasoning.is_none());
}
#[test]
fn max_output_tokens_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, Some(100));
}
#[test]
fn max_output_tokens_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(false);
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("max_output_tokens").is_none(),
"max_output_tokens key must not appear in serialised body, got: {json}"
);
}
#[test]
fn sampling_params_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, Some(0.4));
assert_eq!(body.top_p, Some(0.9));
}
#[test]
fn sampling_params_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, None);
assert_eq!(body.top_p, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("temperature").is_none() && json.get("top_p").is_none(),
"temperature/top_p keys must not appear in serialised body, got: {json}"
);
}
#[test]
fn prompt_cache_key_passed_through_when_set() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").cache_key("session-abc");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.prompt_cache_key.as_deref(), Some("session-abc"));
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["prompt_cache_key"], "session-abc");
}
#[test]
fn prompt_cache_key_omitted_when_none() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(body.prompt_cache_key.is_none());
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("prompt_cache_key").is_none(),
"prompt_cache_key key must not appear in serialised body, got: {json}"
);
}
#[test]
fn tool_schema_without_properties_is_normalized() {
// schemars は引数なし struct から `type:"object"` だけのスキーマを
// 吐く。OpenAI Responses は `properties` 欠落を 400 で拒否するので
// 送る直前に空オブジェクトを補うのを確認。
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({ "type": "object" });
let req = Request::new().tool(
ToolDefinition::new("empty")
.description("no args")
.input_schema(raw_schema),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"]["type"], "object");
assert!(
json["tools"][0]["parameters"]["properties"].is_object(),
"properties must be present as an object, got: {}",
json["tools"][0]["parameters"]
);
}
#[test]
fn tool_schema_with_properties_is_untouched() {
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({
"type": "object",
"properties": { "path": { "type": "string" } },
"required": ["path"]
});
let req = Request::new().tool(
ToolDefinition::new("t")
.description("d")
.input_schema(raw_schema.clone()),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"], raw_schema);
}
#[test]
fn serialized_body_has_expected_shape() {
// wire 形式が崩れていないかのスモークテスト
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.system("sys")
.user("hi")
.tool(ToolDefinition::new("t").description("d"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["model"], "gpt-5");
assert_eq!(json["instructions"], "sys");
assert_eq!(json["tool_choice"], "auto");
assert_eq!(json["parallel_tool_calls"], true);
assert_eq!(json["store"], false);
assert_eq!(json["stream"], true);
assert_eq!(json["include"][0], "reasoning.encrypted_content");
assert_eq!(json["tools"][0]["type"], "function");
assert_eq!(json["tools"][0]["name"], "t");
}
}

View File

@ -0,0 +1,88 @@
//! `impl Scheme for OpenAIResponsesScheme`
use serde_json::Value;
use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::{Request, RequestConfig},
};
use super::OpenAIResponsesScheme;
pub use super::events::OpenAIResponsesState;
impl Scheme for OpenAIResponsesScheme {
type State = OpenAIResponsesState;
fn default_base_url(&self) -> &'static str {
// `/v1` は base_url 側に寄せる。ChatGPT OAuth 経由のときは
// `https://chatgpt.com/backend-api/codex` を base にすれば同じ
// `/responses` path で両系統を吸収できるCodex CLI 準拠)。
"https://api.openai.com/v1"
}
fn path(&self, _model_id: &str) -> String {
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
capability: &ModelCapability,
) -> Value {
let body = self.build_request(model_id, request, capability);
serde_json::to_value(&body).expect("ResponsesRequest is always serialisable")
}
fn parse_sse(
&self,
event_type: &str,
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
super::events::parse_sse(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// ChatGPT backend (codex-oauth) は `max_output_tokens` を 400 で弾く。
// scheme 構築時に `send_max_output_tokens=false` で組まれていれば
// body 投影は止まっているので、ユーザの意図が落ちることだけを通知する。
if !self.send_max_output_tokens && config.max_tokens.is_some() {
warnings.push(ConfigWarning::unsupported(
"max_tokens",
"OpenAI Responses (ChatGPT backend)",
));
}
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
if !self.send_sampling_params {
if config.temperature.is_some() {
warnings.push(ConfigWarning::unsupported(
"temperature",
"OpenAI Responses (ChatGPT backend)",
));
}
if config.top_p.is_some() {
warnings.push(ConfigWarning::unsupported(
"top_p",
"OpenAI Responses (ChatGPT backend)",
));
}
}
warnings
}
}

View File

@ -0,0 +1,257 @@
//! `HttpTransport<S: Scheme>`: すべての LLM wire scheme を共通の 1 本の
//! HTTP クライアントで扱う。
//!
//! 旧 `providers/{anthropic,openai,gemini,ollama}.rs` を置き換える。
//! scheme 固有の差分は [`Scheme`] trait 実装に委譲する。
use std::pin::Pin;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue, RETRY_AFTER};
use super::auth::{AuthProvider, AuthRequirement};
use super::capability::ModelCapability;
use super::client::{ConfigWarning, LlmClient, ResponseStream};
use super::error::ClientError;
use super::event::Event;
use super::scheme::Scheme;
use super::types::{Request, RequestConfig};
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
///
/// - `None`: 認証ヘッダを送らないOllama 等の opt-out
/// - `ApiKey`: 静的な API key 文字列
/// - `Custom`: リクエスト毎に動的にヘッダを組み立てるCodex OAuth 等)
#[derive(Debug, Clone)]
pub enum ResolvedAuth {
None,
ApiKey(String),
Custom(Arc<dyn AuthProvider>),
}
impl ResolvedAuth {
/// 認証要件と実際の解決値が噛み合うか検査する。構築時検証用。
///
/// - `ResolvedAuth::None` は認証を付けない宣言なので、どの
/// `AuthRequirement` でも受け入れるOllama の Anthropic scheme
/// 流用は `required_auth = XApiKey` だが認証ヘッダなしで動く)
/// - `ResolvedAuth::Custom` は「ヘッダ組立を全部こちらで行う」
/// 宣言なので、scheme が要求する形式によらず受け入れる
pub fn matches(&self, req: AuthRequirement) -> bool {
match (self, req) {
(Self::None, _) => true,
(Self::Custom(_), _) => true,
(
Self::ApiKey(_),
AuthRequirement::Bearer
| AuthRequirement::XApiKey
| AuthRequirement::QueryParam { .. },
) => true,
_ => false,
}
}
}
/// scheme 共通の HTTP 通信層。
pub struct HttpTransport<S: Scheme> {
http_client: reqwest::Client,
scheme: S,
model_id: String,
base_url: String,
auth: ResolvedAuth,
capability: ModelCapability,
}
impl<S: Scheme> HttpTransport<S> {
/// 新しい transport を作る。`base_url` は末尾スラッシュの有無を
/// どちらでも受け付ける(内部で正規化)。
pub fn new(
scheme: S,
model_id: impl Into<String>,
base_url: impl Into<String>,
auth: ResolvedAuth,
capability: ModelCapability,
) -> Self {
let base_url = base_url.into();
let base_url = base_url.trim_end_matches('/').to_string();
Self {
http_client: reqwest::Client::new(),
scheme,
model_id: model_id.into(),
base_url,
auth,
capability,
}
}
/// カスタム HTTP クライアントを差し込む(テスト等)。
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
self.http_client = client;
self
}
fn build_url(&self) -> String {
let path = self.scheme.path(&self.model_id);
let url = format!("{}{}", self.base_url, path);
// Gemini のようにクエリパラメータで認証する場合は URL にキーを追記する
if let (AuthRequirement::QueryParam { name }, ResolvedAuth::ApiKey(key)) =
(self.scheme.required_auth(), &self.auth)
{
let sep = if url.contains('?') { '&' } else { '?' };
format!("{url}{sep}{name}={key}")
} else {
url
}
}
async fn build_headers(&self) -> Result<HeaderMap, ClientError> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
match (&self.auth, self.scheme.required_auth()) {
(ResolvedAuth::None, _) | (_, AuthRequirement::None) => {}
(ResolvedAuth::Custom(provider), _) => {
for (name, mut value) in provider.headers().await? {
value.set_sensitive(true);
headers.insert(name, value);
}
}
(ResolvedAuth::ApiKey(key), AuthRequirement::Bearer) => {
let mut val = HeaderValue::from_str(&format!("Bearer {key}"))
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
val.set_sensitive(true);
headers.insert("Authorization", val);
}
(ResolvedAuth::ApiKey(key), AuthRequirement::XApiKey) => {
let mut val = HeaderValue::from_str(key.as_str())
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
val.set_sensitive(true);
headers.insert("x-api-key", val);
}
(_, AuthRequirement::QueryParam { .. }) => {
// クエリパラメータは `build_url` で付与済み
}
(ResolvedAuth::ApiKey(_), AuthRequirement::Custom) => {
// scheme が Custom を要求する組合せに ApiKey は流れてこない想定
// `matches()` で弾かれる)。安全側で何もしない
}
}
for (name, value) in self.scheme.additional_headers() {
let hv = HeaderValue::from_str(&value)
.map_err(|e| ClientError::Config(format!("invalid header {name}: {e}")))?;
headers.insert(name, hv);
}
Ok(headers)
}
}
impl<S: Scheme + Clone> Clone for HttpTransport<S> {
fn clone(&self) -> Self {
Self {
http_client: self.http_client.clone(),
scheme: self.scheme.clone(),
model_id: self.model_id.clone(),
base_url: self.base_url.clone(),
auth: self.auth.clone(),
capability: self.capability.clone(),
}
}
}
/// エラーレスポンスを `ClientError::Api` に変換する。
async fn classify_error_response(resp: reqwest::Response) -> ClientError {
let status = resp.status().as_u16();
let retry_after = resp
.headers()
.get(RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.trim().parse::<u64>().ok())
.map(Duration::from_secs);
let text = resp.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
ClientError::Api {
status: Some(status),
code,
message,
retry_after,
}
} else {
ClientError::Api {
status: Some(status),
code: None,
message: text,
retry_after,
}
}
}
#[async_trait]
impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
self.scheme.validate_config(config)
}
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
let url = self.build_url();
let headers = self.build_headers().await?;
let body = self
.scheme
.build_request_body(&self.model_id, &request, &self.capability);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await
.map_err(ClientError::Http)?;
if !response.status().is_success() {
return Err(classify_error_response(response).await);
}
let scheme = self.scheme.clone();
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
let event_stream = byte_stream.eventsource();
// scheme 固有のパース状態をストリーム単位で保持する
let mut state = <S::State as Default>::default();
let stream = event_stream
.map(move |result| match result {
Ok(frame) => match scheme.parse_sse(&frame.event, &frame.data, &mut state) {
Ok(events) => Ok(events),
Err(e) => Err(e),
},
Err(e) => Err(ClientError::Sse(e.to_string())),
})
.map(|res| {
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
Ok(events) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
};
s
})
.flatten();
Ok(Box::pin(stream))
}
}

View File

@ -9,6 +9,10 @@
use serde::{Deserialize, Serialize};
fn is_false(value: &bool) -> bool {
!*value
}
// ============================================================================
// Item - The core unit of conversation
// ============================================================================
@ -74,8 +78,14 @@ pub enum Item {
id: Option<ItemId>,
/// Call ID linking to the tool call
call_id: CallId,
/// Output content
output: String,
/// Short summary (always kept in history, survives pruning)
summary: String,
/// Detailed output (removed by pruning when old enough)
#[serde(default, skip_serializing_if = "Option::is_none")]
content: Option<String>,
/// Whether the tool result represents an execution error.
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
},
/// Reasoning/thinking item
@ -83,8 +93,23 @@ pub enum Item {
/// Optional item ID
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<ItemId>,
/// Reasoning text
/// Reasoning textreasoning body, `reasoning_text.delta` の累積)
text: String,
/// Reasoning summaryOpenAI Responses の `summary_text[]` を格納。
/// 他 scheme は空)
#[serde(default, skip_serializing_if = "Vec::is_empty")]
summary: Vec<String>,
/// サーバから返された暗号化済み reasoning blob。ZDR / `store=false`
/// 運用で stateless に再送するときそのまま添える必要がある。
/// Anthropic の `redacted_thinking.data` もここに格納する。
#[serde(default, skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
/// Anthropic extended thinking の `signature`。新世代 Claude
/// (Opus 4.5+/Sonnet 4.6+) では同一論理ターン内の `thinking`
/// ブロックを送り返す際に必須。改ざん検知に使われる。他 scheme
/// では `None`。
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
/// Item status
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<ItemStatus>,
@ -96,6 +121,20 @@ impl Item {
// Message constructors
// ========================================================================
/// Create a system message item with text content.
///
/// System items in history are sent as `role: "system"` on OpenAI,
/// and as `role: "user"` on Anthropic/Gemini (which lack a system
/// role in conversation items).
pub fn system_message(text: impl Into<String>) -> Self {
Self::Message {
id: None,
role: Role::System,
content: vec![ContentPart::Text { text: text.into() }],
status: None,
}
}
/// Create a user message item with text content
pub fn user_message(text: impl Into<String>) -> Self {
Self::Message {
@ -164,15 +203,41 @@ impl Item {
Self::tool_call(call_id, name, arguments.to_string())
}
/// Create a tool result item
pub fn tool_result(call_id: impl Into<String>, output: impl Into<String>) -> Self {
/// Create a tool result item with summary only (no content).
pub fn tool_result(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, false)
}
/// Create an error tool result item with summary only (no content).
pub fn tool_result_error(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, true)
}
/// Create a tool result item with summary, optional content, and error flag.
pub fn tool_result_item(
call_id: impl Into<String>,
summary: impl Into<String>,
content: Option<String>,
is_error: bool,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
output: output.into(),
summary: summary.into(),
content,
is_error,
}
}
/// Create a tool result item with summary and content.
pub fn tool_result_with_content(
call_id: impl Into<String>,
summary: impl Into<String>,
content: impl Into<String>,
) -> Self {
Self::tool_result_item(call_id, summary, Some(content.into()), false)
}
// ========================================================================
// Reasoning constructors
// ========================================================================
@ -182,10 +247,40 @@ impl Item {
Self::Reasoning {
id: None,
text: text.into(),
summary: Vec::new(),
encrypted_content: None,
signature: None,
status: None,
}
}
/// Set reasoning summary on a `Reasoning` item. No-op on other variants.
pub fn with_reasoning_summary(mut self, new_summary: Vec<String>) -> Self {
if let Self::Reasoning { summary, .. } = &mut self {
*summary = new_summary;
}
self
}
/// Set `encrypted_content` on a `Reasoning` item. No-op on other variants.
pub fn with_encrypted_content(mut self, content: impl Into<String>) -> Self {
if let Self::Reasoning {
encrypted_content, ..
} = &mut self
{
*encrypted_content = Some(content.into());
}
self
}
/// Set Anthropic `signature` on a `Reasoning` item. No-op on other variants.
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
if let Self::Reasoning { signature, .. } = &mut self {
*signature = Some(sig.into());
}
self
}
// ========================================================================
// Builder methods
// ========================================================================
@ -285,6 +380,19 @@ impl Item {
}
}
/// Parse a ToolCall `arguments` string into a JSON object.
///
/// Tool call arguments must be a JSON object at the provider API level
/// (Anthropic rejects non-object `tool_use.input`). This helper normalizes
/// anything that is not a JSON object — empty string, the literal `"null"`,
/// arrays, scalars, or parse failures — to an empty object `{}`.
pub fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
match serde_json::from_str::<serde_json::Value>(arguments) {
Ok(value) if value.is_object() => value,
_ => serde_json::Value::Object(serde_json::Map::new()),
}
}
// ============================================================================
// Content Parts - Components within message items
// ============================================================================
@ -374,6 +482,21 @@ pub struct Request {
pub tools: Vec<ToolDefinition>,
/// Request configuration
pub config: RequestConfig,
/// Index into `items` marking the end of a stable, cacheable prefix.
///
/// Higher layers that know about durable prefix boundaries (e.g. a
/// post-compaction summary) set this so that caching-aware providers
/// (Anthropic today) can place a long-lived cache breakpoint there.
/// Providers without prompt caching ignore the field.
pub cache_anchor: Option<usize>,
/// 会話単位の安定キー。`prompt_cache_key` として送られる
/// (OpenAI Responses)。ChatGPT backend (codex-oauth) は明示キーが
/// 無いと org/project ハッシュ衝突でプロンプトキャッシュが
/// ほぼヒットしないため、pod 側で `SegmentId` を渡す運用を想定。
/// `cache_anchor` と違い名前空間キーであり、`prefix anchor` とは
/// 別の概念。`cache_anchor` を読まない provider と同じく、
/// `prompt_cache_key` を持たない provider は無視する。
pub cache_key: Option<String>,
}
impl Request {
@ -453,6 +576,14 @@ impl Request {
self.config.stop_sequences.push(sequence.into());
self
}
/// Set the conversation cache key.
///
/// 詳細は [`Request::cache_key`] のフィールドコメント参照。
pub fn cache_key(mut self, key: impl Into<String>) -> Self {
self.cache_key = Some(key.into());
self
}
}
// ============================================================================
@ -513,6 +644,12 @@ pub struct RequestConfig {
pub top_k: Option<u32>,
/// Stop sequences
pub stop_sequences: Vec<String>,
/// Reasoning / extended-thinking 制御共通型、scheme 側で各社形式に投影)。
///
/// `None` のときは何も送らない。`Some` でも scheme の
/// `ModelCapability::reasoning` が `None` なら無視される。
#[serde(default)]
pub reasoning: Option<crate::llm_client::capability::ReasoningControl>,
}
impl RequestConfig {
@ -551,3 +688,54 @@ impl RequestConfig {
self
}
}
#[cfg(test)]
mod parse_tool_arguments_tests {
use super::parse_tool_arguments;
use serde_json::{Value, json};
fn empty_object() -> Value {
Value::Object(serde_json::Map::new())
}
#[test]
fn empty_string_normalizes_to_object() {
assert_eq!(parse_tool_arguments(""), empty_object());
}
#[test]
fn literal_null_normalizes_to_object() {
// 既存セッションに残っている "null" が resume 時に復旧できること
assert_eq!(parse_tool_arguments("null"), empty_object());
}
#[test]
fn array_normalizes_to_object() {
assert_eq!(parse_tool_arguments("[1, 2, 3]"), empty_object());
}
#[test]
fn scalar_normalizes_to_object() {
assert_eq!(parse_tool_arguments("42"), empty_object());
assert_eq!(parse_tool_arguments("\"str\""), empty_object());
assert_eq!(parse_tool_arguments("true"), empty_object());
}
#[test]
fn invalid_json_normalizes_to_object() {
assert_eq!(parse_tool_arguments("{not json"), empty_object());
}
#[test]
fn valid_object_passes_through() {
assert_eq!(
parse_tool_arguments(r#"{"city":"Tokyo","days":3}"#),
json!({"city": "Tokyo", "days": 3}),
);
}
#[test]
fn empty_object_passes_through() {
assert_eq!(parse_tool_arguments("{}"), empty_object());
}
}

View File

@ -0,0 +1,451 @@
//! Prune — context projection for old tool-result content.
//!
//! LLM 送信時のコンテキストから古い [`Item::ToolResult`] の `content` を
//! 省略して、コンテキスト窓のトークンを回収する。`summary` は残すので
//! 「何が起きたか」の痕跡は保たれる。
//!
//! # 設計方針
//!
//! Prune は **コンテキスト射影** であり、history の変換ではない。
//! この crate が提供するのは pure な候補抽出 [`prunable_indices`] のみで、
//! 射影の適用は上位層(`pod::prune_hook` 等)が LLM に送る一時コンテキスト
//! に対してだけ行う。Worker の永続履歴は決して変更されない。
//!
//! 保護境界は末尾 token budget で決めるが、この crate は usage 履歴を
//! 所有しない。prefix ごとの token 推定値と savings 推定は上位層から
//! callback で注入される。
use serde::{Deserialize, Serialize};
use crate::llm_client::types::Item;
use crate::token_counter::{EstimateSource, TokenEstimate};
/// Callback that returns token estimates for every prefix boundary of the
/// supplied request history.
///
/// The returned slice must have `history.len() + 1` entries where entry `i`
/// estimates the token count of `history[..i]`. Returning a malformed vector,
/// or estimates whose source is [`EstimateSource::NoData`], makes prune treat
/// the request as having no candidates.
pub type TokenEstimator = Box<dyn Fn(&[Item]) -> Vec<TokenEstimate> + Send + Sync>;
/// Callback that estimates the token savings for projecting the
/// `ToolResult.content` out of `history[i]` for each `i` in `indices`.
///
/// Injected into [`crate::Worker`] via `set_savings_estimator` so the
/// Worker can make `min_savings` decisions without knowing about usage
/// measurement sources. Return `0` to signal "no data / refuse to prune".
///
/// 推定対象は「drop する範囲全体」ではなく「content を None にする差分」
/// であることに注意。item 自体summary 等)は残るので、この callback は
/// 実際の projection と一致する savings を返す必要がある。
pub type SavingsEstimator = Box<dyn Fn(&[Item], &[usize]) -> u64 + Send + Sync>;
/// Result of one prune evaluation pass, surfaced to the optional
/// [`PruneObserver`] for instrumentation.
///
/// Worker は LLM リクエストごとに 1 回 prune の評価をし、その結果を
/// observer が登録されていればこの値で通知する。fire/skip の判定
/// 結果と、判定材料になった候補数 / 推定 savings / 保護領域の先頭 index を持つ。
#[derive(Debug, Clone)]
pub struct PruneEvaluation {
/// `prunable_indices` の長さ。`Skipped::NoCandidates` の時は 0。
pub candidate_count: usize,
/// 推定された savings (tokens)。`NoCandidates` の時は 0。
pub estimated_savings: u64,
/// Token budget で保護される suffix の先頭 item index。
/// usage 推定が `NoData` で境界が決まらない場合は `None`。
pub protected_start_index: Option<usize>,
/// 判定結果。
pub decision: PruneDecision,
}
/// Outcome of one prune evaluation. Each variant is one branch of the
/// "fire vs skip" decision tree the Worker walks before each LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PruneDecision {
/// `prunable_indices` が空 → 何もしない。
SkippedNoCandidates,
/// 候補はあったが推定 savings が `min_savings` 未満 → 何もしない。
SkippedBelowMinSavings,
/// 候補があり savings >= min_savings → projection を適用した。
/// `pruned_count` は `project()` が実際に書き換えた item 数
/// (既に content=None だった候補は 0 計上)。
Fired { pruned_count: usize },
}
/// Optional observer invoked after each prune evaluation, regardless of
/// branch. Pod 等の上位層が install して metrics を発行する。
pub type PruneObserver = Box<dyn Fn(&PruneEvaluation) + Send + Sync>;
/// Configuration for the Prune algorithm.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruneConfig {
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_protected_tokens")]
pub protected_tokens: u64,
/// Minimum token savings required to actually prune. If the prunable
/// content is smaller than this, the caller should skip to avoid
/// pointless KV-cache invalidation. The unit is tokens; the caller
/// is responsible for measuring savings via a usage-history-aware
/// estimator and comparing against this threshold.
#[serde(default = "default_min_savings")]
pub min_savings: u64,
}
fn default_protected_tokens() -> u64 {
8000
}
fn default_min_savings() -> u64 {
4096
}
impl Default for PruneConfig {
fn default() -> Self {
Self {
protected_tokens: default_protected_tokens(),
min_savings: default_min_savings(),
}
}
}
/// Set `content = None` on each `Item::ToolResult` at the given indices.
///
/// Returns the number of items that were actually modified — items that
/// are already content-less are counted as 0. Intended for use on a
/// request-context clone (never on a persistent history).
pub fn project(items: &mut [Item], indices: &[usize]) -> usize {
let mut count = 0;
for &i in indices {
if let Item::ToolResult { content, .. } = &mut items[i]
&& content.is_some()
{
*content = None;
count += 1;
}
}
count
}
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie before
/// the suffix protected by `protected_tokens`. Pure: does not mutate `items`.
///
/// Returns an empty vector when token estimates are unavailable (`NoData`) or
/// no prunable candidates exist.
pub fn prunable_indices(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Vec<usize> {
evaluate_candidates(items, protected_tokens, token_estimates).0
}
/// Same as [`prunable_indices`] but also returns the start index of the
/// protected suffix. `None` means the token boundary could not be determined
/// (currently because usage estimates were `NoData` or malformed).
pub fn evaluate_candidates(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> (Vec<usize>, Option<usize>) {
let Some(protected_start) = protected_start_index(items, protected_tokens, token_estimates)
else {
return (Vec::new(), None);
};
let candidates = items[..protected_start]
.iter()
.enumerate()
.filter_map(|(i, item)| match item {
Item::ToolResult {
content: Some(_), ..
} => Some(i),
_ => None,
})
.collect();
(candidates, Some(protected_start))
}
fn protected_start_index(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Option<usize> {
if token_estimates.len() != items.len() + 1 {
return None;
}
let total = token_estimates[items.len()];
if total.source == EstimateSource::NoData {
return None;
}
if protected_tokens == 0 {
return Some(items.len());
}
let mut protected_start = items.len();
for idx in (0..items.len()).rev() {
let prefix = token_estimates[idx];
if prefix.source == EstimateSource::NoData {
return None;
}
protected_start = idx;
let tail_tokens = total.tokens.saturating_sub(prefix.tokens);
if tail_tokens >= protected_tokens {
break;
}
}
Some(protected_start)
}
#[cfg(test)]
mod tests {
use super::*;
/// Helper: build a history with interleaved user messages and tool results.
fn make_history(turns: &[(&str, Vec<(&str, Option<&str>)>)]) -> Vec<Item> {
let mut items = Vec::new();
for (user_msg, tool_results) in turns {
items.push(Item::user_message(*user_msg));
items.push(Item::assistant_message("ok"));
for (i, (summary, content)) in tool_results.iter().enumerate() {
let call_id = format!("call_{}", items.len() + i);
items.push(Item::tool_call(&call_id, "some_tool", "{}"));
match content {
Some(c) => items.push(Item::tool_result_with_content(&call_id, *summary, *c)),
None => items.push(Item::tool_result(&call_id, *summary)),
}
}
}
items
}
fn measured_prefix(tokens: &[u64]) -> Vec<TokenEstimate> {
tokens
.iter()
.copied()
.map(|tokens| TokenEstimate {
tokens,
source: EstimateSource::Measured,
})
.collect()
}
fn uniform_estimates(items: &[Item], item_tokens: u64) -> Vec<TokenEstimate> {
let mut tokens = Vec::with_capacity(items.len() + 1);
for i in 0..=items.len() {
tokens.push(i as u64 * item_tokens);
}
measured_prefix(&tokens)
}
fn estimates_from_item_tokens(item_tokens: &[u64]) -> Vec<TokenEstimate> {
let mut prefix = Vec::with_capacity(item_tokens.len() + 1);
let mut acc = 0;
prefix.push(acc);
for tokens in item_tokens {
acc += tokens;
prefix.push(acc);
}
measured_prefix(&prefix)
}
fn no_data_estimates(items: &[Item]) -> Vec<TokenEstimate> {
(0..=items.len())
.map(|i| TokenEstimate {
tokens: i as u64,
source: if i == 0 {
EstimateSource::Measured
} else {
EstimateSource::NoData
},
})
.collect()
}
#[test]
fn no_candidates_when_estimate_has_no_data() {
let items = make_history(&[("turn1", vec![("summary1", Some("big content here"))])]);
let estimates = no_data_estimates(&items);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
#[test]
fn no_candidates_when_history_fits_in_protected_tokens() {
let items = make_history(&[
("turn1", vec![("summary1", Some("big content here"))]),
("turn2", vec![("summary2", Some("more content"))]),
]);
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 10_000, &estimates).is_empty());
}
#[test]
fn candidates_before_token_protected_suffix() {
let big = "x".repeat(4096 * 4);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// suffix budget 80 tokens protects turn3+turn4 (8 items), so only s1/s2 are candidates.
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn single_long_task_gets_candidates_without_multiple_user_turns() {
let big = "x".repeat(4096 * 8);
let items = make_history(&[(
"one long task",
vec![
("s1", Some(&big)),
("s2", Some(&big)),
("s3", Some(&big)),
("s4", Some(&big)),
],
)]);
// user + assistant are cheap; every ToolCall is cheap; every ToolResult is heavy.
let item_tokens = vec![1, 1, 1, 5_000, 1, 5_000, 1, 5_000, 1, 5_000];
let estimates = estimates_from_item_tokens(&item_tokens);
let (candidates, protected_start) = evaluate_candidates(&items, 8_000, &estimates);
assert_eq!(protected_start, Some(7));
assert_eq!(candidates.len(), 2);
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn already_pruned_items_excluded_from_candidates() {
let items = make_history(&[
("turn1", vec![("s1", None)]), // already pruned (content=None)
("turn2", vec![]),
("turn3", vec![]),
("turn4", vec![]),
]);
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 20, &estimates).is_empty());
}
#[test]
fn project_drops_content_and_counts_modifications() {
let big = "x".repeat(64);
let mut items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
let count = project(&mut items, &candidates);
assert_eq!(count, 2);
for item in &items {
if let Item::ToolResult {
summary, content, ..
} = item
{
if summary == "s1" || summary == "s2" {
assert!(content.is_none(), "old content should be projected out");
} else {
assert!(content.is_some(), "protected content should remain");
}
}
}
}
#[test]
fn project_skips_already_pruned_items() {
// indices points at an item whose content is already None.
// project() should count it as 0 modifications.
let mut items = make_history(&[
("turn1", vec![("s1", None)]),
("turn2", vec![("s2", Some("hello"))]),
]);
// Manually target s1 even though it's already None.
let target = items
.iter()
.position(|it| matches!(it, Item::ToolResult { summary, .. } if summary == "s1"))
.unwrap();
let count = project(&mut items, &[target]);
assert_eq!(count, 0);
}
#[test]
fn project_is_idempotent() {
let big = "x".repeat(64);
let mut items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![]),
("turn3", vec![]),
("turn4", vec![]),
]);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 20, &estimates);
assert_eq!(project(&mut items, &candidates), 1);
// 2 周目: 候補は一度の prunable_indices 結果を使い回しても 0 件。
assert_eq!(project(&mut items, &candidates), 0);
}
#[test]
fn evaluate_candidates_returns_protected_start_index() {
let big = "x".repeat(64);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep"))]),
("turn4", vec![("s4", Some("keep too"))]),
]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// protected_tokens=80 → protected suffix is turn3+turn4, starting at index 8.
assert_eq!(protected_start, Some(8));
}
#[test]
fn evaluate_candidates_reports_zero_start_when_everything_is_protected() {
let items = make_history(&[("only", vec![("s", Some("x"))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 10_000, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, Some(0));
}
#[test]
fn zero_protected_tokens_allows_all_tool_results_as_candidates() {
let big = "x".repeat(64);
let items = make_history(&[("turn1", vec![("s1", Some(&big)), ("s2", Some(&big))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 0, &estimates);
assert_eq!(protected_start, Some(items.len()));
assert_eq!(candidates.len(), 2);
}
#[test]
fn malformed_estimate_vector_is_treated_as_no_boundary() {
let items = make_history(&[("turn1", vec![("s1", Some("x"))])]);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &[]);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
}

View File

@ -1,7 +1,7 @@
//! Worker State
//!
//! State marker types for cache protection using the Type-state pattern.
//! Worker has state transitions from `Mutable` → `CacheLocked`.
//! Worker has state transitions from `Mutable` → `Locked`.
/// Marker trait representing Worker state
///
@ -19,7 +19,7 @@ mod private {
/// - Editing message history (add, delete, clear)
/// - Registering tools and hooks
///
/// Can transition to [`CacheLocked`] state via `Worker::lock()`.
/// Can transition to [`Locked`] state via `Worker::lock()`.
///
/// # Examples
///
@ -54,7 +54,7 @@ impl WorkerState for Mutable {}
/// Can return to [`Mutable`] state via `Worker::unlock()`,
/// but note that cache protection will be released.
#[derive(Debug, Clone, Copy, Default)]
pub struct CacheLocked;
pub struct Locked;
impl private::Sealed for CacheLocked {}
impl WorkerState for CacheLocked {}
impl private::Sealed for Locked {}
impl WorkerState for Locked {}

View File

@ -1,371 +0,0 @@
//! Event Subscription
//!
//! Trait for receiving streaming events from LLM in real-time.
//! Used for stream display to UI and progress display.
use std::sync::{Arc, Mutex};
use crate::{
handler::{
ErrorKind, Handler, StatusKind, TextBlockEvent, TextBlockKind, ToolUseBlockEvent,
ToolUseBlockKind, UsageKind,
},
hook::ToolCall,
timeline::event::{ErrorEvent, StatusEvent, UsageEvent},
};
// =============================================================================
// WorkerSubscriber Trait
// =============================================================================
/// Trait for subscribing to streaming events from LLM
///
/// When registered with Worker, you can receive events from text generation
/// and tool calls in real-time. Ideal for stream display to UI.
///
/// # Available Events
///
/// - **Block events**: Text, tool use (with scope)
/// - **Meta events**: Usage, status, error
/// - **Completion events**: Text complete, tool call complete
/// - **Turn control**: Turn start, turn end
///
/// # Examples
///
/// ```ignore
/// use llm_worker::subscriber::WorkerSubscriber;
/// use llm_worker::timeline::TextBlockEvent;
///
/// struct StreamPrinter;
///
/// impl WorkerSubscriber for StreamPrinter {
/// type TextBlockScope = ();
/// type ToolUseBlockScope = ();
///
/// fn on_text_block(&mut self, _: &mut (), event: &TextBlockEvent) {
/// if let TextBlockEvent::Delta(text) = event {
/// print!("{}", text); // Real-time output
/// }
/// }
///
/// fn on_text_complete(&mut self, text: &str) {
/// println!("\n--- Complete: {} chars ---", text.len());
/// }
/// }
///
/// // Register with Worker
/// worker.subscribe(StreamPrinter);
/// ```
pub trait WorkerSubscriber: Send {
// =========================================================================
// Scope Types (for block events)
// =========================================================================
/// Scope type for text block processing
///
/// Generated with Default::default() at block start,
/// destroyed at block end.
type TextBlockScope: Default + Send + Sync;
/// Scope type for tool use block processing
type ToolUseBlockScope: Default + Send + Sync;
// =========================================================================
// Block Events (with scope management)
// =========================================================================
/// Text block event
///
/// Has Start/Delta/Stop lifecycle.
/// Scope is generated at block start and destroyed at end.
#[allow(unused_variables)]
fn on_text_block(&mut self, scope: &mut Self::TextBlockScope, event: &TextBlockEvent) {}
/// Tool use block event
///
/// Has Start/InputJsonDelta/Stop lifecycle.
#[allow(unused_variables)]
fn on_tool_use_block(
&mut self,
scope: &mut Self::ToolUseBlockScope,
event: &ToolUseBlockEvent,
) {
}
// =========================================================================
// Single Events (no scope needed)
// =========================================================================
/// Usage event
#[allow(unused_variables)]
fn on_usage(&mut self, event: &UsageEvent) {}
/// Status event
#[allow(unused_variables)]
fn on_status(&mut self, event: &StatusEvent) {}
/// Error event
#[allow(unused_variables)]
fn on_error(&mut self, event: &ErrorEvent) {}
// =========================================================================
// Accumulated Events (added in Worker layer)
// =========================================================================
/// Text complete event
///
/// When a text block completes, the entire accumulated text is passed.
/// Convenient for receiving the final result after block processing.
#[allow(unused_variables)]
fn on_text_complete(&mut self, text: &str) {}
/// Tool call complete event
///
/// When a tool use block completes, the complete ToolCall is passed.
#[allow(unused_variables)]
fn on_tool_call_complete(&mut self, call: &ToolCall) {}
// =========================================================================
// Turn Control
// =========================================================================
/// On turn start
///
/// `turn` is a 0-based turn number.
#[allow(unused_variables)]
fn on_turn_start(&mut self, turn: usize) {}
/// On turn end
#[allow(unused_variables)]
fn on_turn_end(&mut self, turn: usize) {}
}
// =============================================================================
// SubscriberAdapter - Bridge WorkerSubscriber to Timeline handlers
// =============================================================================
// =============================================================================
// TextBlock Handler Adapter
// =============================================================================
/// Subscriber adapter for TextBlockKind
pub(crate) struct TextBlockSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> TextBlockSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for TextBlockSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
/// Wrapper for TextBlock scope
pub struct TextBlockScopeWrapper<S: WorkerSubscriber> {
inner: S::TextBlockScope,
buffer: String, // Buffer for on_text_complete
}
impl<S: WorkerSubscriber> Default for TextBlockScopeWrapper<S> {
fn default() -> Self {
Self {
inner: S::TextBlockScope::default(),
buffer: String::new(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<TextBlockKind> for TextBlockSubscriberAdapter<S> {
type Scope = TextBlockScopeWrapper<S>;
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
// Accumulate deltas into buffer
if let TextBlockEvent::Delta(text) = event {
scope.buffer.push_str(text);
}
// Call Subscriber's TextBlock event handler
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_text_block(&mut scope.inner, event);
// Also call on_text_complete on Stop
if matches!(event, TextBlockEvent::Stop(_)) {
subscriber.on_text_complete(&scope.buffer);
}
}
}
}
// =============================================================================
// ToolUseBlock Handler Adapter
// =============================================================================
/// Subscriber adapter for ToolUseBlockKind
pub(crate) struct ToolUseBlockSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> ToolUseBlockSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for ToolUseBlockSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
/// Wrapper for ToolUseBlock scope
pub struct ToolUseBlockScopeWrapper<S: WorkerSubscriber> {
inner: S::ToolUseBlockScope,
id: String,
name: String,
input_json: String, // JSON accumulation
}
impl<S: WorkerSubscriber> Default for ToolUseBlockScopeWrapper<S> {
fn default() -> Self {
Self {
inner: S::ToolUseBlockScope::default(),
id: String::new(),
name: String::new(),
input_json: String::new(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<ToolUseBlockKind> for ToolUseBlockSubscriberAdapter<S> {
type Scope = ToolUseBlockScopeWrapper<S>;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
// Save metadata on Start
if let ToolUseBlockEvent::Start(start) = event {
scope.id = start.id.clone();
scope.name = start.name.clone();
}
// Accumulate InputJsonDelta into buffer
if let ToolUseBlockEvent::InputJsonDelta(json) = event {
scope.input_json.push_str(json);
}
// Call Subscriber's ToolUseBlock event handler
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_tool_use_block(&mut scope.inner, event);
// Also call on_tool_call_complete on Stop
if matches!(event, ToolUseBlockEvent::Stop(_)) {
let input: serde_json::Value =
serde_json::from_str(&scope.input_json).unwrap_or_default();
let tool_call = ToolCall {
id: scope.id.clone(),
name: scope.name.clone(),
input,
};
subscriber.on_tool_call_complete(&tool_call);
}
}
}
}
// =============================================================================
// Meta Event Handler Adapters
// =============================================================================
/// Subscriber adapter for UsageKind
pub(crate) struct UsageSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> UsageSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for UsageSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<UsageKind> for UsageSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &UsageEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_usage(event);
}
}
}
/// Subscriber adapter for StatusKind
pub(crate) struct StatusSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> StatusSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for StatusSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<StatusKind> for StatusSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &StatusEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_status(event);
}
}
}
/// Subscriber adapter for ErrorKind
pub(crate) struct ErrorSubscriberAdapter<S: WorkerSubscriber> {
subscriber: Arc<Mutex<S>>,
}
impl<S: WorkerSubscriber> ErrorSubscriberAdapter<S> {
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
Self { subscriber }
}
}
impl<S: WorkerSubscriber> Clone for ErrorSubscriberAdapter<S> {
fn clone(&self) -> Self {
Self {
subscriber: self.subscriber.clone(),
}
}
}
impl<S: WorkerSubscriber + 'static> Handler<ErrorKind> for ErrorSubscriberAdapter<S> {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ErrorEvent) {
if let Ok(mut subscriber) = self.subscriber.lock() {
subscriber.on_error(event);
}
}
}

View File

@ -10,14 +10,16 @@
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
pub mod event;
mod reasoning_item_collector;
mod text_block_collector;
mod timeline;
mod tool_call_collector;
// 公開API
pub use event::*;
pub use reasoning_item_collector::ReasoningItemCollector;
pub use text_block_collector::TextBlockCollector;
pub use timeline::{ErasedHandler, HandlerWrapper, Timeline};
pub use timeline::Timeline;
pub use tool_call_collector::ToolCallCollector;
// 型定義からのre-export
@ -28,6 +30,7 @@ pub use crate::handler::{
Handler,
Kind,
PingKind,
ReasoningItemKind,
StatusKind,
// Block Events
TextBlockEvent,

View File

@ -0,0 +1,77 @@
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
//!
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
//! Worker はターン終了時に `take_collected()` でドレインして
//! `Item::Reasoning` として `worker.history` に append する。
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ReasoningItemKind};
use crate::llm_client::event::ReasoningItemEvent;
/// 収集された reasoning item の連列。
#[derive(Clone, Default)]
pub struct ReasoningItemCollector {
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
}
impl ReasoningItemCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
self.collected.lock().unwrap().push(event.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::Event;
use crate::timeline::Timeline;
#[test]
fn collects_in_order() {
let collector = ReasoningItemCollector::new();
let mut timeline = Timeline::new();
timeline.on_reasoning_item(collector.clone());
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "first".into(),
signature: Some("sig1".into()),
..Default::default()
}));
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r2".into()),
text: "second".into(),
..Default::default()
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text, "first");
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text, "second");
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
}

View File

@ -8,6 +8,33 @@ use std::marker::PhantomData;
use super::event::*;
use crate::handler::*;
// =============================================================================
// Helpers
// =============================================================================
/// 1リクエスト内で受信した複数 UsageEvent をマージする。
/// 各フィールドについて新しい値が `Some` ならそれで上書き。
/// プロバイダによっては input/cache 系を最初の event だけに載せ、
/// output_tokens を後続 event で更新するため、最後の値だけを取るのではなく
/// フィールド単位で latest-non-None を取る。
fn merge_usage(acc: &mut UsageEvent, new: &UsageEvent) {
if new.input_tokens.is_some() {
acc.input_tokens = new.input_tokens;
}
if new.output_tokens.is_some() {
acc.output_tokens = new.output_tokens;
}
if new.total_tokens.is_some() {
acc.total_tokens = new.total_tokens;
}
if new.cache_read_input_tokens.is_some() {
acc.cache_read_input_tokens = new.cache_read_input_tokens;
}
if new.cache_creation_input_tokens.is_some() {
acc.cache_creation_input_tokens = new.cache_creation_input_tokens;
}
}
// =============================================================================
// Type-erased Handler
// =============================================================================
@ -354,6 +381,7 @@ pub struct Timeline {
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
// Block系ハンドラーBlockTypeごとにグループ化
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
@ -362,6 +390,12 @@ pub struct Timeline {
// 現在アクティブなブロック
current_block: Option<BlockType>,
// 1リクエスト内で受信した Usage event の集約バッファ。
// Anthropic は message_start と message_delta、Gemini は各チャンクと、
// 多くのプロバイダが複数 Usage を発行するため、リクエスト境界で
// 1度だけ発火するためにここでマージする。flush_usage() で発火する。
pending_usage: Option<UsageEvent>,
}
impl Default for Timeline {
@ -377,10 +411,12 @@ impl Timeline {
ping_handlers: Vec::new(),
status_handlers: Vec::new(),
error_handlers: Vec::new(),
reasoning_item_handlers: Vec::new(),
text_block_handlers: Vec::new(),
thinking_block_handlers: Vec::new(),
tool_use_block_handlers: Vec::new(),
current_block: None,
pending_usage: None,
}
}
@ -437,6 +473,18 @@ impl Timeline {
self
}
/// `ReasoningItemKind` 用 Handler を登録
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
where
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
H::Scope: Send + Sync,
{
let mut wrapper = HandlerWrapper::new(handler);
wrapper.start_scope();
self.reasoning_item_handlers.push(Box::new(wrapper));
self
}
/// TextBlockKind用のHandlerを登録
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
where
@ -488,12 +536,30 @@ impl Timeline {
Event::BlockDelta(d) => self.handle_block_delta(d),
Event::BlockStop(s) => self.handle_block_stop(s),
Event::BlockAbort(a) => self.handle_block_abort(a),
// 完成済み reasoning item: 即時ディスパッチ
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
}
}
/// Usage event を即時には dispatch せず、pending_usage にマージする。
/// 1リクエスト内で複数の Usage event が来ても、ハンドラには 1 度だけ
/// 最終値を渡したいため。flush_usage() で発火する。
fn dispatch_usage(&mut self, event: &UsageEvent) {
for handler in &mut self.usage_handlers {
handler.dispatch(event);
match &mut self.pending_usage {
Some(acc) => merge_usage(acc, event),
None => self.pending_usage = Some(event.clone()),
}
}
/// pending_usage を usage_handlers に発火し、バッファをクリアする。
/// 1リクエスト分のストリーム終了時に1回だけ呼ぶ想定。
/// pending_usage が空ならば何もしない。
pub fn flush_usage(&mut self) {
if let Some(event) = self.pending_usage.take() {
for handler in &mut self.usage_handlers {
handler.dispatch(&event);
}
}
}
@ -515,6 +581,12 @@ impl Timeline {
}
}
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
for handler in &mut self.reasoning_item_handlers {
handler.dispatch(event);
}
}
fn handle_block_start(&mut self, start: &BlockStart) {
self.current_block = Some(start.block_type);
@ -629,9 +701,63 @@ mod tests {
timeline.on_usage(handler);
timeline.dispatch(&Event::usage(100, 50));
// pending_usage に積まれているだけなのでまだ未発火
assert_eq!(calls.lock().unwrap().len(), 0);
// flush で 1 度だけ発火
timeline.flush_usage();
let recorded = calls.lock().unwrap();
assert_eq!(recorded.len(), 1);
assert_eq!(recorded[0].input_tokens, Some(100));
}
#[test]
fn test_usage_aggregation_and_flush() {
struct TestUsageHandler {
calls: Arc<Mutex<Vec<UsageEvent>>>,
}
impl Handler<UsageKind> for TestUsageHandler {
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &UsageEvent) {
self.calls.lock().unwrap().push(event.clone());
}
}
let calls = Arc::new(Mutex::new(Vec::new()));
let mut timeline = Timeline::new();
timeline.on_usage(TestUsageHandler {
calls: calls.clone(),
});
// Anthropic 風: message_start で input + 暫定 output
timeline.dispatch(&Event::Usage(UsageEvent {
input_tokens: Some(409),
output_tokens: Some(1),
total_tokens: Some(410),
cache_read_input_tokens: Some(0),
cache_creation_input_tokens: Some(0),
}));
// message_delta で最終 output
timeline.dispatch(&Event::Usage(UsageEvent {
input_tokens: Some(409),
output_tokens: Some(71),
total_tokens: Some(480),
cache_read_input_tokens: Some(0),
cache_creation_input_tokens: Some(0),
}));
// 未 flush の段階では発火しない
assert_eq!(calls.lock().unwrap().len(), 0);
timeline.flush_usage();
let recorded = calls.lock().unwrap();
assert_eq!(recorded.len(), 1);
assert_eq!(recorded[0].input_tokens, Some(409));
assert_eq!(recorded[0].output_tokens, Some(71));
// flush 後にもう一度 flush しても何も起きない
drop(recorded);
timeline.flush_usage();
assert_eq!(calls.lock().unwrap().len(), 1);
}
}

Some files were not shown because too many files have changed in this diff Show More