Compare commits

...

496 Commits

Author SHA1 Message Date
b0c3b79e11
ticket: close installed binary rename 2026-05-29 09:39:09 +09:00
fa4130ab68
merge: rename installed binaries 2026-05-29 09:39:08 +09:00
372490707a
chore: use static crate fetch for nix vendor 2026-05-29 09:28:49 +09:00
1e8c11c564
fix: rename installed binaries 2026-05-29 09:28:31 +09:00
e7b89a169c
ticket: add installed binary rename 2026-05-29 09:14:07 +09:00
aab7af7e9c
ticket: close memory tool guidance prompt 2026-05-29 08:59:07 +09:00
6e89d6017b
merge: memory tool guidance prompt 2026-05-29 08:58:47 +09:00
b8609c35b6
ticket: close multi-pod open return 2026-05-29 08:57:50 +09:00
d79b5d5cc4
merge: multi-pod open return 2026-05-29 08:57:24 +09:00
775a4605cd
prompt: add memory tool usage guidance 2026-05-29 08:49:24 +09:00
96407899f7
tui: return to multi dashboard after opening pod 2026-05-29 08:45:15 +09:00
f73cfdc6e7
ticket: add multi-pod open return 2026-05-29 08:36:02 +09:00
7880672737
ticket: close multi-pod layout polish 2026-05-29 01:49:26 +09:00
c6d9b7f405
merge: multi-pod view section layout 2026-05-29 01:49:06 +09:00
2bb69ae7f6
tui: section multi-pod list layout 2026-05-29 01:46:48 +09:00
cbb4c4dec4
ticket: close nix packaging 2026-05-29 01:42:09 +09:00
420e83bea3
merge: nix packaging 2026-05-29 01:41:54 +09:00
7ecd58814c
nix: exclude local worktrees from package source 2026-05-29 01:41:09 +09:00
a8e9a091f8
ticket: add multi-pod layout polish 2026-05-29 01:33:35 +09:00
9df9dc1863
nix: add installable package 2026-05-29 01:32:04 +09:00
3c086b7497
ticket: close multi-pod TUI view 2026-05-29 01:09:02 +09:00
20f55a3c61
merge: multi-pod TUI view 2026-05-29 01:08:42 +09:00
7cfa5503df
feat: add multi pod tui dashboard 2026-05-29 01:04:56 +09:00
32be379f54
ticket: specify nix package file 2026-05-29 00:56:04 +09:00
8b2f16e009
ticket: specify multi-pod TUI entrypoint 2026-05-29 00:53:33 +09:00
3784cc8bbf
ticket: close TUI pod list abstraction 2026-05-29 00:40:32 +09:00
3457167931
merge: TUI pod list abstraction 2026-05-29 00:39:57 +09:00
d9984f33c2
tui: drain initial pod status events 2026-05-29 00:39:00 +09:00
35b13a98df
tui: add pod list model 2026-05-29 00:33:57 +09:00
74f792da1a
ticket: add web tools and nix packaging 2026-05-29 00:31:09 +09:00
26d8a5d9be
ticket: refine TUI pod list abstraction 2026-05-29 00:25:03 +09:00
41ce27f038
ticket: define multi-pod TUI view 2026-05-28 23:48:39 +09:00
f7d4b12e7f
ticket: add TUI pod list abstraction 2026-05-28 23:17:16 +09:00
6083121574
audit: record crate boundary findings 2026-05-28 22:25:54 +09:00
92a9c1416c
ticket: close spawnpod initial run confirmation 2026-05-28 22:25:28 +09:00
3cb1138e84
merge: spawnpod initial run confirmation 2026-05-28 22:24:14 +09:00
2b4bdda89c
fix: confirm initial SpawnPod run delivery 2026-05-28 22:14:28 +09:00
834d21723b
ticket: add crate boundary audit 2026-05-28 22:13:45 +09:00
3658242bbc
ticket: refine spawnpod socket delivery 2026-05-28 22:06:47 +09:00
eda4c4ce47
ticket: close compact session-log exploration 2026-05-28 18:53:52 +09:00
7265e83e44
test: fix runtime dir expectation 2026-05-28 18:53:52 +09:00
22df14a66c
merge: compact session-log exploration 2026-05-28 12:40:37 +09:00
0e4ab1d496
style: format manifest paths test 2026-05-28 12:38:53 +09:00
4450e1da9d
style: revert unrelated manifest path formatting 2026-05-28 12:36:32 +09:00
5d104a1cc6
merge: main trace diagnostics 2026-05-28 12:32:24 +09:00
b2efd2906f
feat: add compact session exploration tools 2026-05-28 12:31:44 +09:00
98522911a4
trace: llm stream open diagnostics 2026-05-28 12:26:14 +09:00
01c41ae86c
feat: bound compact worker context 2026-05-28 11:59:41 +09:00
9fe2799732
ticket: add compact work item metadata 2026-05-28 10:01:28 +09:00
2c3eddd218
ticket: compact session-log exploration 2026-05-28 10:01:03 +09:00
365b8c34fd sanitize: neutralize provider notes and remove claude knowledge 2026-05-28 07:45:49 +09:00
4361385946 sanitize: remove local path references from current tree 2026-05-28 06:26:34 +09:00
9ccbdda27c chore: record spawnpod hang report and local manifest 2026-05-28 06:21:01 +09:00
9a0ef7c799 work-items: close openai unhandled sse observability 2026-05-28 05:44:20 +09:00
732d6a57b7 merge: openai unhandled sse observability 2026-05-28 05:44:14 +09:00
3b90f26dea fix: trace unhandled openai responses sse 2026-05-28 05:18:57 +09:00
6877447616 work-items: add openai unhandled sse observability 2026-05-28 05:13:41 +09:00
b808811843 work-items: add pod orchestration guidance item 2026-05-28 04:45:03 +09:00
838ccbb65f work-items: close tickets sh mvp 2026-05-28 04:29:35 +09:00
4d080ca985 merge: tickets work item thread mvp 2026-05-28 04:27:56 +09:00
b1d8f7f181 fix: repair migrated work item encoding 2026-05-28 04:09:47 +09:00
134d0ce2a1 feat: add tickets work item mvp 2026-05-28 03:59:05 +09:00
2820cbbe53 ticket: clarify workitem migration scope 2026-05-28 03:49:21 +09:00
5c6df298aa ticket: complete openai responses diagnostics 2026-05-28 03:23:54 +09:00
ae196c2a87 ticket: record openai responses diagnostics fix 2026-05-28 03:23:25 +09:00
f56793589f fix: preserve openai responses incomplete diagnostics 2026-05-28 03:22:53 +09:00
33884bd0ce ticket: complete memory consolidation skip observability 2026-05-28 03:09:42 +09:00
2ba35cca23 merge: memory consolidation skip observability 2026-05-28 03:09:14 +09:00
860767a143 ticket: complete llm request timeout fix 2026-05-28 02:44:00 +09:00
d65cfe146d ticket: record llm request timeout fix 2026-05-28 02:43:23 +09:00
1babd021b0 fix: add llm request lifecycle timeouts 2026-05-28 02:42:31 +09:00
bdabe789e3 ticket: openai responses incomplete observability 2026-05-28 02:40:30 +09:00
48c4c9b56b ticket: llm client request timeouts 2026-05-28 02:07:01 +09:00
b1fb3ec0fa ticket: complete codex oauth wire compatibility 2026-05-28 02:05:49 +09:00
b3c739867e fix: align codex oauth wire behavior 2026-05-28 01:57:04 +09:00
5ae886ea99 ticket: codex oauth wire compatibility 2026-05-28 01:44:30 +09:00
2c67f99054 fix: suppress memory idle skip notices 2026-05-27 18:55:58 +09:00
67b5d6354c ticket: complete compact retained split fix 2026-05-26 21:40:18 +09:00
cdc42e5a86 ticket: record compact retained split fix 2026-05-26 21:39:57 +09:00
e49817c2d5 feat: trace pre-stream lifecycle 2026-05-26 21:05:45 +09:00
9405ffc633 feat: add session stream event trace flag 2026-05-26 19:57:47 +09:00
77e2ad0c40 fix: compact retained split uses raw tail size 2026-05-26 17:52:09 +09:00
a2771180cc ticket: compact retained split usage records 2026-05-26 17:04:29 +09:00
2cfc3b63c2 ticket: pod scope persistence authority 2026-05-26 16:50:01 +09:00
b2e53f2f61 chore: complete memory summary resident injection ticket 2026-05-26 13:29:03 +09:00
b22040ac84 chore: complete tui user manifest env overlay ticket 2026-05-26 10:10:00 +09:00
80a4f90004 fix: align spawn user manifest env overlay 2026-05-26 10:09:17 +09:00
0b582faebc merge: memory summary resident injection 2026-05-26 09:55:24 +09:00
d084923878 fix: split resident injection gates 2026-05-26 09:44:24 +09:00
df3373c3f2 docs: add tickets.sh workitem mvp ticket 2026-05-26 09:33:30 +09:00
a3e852c6b3 docs: add memory tool guidance ticket 2026-05-26 09:21:57 +09:00
b25f4c7468 feat: inject memory summary into resident prompt 2026-05-26 09:21:10 +09:00
39d40d391b chore: tune project memory thresholds 2026-05-26 09:05:14 +09:00
f87cf5bd00 docs: add memory summary resident injection ticket 2026-05-26 08:50:58 +09:00
9f5e27f3fd merge: memory consolidation skip observability 2026-05-26 08:37:32 +09:00
c101b42619 fix: confirm SpawnPod initial run delivery 2026-05-26 08:37:24 +09:00
f56ef010a8 chore: ignore generated insomnia memory 2026-05-26 08:14:46 +09:00
8095c86be2 fix: suppress memory idle skip notices 2026-05-26 08:03:17 +09:00
99797b9e40 docs: refine memory consolidation skip ticket 2026-05-26 07:53:37 +09:00
1ac197fc6c chore: complete llm retry continuation ticket 2026-05-26 07:22:45 +09:00
3ff78c03af feat: surface llm retry and continuation state 2026-05-26 07:13:59 +09:00
156a55d1d1 docs: refine llm retry continuation ticket 2026-05-26 05:20:43 +09:00
597c6fc3e9 docs: note spawnpod delivery race precedent 2026-05-25 07:03:00 +09:00
a70fe65ed5 docs: add spawnpod run delivery ticket 2026-05-25 06:37:38 +09:00
fa225eb01d docs: add live pending pod picker ticket 2026-05-25 06:29:13 +09:00
8e21e2f3f2 docs: add memory consolidation skip ticket 2026-05-25 05:43:06 +09:00
f51f17cf93 docs: specify stream continuation policy 2026-05-25 04:48:07 +09:00
7e4d90fc1b chore: complete memory audit log ticket 2026-05-25 03:38:18 +09:00
235ddba9c5 merge: memory-audit-log 2026-05-25 03:38:03 +09:00
fe6f5eb326 memory: add audit log events 2026-05-25 03:24:04 +09:00
06da8c5b00 docs: add actionbar notice api ticket 2026-05-25 02:40:59 +09:00
87b2e8eb16 docs: expand memory audit log ticket 2026-05-25 02:06:42 +09:00
2f3adc3d14 fix: refine command mode footer 2026-05-25 01:08:41 +09:00
21ec057de0 chore: complete tui-system-command-compact ticket 2026-05-24 09:40:41 +09:00
dd571a963e merge: tui-system-command-compact 2026-05-24 09:40:25 +09:00
afd65442c5 test: clean up compact event assertion 2026-05-24 09:39:57 +09:00
a4358eed14 feat: add manual compact command 2026-05-24 08:59:44 +09:00
9685bfffba chore: complete tui-command-mode ticket 2026-05-24 08:39:25 +09:00
3a734c30bf merge: tui-command-mode 2026-05-24 08:38:39 +09:00
6e8aa92e38 feat: add TUI command mode 2026-05-24 08:32:21 +09:00
811a449c28 docs: replace gui mvp with tui spawned pod panel 2026-05-24 08:10:21 +09:00
0fd995c85e docs: split tui command and navigation tickets 2026-05-24 07:59:51 +09:00
e0d7468ebb chore: complete worker-history-append-contract ticket 2026-05-24 07:37:29 +09:00
07dc185032 merge: worker-history-append-contract 2026-05-24 07:37:05 +09:00
efb0ac7da3 docs: split maintainer workflows by role 2026-05-24 07:34:30 +09:00
e7b0a0b20f fix: route worker history appends through callbacks 2026-05-24 06:44:19 +09:00
5508299e76 chore: drop stale tui spawn error todo 2026-05-24 06:29:15 +09:00
59e4aac7f7 chore: complete tui-input-queue ticket 2026-05-23 13:58:09 +09:00
6485632a4c merge: tui-input-queue 2026-05-23 13:57:32 +09:00
8d6b47bef1 feat: queue tui input during runs 2026-05-23 13:57:22 +09:00
6046842242 docs: add manual turn rollback ticket 2026-05-23 13:35:03 +09:00
1c8b349e01 chore: complete tui-empty-turn-restore ticket 2026-05-23 13:30:01 +09:00
d70c10b782 merge: tui-empty-turn-restore 2026-05-23 13:29:07 +09:00
70c0548190 feat: restore rolled back tui input 2026-05-23 13:28:56 +09:00
6acaccccf7 chore: complete pod-empty-turn-rollback ticket 2026-05-23 12:52:42 +09:00
b9dd0ba0d0 merge: pod-empty-turn-rollback 2026-05-23 12:52:12 +09:00
23e218abaa chore: handle rolled back run result clients 2026-05-23 12:51:40 +09:00
55dedd173c feat: rollback empty interrupted turns 2026-05-23 12:50:46 +09:00
df629b4dc6 fix: make visible pod list schema object 2026-05-23 12:29:37 +09:00
7c573f36e2 chore: complete pod-discovery-restore-tools ticket 2026-05-23 12:05:30 +09:00
ca869195dc merge: pod-discovery-restore-tools 2026-05-23 12:04:59 +09:00
e6fa660a5f feat: add visible pod discovery tools 2026-05-23 12:04:45 +09:00
f7a3b0adf1 chore: complete memory-extract-remove-input-cap ticket 2026-05-23 09:14:37 +09:00
ea9e924d35 merge: memory-extract-remove-input-cap 2026-05-23 09:14:15 +09:00
3b582a4f73 fix: remove memory extract input cap 2026-05-23 09:14:07 +09:00
2a721e3776 chore: complete tui-pod-restore-picker ticket 2026-05-23 09:13:57 +09:00
61347362d1 merge: tui-pod-restore-picker 2026-05-23 09:13:19 +09:00
e2688da828 feat: restore tui sessions by pod 2026-05-23 09:13:06 +09:00
a0e544e3e4 chore: complete spawned-delegation-scope-reclaim ticket 2026-05-23 08:39:04 +09:00
96fd9574a2 merge: spawned-delegation-scope-reclaim 2026-05-23 08:38:50 +09:00
ab4611001e fix: reclaim delegated scope from stopped children 2026-05-23 08:38:42 +09:00
5b20d21ea0 docs: refine pod visibility and tui restore flow 2026-05-23 08:33:00 +09:00
48625f5077 update: tui -rの際のリストの時系列ソート 2026-05-23 08:02:05 +09:00
fbe8846393 chore: complete tui-streaming-input-loss ticket 2026-05-23 07:16:08 +09:00
8ae3849cc8 merge: tui-streaming-input-loss 2026-05-23 07:15:55 +09:00
e29861f787 fix: preserve tui input during streaming 2026-05-23 07:15:39 +09:00
fa00c1f188 chore: complete tui-context-usage-indicator ticket 2026-05-23 07:15:30 +09:00
879434e240 merge: tui-context-usage-indicator 2026-05-23 07:15:17 +09:00
4b263f8743 feat: show context usage in tui status 2026-05-23 07:15:03 +09:00
7315114b20 docs: identify tui streaming input loss race 2026-05-23 05:47:59 +09:00
f14c8cb614 Create tui-parts.md 2026-05-23 05:41:48 +09:00
da5d789897 fix: tighten task tool usage guidance 2026-05-23 05:11:48 +09:00
802cbf2f45 chore: complete prune-token-budget ticket 2026-05-23 05:00:30 +09:00
18c30c5f90 merge: prune-token-budget 2026-05-23 05:00:15 +09:00
dfec60438e feat: protect prune tail by token budget 2026-05-23 05:00:06 +09:00
6a5b8ed152 chore: complete pod-event-callback-delivery ticket 2026-05-23 04:57:26 +09:00
baaec0c77f merge: pod-event-callback-delivery 2026-05-23 04:57:10 +09:00
fdd2f16df0 fix: drain snapshots before pod callbacks 2026-05-23 04:57:03 +09:00
3e7a15a2b5 docs: add memory extract input cap ticket 2026-05-23 04:42:38 +09:00
b5219dc862 docs: add pod event callback delivery ticket 2026-05-23 03:29:01 +09:00
c1173dd8a1 docs: add spawned delegation scope reclaim ticket 2026-05-23 03:02:48 +09:00
f03e84a62a refactor: remove legacy plural log entries 2026-05-23 02:03:42 +09:00
e80a3fbf8e docs: track read pod output log entry bug 2026-05-23 00:53:47 +09:00
8947a89e7b docs: add pod discovery restore tools ticket 2026-05-23 00:09:34 +09:00
f46cdd6dbc chore: complete spawned-registry-persist ticket 2026-05-22 23:30:16 +09:00
1a5b5331d6 merge: spawned-registry-persist 2026-05-22 23:30:06 +09:00
530027c62b feat: persist spawned pod registry 2026-05-22 23:30:02 +09:00
8e7126d177 chore: complete pod-name-resume ticket 2026-05-22 22:57:31 +09:00
3fe4a6bc14 merge: pod-name-resume 2026-05-22 22:57:23 +09:00
12a4ba5edf feat: resume pods by name 2026-05-22 22:57:16 +09:00
d3b78234c2 chore: complete pod-state-write-points ticket 2026-05-22 22:29:23 +09:00
baf7403c8c merge: pod-state-write-points 2026-05-22 22:29:12 +09:00
5955695db8 feat: wire pod metadata lifecycle writes 2026-05-22 22:29:08 +09:00
bacba69d31 chore: complete pod-state-backend ticket 2026-05-22 22:03:36 +09:00
08dc6b29f8 style: run cargo fmt 2026-05-22 22:03:27 +09:00
d08ea1734e merge: pod-state-backend 2026-05-22 22:03:17 +09:00
ec5b891fec feat: add pod metadata store backend 2026-05-22 22:03:11 +09:00
5830bb9c85 Merge: live-fork-marker 2026-05-20 06:45:49 +09:00
16ef135f1f chore: 空になった Storage 親見出しを TODO から削除 2026-05-20 06:45:43 +09:00
15f514dfe2 ticket: live-fork-marker 完了 2026-05-20 06:45:19 +09:00
fbd97c3546 chore: auto-fork ロジック二重実装を KNOWN_ISSUES に登録 2026-05-20 06:45:14 +09:00
bb4205b531 ticket: live-fork-marker レビュー (Approve) 2026-05-20 06:44:54 +09:00
077efee13b feat: live auto-fork の marker 形式を確定(seq 比較 + forked_from 記録)
方針: 末尾 entry-count 比較で検知し、元 Segment は immutable のまま
(terminal marker を書き戻さない)。fork lineage は新 Segment の
SegmentStart.forked_from に前向きに記録するため、log だけから辿れる。
過去 fork と対称で、nested fork も marker 位置の調停が不要。

- session-store ensure_head_or_fork に at_turn_index 引数を追加し
  新 Segment へ forked_from を記録
- pod ensure_segment_head の auto-fork も同様に forked_from を記録
  (at_turn_index = writer の現 turn_count)
- fork_at の doc に「元 Segment を mutate しない」invariant を明記
- test: nested past-fork が祖先を不変に保つ / Pod 並行 writer drift で
  auto-fork し forked_from を記録 / 元 Segment に marker が書かれない
2026-05-20 06:42:09 +09:00
b5d5c03412 Merge: session-grouping-introduce 2026-05-20 06:29:48 +09:00
bee41379fa ticket: session-grouping-introduce 完了 2026-05-20 06:29:43 +09:00
842e7a3c58 update: session-grouping review follow-up
- PickerOutcome::Picked から未使用の session_id を除去(pod-cli が lookup_session_of で再解決)
- picker preview が singular AssistantItem も拾うように
- fs_store layout doc に migration(後方互換なし、旧 flat sessions は破棄)を明記
- TaskStore は Session-lifetime、ScopedFs/Tracker は Pod-process lifetime と用語整理
- Pod::session_id / from_manifest_spawned のコメント補強
2026-05-20 06:29:37 +09:00
e8c16be475 feat: Session(Segment 群の grouping)を導入
- SessionId 型を新設、各 SegmentStart に session_id を持たせる
- compaction / 内部 fork は同 SessionId を継承、fork() は新 Session を発行
- Store API を (SessionId, SegmentId) ベースに、FsStore layout は
  <root>/<session_id>/<segment_id>.jsonl に
- Store::list_sessions / list_segments(session_id) / lookup_session_of を追加
- restore_by_segment shim を session-store に提供(pod-cli --session で使用)
- SegmentState に SegmentLocation (session_id, segment_id) を保持し ArcSwap で更新
- RestoredState に session_id: Option<SessionId> を追加
- Picker は Session 単位に列挙、leaf segment を解決して resume
2026-05-20 06:17:56 +09:00
58f54b99f3 Merge: segment-rename 2026-05-20 05:18:11 +09:00
5aea9730c6 ticket: segment-rename 完了 2026-05-20 05:18:04 +09:00
a63f076856 update: 残存 Session 識別子の Segment 化(review follow-up)
レビュー指摘の通り、次の session-grouping-introduce で新 SessionId が
入る前に名称衝突を避けるため取り残しを掃除。

- PodError::Session{Empty,ScopeMissing} → Segment{Empty,ScopeMissing}
- ScopeLockError::SessionConflict → SegmentConflict
- Pod.session_state / SegmentState.set_session_id 系
- source_session_id / prev_session_id / ensure_session_head / short_session
- pod_cli の "Session ID:" 表示
- fs_store の sessions ローカル変数
2026-05-20 05:17:49 +09:00
ac1d8b1c7d update: Session-lifetime/scoped を Pod-lifetime に修正
タスクストア/ファイルトラッカーは compaction を跨いで Pod プロセス寿命まで生きる。
旧 SessionId = Segment の時代の表現を Pod-lifetime に正す。pod_cli の表示も Segment: に。
2026-05-20 05:06:38 +09:00
d5fcbc2125 update: SessionId / SessionStart / SessionOrigin 等を Segment 系名称へ
- Type/Function/Variantを Segment* 系へ統一
  - SessionId/SessionStart/SessionOrigin/SessionStartState/SessionState/SessionLogSink/SessionLockInfo
  - new_session_id / session_id / create_session* / list_sessions / lookup_session / update_session / find_by_session
  - protocol Event::SessionRotated → SegmentRotated、CompactDone.new_session_id → new_segment_id
- Module: session_log → segment_log / session → segment (file mv 含む)
  pod 側の session_log_sink → segment_log_sink も同様
- crate 名 (session-store)、CLI flag (--session)、ResumeWithSession (CLI tied) は据え置き
- session-tests/session_metrics_test 等の Store impl も追従
2026-05-20 05:06:04 +09:00
45db480b0b Merge: entry-hash-abolish 2026-05-20 04:53:52 +09:00
4b8aee909b ticket: entry-hash-abolish 完了 2026-05-20 04:53:47 +09:00
903cfa3060 update: 旧用語コメントの掃除と KNOWN_ISSUES 追記
- 残存していた head_hash / SessionHead 言及コメントを 3 箇所更新
- FsStore::read_entry_count の O(n) 計測コストを KNOWN_ISSUES に登録
2026-05-20 04:53:33 +09:00
27a1d07e98 ticket: entry-hash-abolish レビュー (Approve) 2026-05-20 04:49:17 +09:00
9bfbb2fb4c update: entry hash chain と session_head mutex を撤廃
- HashedEntry / EntryHash / compute_hash / build_chain 撤去、JSONL は 1 行 1 LogEntry
- SessionOrigin.at_hash → at_turn_index (TurnEnd 由来) に置換
- Pod 側 SessionHead mutex を ArcSwap<SessionId> + AtomicUsize の SessionState に置換
- ensure_head_or_fork は store の entry count と writer の append tally で判定
- session-store から sha2 / hex 依存、pod から parking_lot 依存を削除
2026-05-20 04:31:37 +09:00
1a9bb30824 ticket: 永続化整理を 8 個に分割
persistence-semantics と pod-persistent-state を実装可能な粒度に分割。
Storage 層 (Phase 1) を entry-hash-abolish / segment-rename /
session-grouping-introduce / live-fork-marker に、Pod 単位永続化
(Phase 2) を pod-state-backend / pod-state-write-points /
pod-name-resume / spawned-registry-persist に切り出した。
2026-05-20 04:07:44 +09:00
0440d5c6dc Merge: invoke-turn-llmcall-semantics
# Conflicts:
#	crates/pod/src/controller.rs
2026-05-15 22:08:41 +09:00
01200a0d33 ticket: invoke-turn-llmcall-semantics 完了 2026-05-15 21:54:40 +09:00
d3b7663d41 ticket: worker-history-append-contract 作成 2026-05-15 21:53:24 +09:00
b204909c4c chore: KNOWN_ISSUES に controller_test::double_run_returns_error の flakiness を追記 2026-05-15 21:52:40 +09:00
9a89d2419a ticket: pod-interrupt-prep-internalize 完了 2026-05-15 21:52:24 +09:00
af6427ff67 ticket: pod-interrupt-prep-internalize レビュー (Approve with follow-up) 2026-05-15 21:51:57 +09:00
4c8596db38 update: Paused→Run の interrupt 前処理を Pod::run に内包 2026-05-15 21:51:57 +09:00
c779768b6e ticket: invoke-turn-llmcall-semantics review (Approve) 2026-05-15 21:42:43 +09:00
49b78612d6 feat: Invoke marker と LlmCall callback を導入し AgentTurn セマンティクスを明確化
- protocol: InvokeKind enum、Event::InvokeStart / LlmCallStart / LlmCallEnd 追加
- llm-worker: Worker.llm_call_count と on_llm_call_start/end callback、turn_count を AgentTurn 数として doc 更新
- session-store: LogEntry::Invoke { ts, trigger } 追加 (replay は marker のみで state 不変)
- pod: run/run_for_notification 開始時に Invoke marker commit、PendingRun::RunForNotification(InvokeKind) で kind を伝搬
- pod ipc: sink + server で Invoke エントリーを Event::InvokeStart として broadcast
- tui: 新 Event 3種を no-op で受理 (UI 設計はチケット範囲外)
2026-05-15 07:04:26 +09:00
ce6085b5f4 ticket: invoke/turn/llmcall 決定事項と実装範囲を明文化 2026-05-15 06:48:57 +09:00
1b83b2c40a ticket: Exchange語撤廃、Invoke/Turn/LlmCall でセマンティクスを再整理 2026-05-15 05:41:13 +09:00
9d04008123 ticket: pod-input-validate-internlize完了 2026-05-15 05:38:27 +09:00
4ebc2c96b3 update: Controllerで入力のValidationを行っていた部分をPod側に移す 2026-05-15 05:33:33 +09:00
bb6f7e2022 ticket: PodとControllerの責務の抱え違いを修正するチケット 2026-05-15 04:52:39 +09:00
86b48a9fdf ticket: pod-parent-turn-callback完了 2026-05-15 04:43:12 +09:00
59067bd115 ticket: pod-parent-turn-callbackレビュー 2026-05-15 04:42:29 +09:00
6116d72570 ticket: 消し忘れ 2026-05-15 04:39:30 +09:00
8e8c0887de update: 親にターン完了を通達する経路の整理 2026-05-15 04:38:53 +09:00
3143353ddc update: エントリの単数化のフォローアップ 2026-05-14 19:42:23 +09:00
f35d99900f update: 書き込みの不要なasyncを削除 2026-05-14 19:16:48 +09:00
6e7494553b ticket: 書き込みのsync化を計画 2026-05-14 16:45:58 +09:00
904ea6e326 update: SystemItem1本化 2026-05-14 14:36:29 +09:00
b6b158a244 ticket: イベントプロトコルと永続化におけるシステムイベントの統合 2026-05-14 04:12:40 +09:00
e32b210d50 chore: cargo fmt 2026-05-14 03:36:08 +09:00
a02f34437c fix: 実態にそぐわないEvent::Entryを実装した構造を訂正 2026-05-14 03:35:52 +09:00
1ef094f039 refactor: Podのメインループのリファクタリング 2026-05-14 03:27:49 +09:00
e57e23b999 ticket: 追加:Podのメインループとソケット通信周りのリファクタリング 2026-05-13 22:16:25 +09:00
13feb36518 ticket: add tui manual compact command 2026-05-13 06:50:27 +09:00
9e4bdf315f docs: update pod cli manifest flags 2026-05-13 06:44:48 +09:00
068a975488 ticket: note tui user manifest overlay mismatch 2026-05-13 06:41:23 +09:00
3d23c4ed40 close: complete pod manifest and file ref tickets 2026-05-13 06:30:45 +09:00
d2149d11d3 merge: file-ref-directory 2026-05-13 06:30:45 +09:00
ada2988105 merge: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
d6cfea463a review: file-ref-directory 2026-05-13 06:30:45 +09:00
43330cf624 review: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
21a78fb19e refactor: PodControllerの構造のリファクタリング 2026-05-13 06:07:38 +09:00
0ae6592032 docs(tickets): PodControllerの構造調整チケット作成 2026-05-13 05:43:23 +09:00
0e1539fefa chore: planの更新 2026-05-13 05:42:55 +09:00
dff72e291b feat: handle directory file refs 2026-05-13 02:57:58 +09:00
c6a9007b58 feat: organize pod manifest cli flags 2026-05-13 02:57:50 +09:00
d1c7297f87 feat: Languageインストラクションの追加 2026-05-13 02:27:30 +09:00
3c4a34b13b update: fmt + memoryに用いる言語の構成 2026-05-13 01:57:04 +09:00
076cf9af18 fix: compact時にToolCallとOutputの間でCutしてしまう問題 2026-05-13 00:59:02 +09:00
2f5f5b8a26 chore: workflowの調整・knowledgeの追加テスト 2026-05-13 00:06:33 +09:00
a363546a14 merge: lint common crate 2026-05-12 21:56:49 +09:00
599b24fa9e chore: complete lint common crate ticket 2026-05-12 21:56:39 +09:00
4bdbac6597 refactor: extract shared lint record primitives 2026-05-12 21:56:25 +09:00
20a6748cdd docs(tickets): submit時FileRefでディレクトリを参照した時の挙動 2026-05-12 17:39:40 +09:00
1271d13f26 docs(tickets): mainfest-output-upload-limits完了 2026-05-12 17:27:47 +09:00
5882341b21 feat: add manifest output upload limits 2026-05-12 16:20:15 +09:00
19730ba7c0 Merge branch 'tui-knowledge-completion' into develop 2026-05-12 15:43:29 +09:00
7a76276539 docs(memory): fix knowledge dir path in collect_resident_knowledge doc 2026-05-12 15:07:39 +09:00
64d12f2a6f docs(tickets): review tui knowledge completion (approve) 2026-05-12 14:56:30 +09:00
668bde46f4 feat(pod): wire knowledge slugs into # completion 2026-05-12 14:45:46 +09:00
3647614ab0 docs(tickets): tui knowledge completion unimplemented fix 2026-05-12 14:40:37 +09:00
5a2e69b2bf docs(tickets): define work item query strategy 2026-05-12 02:32:32 +09:00
1d53929250 docs(tickets): use timestamp work item ids 2026-05-12 02:07:29 +09:00
91a0a935b0 docs: add ai maintainer work item plan 2026-05-12 01:53:52 +09:00
bd46491b04 docs(tickets): add lint-common crate ticket 2026-05-12 00:06:06 +09:00
18b0f8b19f merge: workflow crate extraction 2026-05-11 22:50:19 +09:00
f5d69504b5 docs(tickets): complete workflow crate extraction 2026-05-11 22:50:06 +09:00
76e1287cbe review: workflow crate extraction 2026-05-11 22:49:50 +09:00
eb791f9e80 refactor: extract workflow crate 2026-05-11 22:49:07 +09:00
a1b9c865df merge: anthropic assistant burst bundling 2026-05-11 22:24:36 +09:00
985931d6fa docs(tickets): complete anthropic assistant burst bundling 2026-05-11 22:23:53 +09:00
d35c9f40a7 review: anthropic assistant burst bundling 2026-05-11 22:23:38 +09:00
4d6d5b631c fix: bundle anthropic assistant bursts 2026-05-11 22:22:36 +09:00
3354c41e66 merge: memory usage metrics 2026-05-11 21:46:24 +09:00
73d1c05edc docs(tickets): complete memory usage metrics 2026-05-11 21:46:19 +09:00
9e615a41f0 review: memory usage metrics 2026-05-11 21:46:19 +09:00
da4f4cc954 feat: add memory usage event metrics 2026-05-11 21:29:48 +09:00
01d38f042c docs(tickets): complete memory phase naming cleanup 2026-05-11 17:16:36 +09:00
9b99f50264 docs(tickets): simplify memory usage metrics 2026-05-11 16:54:23 +09:00
646b47b40f fix: remove remaining memory phase wording 2026-05-11 01:57:39 +09:00
5cf8eb94c7 docs(tickets): compact-worker-occupancy-cap完了 2026-05-11 01:56:20 +09:00
4d61d044ec update: memoryシステムの"Phase"表記を撤廃 2026-05-11 01:55:28 +09:00
967e57c933 docs(tickets): memory-extract-occupancy-cap 完了 2026-05-11 01:32:45 +09:00
acfe073b29 review: memory-extract-occupancy-cap (approve) 2026-05-11 01:25:20 +09:00
0b79e0ed65 feat: extract worker サーキットブレーカーを占有量ベースに統一 2026-05-11 01:20:37 +09:00
c8871ec4fe docs(tickets): add memory-extract-occupancy-cap ticket 2026-05-11 01:14:59 +09:00
3fece8749b Merge branch 'compact-worker-occupancy-cap' into develop 2026-05-11 01:12:32 +09:00
cac1f4d4fe review: compact-worker-occupancy-cap (set_max_turns 分岐削除) 2026-05-11 00:56:41 +09:00
e664def920 feat: compact worker サーキットブレーカーを占有量ベースに統一 2026-05-11 00:43:16 +09:00
f0a1f98912 docs(tickets): add memory audit log ticket 2026-05-11 00:06:42 +09:00
5ca771ded4 docs(tickets): completed tickets cleanup 2026-05-10 17:31:34 +09:00
9b15135416 merge: memory prompt record policy 2026-05-10 14:40:58 +09:00
b6f99b7651 docs: generalize memory prompt record policy 2026-05-10 14:40:52 +09:00
13c05b1083 docs: memory effectiveness plan 2026-05-10 01:25:10 +09:00
05da79f966 docs: memory prompt ticket policy ticket 2026-05-10 01:13:57 +09:00
92cee690f8 feat: client-crateの実装 2026-05-10 00:57:50 +09:00
6f0ec92f91 chore: E2Eの計画とgit運用の話 2026-05-09 05:04:57 +09:00
32ed5a812c docs(tickets): file-ref-symlink-diagnostics完了 2026-05-09 04:22:27 +09:00
856a0a2432 docs(tickets): file-ref-symlink-diagnosticsレビュー 2026-05-09 04:21:56 +09:00
ced26b952e feat: Toolsのシンボリックリンク対応 2026-05-09 04:21:56 +09:00
e451b07783 docs(tickets): tui-assistant-markdown完了 2026-05-09 03:31:49 +09:00
f6600feab5 docs(tickets): permission既定policy整理チケット追加 2026-05-09 03:27:22 +09:00
553d67a910 docs(tickets): permission-extension-point完了 2026-05-09 03:20:17 +09:00
805be47128 feat: パターンベースのツール権限制御を追加 2026-05-09 03:20:02 +09:00
aa9409869e chore: tui compact progress ticket完了 2026-05-09 03:14:23 +09:00
8ebdd47fbb feat: compactのプログレス表示 2026-05-09 03:11:53 +09:00
ec1eccd10d chore: git方針の変更とセマンティクス変更の計画の帳尻合わせ 2026-05-08 20:17:11 +09:00
42127554d4 docs(tickets): 自己改善workflowの設計 2026-05-08 01:50:55 +09:00
9dbfd15687 docs(tickets): workflow-directory-layout完了 2026-05-08 01:08:25 +09:00
6c31264377 update: Workflowディレクトリ修正のフォローアップ 2026-05-08 00:59:08 +09:00
b6b4168503 feat: Workflowの読み取り位置変更の実装 2026-05-08 00:15:50 +09:00
40cde699a8 docs(tickets): reportの運用・Workflowのディレクトリ位置修正 2026-05-07 23:34:00 +09:00
1ed45032be feat: TUIのmarkdown対応 2026-05-05 18:30:25 +09:00
64814c2e15 docs(tickets): PermissionのチケットとTUIのmd表示 2026-05-05 17:16:03 +09:00
96daebff30 docs(tickets): agent-skills完了 2026-05-05 16:00:40 +09:00
85fe1a094c update: Agent skills実装のレビュー・対応 2026-05-05 13:54:02 +09:00
68249b8072 feat: writingに対する基本的な指示promptを追加 2026-05-05 13:42:34 +09:00
98018972aa feat: agent skillsの互換実装 2026-05-05 13:16:10 +09:00
5b1324a630 fix: Reasoningの永続化のスキーマのミスを修正 2026-05-05 12:30:29 +09:00
4e352bb9ff docs(tickets): turnのセマンティクスを変える計画 2026-05-05 12:29:52 +09:00
5c8d00e49b docs(tickets): reasoning-history-perisit完了 2026-05-04 23:06:21 +09:00
94bb8804f4 update: Reasoningコンテキスト管理のレビュー・対応 2026-05-04 23:05:08 +09:00
30023349b9 feat: Reasoningのコンテキスト管理の対応 2026-05-04 21:31:44 +09:00
b0e6ab16b1 docs(tickets): Reasoningのコンテキスト管理とPruneの調整チケット追加 2026-05-04 21:16:31 +09:00
6e6be6f3ff docs(tickets): tui-task-display完了 2026-05-04 20:43:21 +09:00
eb9bd84b05 feat: Task表示のレビュー・修正 2026-05-04 17:28:39 +09:00
17a7744da1 feat: TUI上に進行中のTaskを表示する実装 2026-05-04 17:06:02 +09:00
a3082072d7 docs(tickets): Compaction進行中のライブ表示 2026-05-04 17:03:51 +09:00
04a471b669 docs(tickets): post-run memory detach 完了 2026-05-04 16:11:38 +09:00
3266ddb2d4 feat: Pos処理の非同期化・Busy状態の削除 2026-05-04 15:52:27 +09:00
7527b55de4 docs(tickets): 追加:タスクリストの表示とコンテキスト長インジケータ 2026-05-04 15:32:40 +09:00
c57d4be413 docs(tickets): Busyの切り離し 2026-05-04 13:20:25 +09:00
344dca6ffa Merge branch 'llm-worker-transient-retry' into develop 2026-05-04 13:16:26 +09:00
93fe2eb0ff docs(tickets): pod状態のTUI同期完了 2026-05-04 13:08:44 +09:00
09e465d583 feat: Podのステータス同期の修正 2026-05-04 12:55:29 +09:00
4eb73fa552 feat: Podのステータスを厳密にし、同期漏れを防ぐ 2026-05-04 12:55:11 +09:00
2d59ddd228 docs(tickets): llm-worker-transient-retry完了 2026-05-04 12:51:41 +09:00
39882263d3 docs(tickets): llm-worker-transient-retry レビュー追記
7183847 のレビュー結果を Approve として記録する。チケット要件
(リトライ対象 / バックオフ / Retry-After 上書き / mid-stream 温存 /
完了条件) はすべて満たしており、コードベースの層構造を歪める変更も
ない。Retry-After テストの方針差 (実時間 1s vs 仮想時間 5s) と
connect refused テストの試行回数未検証は non-blocking として
review.md に記録。
2026-05-04 12:49:13 +09:00
c2caaa21a0 feat(llm-worker): HTTP transient エラーへのリトライを追加
`transport.rs` の HTTP 送信〜ステータスチェック区間に指数バックオフ
+ フルジッターのリトライループを追加する。SSE 読み出し開始後 (
`bytes_stream()` 以降) のエラーは従来どおりそのまま流す。

- `is_retryable(&ClientError)`: 408/425/429/500/502/503/504/529 と
  reqwest の connect/timeout のみ true
- `RetryPolicy` (default: base 500ms / cap 10s / max_attempts 4 /
  total_timeout 30s)
- `Retry-After` ヘッダ (秒数) があればバックオフを上書き
- リトライ発火ごとに warn! でステータス・attempt・wait を出す

ref: tickets/llm-worker-transient-retry.md
2026-05-04 12:45:33 +09:00
20097e8296 Merge branch 'tui-system-message-render' into develop 2026-05-04 12:10:17 +09:00
185db7f8cd docs(tickets): tui-system-message-render完了 2026-05-04 12:05:50 +09:00
8870af800f feat: システムメッセージをTUIで表示させる 2026-05-04 12:04:09 +09:00
56f9bab7b7 update: Taskツールの説明を更新 2026-05-04 11:32:04 +09:00
194d29723e docs(tickets): tuiトークン表示完了 2026-05-04 00:07:59 +09:00
a22cb479f4 docs(tickets): tuiトークン表示レビュー 2026-05-04 00:05:59 +09:00
5efe0e4910 feat: tuiのトークン集計表示の修正 2026-05-04 00:01:37 +09:00
6168e3f924 docs(tickets): TUI表示トークンの集計の修正 2026-05-03 23:28:31 +09:00
9b676238a2 docs(tickets): チケット追加:システムメッセージのTUI表示とセッションのロールバック・フォーク 2026-05-03 22:43:21 +09:00
8df34a1d64 docs(tickets): tui-pod-event-render 完了 (消し忘れ片付け) 2026-05-03 22:14:24 +09:00
45ef661651 update: Taskツール群の説明を更新 2026-05-03 22:09:45 +09:00
2d8767f940 docs(tickets): notify-history-persist 完了 (消し忘れ片付け) 2026-05-03 22:07:18 +09:00
8f7a023897 docs(tickets): session-todo-reminder spec を pending_history_appends に改訂 (AGENTS.md 揮発禁止に整合) 2026-05-03 21:53:20 +09:00
302a1a7f58 Merge branch 'session-todo-tools' into develop
# Conflicts:
#	tickets/session-todo.md
2026-05-03 21:50:30 +09:00
284d07b569 docs(tickets): session-todo (本体) 完了 2026-05-03 21:48:44 +09:00
5fbb9c47dd update: tuiからspawnする際にエラー詳細が落ちていた問題を修正 2026-05-03 21:47:54 +09:00
f18cf7c172 docs(tickets): notify-history-persist完了 2026-05-03 21:37:13 +09:00
cae0c1ea2f docs(tickets): session-todo レビュー反映 (Approve) + reminder spec 段階レビュー 2026-05-03 21:34:54 +09:00
ada1fe6c63 fix: TaskStore snapshot を JSON ブロック化 + 構造ラウンドトリップテスト追加 2026-05-03 21:33:50 +09:00
fde55c96d4 fix: TaskStore snapshot を compact 後 history の末尾に置いて retained 中の TaskCreate 重複を防ぐ 2026-05-03 21:26:49 +09:00
05c2605aae feat: notify-history-persist実装 2026-05-03 19:27:22 +09:00
d1a9b622d4 feat: セッション内 Task ツール (TaskCreate/List/Get/Update + 履歴 replay + compact 跨ぎ) 2026-05-03 19:03:52 +09:00
a87be4cbc2 docs(tickets): セッション内 Task ツールを本体と注意機構に分割 2026-05-03 19:03:48 +09:00
30bb096513 Merge branch 'resume-scope-claim' into develop
# Conflicts:
#	TODO.md
2026-05-03 18:59:01 +09:00
e0261591b6 docs(tickets): resume-scope-claim 完了 2026-05-03 18:56:39 +09:00
eb054b3e88 fix: resume-scope-claim レビュー指摘対応 (deny セマンティクス doc・破損 snapshot の警告ログ) 2026-05-03 18:56:21 +09:00
1be6d34010 docs(tickets): resume-scope-claim レビュー (Approve) 2026-05-03 18:46:15 +09:00
eb0d0433a1 docs(tickets): Notifyが永続化されいない問題についてのチケット 2026-05-03 18:45:10 +09:00
557d5da391 feat: resume時のscope claimを過去の有効scopeに揃える 2026-05-03 17:12:36 +09:00
3f987e9885 feat: session-metrics完了 2026-05-03 15:56:06 +09:00
a86f69fd8d feat: session-metrics実装 2026-05-03 15:10:43 +09:00
cae18a4339 feat: TUIに他Podからの通知を表示する 2026-05-03 12:45:05 +09:00
69a6f63023 docs(tickets): 消し忘れチケットども 2026-05-03 01:16:22 +09:00
1236c68073 chore: TODOから[ ]を削除 2026-05-03 01:08:43 +09:00
d64d1b2ae8 Update AGENTS.md 2026-05-03 01:06:23 +09:00
159ffb0c6d docs(tickets): tuiでPodEventを表示する・セッション中でメトリクスを取るチケットを追加 2026-05-03 01:01:09 +09:00
97a1c10ef7 update: tuiの文字入力のCtrlブロックを追加 2026-05-03 00:44:38 +09:00
eeb570c71f update: memoryシステム周りのプロンプトの整理 2026-05-03 00:27:10 +09:00
9be7caae99 docs(tickets): memory-consolidation-drop-input-cap完了 2026-05-02 23:57:36 +09:00
0e7be01807 update: Consolidationの不要なToken上限の削除 2026-05-02 23:48:33 +09:00
35c8ee3a73 docs(tickets): セッション内TODOツールと注意機構のチケット 2026-05-02 23:48:01 +09:00
c79c54ba9d update: codexのキャッシュ利用が出来てなかった問題 2026-05-02 03:23:44 +09:00
f1d8f42fd5 fix: tuiからのPod作成の挙動を修正・開発時にcargo runでpodを起動する経路を実装 2026-05-02 02:13:30 +09:00
14862fbc37 Merge branch 'workflow-impl' into develop
# Conflicts:
#	crates/pod/src/controller.rs
#	crates/pod/src/pod.rs
2026-05-02 01:47:49 +09:00
ef3f0a8a78 docs(tickets): workflow完了 2026-05-02 01:40:06 +09:00
2ef397b562 update: workflowの実装修正 2026-05-02 01:38:50 +09:00
bebe1169c8 docs(tickets): 消し忘れチケット 2026-05-02 01:36:19 +09:00
ba5b8db9cf feat: dynamic-scopeの実装修正 2026-05-02 01:33:32 +09:00
189ee43a0c feat: dynamic-scopeの実装 2026-05-02 01:26:17 +09:00
6bf1f9a110 fix: SpawnPodの起動経路の問題・を修正 2026-05-02 01:09:57 +09:00
8307ca965c Implement workflow MVP 2026-05-02 00:46:47 +09:00
e97f803104 update: manifestで一部値のzeroの扱いを変更 2026-05-02 00:08:46 +09:00
c4bc994cab fix(llm-worker): openai_responsesのroleの最新の投影を反映 2026-05-01 23:55:26 +09:00
6d84d4df19 chore: dev-depsの整理 2026-05-01 23:50:14 +09:00
ac4133ddf9 docs(tickets): workflowのプロパティ名の修正 2026-05-01 23:40:47 +09:00
6d15d1e2b6 chore: 依存パッケージの集約 2026-05-01 23:35:46 +09:00
ffda357218 Merge branch 'tui-mouse-scroll' into develop 2026-05-01 23:22:58 +09:00
09eb29b0b7 feat: memory P2の修正 2026-05-01 23:22:49 +09:00
300234df57 feat(tui): マウスホイールスクロール完了 2026-05-01 23:16:02 +09:00
7e938b2d3b スキルの整理 2026-05-01 23:14:37 +09:00
0e98d67a5f feat(tui): マウスホイールでスクロールする実装 2026-05-01 23:14:16 +09:00
31eeded4a6 メモリPhase2の実装 2026-05-01 23:00:55 +09:00
ca27d88869 docs: memoryシステムの仕様変更と、動的Tool・VCSの話 2026-05-01 18:47:52 +09:00
38efe82544 bashツール一旦完了 2026-05-01 18:47:09 +09:00
31a1c1d879 bashツール実装 2026-05-01 18:14:13 +09:00
e21f43c70a ClaudeによるTool出力メタ認知 2026-05-01 02:47:44 +09:00
e058dc576d ファイル参照を与えた際に自動的に読ませる実装 2026-04-30 21:58:10 +09:00
a05d7533b0 TUI補完の細かい挙動修正 2026-04-30 14:38:03 +09:00
621acbe224 tuiの補完の実装 2026-04-30 12:46:48 +09:00
e259ab7bd3 claudeの動的ツールの調査レポート 2026-04-30 01:35:42 +09:00
1f3ad13c83 fix: セッション復元時にhistoryが表示されない問題 2026-04-30 00:02:26 +09:00
2c9db5a27b cargo fmt 2026-04-29 23:20:25 +09:00
dcc71e3a14 templatureがcodexエンドポイントで使えない件の修正 2026-04-29 23:20:16 +09:00
426d477584 session-log関連完了 2026-04-29 23:00:55 +09:00
09d56272d8 session-logリファクタのレビュー・修正 2026-04-29 22:55:36 +09:00
de6b8faf55 session-log-segments実装 2026-04-29 22:42:10 +09:00
bb2a6013fa session-log-decouple-item実装 2026-04-29 22:24:18 +09:00
709b17d309 session-storeの永続化形式からllm-workerの内部型を削除 2026-04-29 22:09:30 +09:00
f74716c2e4 tui-input-word-motion完了 2026-04-29 21:45:49 +09:00
f6fe978db4 tui-input-word-motionレビュー・半角カナに関する修正 2026-04-29 21:41:24 +09:00
99d6a4cf4b tuiの単語単位Backspace 2026-04-29 21:31:19 +09:00
9782323885 tuiの単語境界カーソル移動実装 2026-04-29 21:23:29 +09:00
28c2b0eb1c workflowのチケットとtuiの単語境界カーソル移動のチケット 2026-04-29 21:22:49 +09:00
437fe9fe85 pod-registry-rename完了 2026-04-29 21:05:09 +09:00
c647cac983 pod-registry-rename修正 2026-04-29 21:04:47 +09:00
e2d6f00d6d pod-registryのモジュール分割 2026-04-29 20:14:34 +09:00
40d19ca702 scope-lock -> pod-registry 2026-04-29 20:01:32 +09:00
e304b17a7e scope.lockの意味変更に伴うクレート名変更チケット作成 2026-04-29 19:54:08 +09:00
ca0b772242 memory-phase1-extract完了消し忘れ 2026-04-29 19:53:37 +09:00
3962db4d37 tui-session-restore完了 2026-04-29 19:52:24 +09:00
5ea99673fc tuiからセッションを復帰する経路の実装 2026-04-29 19:03:03 +09:00
dad75b592e 不要なforkの削除 2026-04-28 20:19:50 +09:00
d1be97fbc2 resumeの実装 2026-04-28 18:52:58 +09:00
f2b364ec0d max_tokenとreasoning_tokenに関するdocs修正 2026-04-28 18:01:17 +09:00
f1ba5b5686 max_tokensのスキーマ不整合に関する修正 2026-04-28 17:58:24 +09:00
ce7153f6e8 tui-thinking-display完了 2026-04-28 16:23:09 +09:00
04ad20e760 tui-thinking-display修正 2026-04-28 16:22:45 +09:00
fc2c6bc81c TUIにThinkingを表示する実装 2026-04-28 16:10:48 +09:00
31d5de1a37 ThinkingのTUI表示のチケット作成 2026-04-28 16:07:41 +09:00
cfd1879f7e session-store-llm-worker-type-ownership完了 2026-04-28 15:44:16 +09:00
eed3f13e51 セッション関連の責務の分離 2026-04-28 15:43:34 +09:00
a9d30e1c37 memory-phase1の、トークンカウントの実装位置が悪い件 2026-04-28 14:24:38 +09:00
11bd486740 memory-phase1-extract修正 2026-04-28 13:12:21 +09:00
fd88c72e2e memoryを抽出する仕組みの実装 2026-04-28 12:58:33 +09:00
2ef4f26a8f session-restoreの設計更新 2026-04-28 12:42:49 +09:00
cb3642d12c session復帰経路を作るチケット・テスト用のファイルの削除 2026-04-28 12:31:38 +09:00
e4d7cc1924 memoryが.insomnia配下ではなくworkspace root直下を想定していた問題の修正 2026-04-28 11:53:08 +09:00
c4e1a969c1 memoryのクエリと動作のテスト 2026-04-28 11:37:41 +09:00
2e38a24ac2 worker-generation-settings完了 2026-04-28 09:38:23 +09:00
8114d3c4fd 生成設定のmanifest化の実装 2026-04-28 09:37:22 +09:00
cabf9c967c cargo fmt 2026-04-27 22:51:07 +09:00
1c98938b6f model-reasoning-control完了 2026-04-27 22:49:56 +09:00
5fa3d140ab model-reasoning-contolレビュー 2026-04-27 22:41:51 +09:00
7d23cff0a9 model-reasoning-control実装 2026-04-27 22:25:27 +09:00
5246b3ce92 home-dir-layout完了 2026-04-27 22:11:15 +09:00
45ede7a6fc home-dir-layout修正 2026-04-27 22:10:36 +09:00
f8fe6f83aa home-dirの整理 2026-04-27 21:45:30 +09:00
9998539e71 reasoningを利用可能にするチケット 2026-04-27 20:21:22 +09:00
29ea180b18 memory-resident-injection完了 2026-04-27 18:30:21 +09:00
ee60758138 メモリー内容のシステムプロンプトへの埋め込みの実装 2026-04-27 18:25:47 +09:00
db9faa0fad 環境変数に関するチケットの修正 2026-04-27 18:11:40 +09:00
325ae6fa27 pod-spawn-ui完了・設定UI関連のチケット作成 2026-04-27 17:38:32 +09:00
d0a1eaeb57 memory-search-tool完了 2026-04-27 17:26:07 +09:00
56c6758da5 memoryサーチツールを実装 2026-04-27 17:24:08 +09:00
30abefe747 manifest読み込み経路の整理チケット作成 2026-04-27 17:17:00 +09:00
2ed4bd007b manifest側で設定ファイルの収集を行うようにした 2026-04-27 16:52:23 +09:00
5ebdeff76d tuiからSpawnする仮UI 2026-04-27 16:22:06 +09:00
d80d06ff2e memory-file-format完了 2026-04-27 13:59:04 +09:00
f43d8fba3b メモリーに関するクレート作成・ファイル構造の実装 2026-04-27 13:33:31 +09:00
0a676524ae セグメントのセッション永続化チケット 2026-04-27 13:25:16 +09:00
fd89c754f1 submit-segment-protocol完了 2026-04-27 11:42:42 +09:00
2722e0b7ba submitをvec segmentを受け付ける形に変更 2026-04-27 11:03:58 +09:00
e0c4dbdc73 notification-naming完了 2026-04-26 23:30:46 +09:00
e44d49e80f Method::NotifyとEvent::Notificationが紛らわしい問題 2026-04-26 23:25:50 +09:00
123fc3b0ad memory実装チケット 2026-04-26 17:00:38 +09:00
89c2c701fd カタログの実装完了、ドキュメント整理 2026-04-24 13:33:56 +09:00
ce6198102f podのモジュール分割完了 2026-04-24 11:58:11 +09:00
c75d777cec podのモジュール分割 2026-04-24 11:48:27 +09:00
1b1dc73d7f modelsとprovidersをカタログ化 2026-04-24 10:45:03 +09:00
a730717fc7 モデルとプロバイダーをカタログ化するチケット 2026-04-23 16:18:30 +09:00
45b1e7b6de llm-provider-catalog実装 2026-04-23 15:37:51 +09:00
a86c22e6f5 Agents.mdを一定閾値でturncateする仕様を削除 2026-04-23 01:34:25 +09:00
6146b2806f pod-prompt-catalog完了 2026-04-22 17:43:42 +09:00
c68cd64882 Promptを一元管理するファイルから参照する実装 2026-04-22 17:43:05 +09:00
c492765d1a Memoryシステムの整理・Promptカタログチケット 2026-04-22 13:21:15 +09:00
7ce77f0ad5 TUIのEditツール周りの表示とカラー 2026-04-22 01:17:58 +09:00
3717569533 複数クライアント間でのRunメソッドの同期漏れ 2026-04-21 23:59:49 +09:00
676137c246 改行テキストの行計算・Padding設定 2026-04-21 23:26:34 +09:00
84fedd8048 TUIのオーバーホール実装 2026-04-21 23:12:35 +09:00
9bf6378041 protocol-tool-result-shape完了 2026-04-21 20:52:19 +09:00
d4055fb19d TUIに向けたprotocolの詳細調整 2026-04-21 20:50:59 +09:00
3b2bdcb19a TUIオーバーホールチケット 2026-04-21 19:37:14 +09:00
ee694b310f メモリシステムの設計 2026-04-21 19:23:07 +09:00
e513825da9 モデル性能のハードコードを消し飛し、Codexのフォーマットの修正 2026-04-21 18:35:56 +09:00
d37347fe68 Docsのアップロード 2026-04-21 17:39:43 +09:00
443 changed files with 69908 additions and 9148 deletions

View File

@ -1,3 +0,0 @@
- [Event broadcast pattern](project_event_broadcast_pattern.md) — Pod は event_tx: Option<broadcast::Sender<Event>> を保持、Controller が attach_notifier と同タイミングで attach
- [Test-path omission precedent](feedback_test_path_omission.md) — 要件に挙がったテストを「共通ヘルパ経由だから省略」した場合は Approve with follow-up が相場
- [cargo add workspace pitfall](feedback_cargo_add_workspace_pitfall.md) — ルート Cargo.toml に [workspace.dependencies] が未定義、workspace = true 指定は現状使えない

View File

@ -1,18 +0,0 @@
---
name: cargo add workspace pitfall
description: ルート Cargo.toml に [workspace.dependencies] が未定義なので workspace = true は使えない
type: feedback
---
ルート `Cargo.toml``[workspace.package]` のみを持ち `[workspace.dependencies]`
定義していない。したがってチケットや PR に
`foo = { workspace = true, features = [...] }` と書かれていても、そのままでは解決しない。
**Why:** プロジェクトの現状流儀として、各クレートは直接バージョン指定する
(例: `crates/session-store/Cargo.toml``uuid = { version = "1", features = [...] }`)。
protocol-design (2026-04-21) レビュー時に発見。
**How to apply:** チケットに `workspace = true` の文言を見たら、
- 実装が直接バージョン指定にしていれば「コードベース流儀に整合」として Follow-up 扱い、
- `workspace = true` のまま書かれていたら「ビルドが通らないはず」として Request changes、
- もしくは `[workspace.dependencies]` を整備する方向の提案を添える。

View File

@ -1,19 +0,0 @@
---
name: Test-path omission precedent
description: 要件で列挙されたテストパスを「共通ヘルパなので省略」した場合の判断相場
type: feedback
---
チケット要件にテストパス (例: 成功/失敗/mid-turn の 3 本) が明示列挙されている場合、
そのうち 1 本を「共通ヘルパ経由だから inspection で担保」として省略する実装が来たら、
**Approve with follow-up** が相場。Blocking にはしない。
**Why:** 共通化されたインスツルメント (例: `send_event`) 1 点だけが共通で、
呼び出し側の制御フロー (async 再帰・フラグ管理・エラー伝播) は個別なのが通例。
ただしビルドと主要パスが動いており、後続チケットでテストを足すだけの差分で済むケースが多い。
protocol-design (2026-04-21) で先例。
**How to apply:** 要件とテストコードを 1:1 で突き合わせ、欠けたパスがあれば
- 制御フローが共通化されていれば Follow-up
- 制御フローが別物 (別関数 / 別状態遷移) なら Request changes
と切り分ける。`send_event` 型のヘルパ共通化は Follow-up 側の判断。

View File

@ -1,20 +0,0 @@
---
name: Event broadcast pattern
description: Pod が protocol::Event を broadcast する公式パターン (Notifier と別経路)
type: project
---
Pod 内部から `protocol::Event` を broadcast する正規ルートは、`Pod` に
`event_tx: Option<broadcast::Sender<Event>>` を持たせて `attach_event_tx`
Controller 側から注入する方式。`Notifier` は `Event::Notification`
replay バッファ専用で、他イベントは通さない。
**Why:** `Notifier` は Notification 型の Warn/Error レベル情報 + late subscriber への
snapshot replay を責務にしており、Event 一般を乗せると意味が噛み合わない。
protocol-design チケットの決定事項 6/7 で確定 (2026-04-21)。
**How to apply:** 新しい Pod 発の Event を追加するときは、
1) `Pod::send_event(&self, event)` ヘルパ (`pod.rs:370-374`) を使う、
2) Controller は `pod.attach_notifier` の直後に `pod.attach_event_tx` を呼ぶ、
3) late subscriber への届きは期待しない (buffer 化が必要なら別チケット化)。
Notifier 経由で新種 Event を流す PR が来たら差し戻し対象。

View File

@ -1,9 +1,8 @@
---
name: "ticket-reviewer"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. Examples:\\n<example>\\nContext: User has just finished implementing a feature described in tickets/foo.md and wants it reviewed.\\nuser: \"tickets/foo.md の実装が終わったのでレビューして\"\\nassistant: \"I'll use the Agent tool to launch the ticket-reviewer agent to review the implementation against tickets/foo.md's requirements and produce tickets/foo.review.md.\"\\n<commentary>\\nThe user is explicitly requesting a ticket-scoped review with the project's .review.md workflow, which is this agent's purpose.\\n</commentary>\\n</example>\\n<example>\\nContext: User finishes a chunk of work and mentions the ticket name.\\nuser: \"scopedfs-scripting のチケット、一通り実装出来た\"\\nassistant: \"Let me use the Agent tool to launch the ticket-reviewer agent to review the implementation and produce the review artifacts.\"\\n<commentary>\\nCompletion of a ticket implementation is the trigger for the ticket-reviewer agent per project's lifecycle (c. レビュー).\\n</commentary>\\n</example>\\n<example>\\nContext: User requests re-review after addressing feedback.\\nuser: \"指摘を反映したので再レビューお願い\"\\nassistant: \"I'll use the Agent tool to launch the ticket-reviewer agent to re-review and update the .review.md accordingly.\"\\n<commentary>\\nRe-review updates the existing .review.md and ticket status; this agent handles that workflow.\\n</commentary>\\n</example>"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. "
model: opus
color: purple
memory: project
---
You are a senior reviewer specialized in the `insomnia` project. You are an expert at evaluating ticket-scoped implementations against their stated premises and requirements, and at safeguarding the codebase from unnecessary complexity or architectural drift. You operate strictly within the project's ticket lifecycle conventions defined in `CLAUDE.md`.
@ -118,153 +117,3 @@ Do not modify the ticket's 背景・要件 sections unless the user explicitly a
4. Did I avoid making git writes?
5. Did I update both `<name>.review.md` and `<name>.md`?
6. Is my judgment line unambiguous?
## Agent Memory
**Update your agent memory** as you review tickets in this project. This builds up institutional knowledge across review sessions. Write concise notes about what you found and where.
Examples of what to record:
- Recurring architectural patterns and anti-patterns observed across tickets
- Layer boundary conventions (e.g., what belongs in llm-worker vs. upper layers) as they become clearer
- Common requirement-miss patterns (e.g., tests omitted, build-through invariant violated)
- Crate/module organization conventions confirmed during reviews
- Reviewer judgment precedents — when a similar issue was Approve-with-follow-up vs. Request-changes
- Ticket authoring patterns that correlate with smooth vs. troubled reviews
- Project-specific policies reinforced during review (provider policy, ScopedFs scripting direction, cargo add discipline, etc.)
Keep entries short and link-friendly so they can be referenced in future reviews.
# Persistent Agent Memory
You have a persistent, file-based memory system at `<repo>/.claude/agent-memory/ticket-reviewer/`. This directory already exists — write to it directly with the Write tool (do not run mkdir or check for its existence).
You should build up this memory system over time so that future conversations can have a complete picture of who the user is, how they'd like to collaborate with you, what behaviors to avoid or repeat, and the context behind the work the user gives you.
If the user explicitly asks you to remember something, save it immediately as whichever type fits best. If they ask you to forget something, find and remove the relevant entry.
## Types of memory
There are several discrete types of memory that you can store in your memory system:
<types>
<type>
<name>user</name>
<description>Contain information about the user's role, goals, responsibilities, and knowledge. Great user memories help you tailor your future behavior to the user's preferences and perspective. Your goal in reading and writing these memories is to build up an understanding of who the user is and how you can be most helpful to them specifically. For example, you should collaborate with a senior software engineer differently than a student who is coding for the very first time. Keep in mind, that the aim here is to be helpful to the user. Avoid writing memories about the user that could be viewed as a negative judgement or that are not relevant to the work you're trying to accomplish together.</description>
<when_to_save>When you learn any details about the user's role, preferences, responsibilities, or knowledge</when_to_save>
<how_to_use>When your work should be informed by the user's profile or perspective. For example, if the user is asking you to explain a part of the code, you should answer that question in a way that is tailored to the specific details that they will find most valuable or that helps them build their mental model in relation to domain knowledge they already have.</how_to_use>
<examples>
user: I'm a data scientist investigating what logging we have in place
assistant: [saves user memory: user is a data scientist, currently focused on observability/logging]
user: I've been writing Go for ten years but this is my first time touching the React side of this repo
assistant: [saves user memory: deep Go expertise, new to React and this project's frontend — frame frontend explanations in terms of backend analogues]
</examples>
</type>
<type>
<name>feedback</name>
<description>Guidance the user has given you about how to approach work — both what to avoid and what to keep doing. These are a very important type of memory to read and write as they allow you to remain coherent and responsive to the way you should approach work in the project. Record from failure AND success: if you only save corrections, you will avoid past mistakes but drift away from approaches the user has already validated, and may grow overly cautious.</description>
<when_to_save>Any time the user corrects your approach ("no not that", "don't", "stop doing X") OR confirms a non-obvious approach worked ("yes exactly", "perfect, keep doing that", accepting an unusual choice without pushback). Corrections are easy to notice; confirmations are quieter — watch for them. In both cases, save what is applicable to future conversations, especially if surprising or not obvious from the code. Include *why* so you can judge edge cases later.</when_to_save>
<how_to_use>Let these memories guide your behavior so that the user does not need to offer the same guidance twice.</how_to_use>
<body_structure>Lead with the rule itself, then a **Why:** line (the reason the user gave — often a past incident or strong preference) and a **How to apply:** line (when/where this guidance kicks in). Knowing *why* lets you judge edge cases instead of blindly following the rule.</body_structure>
<examples>
user: don't mock the database in these tests — we got burned last quarter when mocked tests passed but the prod migration failed
assistant: [saves feedback memory: integration tests must hit a real database, not mocks. Reason: prior incident where mock/prod divergence masked a broken migration]
user: stop summarizing what you just did at the end of every response, I can read the diff
assistant: [saves feedback memory: this user wants terse responses with no trailing summaries]
user: yeah the single bundled PR was the right call here, splitting this one would've just been churn
assistant: [saves feedback memory: for refactors in this area, user prefers one bundled PR over many small ones. Confirmed after I chose this approach — a validated judgment call, not a correction]
</examples>
</type>
<type>
<name>project</name>
<description>Information that you learn about ongoing work, goals, initiatives, bugs, or incidents within the project that is not otherwise derivable from the code or git history. Project memories help you understand the broader context and motivation behind the work the user is doing within this working directory.</description>
<when_to_save>When you learn who is doing what, why, or by when. These states change relatively quickly so try to keep your understanding of this up to date. Always convert relative dates in user messages to absolute dates when saving (e.g., "Thursday" → "2026-03-05"), so the memory remains interpretable after time passes.</when_to_save>
<how_to_use>Use these memories to more fully understand the details and nuance behind the user's request and make better informed suggestions.</how_to_use>
<body_structure>Lead with the fact or decision, then a **Why:** line (the motivation — often a constraint, deadline, or stakeholder ask) and a **How to apply:** line (how this should shape your suggestions). Project memories decay fast, so the why helps future-you judge whether the memory is still load-bearing.</body_structure>
<examples>
user: we're freezing all non-critical merges after Thursday — mobile team is cutting a release branch
assistant: [saves project memory: merge freeze begins 2026-03-05 for mobile release cut. Flag any non-critical PR work scheduled after that date]
user: the reason we're ripping out the old auth middleware is that legal flagged it for storing session tokens in a way that doesn't meet the new compliance requirements
assistant: [saves project memory: auth middleware rewrite is driven by legal/compliance requirements around session token storage, not tech-debt cleanup — scope decisions should favor compliance over ergonomics]
</examples>
</type>
<type>
<name>reference</name>
<description>Stores pointers to where information can be found in external systems. These memories allow you to remember where to look to find up-to-date information outside of the project directory.</description>
<when_to_save>When you learn about resources in external systems and their purpose. For example, that bugs are tracked in a specific project in Linear or that feedback can be found in a specific Slack channel.</when_to_save>
<how_to_use>When the user references an external system or information that may be in an external system.</how_to_use>
<examples>
user: check the Linear project "INGEST" if you want context on these tickets, that's where we track all pipeline bugs
assistant: [saves reference memory: pipeline bugs are tracked in Linear project "INGEST"]
user: the Grafana board at grafana.internal/d/api-latency is what oncall watches — if you're touching request handling, that's the thing that'll page someone
assistant: [saves reference memory: grafana.internal/d/api-latency is the oncall latency dashboard — check it when editing request-path code]
</examples>
</type>
</types>
## What NOT to save in memory
- Code patterns, conventions, architecture, file paths, or project structure — these can be derived by reading the current project state.
- Git history, recent changes, or who-changed-what — `git log` / `git blame` are authoritative.
- Debugging solutions or fix recipes — the fix is in the code; the commit message has the context.
- Anything already documented in CLAUDE.md files.
- Ephemeral task details: in-progress work, temporary state, current conversation context.
These exclusions apply even when the user explicitly asks you to save. If they ask you to save a PR list or activity summary, ask what was *surprising* or *non-obvious* about it — that is the part worth keeping.
## How to save memories
Saving a memory is a two-step process:
**Step 1** — write the memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:
```markdown
---
name: {{memory name}}
description: {{one-line description — used to decide relevance in future conversations, so be specific}}
type: {{user, feedback, project, reference}}
---
{{memory content — for feedback/project types, structure as: rule/fact, then **Why:** and **How to apply:** lines}}
```
**Step 2** — add a pointer to that file in `MEMORY.md`. `MEMORY.md` is an index, not a memory — each entry should be one line, under ~150 characters: `- [Title](file.md) — one-line hook`. It has no frontmatter. Never write memory content directly into `MEMORY.md`.
- `MEMORY.md` is always loaded into your conversation context — lines after 200 will be truncated, so keep the index concise
- Keep the name, description, and type fields in memory files up-to-date with the content
- Organize memory semantically by topic, not chronologically
- Update or remove memories that turn out to be wrong or outdated
- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.
## When to access memories
- When memories seem relevant, or the user references prior-conversation work.
- You MUST access memory when the user explicitly asks you to check, recall, or remember.
- If the user says to *ignore* or *not use* memory: Do not apply remembered facts, cite, compare against, or mention memory content.
- Memory records can become stale over time. Use memory as context for what was true at a given point in time. Before answering the user or building assumptions based solely on information in memory records, verify that the memory is still correct and up-to-date by reading the current state of the files or resources. If a recalled memory conflicts with current information, trust what you observe now — and update or remove the stale memory rather than acting on it.
## Before recommending from memory
A memory that names a specific function, file, or flag is a claim that it existed *when the memory was written*. It may have been renamed, removed, or never merged. Before recommending it:
- If the memory names a file path: check the file exists.
- If the memory names a function or flag: grep for it.
- If the user is about to act on your recommendation (not just asking about history), verify first.
"The memory says X exists" is not the same as "X exists now."
A memory that summarizes repo state (activity logs, architecture snapshots) is frozen in time. If the user asks about *recent* or *current* state, prefer `git log` or reading the code over recalling the snapshot.
## Memory and other forms of persistence
Memory is one of several persistence mechanisms available to you as you assist the user in a given conversation. The distinction is often that memory can be recalled in future conversations and should not be used for persisting information that is only useful within the scope of the current conversation.
- When to use or update a plan instead of memory: If you are about to start a non-trivial implementation task and would like to reach alignment with the user on your approach you should use a Plan rather than saving this information to memory. Similarly, if you already have a plan within the conversation and you have changed your approach persist that change by updating the plan rather than saving a memory.
- When to use or update tasks instead of memory: When you need to break your work in current conversation into discrete steps or keep track of your progress use tasks instead of saving to memory. Tasks are great for persisting information about the work that needs to be done in the current conversation, but memory should be reserved for information that will be useful in future conversations.
- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project
## MEMORY.md
Your MEMORY.md is currently empty. When you save new memories, they will appear here.

View File

@ -0,0 +1,26 @@
---
name: worktree-workflow
description: "Worktreeを用いた開発フローを進める。git上の開発に置けるミクロな指示で、プロジェクトの管理に関する指示は提供されていない。"
allowed-tools: "Bash(cd *), Bash(git worktree *), Bash(mkdir *), Bash(cp *), Bash(ln *), Bash(ls *), Bash(find *)"
---
# Worktreeを用いた開発
Goal: 実装を完了させ、ブランチをマージ待ちの状態にする。
`./.worktree`にworktreeを作成します。
エージェントの1セッション=1ワークツリーとしており、ブランチ/イシュー/チケット単位で切ります。
このワークフローにおいては、ブランチはローカルで並行開発するためのマージ後削除の運用とし、Worktreeと同名のbranchを同時に作って進めます。メインのディレクトリのブランチから切るものとして扱います。
```
git worktree add .worktree/<task-name> -n <task-name>
```
## flake.nixの無効化
基本的に、CWDを変更できない場合、.envrcによる自動アクティベートは効かないので無視で構わない。
## 完了時
マージウィンドウからこのスキルがinvokeされた際は、ブランチのマージ・worktreeの削除まで行う。対して、実装者がマージしてクローズしてはならない。

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
.direnv
*.local*
.env
.worktree

1
.insomnia/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/memory/

13
.insomnia/manifest.toml Normal file
View File

@ -0,0 +1,13 @@
[scope]
allow = [
{ target = ".", permission = "write", recursive = true },
]
[session]
record_event_trace = true
[memory]
extract_threshold = 50000
consolidation_threshold_files = 5
consolidation_threshold_bytes = 50000

View File

@ -0,0 +1,143 @@
---
description: TODO / tickets / docs / git history から次の作業候補を見繕い、課題発見や方針決定を半自動でイテレーションする WIP maintainer workflow
model_invokation: false
user_invocable: true
requires: []
---
# Auto Maintain Workflow (WIP)
insomnia を AI maintainer として運用するための半自動 loop。TODO / tickets から「今進められそうな作業」を選ぶだけでなく、課題の発見、設計判断の切り分け、次に人間へ戻すべき問いの整理までを扱う。
これは unattended 自動開発ではない。実装の並列委譲は `multi-agent-workflow`、worktree の機械的作成は `worktree-workflow` に任せる。本 Workflow はその前段として、何を進めるべきか、何をまだ決めるべきかを整理する。
参照:
- `docs/plan/ai-maintainer.md`
- `tickets/auto-maintain-workflow.md`
## 位置づけ
AI maintainer の目的は、コードを書くこと自体ではなく、プロジェクト状態を前に進めることである。
この Workflow は WIP として、以下を行う。
- TODO / tickets / docs / git history を読んで現在地を把握する。
- 実装可能な ticket と、方針決定が必要な ticket を分ける。
- 小さく実装できる候補を提案する。
- 設計相談が必要な論点を人間に戻す。
- 運用上の問題や繰り返し発生する詰まりを report / ticket / workflow 改訂候補として整理する。
## 非目標
現時点では以下をしない。
- 常駐 scheduler として自動実行する。
- 人間の合意なしに新規 ticket を作る。
- 人間の合意なしに既存 ticket を大幅変更する。
- 人間の合意なしに ticket 完了削除を行う。
- push する。
- Workflow を自律生成・自律改訂する。
- scope / permission / history persistence / prompt context 加工原則に関わる判断を勝手に決める。
## 入力として読むもの
必要に応じて以下を読む。
1. `TODO.md`
2. `tickets/*.md`
3. `docs/plan/`
4. `docs/report/`
5. `git log --oneline` / ticket file の git history
6. 既存 worktree / branch 状態
7. 最近の失敗や通知、ユーザーからの観測
TODO と ticket の不整合を見つけたら、勝手に修正せず、まず報告する。ただしユーザーが明示的に「直して」と言った場合は Mode 1 として整理してよい。
## 分類
候補を以下に分ける。
### A. 実装委譲可能
- 要件と完了条件が具体的。
- 影響範囲が限定的。
- test / build で確認できる。
- 大きな設計判断が不要。
- scope を狭く切れる。
この場合は、人間に候補として提示する。人間が実行を許可したら `$user/multi-agent-workflow` に進む。
### B. 方針決定が必要
- 複数の設計方針が自然に導ける。
- protocol / permission / scope / persistence / prompt context に触れる。
- UX の仕様が未確定。
- 既存 ticket の要件が古い。
この場合は、実装せず、決めるべき問いを短く提示する。
### C. ticket 整理が必要
- TODO にあるが ticket がない。
- ticket があるが TODO にない。
- 完了済みに見えるが残っている。
- ticket の前提が変わっている。
この場合は、不整合と修正案を提示する。修正は人間の許可後に行う。
### D. report / workflow 改善候補
- 同じ tool 問題が繰り返し出る。
- Workflow の指示が曖昧で実装 Pod が迷った。
- AI が過剰に Task tool を使うなど、運用上の癖が出た。
- 通知や Pod completion tracking など、開発基盤の不足が観測された。
この場合は、すぐ ticket 化するか、`docs/report/` に観測として残すか、人間に確認する。
## 半自動 iteration
1. 状態把握
- TODO / tickets / git status を読む。
- 最近完了した流れや未完了 branch を確認する。
2. 候補抽出
- 実装可能そうな ticket を 2〜5 件挙げる。
- correctness / developer experience / user-visible UX / cleanup で分類する。
3. 推奨順位
- blocking correctness を最優先。
- 実害が出ている運用問題を次点。
- 小さく完了できる UX / cleanup を次点。
- 大きな設計変更は方針相談に回す。
4. 人間への提示
- 「次に進めるなら X」を1つ推奨する。
- 理由を短く述べる。
- 実装委譲する場合の scope / test 方針を添える。
5. 実行への接続
- 人間が「進めて」と言ったら `$user/multi-agent-workflow` に接続する。
- worktree 作成は `$user/worktree-workflow` に従う。
## エスカレーション基準
以下では実装に進まず、人間へ戻す。
- ticket の要件から複数の設計方針が自然に導ける。
- 長期構造、crate boundary、protocol、permission、scope、history persistence に触れる。
- prompt context 加工原則に関わる。
- 新 ticket の作成、既存 ticket の大幅変更、ticket 完了削除について合意がない。
- test 不能、再現不能、または作業範囲外の不具合に遭遇した。
- WorkItem / Thread / Lease / maintainer state など、まだ設計中の概念が必要になる。
## まだ固定しないもの
以下は `docs/plan/ai-maintainer.md` の上位設計に残し、本 Workflow では詳細を固定しない。
- WorkItemStore / LeaseStore。
- operation inbox / trial log。
- QA feedback を ticket / review / report のどれに落とすか。
- AI 自身の feedback を Knowledge / report / ticket / workflow 改訂のどれにするか。
- maintainer doctor。
- reviewer Pod の評価基準の機械化。

View File

@ -0,0 +1,150 @@
---
description: worktree と子 Pod を使って複数 ticket の実装・レビュー・修正・完了処理を並列に進める orchestration フロー
model_invokation: true
user_invocable: true
requires: []
---
# Multi-agent Worktree Workflow
insomnia を insomnia で開発する際の、worktree + 実装 Pod + 親 Pod review の標準フロー。これは **実装を並列に進めるためのフロー** であり、worktree の機械的作成手順は `$user/worktree-workflow`、ticket 候補選定や方針探索の半自動 loop は `$user/auto-maintain` に分ける。
## 目的
- 実装差分を ticket ごとの child worktree に隔離する。
- 実装 Pod に narrow write scope を渡して並列実装させる。
- 親 Pod が diff / test / ticket 要件を review し、必要なら修正依頼する。
- approve 後に merge / ticket 完了処理 / main workspace での再検証を行う。
## 開始条件
以下が揃っている時に使う。
- 対象 ticket が決まっている。
- ticket の背景・要件・完了条件から実装方針が概ね導ける。
- worktree 作成と git 書き込み操作について、人間の許可がある。
- main workspace の unrelated dirty changes を把握している。
設計方針が複数自然に導ける場合、protocol / scope / permission / history persistence に触れる場合、ticket 自体の再定義が必要な場合は、実装委譲前に人間へ戻す。
## 親 Pod / orchestrator の責務
1. 状態確認
- `git status --short --branch`
- 対象 ticket
- 関連 TODO / docs / 既存 worktree
2. worktree 作成
- `$user/worktree-workflow` に従い `./.worktree/<task-name>` を作る。
- `.insomnia` を sparse checkout で除外する。
3. 実装 Pod spawn
- read scope: main workspace 全体。
- write scope: child worktree、または必要最小 directory。
- task には以下を明示する。
- child worktree path / branch
- 対象 ticket path
- Bash は必ず child worktree に `cd` すること
- main workspace の `TODO.md` / `tickets/` / `docs/report/` / `.insomnia` は編集しないこと
- 範囲外事項
- 実行すべき build / test / format
- 完了報告項目
4. 監督
- `ReadPodOutput` で報告を読む。
- 通知が来ない場合でも、worktree の `git status` / `git diff` / test で完了状態を確認する。
- 必要なら `SendToPod` で修正依頼する。
5. review
- ticket の背景・要件・完了条件・範囲外に照らして diff を確認する。
- build / test / `git diff --check` を確認する。
- 必要なら reviewer Pod を read-only で立てる。
6. merge / lifecycle
- approve 後に main workspace へ merge する。
- `TODO.md` から該当行を削除し、`tickets/foo.md` を削除して完了 commit を作る。
- main workspace で必要な test / `cargo check --workspace` / `cargo fmt --check` を再実行する。
## 実装 Pod の責務
- child worktree 内でのみ実装する。
- main workspace の管理ファイルを書かない。
- 指定された build / test / format を実行する。
- ticket 要件外の設計変更、依存関係追加、scope / permission / history persistence / prompt context 加工原則に触れる変更が必要なら止めて報告する。
- 完了時に以下を報告する。
- worktree path / branch
- commit hashcommit した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか
## 実装 Pod の commit 方針
実装 Pod には child worktree 内での commit を許可してよい。
- commit は ticket 内で意味のある粒度にする。
- 例: `feat: ...`、`fix: ...`、`test: ...`、`docs: ...`
- 実装 Pod は merge / push / branch deletion / worktree remove をしない。
- 実装 Pod は `TODO.md` / `tickets/` の完了処理 commit をしない。
- 親 Pod は review 時に commit 粒度も確認する。
- 必要な修正は、原則追加 commit として積む。履歴改変や squash は人間の明示指示がある時だけ行う。
## Review → 修正 → 完了の標準形
### Approve
1. 実装 Pod を停止し、scope を回収する。
2. 親 Pod が main workspace で `git merge --no-ff <branch>` する。
3. 親 Pod が `TODO.md``tickets/foo.md` を完了処理して commit する。
4. main workspace で検証コマンドを再実行する。
5. 変更内容・commit・検証結果・残 dirty changes を報告する。
### Request changes
1. blocking finding をファイル / 行 / 理由 / 修正方針つきで整理する。
2. 実装 Pod が生きていれば `SendToPod` で修正依頼する。
3. 停止済みなら、同じ worktree / branch / scope で再 spawn するか、親 Pod が最小修正する。
4. 修正後に focused test と必要な broader test を再実行する。
5. 再 review する。
### Non-blocking comments
- ticket 要件外の改善はその場で混ぜない。
- 必要なら後続 ticket / docs/report にする。
- non-blocking を理由に completion を遅らせない。
## 並列実装時の注意
- 1 ticket = 1 worktree = 1 branch を基本にする。
- 複数 Pod に同じ write scope を渡さない。
- parent は child の write scope 配下を直接編集しない。
- 依存関係がある ticket は、土台 branch を merge してから次 worktree を切る。
- parallel に走らせた Pod の完了通知は取りこぼしうるため、`ReadPodOutput` と worktree 状態で確認する。
## 完了報告の標準形
```text
完了:
- ticket: <path>
- branch: <name>
- commits:
- <hash> <subject>
- 変更概要: ...
- 検証:
- cargo fmt --check
- cargo check --workspace
- cargo test ...
- review: approve / approve with comments / request changes
- 未解決事項: ...
- 残 dirty changes: ...
```
## この Workflow で扱わないもの
以下は `$user/auto-maintain` または別の設計相談で扱う。
- ticket 候補を見繕うこと。
- 新規 ticket 作成判断。
- QA feedback / AI feedback を ticket / report / workflow に落とす判断。
- 長期 maintainer loop / WorkItemStore / LeaseStore の設計。

View File

@ -0,0 +1,98 @@
---
description: insomnia プロジェクトで child git worktree を作成・管理するための機械的手順。実装 Pod に作らせず、親 Pod が main workspace で実行する。
model_invokation: false
user_invocable: true
requires: []
---
# Worktree Workflow
insomnia プロジェクトで実装差分を main workspace から分離するため、`./.worktree/<task-name>` に child git worktree を作る。これは **worktree の扱い方だけ** を定める Workflow であり、ticket 選定、実装委譲、review、merge の運用は `$user/multi-agent-workflow` 側で扱う。
insomnia では Pod の write scope が排他的に委譲されるため、child worktree に `.insomnia` を置かない。main workspace は orchestration / ticket / docs / memory / workflow 管理の場所として残し、child worktree はコード差分専用の作業面として扱う。
## 適用範囲
この Workflow は親 Pod / orchestrator が main workspace で実行する。
- 実装 Pod にこの Workflow を渡して worktree を作らせない。
- 実装 Pod は、親 Pod が作成済みの child worktree を受け取り、その中で実装・build・test・報告を行う。
- ticket 作成、TODO 更新、review artifact、docs/report は main workspace 側で扱う。
## 原則
- 1 ticket / 1 実装 task につき 1 worktree を作る。
- worktree path は `./.worktree/<task-name>`
- branch 名は原則 `<task-name>` と同じ kebab-case。
- child worktree には `.insomnia` を出さない。
- child worktree は実装差分用。`TODO.md` / `tickets/` / `docs/report/` / workflow / memory は原則 main workspace 側で扱う。
- push はしない。
## 事前確認
作成前に以下を確認する。
1. 対象 ticket / task が決まっているか。
2. `<task-name>` が branch / path 名に使える kebab-case か。
3. `git worktree add` を実行してよい許可があるか。
4. main workspace に混ぜてはいけない未保存差分がないか。
5. 同名 branch / worktree が既に存在しないか。
同名 branch がある場合は、既存 branch を使うか、人間に確認する。`git worktree add -b` で上書きしない。
## 作成手順
main workspace で実行する。
```bash
git worktree add .worktree/<task-name> -b <task-name>
git -C .worktree/<task-name> sparse-checkout init --no-cone
git -C .worktree/<task-name> sparse-checkout set --no-cone \
'/*' \
'!/.insomnia/' \
'!/.insomnia/**'
```
確認する。
```bash
git -C .worktree/<task-name> status --short --branch
test ! -e .worktree/<task-name>/.insomnia
```
失敗した場合は、worktree / branch / lock の状態を確認し、勝手に cleanup せず人間へ報告する。
## 子 Pod へ渡す scope
子 Pod を使う場合、子 Pod の cwd は main workspace のままになる。必ず作業対象が child worktree であることを明示し、Bash 実行時は毎回 `cd <repo>/.worktree/<task-name> && ...` させる。
推奨 scope:
```text
read: <repo>
write: <repo>/.worktree/<task-name>
```
より狭く切れる場合は、write scope を変更対象 crate / directory まで狭めてよい。ただし build / test に必要な生成物を書けることを確認する。
## child worktree 内の禁止事項
- `.insomnia` を作らない / コピーしない。
- main workspace の `TODO.md` / `tickets/` / `docs/report/` を編集しない。
- merge / push / branch deletion / worktree remove をしない。
- scope / permission / history persistence / prompt context 加工原則に関わる設計変更を無断で行わない。
## 完了時の扱い
worktree 作成 Workflow としては、完了時に merge しない。merge、ticket 完了、TODO 削除は `$user/multi-agent-workflow` または人間の明示指示で行う。
実装 Pod へ渡す完了報告項目の標準形:
- worktree path
- branch 名
- commit hash実装 Pod に commit を許可した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか

View File

@ -1,11 +1,43 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
---
## このシステムに置ける設計要旨
Gitは基本的にすべてユーザーが操作している。書き込みが必要な操作は明示的に許可されない限り行わないこと
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
@ -31,7 +63,13 @@ b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

View File

@ -1,11 +1,43 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
---
## このシステムに置ける設計要旨
Gitは基本的にすべてユーザーが操作している。書き込みが必要な操作は明示的に許可されない限り行わないこと
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
@ -31,7 +63,13 @@ b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

182
Cargo.lock generated
View File

@ -82,6 +82,15 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arc-swap"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207"
dependencies = [
"rustversion",
]
[[package]]
name = "assert-json-diff"
version = "2.0.2"
@ -319,6 +328,16 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]]
name = "client"
version = "0.1.0"
dependencies = [
"manifest",
"protocol",
"tokio",
"uuid",
]
[[package]]
name = "cmake"
version = "0.1.57"
@ -1571,9 +1590,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "libc"
version = "0.2.185"
version = "0.2.186"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
[[package]]
name = "libredox"
@ -1596,6 +1615,16 @@ dependencies = [
"bitflags 2.11.0",
]
[[package]]
name = "lint-common"
version = "0.1.0"
dependencies = [
"chrono",
"serde",
"serde_json",
"thiserror 2.0.18",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
@ -1641,6 +1670,8 @@ dependencies = [
"tracing",
"tracing-subscriber",
"trybuild",
"wiremock",
"zstd",
]
[[package]]
@ -1696,6 +1727,7 @@ dependencies = [
name = "manifest"
version = "0.1.0"
dependencies = [
"arc-swap",
"llm-worker",
"protocol",
"serde",
@ -1751,6 +1783,28 @@ dependencies = [
"autocfg",
]
[[package]]
name = "memory"
version = "0.1.0"
dependencies = [
"async-trait",
"chrono",
"libc",
"lint-common",
"llm-worker",
"manifest",
"schemars",
"serde",
"serde_json",
"serde_yaml",
"sha2 0.11.0",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -2096,6 +2150,7 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
name = "pod"
version = "0.1.0"
dependencies = [
"arc-swap",
"async-trait",
"chrono",
"clap",
@ -2106,12 +2161,15 @@ dependencies = [
"libc",
"llm-worker",
"manifest",
"memory",
"minijinja",
"pod-registry",
"protocol",
"provider",
"schemars",
"serde",
"serde_json",
"session-metrics",
"session-store",
"tempfile",
"thiserror 2.0.18",
@ -2119,6 +2177,22 @@ dependencies = [
"toml",
"tools",
"tracing",
"uuid",
"workflow",
]
[[package]]
name = "pod-registry"
version = "0.1.0"
dependencies = [
"fs4",
"libc",
"manifest",
"serde",
"serde_json",
"session-store",
"tempfile",
"thiserror 2.0.18",
]
[[package]]
@ -2201,6 +2275,17 @@ dependencies = [
"wiremock",
]
[[package]]
name = "pulldown-cmark"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.0",
"memchr",
"unicase",
]
[[package]]
name = "quinn"
version = "0.11.9"
@ -2840,6 +2925,19 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_yaml"
version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "serial_test"
version = "3.4.0"
@ -2866,20 +2964,29 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "session-metrics"
version = "0.1.0"
dependencies = [
"serde",
"serde_json",
"session-store",
]
[[package]]
name = "session-store"
version = "0.1.0"
dependencies = [
"async-trait",
"futures",
"hex",
"llm-worker",
"protocol",
"serde",
"serde_json",
"sha2 0.11.0",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
"uuid",
]
@ -3537,12 +3644,23 @@ dependencies = [
name = "tui"
version = "0.1.0"
dependencies = [
"client",
"crossterm 0.28.1",
"llm-worker",
"manifest",
"pod-registry",
"protocol",
"pulldown-cmark",
"ratatui",
"serde",
"serde_json",
"session-store",
"tempfile",
"tokio",
"toml",
"tools",
"unicode-width",
"uuid",
]
[[package]]
@ -3557,6 +3675,12 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "unicase"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-ident"
version = "1.0.24"
@ -3592,6 +3716,12 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -4283,6 +4413,22 @@ dependencies = [
"wasmparser",
]
[[package]]
name = "workflow"
version = "0.1.0"
dependencies = [
"chrono",
"lint-common",
"manifest",
"memory",
"serde",
"serde_json",
"serde_yaml",
"tempfile",
"thiserror 2.0.18",
"tracing",
]
[[package]]
name = "writeable"
version = "0.6.3"
@ -4397,3 +4543,31 @@ name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",
]

View File

@ -1,6 +1,7 @@
[workspace]
resolver = "2"
members = [
"crates/client",
"crates/daemon",
"crates/llm-worker",
"crates/llm-worker-macros",
@ -9,10 +10,48 @@ members = [
"crates/pod",
"crates/protocol",
"crates/provider",
"crates/pod-registry",
"crates/session-metrics",
"crates/lint-common",
"crates/tools",
"crates/tui",
"crates/memory",
"crates/workflow",
]
[workspace.package]
edition = "2024"
license = "MIT"
[workspace.dependencies]
# Internal crates
client = { path = "crates/client" }
llm-worker = { path = "crates/llm-worker", version = "0.2" }
llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" }
manifest = { path = "crates/manifest" }
lint-common = { path = "crates/lint-common" }
memory = { path = "crates/memory" }
pod-registry = { path = "crates/pod-registry" }
protocol = { path = "crates/protocol" }
provider = { path = "crates/provider" }
session-metrics = { path = "crates/session-metrics" }
session-store = { path = "crates/session-store" }
tools = { path = "crates/tools" }
# External
# Note: `reqwest` and `chrono` are not aggregated here because some crates
# need `default-features = false`, which workspace inheritance cannot override.
async-trait = "0.1"
fs4 = "0.13"
futures = "0.3"
libc = "0.2"
schemars = "1.2"
serde = "1.0"
serde_json = "1.0"
sha2 = "0.11"
tempfile = "3.27"
thiserror = "2.0"
tokio = "1.52"
toml = "1.1"
tracing = "0.1"
uuid = "1.23"

18
KNOWN_ISSUES.md Normal file
View File

@ -0,0 +1,18 @@
# Known Issues
Ticket を切るほどではないが、次に近所を触るときに合わせて拾いたい小粒な所見の置き場。
## 運用
- 1 項目 = 出典 (file:line) + 症状 (一文) + トリガー (いつ拾うか、一文)
- 関連 ticket があれば `→ [tickets/foo.md]` でリンク
- 修正したら同じコミットで該当エントリを削除する (履歴は git)
- ここに溜める基準: 「ticket は重い」「だが忘れたら次の触り手が踏む」もの。明確に作業すべきものは ticket 化する
## エントリ
- `crates/tui/src/app.rs:478-485` — bad workflow slug を含む `Method::Run` 送信時、`Event::UserMessage` の早期 broadcast で `turn_index += 1` されターンヘッダだけ残る ("ghost turn header")。次に TUI のターンヘッダ / エラー表示周りを触るときに整理。→ [tickets/pod-input-validate-internalize.md] の review 由来。
- `crates/pod/src/controller.rs:944``worker_error_code``PodError::WorkflowResolve(_) => InvalidRequest` が post-commit な resolve エラー (`KnowledgeNotFound` 等) にも適用される。意味論的には妥当方向だが、resolve 系のエラー粒度を分けたくなったタイミングで再評価。
- `crates/pod/tests/controller_test.rs``double_run_returns_error` がたまに失敗する flakiness を観測。`pod-interrupt-prep-internalize` 以前から存在する別件。次に controller_test の Run 連投系のタイミングを触るときに併せて原因を切り分け。
- `crates/session-store/src/fs_store.rs:117-122``FsStore::read_entry_count``fs::read_to_string` で全文ロードしてから行数カウントするため O(n)。`ensure_head_or_fork` は run-start でしか呼ばれず現状は許容範囲だが、長期セッションが普通になった時点で `\n` バイト数の cheap count か末尾 seek に置き換える。
- `crates/session-store/src/segment.rs:121` `ensure_head_or_fork` (free fn, test 専用・本番 caller ゼロ) と `crates/pod/src/pod.rs` `Pod::ensure_segment_head` (本番 inline) に live auto-fork の検知 + forked_from 記録が二重実装されている。entry-hash-abolish 以前からの重複で、両方独立にテスト済みだが drift 必至。session-store 側を本番から呼ぶ形に寄せるか free fn を畳むかは要設計判断。Pod state / fork 周辺を次に触るときに統合を検討。

15
TODO.md
View File

@ -1,10 +1,5 @@
- [ ] テスト設計 → [tickets/test-design.md](tickets/test-design.md)
- [ ] ツール設計
- [ ] Bash ツール (Permission 層と統合) → [tickets/bash-tool.md](tickets/bash-tool.md)
- [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
- [ ] Pod オーケストレーション
- [ ] 動的 Scope 変更 → [tickets/dynamic-scope.md](tickets/dynamic-scope.md)
- [ ] ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
- [ ] TUI 拡充
- [ ] 新しい Pod を spawn する UI の設計 → [tickets/tui-pod-spawn-ui.md](tickets/tui-pod-spawn-ui.md)
- [ ] ツール呼び出しのフレーム更新型表示 → [tickets/tui-tool-call-ui.md](tickets/tui-tool-call-ui.md)
# TODO legacy notice
Active repository work items have been migrated to `work-items/`.
Use `./tickets.sh list --status all` for the generated/current view and `./tickets.sh doctor` to validate the migration state.

11
crates/client/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "client"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
protocol = { workspace = true }
manifest = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "net", "io-util", "sync", "time", "process", "fs"] }
uuid = { workspace = true }

15
crates/client/src/lib.rs Normal file
View File

@ -0,0 +1,15 @@
//! Pod プロトコルを喋るクライアント。
//!
//! - [`PodClient`]: 既存 pod の Unix ソケットへ接続して `Method` を送り、
//! `Event` を受け取る低レベル接続。
//! - [`spawn`]: pod バイナリをサブプロセスとして起動し、`INSOMNIA-READY`
//! ハンドシェイクが終わるまで待つフロー。subprocess を立ち上げる必要が
//! ない呼び出し側 (=既存 pod に attach する場合) は使わなくてよい。
//!
//! TUI / GUI / E2E ハーネスはこの crate に依存して protocol を喋る。
mod pod_client;
pub mod spawn;
pub use pod_client::PodClient;
pub use spawn::{SpawnConfig, SpawnError, SpawnReady, spawn_pod};

View File

@ -35,6 +35,10 @@ impl PodClient {
self.writer.write(method).await
}
pub fn try_next_event(&mut self) -> Option<Event> {
self.event_rx.try_recv().ok()
}
pub async fn next_event(&mut self) -> Option<Event> {
self.event_rx.recv().await
}

299
crates/client/src/spawn.rs Normal file
View File

@ -0,0 +1,299 @@
//! `insomnia-pod` バイナリをサブプロセスとして立ち上げ、`INSOMNIA-READY` を待つ
//! ハンドシェイク。
//!
//! - 親プロセス (TUI / GUI / E2E) は overlay TOML を組み立ててこの関数に
//! 渡す。pod はそれを受けて socket を bind し、stderr に
//! `INSOMNIA-READY\t<name>\t<socket>` を吐く。
//! - 待機中の stderr 行は `progress` コールバック越しに呼び出し側へ流す。
//! UI の進捗表示や E2E のログ収集はここで賄う。
//! - `kill_on_drop = false` + `process_group(0)` により、親プロセス
//! ライフサイクルから切り離した detached pod を作る。ready 後の lifecycle
//! 管理は runtime ディレクトリ / socket を介して行う。
use std::io;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use uuid::Uuid;
const READY_PREFIX: &str = "INSOMNIA-READY\t";
const READY_TIMEOUT: Duration = Duration::from_secs(20);
/// `spawn_pod` の入力。
pub struct SpawnConfig {
/// `pod.name` として使う識別子。runtime ディレクトリ
/// (`manifest::paths::pod_runtime_dir`) の解決と、ready 行に乗る
/// 名前との突き合わせに使う。
pub pod_name: String,
/// `--overlay` で pod に渡す TOML 文字列。
pub overlay_toml: String,
/// pod の current_dir。
pub cwd: PathBuf,
/// `Some(id)` のとき `--session <id>` を付与し、当該セッションから
/// resume させる。
pub resume_from: Option<Uuid>,
/// true のとき `--pod <pod_name>` を付与し、pod 側で name-keyed state
/// があれば resume、なければ同名の新規 Pod として起動させる。
pub resume_by_pod_name: bool,
}
pub struct SpawnReady {
pub pod_name: String,
pub socket_path: PathBuf,
}
#[derive(Debug)]
pub enum SpawnError {
Io(io::Error),
/// runtime ディレクトリが解決できなかった (環境変数未設定等)。
RuntimeDirUnavailable,
PodLaunchFailed(io::Error),
PodExitedEarly {
stderr_tail: String,
},
Timeout,
}
impl std::fmt::Display for SpawnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "io error: {e}"),
Self::RuntimeDirUnavailable => write!(
f,
"could not resolve runtime directory (set INSOMNIA_HOME, INSOMNIA_RUNTIME_DIR, XDG_RUNTIME_DIR, or HOME)"
),
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
Self::PodExitedEarly { stderr_tail } => {
if stderr_tail.is_empty() {
write!(f, "pod exited before becoming ready")
} else {
write!(f, "pod exited before becoming ready: {stderr_tail}")
}
}
Self::Timeout => write!(
f,
"pod did not become ready within {}s",
READY_TIMEOUT.as_secs()
),
}
}
}
impl std::error::Error for SpawnError {}
impl From<io::Error> for SpawnError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
/// pod を spawn し、`INSOMNIA-READY` ハンドシェイクが終わるまで待つ。
///
/// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
/// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let pod_bin = resolve_pod_command();
let pod_runtime_dir = manifest::paths::pod_runtime_dir(&config.pod_name)
.ok_or(SpawnError::RuntimeDirUnavailable)?;
std::fs::create_dir_all(&pod_runtime_dir).map_err(SpawnError::Io)?;
let stderr_path = pod_runtime_dir.join("stderr.log");
let stderr_file = std::fs::File::create(&stderr_path).map_err(SpawnError::Io)?;
let mut command = Command::new(&pod_bin);
command
.arg("--overlay")
.arg(&config.overlay_toml)
.current_dir(&config.cwd)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::from(stderr_file))
.process_group(0);
if config.resume_by_pod_name {
command.arg("--pod").arg(&config.pod_name);
}
if let Some(id) = config.resume_from {
command.arg("--session").arg(id.to_string());
}
let mut child = command.spawn().map_err(SpawnError::PodLaunchFailed)?;
// Default `kill_on_drop = false` plus `process_group(0)` makes this
// a detached Pod once startup succeeds: dropping the handle does not
// terminate it, and terminal-generated signals for the parent's
// process group do not hit the Pod. Runtime state/socket files are
// the source of truth after that point.
let ready = match wait_for_ready_file(&mut progress, &stderr_path, &mut child).await {
Ok(ready) => ready,
Err(e) => {
let _ = child.start_kill();
let _ = child.wait().await;
return Err(e);
}
};
tokio::spawn(async move {
let _ = child.wait().await;
});
Ok(ready)
}
async fn wait_for_ready_file<F>(
progress: &mut F,
stderr_path: &Path,
child: &mut tokio::process::Child,
) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let mut tail = StderrTail::new();
let deadline = tokio::time::Instant::now() + READY_TIMEOUT;
let mut offset = 0usize;
loop {
let content = match tokio::fs::read_to_string(stderr_path).await {
Ok(content) => content,
Err(e) if e.kind() == io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(SpawnError::Io(e)),
};
if content.len() > offset {
for line in content[offset..].lines() {
if let Some(rest) = line.strip_prefix(READY_PREFIX) {
let mut parts = rest.splitn(2, '\t');
let pod_name = parts.next().unwrap_or("").to_string();
let socket_str = parts.next().unwrap_or("").to_string();
if pod_name.is_empty() || socket_str.is_empty() {
return Err(SpawnError::PodExitedEarly {
stderr_tail: format!("malformed ready line: {line}"),
});
}
let socket_path = PathBuf::from(socket_str);
wait_for_socket(
&socket_path,
deadline,
child,
stderr_path,
&mut tail,
&mut offset,
)
.await?;
return Ok(SpawnReady {
pod_name,
socket_path,
});
}
tail.push(line);
progress(line);
}
offset = content.len();
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
// Pod は exit 直前に最終 stderr 行を flush することがある。
// child.wait() が解決した後に再読みして、原因行を取りこ
// ぼさず PodExitedEarly に載せる。
drain_stderr_into_tail(stderr_path, &mut tail, &mut offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.into_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(100)) => {}
}
}
}
async fn wait_for_socket(
socket_path: &Path,
deadline: tokio::time::Instant,
child: &mut tokio::process::Child,
stderr_path: &Path,
tail: &mut StderrTail,
offset: &mut usize,
) -> Result<(), SpawnError> {
loop {
match tokio::net::UnixStream::connect(socket_path).await {
Ok(_) => return Ok(()),
Err(e)
if e.kind() == io::ErrorKind::NotFound
|| e.kind() == io::ErrorKind::ConnectionRefused => {}
Err(e) => return Err(SpawnError::Io(e)),
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
drain_stderr_into_tail(stderr_path, tail, offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.as_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(50)) => {}
}
}
}
async fn drain_stderr_into_tail(stderr_path: &Path, tail: &mut StderrTail, offset: &mut usize) {
let Ok(content) = tokio::fs::read_to_string(stderr_path).await else {
return;
};
if content.len() <= *offset {
return;
}
for line in content[*offset..].lines() {
if !line.starts_with(READY_PREFIX) {
tail.push(line);
}
}
*offset = content.len();
}
/// Resolves the binary used to launch a child Pod. Must point at a
/// `insomnia-pod`-compatible executable — the parent reads the child's stderr
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
/// extra lines on stderr will pollute that handshake.
///
/// `INSOMNIA_POD_COMMAND` overrides the lookup (used by tests to inject
/// a mock binary). Otherwise we defer to `PATH` — missing binary
/// surfaces as the spawn `io::Error`.
fn resolve_pod_command() -> PathBuf {
if let Ok(cmd) = std::env::var("INSOMNIA_POD_COMMAND")
&& !cmd.is_empty()
{
return PathBuf::from(cmd);
}
PathBuf::from("insomnia-pod")
}
struct StderrTail {
lines: std::collections::VecDeque<String>,
}
impl StderrTail {
fn new() -> Self {
Self {
lines: std::collections::VecDeque::with_capacity(8),
}
}
fn push(&mut self, line: &str) {
if self.lines.len() == 8 {
self.lines.pop_front();
}
self.lines.push_back(line.to_string());
}
fn as_string(&self) -> String {
self.lines.iter().cloned().collect::<Vec<_>>().join(" | ")
}
fn into_string(self) -> String {
self.lines.into_iter().collect::<Vec<_>>().join(" | ")
}
}

View File

@ -5,6 +5,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
manifest = { path = "../manifest" }
protocol = { path = "../protocol" }
tokio = { version = "1.49", features = ["full"] }
manifest = { workspace = true }
protocol = { workspace = true }
tokio = { workspace = true, features = ["full"] }

View File

@ -0,0 +1,13 @@
[package]
name = "lint-common"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
[dev-dependencies]
serde_json = { workspace = true }

View File

@ -0,0 +1,81 @@
//! Common frontmatter helpers.
use chrono::{DateTime, Utc};
use crate::RecordLintError;
/// Trait record frontmatter types implement so linters can drive them uniformly.
pub trait Frontmatter: Sized {
/// Hard upper bound on body chars (excluding the frontmatter block).
const BODY_LIMIT: usize;
fn created_at(&self) -> Option<DateTime<Utc>>;
fn updated_at(&self) -> Option<DateTime<Utc>>;
}
const FRONTMATTER_DELIM: &str = "---";
/// Split a markdown document into `(yaml_frontmatter, body)`.
///
/// Expects the document to start with `---\n` and have a closing
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
/// after the closing delimiter is consumed.
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), RecordLintError> {
// The opening delimiter must be the very first line.
let after_open = content
.strip_prefix(FRONTMATTER_DELIM)
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
.ok_or(RecordLintError::MissingFrontmatter)?;
// Look for the closing `---` on its own line.
let mut yaml_end = None;
let mut byte_offset = 0usize;
for line in after_open.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
if trimmed == FRONTMATTER_DELIM {
yaml_end = Some((byte_offset, byte_offset + line.len()));
break;
}
byte_offset += line.len();
}
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
RecordLintError::MalformedFrontmatter("missing closing `---` line".to_string())
})?;
let yaml = &after_open[..yaml_end_excl];
let body = &after_open[body_start..];
Ok((yaml, body))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn splits_simple() {
let doc = "---\nfoo: 1\n---\nbody here\n";
let (y, b) = split_frontmatter(doc).unwrap();
assert_eq!(y, "foo: 1\n");
assert_eq!(b, "body here\n");
}
#[test]
fn no_leading_delim_errors() {
let err = split_frontmatter("hello").unwrap_err();
assert!(matches!(err, RecordLintError::MissingFrontmatter));
}
#[test]
fn no_closing_delim_errors() {
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
assert!(matches!(err, RecordLintError::MalformedFrontmatter(_)));
}
#[test]
fn handles_empty_body() {
let doc = "---\nfoo: 1\n---\n";
let (_, b) = split_frontmatter(doc).unwrap();
assert_eq!(b, "");
}
}

View File

@ -0,0 +1,20 @@
//! Shared record lint primitives for memory and workflow files.
mod frontmatter;
mod slug;
pub use frontmatter::{Frontmatter, split_frontmatter};
pub use slug::{Slug, is_valid_slug};
/// Common lint errors for Markdown record syntax shared by memory and workflow.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum RecordLintError {
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
InvalidSlug(String),
#[error("malformed frontmatter: {0}")]
MalformedFrontmatter(String),
#[error("frontmatter is missing or document is empty")]
MissingFrontmatter,
}

View File

@ -0,0 +1,146 @@
//! Slug type and validation.
//!
//! Syntax (agent-skills compatible):
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
//! - 164 chars
//! - lowercase ASCII alphanumerics and `-`
//! - cannot start or end with `-`
//! - no consecutive `--`
use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Deserializer, Serialize};
use crate::RecordLintError;
const MIN_LEN: usize = 1;
const MAX_LEN: usize = 64;
/// Validated slug. Constructible only via [`Slug::parse`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[serde(transparent)]
pub struct Slug(String);
impl Slug {
/// Parse and validate. Returns [`RecordLintError::InvalidSlug`] on rejection.
pub fn parse(s: impl Into<String>) -> Result<Self, RecordLintError> {
let s = s.into();
if is_valid_slug(&s) {
Ok(Self(s))
} else {
Err(RecordLintError::InvalidSlug(s))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for Slug {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for Slug {
type Err = RecordLintError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl<'de> Deserialize<'de> for Slug {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
Self::parse(raw).map_err(serde::de::Error::custom)
}
}
/// Pure-fn predicate matching the agent-skills slug regex without
/// pulling in the `regex` crate.
pub fn is_valid_slug(s: &str) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if len < MIN_LEN || len > MAX_LEN {
return false;
}
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
return false;
}
let mut prev_dash = false;
for &b in bytes {
if b == b'-' {
if prev_dash {
return false;
}
prev_dash = true;
} else if is_alnum_lower(b) {
prev_dash = false;
} else {
return false;
}
}
true
}
fn is_alnum_lower(b: u8) -> bool {
b.is_ascii_digit() || b.is_ascii_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_basic_slugs() {
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
assert!(is_valid_slug(s), "expected `{s}` valid");
assert!(Slug::parse(s).is_ok());
}
}
#[test]
fn rejects_bad_slugs() {
for s in [
"", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä",
] {
assert!(!is_valid_slug(s), "expected `{s}` invalid");
assert!(Slug::parse(s).is_err());
}
}
#[test]
fn enforces_length_bounds() {
let too_long = "a".repeat(MAX_LEN + 1);
assert!(!is_valid_slug(&too_long));
let max = "a".repeat(MAX_LEN);
assert!(is_valid_slug(&max));
}
#[test]
fn deserializes_via_serde() {
let json = "\"valid-slug\"";
let slug: Slug = serde_json::from_str(json).unwrap();
assert_eq!(slug.as_str(), "valid-slug");
let bad = "\"BAD\"";
let err: Result<Slug, _> = serde_json::from_str(bad);
assert!(err.is_err());
}
}

View File

@ -6,22 +6,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
tracing = "0.1"
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
async-trait = { workspace = true }
futures = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
tokio-util = "0.7"
reqwest = { version = "0.13.1", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
reqwest = { version = "0.13", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
eventsource-stream = "0.2"
llm-worker-macros = { path = "../llm-worker-macros", version = "0.2" }
zstd = "0.13"
llm-worker-macros = { workspace = true }
[dev-dependencies]
clap = { version = "4.5", features = ["derive", "env"] }
schemars = "1.2"
tempfile = "3.24"
schemars = { workspace = true }
tempfile = { workspace = true }
dotenv = "0.15"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
trybuild = "1.0.116"
wiremock = "0.6.5"

View File

@ -20,32 +20,13 @@ mod recorder;
mod scenarios;
use clap::{Parser, ValueEnum};
use llm_worker::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use llm_worker::llm_client::scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
/// 既定の capability: fixture 記録には cache_control を付けない
/// (既知モデルの静的テーブルを経由すると scheme 毎に自動設定される)。
fn fallback_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
fn make_transport<S: Scheme>(
scheme: S,
model: &str,
auth: ResolvedAuth,
) -> HttpTransport<S> {
let cap = scheme.capability_for(model).unwrap_or_else(fallback_capability);
fn make_transport<S: Scheme>(scheme: S, model: &str, auth: ResolvedAuth) -> HttpTransport<S> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
HttpTransport::new(scheme, model.to_string(), base_url, auth, cap)
}
@ -86,11 +67,7 @@ async fn run_scenario_with_anthropic(
let api_key = std::env::var("ANTHROPIC_API_KEY")
.expect("ANTHROPIC_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514");
let client = make_transport(
AnthropicScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
);
let client = make_transport(AnthropicScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -138,7 +115,7 @@ async fn run_scenario_with_ollama(
model.to_string(),
"http://localhost:11434".to_string(),
ResolvedAuth::None,
fallback_capability(),
AnthropicScheme::new().default_capability(),
);
recorder::record_request(

View File

@ -2,9 +2,6 @@
//!
//! Example of cancelling from another thread during streaming
use llm_worker::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::{Worker, WorkerResult};
@ -28,13 +25,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let scheme = AnthropicScheme::new();
let model = "claude-sonnet-4-20250514".to_string();
let cap = scheme.capability_for(&model).unwrap_or(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
});
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap);
let worker = Worker::new(client);

View File

@ -338,14 +338,8 @@ fn default_capability() -> ModelCapability {
}
}
fn build_transport<S: Scheme>(
scheme: S,
model: String,
auth: ResolvedAuth,
) -> Box<dyn LlmClient> {
let cap = scheme
.capability_for(&model)
.unwrap_or_else(default_capability);
fn build_transport<S: Scheme>(scheme: S, model: String, auth: ResolvedAuth) -> Box<dyn LlmClient> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
Box::new(HttpTransport::new(scheme, model, base_url, auth, cap))
}

View File

@ -7,8 +7,8 @@
use std::marker::PhantomData;
use crate::handler::{
Handler, Kind, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind,
ToolUseBlockStart,
Handler, Kind, TextBlockEvent, TextBlockKind, ThinkingBlockEvent, ThinkingBlockKind,
ToolUseBlockEvent, ToolUseBlockKind, ToolUseBlockStart,
};
use crate::tool::ToolCall;
@ -95,6 +95,81 @@ impl Handler<TextBlockKind> for ClosureTextBlockHandler {
}
}
// =============================================================================
// ThinkingBlock Closure Handler
// =============================================================================
/// Callback scope for a thinking block.
///
/// Mirrors `TextBlockScope`. Some providers (or some configurations)
/// emit thinking metadata without plaintext deltas — in that case the
/// block fires `Start` and `Stop` with no `Delta` in between, which is
/// expected and not an error.
pub struct ThinkingBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl ThinkingBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each thinking text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated thinking text. May be empty when
/// the provider didn't emit any plaintext deltas.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
#[derive(Default)]
pub(crate) struct ThinkingBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
pub(crate) struct ClosureThinkingBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut ThinkingBlockScope) + Send + Sync>,
}
impl Handler<ThinkingBlockKind> for ClosureThinkingBlockHandler {
type Scope = ThinkingBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
match event {
ThinkingBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = ThinkingBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ThinkingBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
ThinkingBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ToolUseBlock Closure Handler
// =============================================================================

View File

@ -91,6 +91,16 @@ impl Kind for ErrorKind {
type Event = ErrorEvent;
}
/// Reasoning item Kind - 完成済み reasoning item の永続化用
///
/// 1 reasoning item につき 1 度だけ発火する。Worker は
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
/// `Item::Reasoning` として history に append する。
pub struct ReasoningItemKind;
impl Kind for ReasoningItemKind {
type Event = ReasoningItemEvent;
}
// =============================================================================
// Block Kind Definitions
// =============================================================================

View File

@ -17,19 +17,31 @@ use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
// =============================================================================
/// Action after prompt submission.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq)]
pub enum PromptAction {
/// Proceed normally.
Continue,
/// Cancel with a reason.
Cancel(String),
/// Proceed, and append these items to history right after the user
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
/// submit edge: lets the upper layer attach resolver-produced
/// system messages (e.g. `@<path>` file content) so they sit
/// adjacent to the user message that referenced them.
ContinueWith(Vec<Item>),
}
/// Action before an LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq)]
pub enum PreRequestAction {
/// Proceed normally.
Continue,
/// Proceed after appending these items to durable worker history.
///
/// This is for upper-layer budget/status nudges that the model may react
/// to: the items are committed before the request so later turns can see
/// why the worker changed course.
ContinueWith(Vec<Item>),
/// Cancel with a reason (treated as an error).
Cancel(String),
/// Yield control to the caller for external processing.
@ -46,6 +58,11 @@ pub enum PreToolAction {
Continue,
/// Skip this tool call (do not execute).
Skip,
/// Do not execute the tool call; commit this synthetic result instead.
///
/// This preserves provider-visible `tool_use` / `tool_result` pairing
/// without aborting the whole turn.
SyntheticResult(ToolResult),
/// Abort the entire run.
Abort(String),
/// Pause execution (can be resumed later).
@ -114,8 +131,36 @@ pub trait Interceptor: Send + Sync {
PromptAction::Continue
}
/// Called before each LLM request. The context can be modified
/// (e.g. for context compaction).
/// Items that should be **committed to `worker.history`** just
/// before the next LLM request. Returned items are `extend`ed into
/// the persistent history (and therefore picked up by the per-turn
/// clone that backs the LLM request, plus the usual
/// history-persistence path).
///
/// Use this for inputs that arrive from outside the LLM and need
/// to be reflected in the on-disk history — notifications,
/// cross-Pod events, system reminders. Do **not** use
/// [`Self::pre_llm_request`] for that purpose: it mutates a
/// per-request clone, so any committed assistant response that
/// reacts to the injection would have no visible trigger on the
/// next turn (or after resume / compaction).
///
/// `pre_llm_request` remains the right place for purely
/// reproducible per-request transformations (pruning, content
/// trimming, cache anchors) that depend only on the existing
/// history.
async fn pending_history_appends(&self) -> Vec<Item> {
Vec::new()
}
/// Called before each LLM request. The context starts as a clone
/// of `worker.history` (after `pending_history_appends` and the
/// Worker's own prune projection have been applied).
///
/// Direct mutations to `context` remain request-local and are not persisted.
/// If an interceptor derives a human/model-visible nudge from the current
/// request context, return [`PreRequestAction::ContinueWith`] so the Worker
/// commits it to history before the request is sent.
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
PreRequestAction::Continue
}

View File

@ -48,12 +48,17 @@ pub mod llm_client;
pub mod prune;
pub mod state;
pub mod timeline;
pub mod token_counter;
pub mod tool;
pub mod tool_server;
pub mod usage_record;
pub use callback::{TextBlockScope, ToolUseBlockScope};
pub use callback::{TextBlockScope, ThinkingBlockScope, ToolUseBlockScope};
pub use handler::ToolUseBlockStart;
pub use interceptor::Interceptor;
pub use message::{ContentPart, Item, Message, Role};
pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
pub use worker::{RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
pub use usage_record::UsageRecord;
pub use worker::{
LlmRetryNotice, RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult,
};

View File

@ -45,4 +45,13 @@ pub enum AuthRequirement {
pub trait AuthProvider: Send + Sync + std::fmt::Debug {
/// 1 リクエスト分の認証ヘッダを返す。refresh が必要なら内部で行う。
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError>;
/// ChatGPT Codex backend 向けの複合認証かどうか。
///
/// transport は provider crate の具象型を知らないため、この hook だけで
/// Codex CLI 互換の wire behaviorconversation header / request compression 等)
/// を切り替える。
fn is_codex_backend(&self) -> bool {
false
}
}

View File

@ -8,7 +8,7 @@
//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル)
//! 2. `ModelConfig::capability` での明示 override未知モデル、または上書き
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
/// モデル能力メタデータ
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@ -80,23 +80,90 @@ pub enum CacheStrategy {
Auto,
}
/// Reasoning 制御共通型、scheme 側で各社形式に投影)
/// Reasoning 制御共通型、scheme 側で各社形式に投影)
///
/// `effort` / `budget_tokens` はユーザー設定から任意で渡される。Scheme
/// 側は自身の `ReasoningSupport` に応じて片方だけ使う。両方が宣言
/// されている場合の優先順位は scheme 実装が決める。
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReasoningControl {
#[serde(default)]
pub effort: Option<ReasoningEffort>,
#[serde(default)]
pub budget_tokens: Option<u32>,
/// 文字列は provider-native な effort label、数値は provider-native な
/// thinking budget token として扱う。どちらか一方だけを型で表現する。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum ReasoningControl {
Effort(ReasoningEffort),
BudgetTokens(i32),
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
XHigh,
Other(String),
}
impl ReasoningEffort {
pub fn as_str(&self) -> &str {
match self {
Self::Minimal => "minimal",
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
Self::XHigh => "xhigh",
Self::Other(label) => label.as_str(),
}
}
}
impl From<String> for ReasoningEffort {
fn from(value: String) -> Self {
match value.as_str() {
"minimal" => Self::Minimal,
"low" => Self::Low,
"medium" => Self::Medium,
"high" => Self::High,
"xhigh" => Self::XHigh,
_ => Self::Other(value),
}
}
}
impl Serialize for ReasoningEffort {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for ReasoningEffort {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
String::deserialize(deserializer).map(Self::from)
}
}
#[cfg(test)]
mod tests {
use super::{ReasoningControl, ReasoningEffort};
#[test]
fn reasoning_control_deserializes_effort_labels() {
let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap();
assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh));
let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap();
assert_eq!(
unknown,
ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into()))
);
}
#[test]
fn reasoning_control_deserializes_signed_budget() {
let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap();
assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1));
}
}

View File

@ -36,6 +36,8 @@ impl std::fmt::Display for ConfigWarning {
}
}
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>;
/// LLMクライアントのtrait
///
/// 各プロバイダはこのtraitを実装し、統一されたインターフェースを提供する。
@ -49,10 +51,7 @@ pub trait LlmClient: Send + Sync {
/// # Returns
/// * `Ok(Stream)` - イベントストリーム
/// * `Err(ClientError)` - エラー
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>;
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError>;
/// Clone this client into a new `Box<dyn LlmClient>`.
///
@ -74,15 +73,18 @@ pub trait LlmClient: Send + Sync {
}
}
impl Clone for Box<dyn LlmClient> {
fn clone(&self) -> Self {
self.clone_boxed()
}
}
/// `Box<dyn LlmClient>` に対する `LlmClient` の実装
///
/// これにより、動的ディスパッチを使用するクライアントも `Worker` で利用可能になる。
#[async_trait]
impl LlmClient for Box<dyn LlmClient> {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
(**self).stream(request).await
}

View File

@ -1,6 +1,6 @@
//! LLMクライアントエラー型
use std::fmt;
use std::{fmt, time::Duration};
/// LLMクライアントのエラー
#[derive(Debug)]
@ -16,6 +16,12 @@ pub enum ClientError {
status: Option<u16>,
code: Option<String>,
message: String,
retry_after: Option<Duration>,
},
/// A request lifecycle phase exceeded its hard timeout.
Timeout {
phase: &'static str,
timeout: Duration,
},
/// 設定エラー
Config(String),
@ -31,6 +37,7 @@ impl fmt::Display for ClientError {
status,
code,
message,
..
} => {
write!(f, "API error")?;
if let Some(s) = status {
@ -41,6 +48,9 @@ impl fmt::Display for ClientError {
}
write!(f, ": {}", message)
}
ClientError::Timeout { phase, timeout } => {
write!(f, "{phase} timed out after {}s", timeout.as_secs())
}
ClientError::Config(msg) => write!(f, "Config error: {}", msg),
}
}
@ -67,3 +77,96 @@ impl From<serde_json::Error> for ClientError {
ClientError::Json(err)
}
}
impl ClientError {
pub fn status(&self) -> Option<u16> {
match self {
ClientError::Api { status, .. } => *status,
_ => None,
}
}
pub fn retry_after(&self) -> Option<Duration> {
match self {
ClientError::Api { retry_after, .. } => *retry_after,
_ => None,
}
}
}
/// transient な失敗としてリトライ対象になるかを判定する。
///
/// 対象:
/// - `Api { status }` のうち 408 / 425 / 429 / 500 / 502 / 503 / 504 / 529
/// - `Http(reqwest::Error)` のうち `is_connect()` または `is_timeout()`
/// - `Timeout { .. }` の lifecycle hard timeout
///
/// それ以外Json、Sse、Config、上記以外の Api ステータス)は false。
/// SSE 読み出し開始後の失敗は呼び出し側で `Sse` として上に流すため、
/// ここで対象外にしておけば自動的に弾かれる。
pub fn is_retryable(error: &ClientError) -> bool {
match error {
ClientError::Api {
status: Some(code), ..
} => matches!(*code, 408 | 425 | 429 | 500 | 502 | 503 | 504 | 529),
ClientError::Api { status: None, .. } => false,
ClientError::Timeout { .. } => true,
ClientError::Http(e) => e.is_connect() || e.is_timeout(),
ClientError::Json(_) | ClientError::Sse(_) | ClientError::Config(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn api_err(status: Option<u16>) -> ClientError {
ClientError::Api {
status,
code: None,
message: String::new(),
retry_after: None,
}
}
#[test]
fn retryable_status_codes() {
for code in [408u16, 425, 429, 500, 502, 503, 504, 529] {
assert!(
is_retryable(&api_err(Some(code))),
"status {code} should be retryable",
);
}
}
#[test]
fn non_retryable_status_codes() {
for code in [400u16, 401, 403, 404, 409, 410, 422, 501] {
assert!(
!is_retryable(&api_err(Some(code))),
"status {code} should not be retryable",
);
}
}
#[test]
fn api_without_status_not_retryable() {
assert!(!is_retryable(&api_err(None)));
}
#[test]
fn lifecycle_timeout_is_retryable() {
assert!(is_retryable(&ClientError::Timeout {
phase: "stream_open",
timeout: Duration::from_secs(30),
}));
}
#[test]
fn json_sse_config_not_retryable() {
let json_err = serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
assert!(!is_retryable(&ClientError::Json(json_err)));
assert!(!is_retryable(&ClientError::Sse("boom".into())));
assert!(!is_retryable(&ClientError::Config("boom".into())));
}
}

View File

@ -15,8 +15,11 @@ use serde::{Deserialize, Serialize};
///
/// # イベントの種類
///
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse`
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
/// は別経路で発火する)
///
/// # ブロックのライフサイクル
///
@ -32,6 +35,10 @@ pub enum Event {
Status(StatusEvent),
/// エラー発生
Error(ErrorEvent),
/// Scheme が生成内容として解釈しない未対応 SSE イベント。
///
/// stream trace 用の観測イベントであり、timeline / history には反映しない。
UnhandledSse(UnhandledSseEvent),
/// ブロック開始(テキスト、ツール使用等)
BlockStart(BlockStart),
@ -41,6 +48,18 @@ pub enum Event {
BlockStop(BlockStop),
/// ブロック中断
BlockAbort(BlockAbort),
/// Reasoning item の完成。scheme が「次の request に送り返すための
/// reasoning material が揃った」点で 1 度だけ発火する。
///
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
///
/// 上位層Worker / ReasoningItemCollectorはこれを `Item::Reasoning`
/// として `worker.history` に append する。streaming 表示用の
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
/// は依然として並行発火するlive display と round-trip persist の責務分離)。
ReasoningItem(ReasoningItemEvent),
}
// =============================================================================
@ -104,6 +123,18 @@ pub struct ErrorEvent {
pub message: String,
}
/// 未対応 SSE イベントの観測用メタイベント。
///
/// `data_preview` は provider から受け取った raw SSE data の bounded preview、
/// `data_len` は preview 前の raw data byte length。
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct UnhandledSseEvent {
pub provider: String,
pub event_type: String,
pub data_preview: String,
pub data_len: usize,
}
// =============================================================================
// Block Types
// =============================================================================
@ -212,6 +243,31 @@ impl BlockAbort {
}
}
// =============================================================================
// Reasoning Item Event
// =============================================================================
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
/// materialtext, summary, encrypted_content, signature, idを揃えて
/// 1 度だけ発火する。
///
/// `Item::Reasoning` のフィールドを 1:1 に持つ。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ReasoningItemEvent {
/// scheme 側で観測した item idOpenAI Responses の `id`)。
pub id: Option<String>,
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は
/// `reasoning_text` 累積。redacted_thinking では空。
pub text: String,
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
pub summary: Vec<String>,
/// 暗号化された opaque blobAnthropic `redacted_thinking.data` /
/// OpenAI Responses `encrypted_content`)。
pub encrypted_content: Option<String>,
/// Anthropic extended thinking signature。round-trip 必須。
pub signature: Option<String>,
}
/// 停止理由
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum StopReason {

View File

@ -23,6 +23,7 @@ pub mod error;
pub mod event;
pub mod types;
pub mod retry;
pub mod scheme;
pub mod transport;

View File

@ -0,0 +1,104 @@
//! LLM response stream を開く前の transient error 向けリトライポリシー。
//!
//! Worker が `LlmClient::stream` の open error に対して `is_retryable` を見て
//! retry / backoff / TUI event / cancellation をまとめて管理する。
//! SSE 読み出し開始後の失敗は対象外。
use std::time::Duration;
/// 指数バックオフ + ジッター + 累積タイムアウトを表すポリシー。
///
/// `Default` は llm-worker 全体の固定値を返す。manifest 経由の上書きが
/// 必要になったら拡張する(現状は不要 → `tickets/llm-worker-transient-retry.md`)。
#[derive(Debug, Clone)]
pub struct RetryPolicy {
/// 指数の基準値。`base * 2^attempt` を `cap` で頭打ちにした上限から
/// フルジッターで実際の wait を抽選する。
pub base: Duration,
/// 1 回あたりの wait の上限。
pub cap: Duration,
/// 試行の合計回数(初回 + リトライ)。`1` ならリトライしない。
pub max_attempts: u32,
/// 初回送信開始からの累積タイムアウト。これを超える wait は打ち切る。
pub total_timeout: Duration,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
base: Duration::from_millis(500),
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(40),
}
}
}
impl RetryPolicy {
/// `attempt` 回目の失敗0-indexed後に待つ時間を返す。
/// `Retry-After` で上書きしたい場合は呼び出さず、その値をそのまま使う。
pub fn backoff(&self, attempt: u32) -> Duration {
let shift = attempt.min(20);
let base_nanos = self.base.as_nanos() as u64;
let exp_nanos = base_nanos.saturating_mul(1u64 << shift);
let cap_nanos = self.cap.as_nanos() as u64;
let upper = exp_nanos.min(cap_nanos);
Duration::from_nanos(jitter_nanos(upper))
}
}
/// `[0, max_nanos]` から擬似乱数的に 1 つ取り出す。`SystemTime` の
/// 下位ビットを splitmix64 で攪拌するだけの軽量実装で、暗号的乱数性は
/// 持たないがフルジッターのぶつかり回避には十分。
fn jitter_nanos(max_nanos: u64) -> u64 {
if max_nanos == 0 {
return 0;
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
x ^= x >> 31;
x % (max_nanos + 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_policy_values() {
let p = RetryPolicy::default();
assert_eq!(p.base, Duration::from_millis(500));
assert_eq!(p.cap, Duration::from_secs(10));
assert_eq!(p.max_attempts, 4);
assert_eq!(p.total_timeout, Duration::from_secs(40));
}
#[test]
fn backoff_respects_cap() {
let p = RetryPolicy::default();
for attempt in 0..30u32 {
assert!(
p.backoff(attempt) <= p.cap,
"attempt {attempt} exceeded cap",
);
}
}
#[test]
fn backoff_zero_when_base_zero() {
let p = RetryPolicy {
base: Duration::ZERO,
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
};
for attempt in 0..5 {
assert_eq!(p.backoff(attempt), Duration::ZERO);
}
}
}

View File

@ -1,34 +1,17 @@
//! `model_id → ModelCapability` 静的テーブル
//! Anthropic scheme の wire-level 既定 capability
//!
//! 既知モデルのみ網羅する。未知モデルは `None` を返し、呼び出し側
//! `HttpTransport` 構築時)に scheme 既定へフォールバックさせる。
//! モデル ID 固有のテーブル(`claude-*` など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは未知モデルでも「この wire で
//! 安全に送れる最小共通項」を返すだけに留める。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Anthropic 公式モデルの既定 capability。
///
/// `claude-sonnet-*` / `claude-opus-*` / `claude-haiku-*` に対応する。
/// `cache_control` は公式のみ有効で、最大 4 breakpoint公式仕様
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
if !model_id.starts_with("claude-") {
return None;
}
Some(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::BudgetTokens),
vision: true,
prompt_caching: CacheStrategy::Explicit { max_breakpoints: 4 },
})
}
/// Scheme 既定の capability。
///
/// Ollama の `/v1/messages` 流用を想定して `cache_control` を送らない
/// `CacheStrategy::Auto` にする。Anthropic 本家の未知モデル(新 Claude
/// も tool_calling / vision を備える想定で Parallel / true を返す。
/// `CacheStrategy::Auto` にする。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,

View File

@ -12,6 +12,7 @@ use crate::llm_client::{
use serde::Deserialize;
use super::AnthropicScheme;
use super::scheme_impl::{AnthropicState, PendingThinking};
/// Anthropic SSEイベントタイプ
#[derive(Debug, Clone, PartialEq, Eq)]
@ -75,7 +76,21 @@ pub(crate) enum ContentBlock {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
Thinking {
#[serde(default)]
thinking: String,
/// 非ストリーミングレスポンス由来の初期 signature通常はストリームでは
/// 空 → `signature_delta` で埋まる)。
#[serde(default)]
signature: Option<String>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化された opaque blob。signature ではなく、まるごと
/// `redacted_thinking.data` として送り返す必要がある。
#[serde(default)]
data: String,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -228,7 +243,9 @@ impl AnthropicScheme {
fn convert_block_start(&self, event: &ContentBlockStartEvent) -> Event {
let (block_type, metadata) = match &event.content_block {
ContentBlock::Text { .. } => (BlockType::Text, BlockMetadata::Text),
ContentBlock::Thinking { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
(BlockType::Thinking, BlockMetadata::Thinking)
}
ContentBlock::ToolUse { id, name, .. } => (
BlockType::ToolUse,
BlockMetadata::ToolUse {
@ -264,6 +281,121 @@ impl AnthropicScheme {
}))
}
/// state を持ち回す上位パース。
///
/// `parse_event` の単発 Event に加えて、以下を行う:
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
/// `state.pending_thinking` に蓄積し、`content_block_stop` で
/// `Event::ReasoningItem` を追加発火する
/// - `signature_delta` を蓄積Stream channel には流さず、reasoning event
/// にだけ反映する)
pub(crate) fn parse_with_state(
&self,
event_type: &str,
data: &str,
state: &mut AnthropicState,
) -> Result<Vec<Event>, ClientError> {
let Some(parsed_event_type) = AnthropicEventType::parse(event_type) else {
return Ok(Vec::new());
};
// signature_delta はストリーム表示には流さず、state にだけ蓄積。
// それ以外は parse_event で標準 Event 化する。
let mut emitted: Vec<Event> = Vec::new();
match parsed_event_type {
AnthropicEventType::ContentBlockStart => {
let raw: ContentBlockStartEvent = serde_json::from_str(data)?;
state.current_block_type = Some(match &raw.content_block {
ContentBlock::Text { .. } => BlockType::Text,
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
BlockType::Thinking
}
ContentBlock::ToolUse { .. } => BlockType::ToolUse,
});
match &raw.content_block {
ContentBlock::Thinking {
thinking,
signature,
} => {
state.pending_thinking = Some(PendingThinking {
text: thinking.clone(),
signature: signature.clone(),
redacted_data: None,
});
}
ContentBlock::RedactedThinking { data: blob } => {
state.pending_thinking = Some(PendingThinking {
text: String::new(),
signature: None,
redacted_data: Some(blob.clone()),
});
}
_ => {}
}
emitted.push(self.convert_block_start(&raw));
}
AnthropicEventType::ContentBlockDelta => {
let raw: ContentBlockDeltaEvent = serde_json::from_str(data)?;
match &raw.delta {
DeltaBlock::ThinkingDelta { thinking } => {
if let Some(pending) = state.pending_thinking.as_mut() {
pending.text.push_str(thinking);
}
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Thinking(thinking.clone()),
}));
}
DeltaBlock::SignatureDelta { signature } => {
if let Some(pending) = state.pending_thinking.as_mut() {
// 通常 1 回しか来ないが、複数 fragment 来ても連結しておく
match &mut pending.signature {
Some(acc) => acc.push_str(signature),
None => pending.signature = Some(signature.clone()),
}
}
}
DeltaBlock::TextDelta { text } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Text(text.clone()),
}));
}
DeltaBlock::InputJsonDelta { partial_json } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::InputJson(partial_json.clone()),
}));
}
}
}
AnthropicEventType::ContentBlockStop => {
let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
emitted.push(Event::BlockStop(BlockStop {
index: raw.index,
block_type,
stop_reason: None,
}));
if matches!(block_type, BlockType::Thinking) {
if let Some(pending) = state.pending_thinking.take() {
emitted.push(Event::ReasoningItem(pending.into_event()));
}
}
}
// 残りは state を必要としない。既存 parse_event に委譲。
_ => {
if let Some(event) = self.parse_event(event_type, data)? {
emitted.push(event);
}
}
}
Ok(emitted)
}
fn convert_usage(&self, usage: &UsageData) -> UsageEvent {
// Anthropic の `input_tokens` は **キャッシュ外** の入力トークンのみで、
// プロンプト全長は input_tokens + cache_read + cache_creation。
@ -391,6 +523,117 @@ mod tests {
}
}
#[test]
fn thinking_block_emits_reasoning_item_with_signature() {
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
let evs = scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
&mut state,
)
.unwrap();
assert!(matches!(evs[0], Event::BlockStart(_)));
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hello "}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"world"}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"SIG-XYZ"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
// BlockStop と ReasoningItem の 2 件が並ぶ
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
};
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
assert!(reasoning.encrypted_content.is_none());
}
#[test]
fn redacted_thinking_emits_reasoning_item_with_data() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"opaque-blob"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem");
};
assert!(reasoning.text.is_empty());
assert!(reasoning.signature.is_none());
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
}
#[test]
fn text_block_does_not_emit_reasoning_item() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
}
#[test]
fn test_parse_ping() {
let scheme = AnthropicScheme::new();

View File

@ -8,12 +8,16 @@ use serde::Serialize;
use crate::llm_client::{
Request,
capability::{CacheStrategy, ModelCapability, ReasoningSupport},
capability::{CacheStrategy, ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::AnthropicScheme;
fn is_false(value: &bool) -> bool {
!*value
}
/// Anthropic API request body
#[derive(Debug, Serialize)]
pub(crate) struct AnthropicRequest {
@ -41,7 +45,7 @@ pub(crate) struct AnthropicRequest {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum AnthropicThinking {
Enabled { budget_tokens: u32 },
Enabled { budget_tokens: i32 },
}
/// Anthropic message
@ -77,6 +81,21 @@ pub(crate) enum AnthropicContentPart {
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "thinking")]
Thinking {
thinking: String,
signature: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化済み reasoning blob。`Item::Reasoning::encrypted_content`
/// から渡る。
data: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -89,6 +108,8 @@ pub(crate) enum AnthropicContentPart {
ToolResult {
tool_use_id: String,
content: String,
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
@ -102,6 +123,21 @@ impl AnthropicContentPart {
}
}
fn thinking(thinking: String, signature: String) -> Self {
Self::Thinking {
thinking,
signature,
cache_control: None,
}
}
fn redacted_thinking(data: String) -> Self {
Self::RedactedThinking {
data,
cache_control: None,
}
}
fn tool_use(id: String, name: String, input: serde_json::Value) -> Self {
Self::ToolUse {
id,
@ -111,10 +147,11 @@ impl AnthropicContentPart {
}
}
fn tool_result(tool_use_id: String, content: String) -> Self {
fn tool_result(tool_use_id: String, content: String, is_error: bool) -> Self {
Self::ToolResult {
tool_use_id,
content,
is_error,
cache_control: None,
}
}
@ -122,6 +159,8 @@ impl AnthropicContentPart {
fn set_cache_control(&mut self, cc: CacheControl) {
match self {
Self::Text { cache_control, .. }
| Self::Thinking { cache_control, .. }
| Self::RedactedThinking { cache_control, .. }
| Self::ToolUse { cache_control, .. }
| Self::ToolResult { cache_control, .. } => {
*cache_control = Some(cc);
@ -170,9 +209,13 @@ impl AnthropicScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.budget_tokens)
.filter(|_| supports_budget_tokens)
.map(|budget_tokens| AnthropicThinking::Enabled { budget_tokens });
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget_tokens) => Some(AnthropicThinking::Enabled {
budget_tokens: *budget_tokens,
}),
ReasoningControl::Effort(_) => None,
});
AnthropicRequest {
model: model.to_string(),
@ -199,10 +242,13 @@ impl AnthropicScheme {
/// - Tool calls are content parts within assistant messages
/// - Tool results are content parts within user messages
///
/// Each non-`Message` item produces exactly one content part, so
/// "last part for the item" is always well-defined. For breakpoint
/// `Message` items the output is forced into the array form so a
/// marker has a part to attach to.
/// Assistant-side items are accumulated until a user/system message or
/// tool result boundary so one logical assistant burst becomes one
/// Anthropic assistant message content array. Pending parts carry their
/// origin item index; when flushed, the final part for each item records
/// the `(msg_idx, part_idx)` used by breakpoint attachment. User/system
/// `Message` items keep the single-text shorthand unless a breakpoint
/// needs a concrete part to live on.
fn convert_items_to_messages(
&self,
items: &[Item],
@ -218,47 +264,53 @@ impl AnthropicScheme {
for (i, item) in items.iter().enumerate() {
match item {
Item::Message { role, content, .. } => {
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
let anthropic_role = match role {
Role::User | Role::System => "user",
Role::Assistant => "assistant",
};
let parts: Vec<AnthropicContentPart> = content
.iter()
.map(|p| match p {
ContentPart::Text { text } => {
AnthropicContentPart::text(text.clone())
}
ContentPart::Text { text } => AnthropicContentPart::text(text.clone()),
ContentPart::Refusal { refusal } => {
AnthropicContentPart::text(refusal.clone())
}
})
.collect();
let force_parts = breakpoints.contains(&i);
let msg_idx = messages.len();
match role {
Role::Assistant => {
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
pending_assistant.extend(parts.into_iter().map(|part| (i, part)));
}
Role::User | Role::System => {
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
// Preserve the single-text shorthand unless a
// breakpoint needs a concrete part to live on.
if parts.len() == 1 && !force_parts {
if let AnthropicContentPart::Text { text, .. } = &parts[0] {
let force_parts = breakpoints.contains(&i);
let msg_idx = messages.len();
// Preserve the single-text shorthand unless a
// breakpoint needs a concrete part to live on.
if parts.len() == 1 && !force_parts {
if let AnthropicContentPart::Text { text, .. } = &parts[0] {
messages.push(AnthropicMessage {
role: "user".to_string(),
content: AnthropicContent::Text(text.clone()),
});
continue;
}
}
let last_part_idx = parts.len().saturating_sub(1);
messages.push(AnthropicMessage {
role: anthropic_role.to_string(),
content: AnthropicContent::Text(text.clone()),
role: "user".to_string(),
content: AnthropicContent::Parts(parts),
});
continue;
locations[i] = Some((msg_idx, last_part_idx));
}
}
let last_part_idx = parts.len().saturating_sub(1);
messages.push(AnthropicMessage {
role: anthropic_role.to_string(),
content: AnthropicContent::Parts(parts),
});
locations[i] = Some((msg_idx, last_part_idx));
}
Item::ToolCall {
@ -282,29 +334,59 @@ impl AnthropicScheme {
call_id,
summary,
content,
is_error,
..
} => {
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
pending_user.push((
i,
AnthropicContentPart::tool_result(call_id.clone(), text),
AnthropicContentPart::tool_result(call_id.clone(), text, *is_error),
));
}
Item::Reasoning { text, .. } => {
Item::Reasoning {
text,
encrypted_content,
signature,
..
} => {
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
// Reasoning is treated as assistant text in Anthropic
// (actual thinking blocks are handled differently in streaming).
pending_assistant.push((i, AnthropicContentPart::text(text.clone())));
// Anthropic はアシスタントターン中の `thinking` /
// `redacted_thinking` ブロックを必ず assistant role の
// content_part として送り返す必要がある。
//
// - signature あり: `thinking` content_part を投影
// - signature 無し + encrypted_content あり:
// `redacted_thinking` content_part を投影
// - どちらも無い: 他 schemeOpenAI 等)から流入した
// 素の reasoning text。Anthropic に投げる意味も
// round-trip の根拠も無いので drop。
if let Some(sig) = signature.clone() {
pending_assistant
.push((i, AnthropicContentPart::thinking(text.clone(), sig)));
} else if let Some(data) = encrypted_content.clone() {
pending_assistant.push((i, AnthropicContentPart::redacted_thinking(data)));
}
// どちらも None なら何も pend せず、本 item は無視。
}
}
}
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
// Apply cache_control markers at each breakpoint item's last part.
@ -400,7 +482,7 @@ fn compute_breakpoints(items: &[Item], cache_anchor: Option<usize>) -> BTreeSet<
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, StructuredOutput, ToolCallingSupport,
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
/// cache_control が有効になる既定の capability。
@ -422,6 +504,13 @@ mod tests {
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap_explicit()
}
}
#[test]
fn test_build_simple_request() {
let scheme = AnthropicScheme::new();
@ -429,7 +518,8 @@ mod tests {
.system("You are a helpful assistant.")
.user("Hello!");
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.model, "claude-sonnet-4-20250514");
assert_eq!(
@ -455,12 +545,45 @@ mod tests {
})),
);
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.tools.len(), 1);
assert_eq!(anthropic_req.tools[0].name, "get_weather");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let req = scheme.build_request(
"claude-sonnet-4-20250514",
&request,
&cap_budget_reasoning(),
);
let json = serde_json::to_value(&req).unwrap();
assert_eq!(json["thinking"]["type"], "enabled");
assert_eq!(json["thinking"]["budget_tokens"], 4096);
}
#[test]
fn effort_reasoning_not_projected_to_anthropic() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let req = scheme.build_request(
"claude-sonnet-4-20250514",
&request,
&cap_budget_reasoning(),
);
assert!(req.thinking.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = AnthropicScheme::new();
@ -473,7 +596,8 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.messages.len(), 3);
assert_eq!(anthropic_req.messages[0].role, "user");
@ -485,6 +609,8 @@ mod tests {
fn part_cache_control(part: &AnthropicContentPart) -> Option<CacheControl> {
match part {
AnthropicContentPart::Text { cache_control, .. }
| AnthropicContentPart::Thinking { cache_control, .. }
| AnthropicContentPart::RedactedThinking { cache_control, .. }
| AnthropicContentPart::ToolUse { cache_control, .. }
| AnthropicContentPart::ToolResult { cache_control, .. } => *cache_control,
}
@ -506,6 +632,109 @@ mod tests {
out
}
#[test]
fn assistant_burst_bundles_reasoning_text_and_tool_call() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking").with_signature("SIG-A"))
.item(Item::assistant_message("answer"))
.item(Item::tool_call("c1", "tool_a", r#"{"x":1}"#));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(req.messages.len(), 2, "messages: {:?}", req.messages);
assert_eq!(req.messages[0].role, "user");
assert_eq!(req.messages[1].role, "assistant");
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
panic!("assistant burst must be emitted as content parts");
};
assert_eq!(parts.len(), 3, "parts: {:?}", parts);
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
}
#[test]
fn tool_result_and_user_messages_bound_assistant_bursts() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking").with_signature("SIG-A"))
.item(Item::assistant_message("answer"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "result"))
.item(Item::assistant_message("final"))
.user("follow up");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let roles: Vec<&str> = req.messages.iter().map(|msg| msg.role.as_str()).collect();
assert_eq!(
roles,
vec!["user", "assistant", "user", "assistant", "user"]
);
let AnthropicContent::Parts(first_assistant) = &req.messages[1].content else {
panic!("first assistant burst must be content parts");
};
assert_eq!(first_assistant.len(), 3);
assert!(matches!(
first_assistant[0],
AnthropicContentPart::Thinking { .. }
));
assert!(matches!(
first_assistant[1],
AnthropicContentPart::Text { .. }
));
assert!(matches!(
first_assistant[2],
AnthropicContentPart::ToolUse { .. }
));
let AnthropicContent::Parts(tool_result) = &req.messages[2].content else {
panic!("tool result must be content parts");
};
assert_eq!(tool_result.len(), 1);
assert!(matches!(
tool_result[0],
AnthropicContentPart::ToolResult { .. }
));
let AnthropicContent::Parts(second_assistant) = &req.messages[3].content else {
panic!("second assistant burst must be content parts");
};
assert_eq!(second_assistant.len(), 1);
assert!(matches!(
second_assistant[0],
AnthropicContentPart::Text { .. }
));
}
#[test]
fn assistant_message_breakpoint_maps_to_text_part_inside_burst() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().items(vec![
Item::user_message("question?"),
Item::reasoning("thinking").with_signature("SIG-A"),
Item::assistant_message("answer"),
Item::tool_call("c1", "tool_a", "{}"),
Item::user_message("next"),
]);
request.cache_anchor = Some(2);
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
panic!("assistant burst must be content parts");
};
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
assert_eq!(part_cache_control(&parts[1]), Some(CacheControl::Ephemeral));
assert_eq!(part_cache_control(&parts[2]), Some(CacheControl::Ephemeral));
}
/// Convenience: a turn that ends with one assistant text, one tool
/// call/result pair, and a final assistant text. Produced at
/// `history[head..]` indices shown alongside, so tests can reason
@ -607,9 +836,7 @@ mod tests {
// so we don't bloat requests with wrapper arrays. Here the Head
// lands on items[1], leaving items[0] without a marker.
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hello")
.assistant("hi there");
let request = Request::new().user("hello").assistant("hi there");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert!(
matches!(req.messages[0].content, AnthropicContent::Text(_)),
@ -628,10 +855,7 @@ mod tests {
match &req.messages[0].content {
AnthropicContent::Parts(parts) => {
assert_eq!(parts.len(), 1);
assert_eq!(
part_cache_control(&parts[0]),
Some(CacheControl::Ephemeral)
);
assert_eq!(part_cache_control(&parts[0]), Some(CacheControl::Ephemeral));
}
AnthropicContent::Text(_) => panic!("breakpoint item should use Parts form"),
}
@ -668,7 +892,8 @@ mod tests {
#[test]
fn empty_items_produce_no_breakpoints() {
let scheme = AnthropicScheme::new();
let req = scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit());
let req =
scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit());
assert!(req.messages.is_empty());
assert!(breakpoint_positions(&req).is_empty());
}
@ -684,6 +909,165 @@ mod tests {
assert!(breakpoint_positions(&req).is_empty());
}
fn collect_assistant_thinking_parts(req: &AnthropicRequest) -> Vec<&AnthropicContentPart> {
let mut out = Vec::new();
for msg in &req.messages {
if msg.role != "assistant" {
continue;
}
if let AnthropicContent::Parts(parts) = &msg.content {
for part in parts {
if matches!(
part,
AnthropicContentPart::Thinking { .. }
| AnthropicContentPart::RedactedThinking { .. }
) {
out.push(part);
}
}
}
}
out
}
#[test]
fn reasoning_with_signature_projects_thinking_part() {
// Item::Reasoning に signature があれば assistant role の
// `thinking` content_part として送る。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("step-by-step").with_signature("SIG-A"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let thinking_parts = collect_assistant_thinking_parts(&req);
assert_eq!(thinking_parts.len(), 1);
match thinking_parts[0] {
AnthropicContentPart::Thinking {
thinking,
signature,
..
} => {
assert_eq!(thinking, "step-by-step");
assert_eq!(signature, "SIG-A");
}
other => panic!("expected Thinking part, got {other:?}"),
}
}
#[test]
fn reasoning_with_only_encrypted_content_projects_redacted_thinking() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("").with_encrypted_content("opaque"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let parts = collect_assistant_thinking_parts(&req);
assert_eq!(parts.len(), 1);
match parts[0] {
AnthropicContentPart::RedactedThinking { data, .. } => {
assert_eq!(data, "opaque");
}
other => panic!("expected RedactedThinking, got {other:?}"),
}
}
#[test]
fn reasoning_without_signature_or_encrypted_is_dropped() {
// 他 scheme から流入した素の reasoning は Anthropic に投げない。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("plain text"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
// thinking part は 1 つも乗らない
assert!(collect_assistant_thinking_parts(&req).is_empty());
}
#[test]
fn thinking_part_lands_in_assistant_role_message() {
// wire 構造の position 検証: thinking part は assistant role の
// message 配列に並ぶuser role には絶対に入らない)。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking inside").with_signature("SIG-A"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "result"))
.user("follow up");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
// 全 thinking part が assistant role の message に存在すること
let mut thinking_msg_indices = Vec::new();
for (i, msg) in req.messages.iter().enumerate() {
if let AnthropicContent::Parts(parts) = &msg.content {
if parts
.iter()
.any(|p| matches!(p, AnthropicContentPart::Thinking { .. }))
{
assert_eq!(
msg.role, "assistant",
"thinking part must be in assistant role, got {} at msg {}",
msg.role, i,
);
thinking_msg_indices.push(i);
}
}
}
assert!(
!thinking_msg_indices.is_empty(),
"expected at least one thinking part in messages: {:?}",
req.messages,
);
// thinking part を含む assistant message は、それに続く tool_use を含む
// assistant message より前 (= 先頭側) に位置すること
// (Anthropic 仕様: 同一論理ターン内で thinking → tool_use の順)
let mut tool_use_msg_indices = Vec::new();
for (i, msg) in req.messages.iter().enumerate() {
if let AnthropicContent::Parts(parts) = &msg.content {
if parts
.iter()
.any(|p| matches!(p, AnthropicContentPart::ToolUse { .. }))
{
tool_use_msg_indices.push(i);
}
}
}
assert!(!tool_use_msg_indices.is_empty(), "expected tool_use part");
let first_thinking = thinking_msg_indices[0];
let first_tool_use = tool_use_msg_indices[0];
assert!(
first_thinking <= first_tool_use,
"thinking msg ({}) must precede tool_use msg ({})",
first_thinking,
first_tool_use,
);
}
#[test]
fn redacted_thinking_part_lands_in_assistant_role_message() {
// RedactedThinking も同様に assistant role に置かれること。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("ask")
.item(Item::reasoning("").with_encrypted_content("opaque"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "ok"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
for msg in &req.messages {
if let AnthropicContent::Parts(parts) = &msg.content {
for part in parts {
if matches!(part, AnthropicContentPart::RedactedThinking { .. }) {
assert_eq!(msg.role, "assistant");
}
}
}
}
}
#[test]
fn tool_definitions_carry_no_cache_control() {
// Tool JSON schema must serialise unchanged — no sneak-in of

View File

@ -7,9 +7,9 @@ use serde_json::Value;
use crate::llm_client::{
ClientError,
capability::ModelCapability,
event::{BlockStop, BlockType, Event},
auth::AuthRequirement,
capability::ModelCapability,
event::{BlockType, Event, ReasoningItemEvent},
scheme::Scheme,
types::Request,
};
@ -18,12 +18,37 @@ use super::AnthropicScheme;
/// Anthropic の SSE パースで必要な状態。
///
/// `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 直前の `content_block_start` で観測した `block_type` を保持して
/// `BlockStop` に書き戻す。
/// 1. `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 直前の `content_block_start` で観測した `block_type` を保持して
/// `BlockStop` に書き戻す。
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
/// `content_block_stop` で `Event::ReasoningItem` を発火する
/// round-trip 永続化のため)。
#[derive(Debug, Default)]
pub struct AnthropicState {
current_block_type: Option<BlockType>,
pub(crate) current_block_type: Option<BlockType>,
pub(crate) pending_thinking: Option<PendingThinking>,
}
/// 1 つの `thinking` または `redacted_thinking` content_block の蓄積バッファ。
#[derive(Debug, Default)]
pub(crate) struct PendingThinking {
pub(crate) text: String,
pub(crate) signature: Option<String>,
pub(crate) redacted_data: Option<String>,
}
impl PendingThinking {
pub(crate) fn into_event(self) -> ReasoningItemEvent {
ReasoningItemEvent {
id: None,
text: self.text,
summary: Vec::new(),
encrypted_content: self.redacted_data,
signature: self.signature,
}
}
}
impl Scheme for AnthropicScheme {
@ -73,28 +98,7 @@ impl Scheme for AnthropicScheme {
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
let Some(mut event) = self.parse_event(event_type, data)? else {
return Ok(Vec::new());
};
match &event {
Event::BlockStart(start) => {
state.current_block_type = Some(start.block_type);
}
Event::BlockStop(stop) => {
if let Some(block_type) = state.current_block_type.take() {
event = Event::BlockStop(BlockStop {
block_type,
..stop.clone()
});
}
}
_ => {}
}
Ok(vec![event])
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
self.parse_with_state(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {

View File

@ -1,10 +1,14 @@
//! `model_id → ModelCapability` 静的テーブルGoogle Gemini
//! Gemini scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gemini-*` バージョン別の reasoning 有無)は
//! 高レベル構築層(`provider::capability`)の責務。ここでは wire の
//! 保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability(未知モデル / 未明示モデル用)
/// Scheme 既定の capability(未知モデル / 未明示モデル用)
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
@ -14,24 +18,3 @@ pub(crate) fn default_capability() -> ModelCapability {
prompt_caching: CacheStrategy::Auto,
}
}
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
if !model_id.starts_with("gemini-") {
return None;
}
// 2.5 系以降は thinking / reasoning を持つ
let reasoning = if model_id.starts_with("gemini-2.5")
|| model_id.starts_with("gemini-3")
{
Some(ReasoningSupport::BudgetTokens)
} else {
None
};
Some(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning,
vision: true,
prompt_caching: CacheStrategy::Auto,
})
}

View File

@ -131,6 +131,7 @@ impl GeminiScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse Gemini SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -7,7 +7,7 @@ use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -203,10 +203,12 @@ impl GeminiScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.budget_tokens)
.filter(|_| supports_budget)
.map(|budget| GeminiThinkingConfig {
thinking_budget: budget as i32,
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig {
thinking_budget: *budget,
}),
ReasoningControl::Effort(_) => None,
});
// Generation config
@ -374,7 +376,9 @@ impl GeminiScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport};
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
@ -386,6 +390,13 @@ mod tests {
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap()
}
}
#[test]
fn test_build_simple_request() {
let scheme = GeminiScheme::new();
@ -457,4 +468,29 @@ mod tests {
assert_eq!(gemini_req.contents[1].role, "model");
assert_eq!(gemini_req.contents[2].role, "user");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
let thinking = config.thinking_config.expect("thinking config");
assert_eq!(thinking.thinking_budget, -1);
}
#[test]
fn effort_reasoning_not_projected_to_gemini() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
assert!(config.thinking_config.is_none());
}
}

View File

@ -3,11 +3,7 @@
use serde_json::Value;
use crate::llm_client::{
ClientError,
capability::ModelCapability,
event::Event,
auth::AuthRequirement,
scheme::Scheme,
ClientError, auth::AuthRequirement, capability::ModelCapability, event::Event, scheme::Scheme,
types::Request,
};
@ -47,10 +43,6 @@ impl Scheme for GeminiScheme {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}

View File

@ -76,13 +76,10 @@ pub trait Scheme: Clone + Send + Sync + 'static {
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError>;
/// 既知モデル ID の能力テーブル引き。未知なら `None` を返す
/// ので、呼び出し側は [`Scheme::default_capability`] に
/// フォールバックする。
fn capability_for(&self, model_id: &str) -> Option<ModelCapability>;
/// scheme 既定の capability。未知モデル ID や未明示モデルでの
/// フォールバックに使う。`capability_for` と違って必ず値を返す。
/// scheme 既定の capability。モデル ID に関係なく、この wire で
/// 安全に送れる最小共通項を返す。既知モデル ID の能力テーブルは
/// `provider::capability::lookup` 側(高レベル構築層)の責務で、
/// scheme はここには関与しない。
fn default_capability(&self) -> ModelCapability;
/// scheme 側でサポートしていない `RequestConfig` フィールドを
@ -93,4 +90,3 @@ pub trait Scheme: Clone + Send + Sync + 'static {
Vec::new()
}
}

View File

@ -1,76 +1,13 @@
//! `model_id → ModelCapability` 静的テーブルOpenAI Chat Completions
//! OpenAI Chat Completions scheme の wire-level 既定 capability
//!
//! OpenAI 本家の主要モデルのみ網羅する。OpenRouter / xAI / Groq 等は
//! モデル ID が各社独自なので、マニフェスト側で明示 override する
//! 前提。
//!
//! [`classify`] はモデル ID から family を判定する一次情報で、
//! `scheme/openai_responses` からも参照される。
//! モデル ID 固有のテーブル(`gpt-5` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// OpenAI 本家のモデル family 分類。
///
/// `openai_chat` と `openai_responses` で共有する一次情報。各 scheme は
/// この分類に自 scheme 固有の `ReasoningSupport` 等を当てはめて
/// `ModelCapability` を組み立てる。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum OpenAiFamily {
/// GPT-5 / o1 / o3 / o4 系 — reasoning 対応
Reasoning,
/// GPT-4o / GPT-4 系
Gpt4,
/// GPT-3.5 系(旧式)
Gpt35,
}
/// モデル ID の prefix から family を判定する。未知は `None`。
pub(crate) fn classify(model_id: &str) -> Option<OpenAiFamily> {
if model_id.starts_with("gpt-5")
|| model_id.starts_with("o1")
|| model_id.starts_with("o3")
|| model_id.starts_with("o4")
{
return Some(OpenAiFamily::Reasoning);
}
if model_id.starts_with("gpt-4") {
return Some(OpenAiFamily::Gpt4);
}
if model_id.starts_with("gpt-3.5") {
return Some(OpenAiFamily::Gpt35);
}
None
}
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
classify(model_id).map(|family| match family {
OpenAiFamily::Reasoning => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt4 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt35 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonObject,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
},
})
}
/// Scheme 既定の capability。OpenAI 互換ルーター系xAI / Groq / OpenRouter 等)
/// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等)
/// で未知モデル ID を受けたときのフォールバックに使う。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {

View File

@ -75,6 +75,7 @@ impl OpenAIScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -7,7 +7,7 @@ use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningEffort, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -37,7 +37,7 @@ pub(crate) struct OpenAIRequest {
pub tool_choice: Option<String>,
/// Reasoning efforto1 / o3 / o4 / gpt-5 系で有効)。
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<&'static str>,
pub reasoning_effort: Option<String>,
}
#[derive(Debug, Serialize)]
@ -154,12 +154,10 @@ impl OpenAIScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.effort)
.filter(|_| supports_effort)
.map(|effort| match effort {
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
.and_then(|rc| match rc {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
});
OpenAIRequest {
@ -322,7 +320,9 @@ impl OpenAIScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport};
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
@ -387,6 +387,38 @@ mod tests {
assert!(body.max_tokens.is_none());
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other(
"provider-native".into(),
)));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Effort),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native"));
}
#[test]
fn budget_reasoning_not_projected_to_openai_chat() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Both),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert!(body.reasoning_effort.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = OpenAIScheme::new();

View File

@ -52,10 +52,6 @@ impl Scheme for OpenAIScheme {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}

View File

@ -1,75 +1,11 @@
//! `model_id → ModelCapability` 静的テーブルOpenAI Responses API
//! OpenAI Responses scheme の wire-level 既定 capability
//!
//! モデル family 判定は `scheme/openai_chat/capability.rs::classify` を
//! 共有する。Responses 側は `ReasoningSupport::Effort` 固定で、prompt
//! caching はサーバ側自動(`CacheStrategy::Auto`)。
//!
//! `gpt-5-codex` は `gpt-5` prefix 経由で Reasoning 扱いされるが、
//! `codex-mini-latest` 等 `codex-` prefix のモデルは ChatGPT backend
//! 経由CodexOAuthでしか使えないため、このテーブルでだけ Reasoning
//! にフォールバックする。
//! モデル ID 固有のテーブル(`gpt-5` / `codex-` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use crate::llm_client::scheme::openai_chat::capability::{OpenAiFamily, classify};
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
let family = classify(model_id).or_else(|| {
if model_id.starts_with("codex-") {
Some(OpenAiFamily::Reasoning)
} else {
None
}
})?;
Some(match family {
OpenAiFamily::Reasoning => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt4 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt35 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonObject,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
},
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gpt_5_codex_is_reasoning() {
// `gpt-5` prefix で classify される
let cap = lookup("gpt-5-codex").unwrap();
assert!(cap.reasoning.is_some());
}
#[test]
fn codex_mini_latest_is_reasoning() {
// ChatGPT backend 専用モデル。`codex-` prefix で Reasoning にフォールバック
let cap = lookup("codex-mini-latest").unwrap();
assert!(cap.reasoning.is_some());
}
#[test]
fn unknown_model_returns_none() {
assert!(lookup("foo-bar-3000").is_none());
}
}
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {

View File

@ -5,15 +5,16 @@
//! insomnia 側 1 次元 `BlockStart/Delta/Stop::index` のマッピングは
//! [`OpenAIResponsesState`] が保持する。
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use serde::Deserialize;
use serde_json::{Map, Value};
use crate::llm_client::{
ClientError,
event::{
BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent,
Event, ResponseStatus, StatusEvent, UsageEvent,
Event, ReasoningItemEvent, ResponseStatus, StatusEvent, UnhandledSseEvent, UsageEvent,
},
};
@ -22,6 +23,21 @@ use crate::llm_client::{
pub struct OpenAIResponsesState {
slots: HashMap<SlotKey, SlotInfo>,
next_index: usize,
/// 蓄積中の reasoning output_item。`output_item.added`(Reasoning) で
/// 確保し、`reasoning_text.delta` / `reasoning_summary_text.delta` で
/// 蓄積、`output_item.done`(Reasoning) で `Event::ReasoningItem` を
/// 発火してエントリを除去する。
pending_reasoning: HashMap<usize, PendingReasoning>,
}
/// 1 つの reasoning output_item の蓄積バッファ。
#[derive(Debug, Default)]
struct PendingReasoning {
id: Option<String>,
/// `reasoning_text.delta` の累積。複数 content_part あれば順に concat。
text: String,
/// `reasoning_summary_text.delta` を summary_index 順に蓄積。
summary: Vec<String>,
}
impl OpenAIResponsesState {
@ -38,17 +54,25 @@ impl OpenAIResponsesState {
/// 既存 slot を取得。無ければ `block_type` で暗黙に確保し、
/// 新規確保したかを併せて返す。delta 先行 / content_part.added が
/// 抜けたときの防御。
fn get_or_allocate(
&mut self,
key: SlotKey,
block_type: BlockType,
) -> (SlotInfo, bool) {
fn get_or_allocate(&mut self, key: SlotKey, block_type: BlockType) -> (SlotInfo, bool) {
if let Some(info) = self.slots.get(&key).copied() {
(info, false)
} else {
(self.allocate(key, block_type), true)
}
}
fn ensure_reasoning(&mut self, output_index: usize) -> &mut PendingReasoning {
self.pending_reasoning.entry(output_index).or_default()
}
fn extend_reasoning_summary(&mut self, output_index: usize, summary_index: usize, text: &str) {
let entry = self.ensure_reasoning(output_index);
if entry.summary.len() <= summary_index {
entry.summary.resize(summary_index + 1, String::new());
}
entry.summary[summary_index].push_str(text);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -93,8 +117,12 @@ enum OutputItem {
id: Option<String>,
},
Reasoning {
#[allow(dead_code)]
#[serde(default)]
id: Option<String>,
/// `output_item.done` で初めて埋まる。`include=["reasoning.encrypted_content"]`
/// 指定時に opaque blob が乗る。
#[serde(default)]
encrypted_content: Option<String>,
},
FunctionCall {
#[allow(dead_code)]
@ -214,17 +242,30 @@ struct ResponsesUsage {
output_tokens: Option<u64>,
#[serde(default)]
total_tokens: Option<u64>,
/// `input_tokens` の内訳。`cached_tokens` がプロンプトキャッシュヒット分。
#[serde(default)]
input_tokens_details: Option<InputTokensDetails>,
}
#[derive(Debug, Deserialize)]
struct InputTokensDetails {
#[serde(default)]
cached_tokens: Option<u64>,
}
#[derive(Debug, Deserialize)]
struct ResponseFailed {
response: FailedResponse,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
struct FailedResponse {
#[serde(default)]
error: Option<ErrorDetail>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
@ -233,6 +274,17 @@ struct ErrorDetail {
error_type: Option<String>,
#[serde(default)]
message: Option<String>,
#[serde(default)]
code: Option<String>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
struct TopLevelErrorEnvelope {
error: TopLevelError,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
@ -243,6 +295,8 @@ struct TopLevelError {
error_type: Option<String>,
#[serde(default)]
code: Option<String>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
// ============================================================================
@ -251,9 +305,9 @@ struct TopLevelError {
/// SSE フレーム 1 件をパースし、0 個以上の [`Event`] に変換する。
///
/// `event_type` は SSE の `event:` フィールド。未対応の event
/// 静かに無視する。`data` が JSON でない / 必要なフィールドが抜けて
/// いる等は [`ClientError::Api`] で返す。
/// `event_type` は SSE の `event:` フィールド。未対応の event type
/// [`Event::UnhandledSse`] として観測可能にする。`data` が JSON でない /
/// 必要なフィールドが抜けている等は [`ClientError::Api`] で返す。
pub(crate) fn parse_sse(
event_type: &str,
data: &str,
@ -274,7 +328,10 @@ pub(crate) fn parse_sse(
total_tokens: usage.total_tokens.or_else(|| {
Some(usage.input_tokens.unwrap_or(0) + usage.output_tokens.unwrap_or(0))
}),
cache_read_input_tokens: None,
cache_read_input_tokens: usage
.input_tokens_details
.and_then(|d| d.cached_tokens),
// Responses API は cache 書き込みを別計上しないinput_tokens に含まれる)
cache_creation_input_tokens: None,
}));
}
@ -286,10 +343,7 @@ pub(crate) fn parse_sse(
"response.failed" | "response.incomplete" => {
let ev: ResponseFailed = from_json(data)?;
let (code, message) = match ev.response.error {
Some(err) => (err.error_type, err.message.unwrap_or_default()),
None => (None, format!("response {event_type}")),
};
let (code, message) = response_failure_diagnostic(event_type, ev);
Ok(vec![
Event::Error(ErrorEvent { code, message }),
Event::Status(StatusEvent {
@ -303,23 +357,57 @@ pub(crate) fn parse_sse(
match ev.item {
OutputItem::FunctionCall { call_id, name, .. }
| OutputItem::CustomToolCall { call_id, name, .. } => {
let info = state
.allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse);
let info =
state.allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse);
Ok(vec![Event::BlockStart(BlockStart {
index: info.flat_index,
block_type: BlockType::ToolUse,
metadata: BlockMetadata::ToolUse {
id: call_id,
name,
},
metadata: BlockMetadata::ToolUse { id: call_id, name },
})])
}
OutputItem::Reasoning { id, .. } => {
// wrapper を確保。中身の content_part / summary_part は
// 別 SlotKey で扱われ続けるStreaming 表示は維持)。
let entry = state.ensure_reasoning(ev.output_index);
if id.is_some() {
entry.id = id;
}
Ok(Vec::new())
}
_ => Ok(Vec::new()),
}
}
"response.output_item.done" => {
let ev: OutputItemDone = from_json(data)?;
// Reasoning wrapper の done で蓄積分を ReasoningItem として発火。
// これは `slots` の OutputItem slot とは独立している
// (FunctionCall は slots、Reasoning は pending_reasoning)。
if let OutputItem::Reasoning {
id,
encrypted_content,
..
} = ev.item
{
let mut pending = state
.pending_reasoning
.remove(&ev.output_index)
.unwrap_or_default();
if pending.id.is_none() {
pending.id = id;
}
return Ok(vec![Event::ReasoningItem(ReasoningItemEvent {
id: pending.id,
text: pending.text,
summary: pending
.summary
.into_iter()
.filter(|s| !s.is_empty())
.collect(),
encrypted_content,
signature: None,
})]);
}
if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
@ -384,6 +472,11 @@ pub(crate) fn parse_sse(
"response.reasoning_text.delta" => {
let ev: ReasoningTextDelta = from_json(data)?;
// round-trip 用に蓄積
state
.ensure_reasoning(ev.output_index)
.text
.push_str(&ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::ContentPart {
@ -414,6 +507,8 @@ pub(crate) fn parse_sse(
"response.reasoning_summary_text.delta" => {
let ev: ReasoningSummaryTextDelta = from_json(data)?;
// round-trip 用に蓄積
state.extend_reasoning_summary(ev.output_index, ev.summary_index, &ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::Summary {
@ -471,22 +566,167 @@ pub(crate) fn parse_sse(
}
"error" => {
let ev: TopLevelError = from_json(data).unwrap_or(TopLevelError {
message: Some(data.to_string()),
error_type: None,
code: None,
let ev = from_json::<TopLevelErrorEnvelope>(data).unwrap_or_else(|_| {
TopLevelErrorEnvelope {
error: TopLevelError {
message: Some(data.to_string()),
error_type: None,
code: None,
extra: BTreeMap::new(),
},
extra: BTreeMap::new(),
}
});
Ok(vec![Event::Error(ErrorEvent {
code: ev.error_type.or(ev.code),
message: ev.message.unwrap_or_default(),
})])
let (code, message) = top_level_error_diagnostic(ev);
Ok(vec![Event::Error(ErrorEvent { code, message })])
}
// 未対応 / 情報系イベントは無視
_ => Ok(Vec::new()),
// 未対応 / 情報系 event type は生成 semantics からは無視しつつ trace に残す。
_ => Ok(vec![unhandled_sse_event(event_type, data)]),
}
}
fn response_failure_diagnostic(event_type: &str, ev: ResponseFailed) -> (Option<String>, String) {
let mut diagnostic = Map::new();
diagnostic.insert("event".to_string(), Value::String(event_type.to_string()));
let mut code = None;
let base_message = if let Some(err) = ev.response.error {
code = err.code.clone().or(err.error_type.clone());
if let Some(error_type) = err.error_type {
diagnostic.insert("error_type".to_string(), Value::String(error_type));
}
if let Some(error_code) = err.code {
diagnostic.insert("error_code".to_string(), Value::String(error_code));
}
if !err.extra.is_empty() {
diagnostic.insert(
"error_extra".to_string(),
diagnostic_object(err.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
err.message
.filter(|message| !message.trim().is_empty())
.unwrap_or_else(|| format!("OpenAI Responses {event_type}"))
} else {
format!("OpenAI Responses {event_type}")
};
let response_extra = ev.response.extra;
if let Some(reason) = response_extra
.get("incomplete_details")
.and_then(|value| value.get("reason"))
.and_then(Value::as_str)
{
diagnostic.insert(
"incomplete_reason".to_string(),
Value::String(reason.to_string()),
);
if code.is_none() {
code = Some(reason.to_string());
}
}
if !response_extra.is_empty() {
diagnostic.insert(
"response_extra".to_string(),
diagnostic_object(response_extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
if !ev.extra.is_empty() {
diagnostic.insert(
"event_extra".to_string(),
diagnostic_object(ev.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
(code, append_diagnostic(base_message, diagnostic))
}
fn top_level_error_diagnostic(ev: TopLevelErrorEnvelope) -> (Option<String>, String) {
let code = ev.error.code.clone().or(ev.error.error_type.clone());
let mut diagnostic = Map::new();
diagnostic.insert("event".to_string(), Value::String("error".to_string()));
if let Some(error_type) = ev.error.error_type {
diagnostic.insert("error_type".to_string(), Value::String(error_type));
}
if let Some(error_code) = ev.error.code {
diagnostic.insert("error_code".to_string(), Value::String(error_code));
}
if !ev.error.extra.is_empty() {
diagnostic.insert(
"error_extra".to_string(),
diagnostic_object(ev.error.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
if !ev.extra.is_empty() {
diagnostic.insert(
"event_extra".to_string(),
diagnostic_object(ev.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
let message = ev
.error
.message
.filter(|message| !message.trim().is_empty())
.unwrap_or_else(|| "OpenAI Responses error".to_string());
(code, append_diagnostic(message, diagnostic))
}
const DIAGNOSTIC_VALUE_LIMIT: usize = 512;
const UNHANDLED_SSE_DATA_PREVIEW_LIMIT: usize = 512;
fn capped_unhandled_sse_data_preview(data: &str) -> String {
if data.len() <= UNHANDLED_SSE_DATA_PREVIEW_LIMIT {
return data.to_string();
}
let mut end = 0;
for (idx, ch) in data.char_indices() {
let next = idx + ch.len_utf8();
if next > UNHANDLED_SSE_DATA_PREVIEW_LIMIT {
break;
}
end = next;
}
data[..end].to_string()
}
fn unhandled_sse_event(event_type: &str, data: &str) -> Event {
Event::UnhandledSse(UnhandledSseEvent {
provider: "openai_responses".to_string(),
event_type: event_type.to_string(),
data_preview: capped_unhandled_sse_data_preview(data),
data_len: data.len(),
})
}
fn diagnostic_object(extra: BTreeMap<String, Value>, value_limit: usize) -> Value {
Value::Object(
extra
.into_iter()
.map(|(key, value)| (key, cap_json_value(value, value_limit)))
.collect(),
)
}
fn cap_json_value(value: Value, limit: usize) -> Value {
let rendered = value.to_string();
if rendered.len() <= limit {
value
} else {
let capped: String = rendered.chars().take(limit).collect();
Value::String(format!("{capped}"))
}
}
fn append_diagnostic(message: String, diagnostic: Map<String, Value>) -> String {
if diagnostic.len() <= 1 {
return message;
}
format!("{} | diagnostic={}", message, Value::Object(diagnostic))
}
/// 対応する BlockStart がまだ発行されていなければ発行しつつ、delta を流す。
/// content_part.added を取りこぼしても delta 単独で復旧できるようにする。
fn ensure_and_delta(
@ -517,6 +757,7 @@ fn from_json<T: for<'de> Deserialize<'de>>(data: &str) -> Result<T, ClientError>
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {e}"),
retry_after: None,
})
}
@ -530,11 +771,7 @@ mod tests {
(events, state)
}
fn with(
state: &mut OpenAIResponsesState,
event_type: &str,
data: &str,
) -> Vec<Event> {
fn with(state: &mut OpenAIResponsesState, event_type: &str, data: &str) -> Vec<Event> {
parse_sse(event_type, data, state).unwrap()
}
@ -551,7 +788,8 @@ mod tests {
#[test]
fn completed_emits_usage_and_status() {
let data = r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#;
let data =
r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#;
let (events, _) = run("response.completed", data);
assert!(matches!(events[0], Event::Usage(_)));
assert!(matches!(
@ -564,9 +802,31 @@ mod tests {
assert_eq!(u.input_tokens, Some(10));
assert_eq!(u.output_tokens, Some(20));
assert_eq!(u.total_tokens, Some(30));
assert_eq!(u.cache_read_input_tokens, None);
assert_eq!(u.cache_creation_input_tokens, None);
}
}
#[test]
fn completed_extracts_cached_tokens_from_input_tokens_details() {
let data = r#"{"response":{"usage":{
"input_tokens":12345,
"input_tokens_details":{"cached_tokens":11000},
"output_tokens":50,
"total_tokens":12395
}}}"#;
let (events, _) = run("response.completed", data);
let Event::Usage(u) = &events[0] else {
panic!("expected usage")
};
assert_eq!(u.input_tokens, Some(12345));
assert_eq!(u.output_tokens, Some(50));
assert_eq!(u.total_tokens, Some(12395));
assert_eq!(u.cache_read_input_tokens, Some(11000));
// OpenAI Responses は cache 書き込みを別計上しない
assert_eq!(u.cache_creation_input_tokens, None);
}
#[test]
fn text_stream_start_delta_stop() {
let mut state = OpenAIResponsesState::default();
@ -761,8 +1021,7 @@ mod tests {
#[test]
fn failed_response_emits_error_and_status() {
let data =
r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#;
let data = r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#;
let (events, _) = run("response.failed", data);
assert_eq!(events.len(), 2);
assert!(matches!(events[0], Event::Error(_)));
@ -775,8 +1034,207 @@ mod tests {
}
#[test]
fn unknown_event_is_ignored() {
let (events, _) = run("response.in_progress", "{}");
assert!(events.is_empty());
fn incomplete_response_preserves_incomplete_reason_without_error() {
let data = r#"{
"response": {
"status": "incomplete",
"incomplete_details": {"reason": "max_output_tokens"}
}
}"#;
let (events, _) = run("response.incomplete", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("max_output_tokens"));
assert!(err.message.contains("OpenAI Responses response.incomplete"));
assert!(err.message.contains("incomplete_reason"));
assert!(err.message.contains("max_output_tokens"));
assert!(!err.message.ends_with("response response.incomplete"));
}
#[test]
fn incomplete_response_preserves_unknown_response_fields() {
let data = r#"{
"response": {
"status": "incomplete",
"incomplete_details": {"reason": "content_filter"},
"mystery_field": {"nested": true}
},
"sequence_number": 42
}"#;
let (events, _) = run("response.incomplete", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert!(err.message.contains("mystery_field"));
assert!(err.message.contains("sequence_number"));
assert!(err.message.contains("content_filter"));
}
#[test]
fn failed_response_preserves_error_and_response_extra_fields() {
let data = r#"{
"response": {
"error": {
"type": "server_error",
"code": "upstream_overloaded",
"message": "try later",
"param": "input"
},
"retry_hint": "short"
}
}"#;
let (events, _) = run("response.failed", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("upstream_overloaded"));
assert!(err.message.contains("try later"));
assert!(err.message.contains("param"));
assert!(err.message.contains("retry_hint"));
}
#[test]
fn top_level_error_preserves_unknown_fields() {
let data = r#"{
"error": {
"type": "rate_limit_error",
"code": "rate_limit_exceeded",
"message": "slow down",
"retry_after_ms": 1000
},
"request_id": "req_123"
}"#;
let (events, _) = run("error", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("rate_limit_exceeded"));
assert!(err.message.contains("slow down"));
assert!(err.message.contains("retry_after_ms"));
assert!(err.message.contains("request_id"));
}
#[test]
fn reasoning_output_item_emits_reasoning_item_with_text_summary_encrypted() {
// 完成済み reasoning wrapper が text + summary[] + encrypted_content を持って
// ReasoningItem として届くこと。
let mut state = OpenAIResponsesState::default();
// wrapper added (id だけ持つ)
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#,
);
// 内側の reasoning_text 用 content_part
with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"hello "}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"world"}"#,
);
with(
&mut state,
"response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"hello world"}}"#,
);
// summary 1 件
with(
&mut state,
"response.reasoning_summary_part.added",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","part":{"type":"summary_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_summary_text.delta",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","delta":"sum-A"}"#,
);
with(
&mut state,
"response.reasoning_summary_part.done",
r#"{"output_index":0,"summary_index":0,"item_id":"r1"}"#,
);
// wrapper done (encrypted_content が乗る)
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC-XYZ"}}"#,
);
assert_eq!(evs.len(), 1);
let Event::ReasoningItem(reasoning) = &evs[0] else {
panic!("expected ReasoningItem, got {:?}", evs[0]);
};
assert_eq!(reasoning.id.as_deref(), Some("r1"));
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.summary, vec!["sum-A".to_string()]);
assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC-XYZ"));
assert!(reasoning.signature.is_none());
// pending_reasoning は drain されていること
assert!(state.pending_reasoning.is_empty());
}
#[test]
fn reasoning_wrapper_without_inner_content_emits_empty_text() {
// encrypted_content だけ届くreasoning_text 無し)ケースでも
// ReasoningItem は発火する。
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.output_item.added",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9"}}"#,
);
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9","encrypted_content":"BLOB"}}"#,
);
let Event::ReasoningItem(r) = &evs[0] else {
panic!()
};
assert!(r.text.is_empty());
assert!(r.summary.is_empty());
assert_eq!(r.encrypted_content.as_deref(), Some("BLOB"));
}
#[test]
fn unknown_event_emits_trace_visible_unhandled_sse() {
let data = r#"{"sequence_number":7,"note":"debug me"}"#;
let (events, _) = run("response.mystery", data);
assert_eq!(events.len(), 1);
let Event::UnhandledSse(unhandled) = &events[0] else {
panic!("expected UnhandledSse, got {:?}", events[0]);
};
assert_eq!(unhandled.provider, "openai_responses");
assert_eq!(unhandled.event_type, "response.mystery");
assert_eq!(unhandled.data_preview, data);
assert_eq!(unhandled.data_len, data.len());
}
#[test]
fn unknown_event_data_preview_is_bounded_and_data_len_is_original_bytes() {
let data = format!("{}終端", "x".repeat(UNHANDLED_SSE_DATA_PREVIEW_LIMIT + 32));
let (events, _) = run("response.mystery.large", &data);
assert_eq!(events.len(), 1);
let Event::UnhandledSse(unhandled) = &events[0] else {
panic!("expected UnhandledSse, got {:?}", events[0]);
};
assert_eq!(unhandled.data_len, data.len());
assert!(unhandled.data_preview.len() <= UNHANDLED_SSE_DATA_PREVIEW_LIMIT);
assert_eq!(
unhandled.data_preview,
"x".repeat(UNHANDLED_SSE_DATA_PREVIEW_LIMIT)
);
assert!(unhandled.data_preview.len() < unhandled.data_len);
}
}

View File

@ -16,11 +16,13 @@ pub use scheme_impl::OpenAIResponsesState;
/// OpenAI Responses scheme 本体。
///
/// `store` / `include_encrypted_content` は scheme 固定の wire 設定で、
/// デフォルトは stateless + ZDR 相当 (`store=false`, `include=[...]`)。
/// 将来 ZDR 非対応環境で `store=true` にしたくなった場合に限り override
/// する。`ModelCapability` には入れない(これはモデルの能力ではなく、
/// クライアントの運用方針)。
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
/// ように受理パラメータが subset の経路では provider 層で
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy
#[derive(Debug, Clone)]
pub struct OpenAIResponsesScheme {
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
@ -28,6 +30,14 @@ pub struct OpenAIResponsesScheme {
/// `include: ["reasoning.encrypted_content"]` を付けるか。
/// `store=false` で reasoning を使うなら必須。
pub include_encrypted_content: bool,
/// `max_output_tokens` を body に載せるか。公式 OpenAI Responses API は
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_max_output_tokens: bool,
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_sampling_params: bool,
}
impl Default for OpenAIResponsesScheme {
@ -35,12 +45,15 @@ impl Default for OpenAIResponsesScheme {
Self {
store: false,
include_encrypted_content: true,
send_max_output_tokens: true,
send_sampling_params: true,
}
}
}
impl OpenAIResponsesScheme {
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`)。
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
pub fn new() -> Self {
Self::default()
}
@ -56,4 +69,16 @@ impl OpenAIResponsesScheme {
self.include_encrypted_content = include;
self
}
/// `max_output_tokens` を body に載せるかを上書き。
pub fn with_send_max_output_tokens(mut self, send: bool) -> Self {
self.send_max_output_tokens = send;
self
}
/// `temperature` / `top_p` を body に載せるかを上書き。
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
self.send_sampling_params = send;
self
}
}

View File

@ -4,12 +4,12 @@
//! item 配列で reasoning / function_call / function_call_output が
//! first-class。`Item` を素に近い形で `input[]` に投影できる。
use serde::Serialize;
use serde::{Serialize, Serializer};
use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningEffort, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -38,19 +38,30 @@ pub(crate) struct ResponsesRequest {
/// `["reasoning.encrypted_content"]` 等。
#[serde(skip_serializing_if = "Vec::is_empty")]
pub include: Vec<&'static str>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は 400 で弾く。scheme の `send_max_output_tokens`
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u32>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
/// `send_sampling_params` が `false` のときは `None` のまま送る。
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
/// 会話単位の安定キー。ChatGPT backend (codex-oauth) は明示キーが
/// 無いとプロンプトキャッシュがほぼ効かない。pod 側は `SegmentId`
/// を渡す。`Request::cache_key` が `None` のときはキー自体を送らない。
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
}
/// reasoning 制御。
#[derive(Debug, Serialize)]
pub(crate) struct ReasoningConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<&'static str>,
pub effort: Option<String>,
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
pub summary: &'static str,
}
@ -62,7 +73,11 @@ pub(crate) struct ReasoningConfig {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputItem {
/// 会話メッセージ。user / assistant / system のいずれか。
/// 会話メッセージ。user / assistant / developer のいずれか。
/// `Role::System` items は `developer` として投影するChatGPT
/// backend が `role: "system"` を拒否するため。Codex CLI も
/// system 相当の挿入には DeveloperInstructions = `role: "developer"`
/// を使う)。
Message {
role: &'static str,
content: Vec<InputContent>,
@ -85,7 +100,11 @@ pub(crate) enum InputItem {
Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
/// Responses API は reasoning item に `summary` フィールドを必須で
/// 要求する(中身が空でも `[]` として送る必要がある。GPT-5 など
/// summary を返さないモデル + reasoning effort 指定なしのターンでは
/// summary text が一切付かないので、ここを skip すると 400
/// "Missing required parameter: 'input[N].summary'" で弾かれる。
summary: Vec<ReasoningSummaryPart>,
#[serde(skip_serializing_if = "Vec::is_empty")]
content: Vec<ReasoningContentPart>,
@ -98,7 +117,7 @@ pub(crate) enum InputItem {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputContent {
/// user / system 側のテキスト
/// user / developer 側のテキスト
InputText { text: String },
/// assistant 側のテキスト
OutputText { text: String },
@ -125,11 +144,28 @@ pub(crate) struct ResponseTool {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// OpenAI Responses API は `type:"object"` のパラメータスキーマに
/// `properties` が存在することを要求する。schemars は引数なし struct
/// から `properties` を含まない最小スキーマを出すので、serialize
/// 時に空オブジェクトを補う。
#[serde(serialize_with = "serialize_parameters")]
pub parameters: Value,
/// Structured output モード制御。デフォルト false。
pub strict: bool,
}
fn serialize_parameters<S: Serializer>(value: &Value, s: S) -> Result<S::Ok, S::Error> {
if let Some(obj) = value.as_object()
&& obj.get("type").and_then(Value::as_str) == Some("object")
&& !obj.contains_key("properties")
{
let mut patched = obj.clone();
patched.insert("properties".to_string(), Value::Object(Default::default()));
return Value::Object(patched).serialize(s);
}
value.serialize(s)
}
impl OpenAIResponsesScheme {
/// `Request` から wire 形式の body を組み立てる。
pub(crate) fn build_request(
@ -151,16 +187,15 @@ impl OpenAIResponsesScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.effort)
.filter(|_| supports_effort)
.map(|effort| ReasoningConfig {
effort: Some(match effort {
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
}),
effort: match effort {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
},
summary: "auto",
});
})
.filter(|reasoning| reasoning.effort.is_some());
let include: Vec<&'static str> = if self.include_encrypted_content {
vec!["reasoning.encrypted_content"]
@ -179,9 +214,22 @@ impl OpenAIResponsesScheme {
store: self.store,
stream: true,
include,
max_output_tokens: request.config.max_tokens,
temperature: request.config.temperature,
top_p: request.config.top_p,
max_output_tokens: if self.send_max_output_tokens {
request.config.max_tokens
} else {
None
},
temperature: if self.send_sampling_params {
request.config.temperature
} else {
None
},
top_p: if self.send_sampling_params {
request.config.top_p
} else {
None
},
prompt_cache_key: request.cache_key.clone(),
}
}
}
@ -192,12 +240,12 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
for item in items {
match item {
Item::Message { role, content, .. } => {
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = match role
{
Role::User => ("user", |t| InputContent::InputText { text: t }),
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
Role::System => ("system", |t| InputContent::InputText { text: t }),
};
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
match role {
Role::User => ("user", |t| InputContent::InputText { text: t }),
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
Role::System => ("developer", |t| InputContent::InputText { text: t }),
};
let parts: Vec<InputContent> = content
.iter()
.map(|p| match p {
@ -353,6 +401,28 @@ mod tests {
}
}
#[test]
fn system_role_item_is_projected_as_developer() {
// ChatGPT backend (codex-oauth) は input[] の `role: "system"` を
// "System messages are not allowed" で 400 拒否する。in-conversation
// な system note (notify / fs_view auto-read / compaction summary) は
// `role: "developer"` として投影し、両 backend で受理されるようにする。
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("hi")
.item(Item::system_message("[notify] hello"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "developer");
assert!(
matches!(&content[0], InputContent::InputText { text } if text == "[notify] hello"),
);
}
_ => panic!("expected message"),
}
}
#[test]
fn assistant_message_uses_output_text() {
let scheme = OpenAIResponsesScheme::new();
@ -378,7 +448,10 @@ mod tests {
.item(Item::tool_result("c1", "ok"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(matches!(body.input[1], InputItem::FunctionCall { .. }));
assert!(matches!(body.input[2], InputItem::FunctionCallOutput { .. }));
assert!(matches!(
body.input[2],
InputItem::FunctionCallOutput { .. }
));
}
#[test]
@ -404,17 +477,37 @@ mod tests {
}
}
#[test]
fn reasoning_summary_field_is_always_serialized() {
// Responses API は reasoning item に `summary` を必須で要求する。
// summary が空でも wire 上に `summary: []` として残らないと、
// ChatGPT backend (codex-oauth) が
// 400 invalid_request_error: Missing required parameter:
// 'input[N].summary'.
// で弾く。GPT-5 + reasoning effort 未指定のターンでは summary text
// が付かないことがあるため、空のままでも skip しないこと。
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("").with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
let reasoning_item = &json["input"][1];
assert_eq!(reasoning_item["type"], "reasoning");
assert!(
reasoning_item.get("summary").is_some(),
"summary key must be present even when empty, got: {reasoning_item}"
);
assert_eq!(reasoning_item["summary"], serde_json::json!([]));
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl {
effort: Some(ReasoningEffort::High),
budget_tokens: None,
});
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let reasoning = body.reasoning.expect("reasoning should be set");
assert_eq!(reasoning.effort, Some("high"));
assert_eq!(reasoning.effort.as_deref(), Some("high"));
assert_eq!(reasoning.summary, "auto");
}
@ -422,22 +515,118 @@ mod tests {
fn reasoning_omitted_when_unsupported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl {
effort: Some(ReasoningEffort::High),
budget_tokens: None,
});
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning());
assert!(body.reasoning.is_none());
}
#[test]
fn max_output_tokens_passed_through() {
fn max_output_tokens_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, Some(100));
}
#[test]
fn max_output_tokens_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(false);
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("max_output_tokens").is_none(),
"max_output_tokens key must not appear in serialised body, got: {json}"
);
}
#[test]
fn sampling_params_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, Some(0.4));
assert_eq!(body.top_p, Some(0.9));
}
#[test]
fn sampling_params_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, None);
assert_eq!(body.top_p, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("temperature").is_none() && json.get("top_p").is_none(),
"temperature/top_p keys must not appear in serialised body, got: {json}"
);
}
#[test]
fn prompt_cache_key_passed_through_when_set() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").cache_key("session-abc");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.prompt_cache_key.as_deref(), Some("session-abc"));
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["prompt_cache_key"], "session-abc");
}
#[test]
fn prompt_cache_key_omitted_when_none() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(body.prompt_cache_key.is_none());
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("prompt_cache_key").is_none(),
"prompt_cache_key key must not appear in serialised body, got: {json}"
);
}
#[test]
fn tool_schema_without_properties_is_normalized() {
// schemars は引数なし struct から `type:"object"` だけのスキーマを
// 吐く。OpenAI Responses は `properties` 欠落を 400 で拒否するので
// 送る直前に空オブジェクトを補うのを確認。
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({ "type": "object" });
let req = Request::new().tool(
ToolDefinition::new("empty")
.description("no args")
.input_schema(raw_schema),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"]["type"], "object");
assert!(
json["tools"][0]["parameters"]["properties"].is_object(),
"properties must be present as an object, got: {}",
json["tools"][0]["parameters"]
);
}
#[test]
fn tool_schema_with_properties_is_untouched() {
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({
"type": "object",
"properties": { "path": { "type": "string" } },
"required": ["path"]
});
let req = Request::new().tool(
ToolDefinition::new("t")
.description("d")
.input_schema(raw_schema.clone()),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"], raw_schema);
}
#[test]
fn serialized_body_has_expected_shape() {
// wire 形式が崩れていないかのスモークテスト

View File

@ -6,9 +6,10 @@ use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::Request,
types::{Request, RequestConfig},
};
use super::OpenAIResponsesScheme;
@ -19,11 +20,14 @@ impl Scheme for OpenAIResponsesScheme {
type State = OpenAIResponsesState;
fn default_base_url(&self) -> &'static str {
"https://api.openai.com"
// `/v1` は base_url 側に寄せる。ChatGPT OAuth 経由のときは
// `https://chatgpt.com/backend-api/codex` を base にすれば同じ
// `/responses` path で両系統を吸収できるCodex CLI 準拠)。
"https://api.openai.com/v1"
}
fn path(&self, _model_id: &str) -> String {
"/v1/responses".to_string()
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
@ -49,11 +53,36 @@ impl Scheme for OpenAIResponsesScheme {
super::events::parse_sse(event_type, data, state)
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// ChatGPT backend (codex-oauth) は `max_output_tokens` を 400 で弾く。
// scheme 構築時に `send_max_output_tokens=false` で組まれていれば
// body 投影は止まっているので、ユーザの意図が落ちることだけを通知する。
if !self.send_max_output_tokens && config.max_tokens.is_some() {
warnings.push(ConfigWarning::unsupported(
"max_tokens",
"OpenAI Responses (ChatGPT backend)",
));
}
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
if !self.send_sampling_params {
if config.temperature.is_some() {
warnings.push(ConfigWarning::unsupported(
"temperature",
"OpenAI Responses (ChatGPT backend)",
));
}
if config.top_p.is_some() {
warnings.push(ConfigWarning::unsupported(
"top_p",
"OpenAI Responses (ChatGPT backend)",
));
}
}
warnings
}
}

View File

@ -6,20 +6,27 @@
use std::pin::Pin;
use std::sync::Arc;
use std::time::{Duration, Instant};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
use reqwest::header::{
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
};
use serde_json::{Value, json};
use super::auth::{AuthProvider, AuthRequirement};
use super::capability::ModelCapability;
use super::client::{ConfigWarning, LlmClient};
use super::client::{ConfigWarning, LlmClient, ResponseStream};
use super::error::ClientError;
use super::event::Event;
use super::scheme::Scheme;
use super::types::{Request, RequestConfig};
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(20);
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
///
/// - `None`: 認証ヘッダを送らないOllama 等の opt-out
@ -46,7 +53,9 @@ impl ResolvedAuth {
(Self::Custom(_), _) => true,
(
Self::ApiKey(_),
AuthRequirement::Bearer | AuthRequirement::XApiKey | AuthRequirement::QueryParam { .. },
AuthRequirement::Bearer
| AuthRequirement::XApiKey
| AuthRequirement::QueryParam { .. },
) => true,
_ => false,
}
@ -146,6 +155,120 @@ impl<S: Scheme> HttpTransport<S> {
Ok(headers)
}
fn is_codex_backend(&self) -> bool {
match &self.auth {
ResolvedAuth::Custom(provider) => provider.is_codex_backend(),
_ => false,
}
}
fn apply_stream_headers(
&self,
headers: &mut HeaderMap,
request: &Request,
) -> Result<(), ClientError> {
headers.insert(ACCEPT, HeaderValue::from_static("text/event-stream"));
if self.is_codex_backend()
&& let Some(cache_key) = request.cache_key.as_deref()
{
let value = HeaderValue::from_str(cache_key).map_err(|e| {
ClientError::Config(format!("invalid Codex conversation header: {e}"))
})?;
headers.insert(HeaderName::from_static("session_id"), value.clone());
headers.insert(HeaderName::from_static("x-client-request-id"), value);
}
Ok(())
}
fn encode_request_body(
&self,
body: &serde_json::Value,
headers: &mut HeaderMap,
) -> Result<RequestBody, ClientError> {
if !self.is_codex_backend() {
return Ok(RequestBody::Json(body.clone()));
}
let raw = serde_json::to_vec(body)?;
let raw_json_bytes = raw.len();
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
Ok(RequestBody::CompressedJson {
bytes: compressed,
raw_json_bytes,
})
}
}
enum RequestBody {
Json(serde_json::Value),
CompressedJson {
bytes: Vec<u8>,
raw_json_bytes: usize,
},
}
impl RequestBody {
fn encoding(&self) -> &'static str {
match self {
Self::Json(_) => "json",
Self::CompressedJson { .. } => "zstd",
}
}
fn raw_json_bytes(&self) -> Option<usize> {
match self {
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
Self::CompressedJson { raw_json_bytes, .. } => Some(*raw_json_bytes),
}
}
fn wire_bytes(&self) -> Option<usize> {
match self {
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
Self::CompressedJson { bytes, .. } => Some(bytes.len()),
}
}
}
fn auth_kind(auth: &ResolvedAuth) -> &'static str {
match auth {
ResolvedAuth::None => "none",
ResolvedAuth::ApiKey(_) => "api_key",
ResolvedAuth::Custom(_) => "custom",
}
}
fn emit_transport_trace(request: &Request, label: &str, data: Value) {
if let Some(trace) = &request.transport_trace {
trace.emit(label, data);
}
}
fn json_value_kind(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "bool",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
async fn response_with_timeout(
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
timeout: Duration,
phase: &'static str,
) -> Result<reqwest::Response, ClientError> {
tokio::time::timeout(timeout, future)
.await
.map_err(|_| ClientError::Timeout { phase, timeout })?
.map_err(ClientError::Http)
}
impl<S: Scheme + Clone> Clone for HttpTransport<S> {
@ -161,6 +284,40 @@ impl<S: Scheme + Clone> Clone for HttpTransport<S> {
}
}
/// エラーレスポンスを `ClientError::Api` に変換する。
async fn classify_error_response(resp: reqwest::Response) -> ClientError {
let status = resp.status().as_u16();
let retry_after = resp
.headers()
.get(RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.trim().parse::<u64>().ok())
.map(Duration::from_secs);
let text = resp.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
ClientError::Api {
status: Some(status),
code,
message,
retry_after,
}
} else {
ClientError::Api {
status: Some(status),
code: None,
message: text,
retry_after,
}
}
}
#[async_trait]
impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
@ -171,48 +328,176 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
self.scheme.validate_config(config)
}
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
let total_started = Instant::now();
let path = self.scheme.path(&self.model_id);
emit_transport_trace(
&request,
"transport_start",
json!({
"model": &self.model_id,
"path": path,
"auth_kind": auth_kind(&self.auth),
"required_auth": format!("{:?}", self.scheme.required_auth()),
"codex_backend": self.is_codex_backend(),
"cache_key_present": request.cache_key.is_some(),
"stream_open_timeout_ms": DEFAULT_STREAM_OPEN_TIMEOUT.as_millis() as u64,
}),
);
let url = self.build_url();
let headers = self.build_headers().await?;
let headers_started = Instant::now();
emit_transport_trace(
&request,
"transport_headers_start",
json!({
"auth_kind": auth_kind(&self.auth),
"required_auth": format!("{:?}", self.scheme.required_auth()),
}),
);
let mut headers = match self.build_headers().await {
Ok(headers) => {
emit_transport_trace(
&request,
"transport_headers_done",
json!({
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
"headers_len": headers.len(),
}),
);
headers
}
Err(error) => {
emit_transport_trace(
&request,
"transport_headers_error",
json!({
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
let stream_headers_started = Instant::now();
if let Err(error) = self.apply_stream_headers(&mut headers, &request) {
emit_transport_trace(
&request,
"transport_stream_headers_error",
json!({
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
emit_transport_trace(
&request,
"transport_stream_headers_done",
json!({
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
"headers_len": headers.len(),
}),
);
let body_started = Instant::now();
emit_transport_trace(&request, "transport_body_build_start", json!({}));
let body = self
.scheme
.build_request_body(&self.model_id, &request, &self.capability);
emit_transport_trace(
&request,
"transport_body_build_done",
json!({
"elapsed_ms": body_started.elapsed().as_millis() as u64,
"body_kind": json_value_kind(&body),
}),
);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
let encode_started = Instant::now();
let request_body = match self.encode_request_body(&body, &mut headers) {
Ok(body) => body,
Err(error) => {
emit_transport_trace(
&request,
"transport_body_encode_error",
json!({
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
emit_transport_trace(
&request,
"transport_body_encode_done",
json!({
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
"encoding": request_body.encoding(),
"raw_json_bytes": request_body.raw_json_bytes(),
"wire_bytes": request_body.wire_bytes(),
}),
);
let builder = self.http_client.post(&url).headers(headers);
let builder = match request_body {
RequestBody::Json(body) => builder.json(&body),
RequestBody::CompressedJson { bytes, .. } => builder.body(bytes),
};
let send_started = Instant::now();
emit_transport_trace(&request, "transport_http_send_start", json!({}));
let response =
match response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
.await
{
Ok(response) => {
emit_transport_trace(
&request,
"transport_http_headers_received",
json!({
"elapsed_ms": send_started.elapsed().as_millis() as u64,
"status": response.status().as_u16(),
"success": response.status().is_success(),
}),
);
response
}
Err(error) => {
emit_transport_trace(
&request,
"transport_http_send_error",
json!({
"elapsed_ms": send_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
emit_transport_trace(
&request,
"transport_http_status_error",
json!({
"status": response.status().as_u16(),
"retry_after_present": response.headers().get(RETRY_AFTER).is_some(),
}),
);
return Err(classify_error_response(response).await);
}
emit_transport_trace(
&request,
"transport_stream_ready",
json!({
"elapsed_ms": total_started.elapsed().as_millis() as u64,
}),
);
let scheme = self.scheme.clone();
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
let event_stream = byte_stream.eventsource();
@ -240,3 +525,170 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[derive(Debug)]
struct TestAuthProvider {
codex: bool,
}
#[async_trait]
impl AuthProvider for TestAuthProvider {
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError> {
Ok(vec![
(
HeaderName::from_static("authorization"),
HeaderValue::from_static("Bearer test-token"),
),
(
HeaderName::from_static("chatgpt-account-id"),
HeaderValue::from_static("account-1"),
),
])
}
fn is_codex_backend(&self) -> bool {
self.codex
}
}
#[derive(Clone)]
struct TestScheme;
impl Scheme for TestScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://example.test"
}
fn path(&self, _model_id: &str) -> String {
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
_capability: &ModelCapability,
) -> serde_json::Value {
json!({
"model": model_id,
"input_len": request.items.len(),
"prompt_cache_key": request.cache_key,
})
}
fn parse_sse(
&self,
_event_type: &str,
_data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
Ok(Vec::new())
}
fn default_capability(&self) -> ModelCapability {
ModelCapability::minimal()
}
}
fn transport(auth: ResolvedAuth) -> HttpTransport<TestScheme> {
HttpTransport::new(
TestScheme,
"gpt-test",
"https://example.test",
auth,
ModelCapability::minimal(),
)
}
#[tokio::test]
async fn response_timeout_returns_retryable_lifecycle_timeout() {
let err = response_with_timeout(
std::future::pending::<Result<reqwest::Response, reqwest::Error>>(),
Duration::from_millis(5),
"stream_open",
)
.await
.unwrap_err();
assert!(crate::llm_client::error::is_retryable(&err));
assert!(matches!(
err,
ClientError::Timeout {
phase: "stream_open",
..
}
));
}
#[tokio::test]
async fn codex_backend_adds_conversation_headers_and_zstd_body() {
let transport = transport(ResolvedAuth::Custom(Arc::new(TestAuthProvider {
codex: true,
})));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert_eq!(headers.get("session_id").unwrap(), "segment-123");
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
let RequestBody::CompressedJson {
bytes: compressed,
raw_json_bytes,
} = encoded
else {
panic!("Codex backend request body must be zstd-compressed");
};
assert!(raw_json_bytes > 0);
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
#[tokio::test]
async fn non_codex_request_does_not_get_codex_only_headers_or_compression() {
let transport = transport(ResolvedAuth::ApiKey("api-key".to_string()));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert!(headers.get("session_id").is_none());
assert!(headers.get("x-client-request-id").is_none());
assert!(headers.get(CONTENT_ENCODING).is_none());
let RequestBody::Json(decoded) = encoded else {
panic!("non-Codex request body must remain normal JSON");
};
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
}

View File

@ -7,8 +7,14 @@
//! - ToolResult items (tool results)
//! - Reasoning items (extended thinking)
use std::{fmt, sync::Arc};
use serde::{Deserialize, Serialize};
fn is_false(value: &bool) -> bool {
!*value
}
// ============================================================================
// Item - The core unit of conversation
// ============================================================================
@ -19,6 +25,35 @@ pub type ItemId = String;
/// Call ID type for linking function calls to their outputs
pub type CallId = String;
/// Callback sink for request-local transport lifecycle diagnostics.
///
/// This is carried on [`Request`] so generic [`crate::llm_client::LlmClient`]
/// implementations can emit fine-grained transport milestones without widening
/// the trait method signature. The callback must never receive request body
/// contents or secret header values.
#[derive(Clone)]
pub struct RequestTrace {
callback: Arc<dyn Fn(&str, serde_json::Value) + Send + Sync>,
}
impl RequestTrace {
pub fn new(callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static) -> Self {
Self {
callback: Arc::new(callback),
}
}
pub fn emit(&self, label: &str, data: serde_json::Value) {
(self.callback)(label, data);
}
}
impl fmt::Debug for RequestTrace {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RequestTrace").finish_non_exhaustive()
}
}
/// Conversation item - the primary unit of conversation history
///
/// Items represent discrete elements in a conversation. Tool calls and reasoning
@ -79,6 +114,9 @@ pub enum Item {
/// Detailed output (removed by pruning when old enough)
#[serde(default, skip_serializing_if = "Option::is_none")]
content: Option<String>,
/// Whether the tool result represents an execution error.
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
},
/// Reasoning/thinking item
@ -94,8 +132,15 @@ pub enum Item {
summary: Vec<String>,
/// サーバから返された暗号化済み reasoning blob。ZDR / `store=false`
/// 運用で stateless に再送するときそのまま添える必要がある。
/// Anthropic の `redacted_thinking.data` もここに格納する。
#[serde(default, skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
/// Anthropic extended thinking の `signature`。新世代 Claude
/// (Opus 4.5+/Sonnet 4.6+) では同一論理ターン内の `thinking`
/// ブロックを送り返す際に必須。改ざん検知に使われる。他 scheme
/// では `None`。
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
/// Item status
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<ItemStatus>,
@ -191,11 +236,27 @@ impl Item {
/// Create a tool result item with summary only (no content).
pub fn tool_result(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, false)
}
/// Create an error tool result item with summary only (no content).
pub fn tool_result_error(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, true)
}
/// Create a tool result item with summary, optional content, and error flag.
pub fn tool_result_item(
call_id: impl Into<String>,
summary: impl Into<String>,
content: Option<String>,
is_error: bool,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
summary: summary.into(),
content: None,
content,
is_error,
}
}
@ -205,12 +266,7 @@ impl Item {
summary: impl Into<String>,
content: impl Into<String>,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
summary: summary.into(),
content: Some(content.into()),
}
Self::tool_result_item(call_id, summary, Some(content.into()), false)
}
// ========================================================================
@ -224,6 +280,7 @@ impl Item {
text: text.into(),
summary: Vec::new(),
encrypted_content: None,
signature: None,
status: None,
}
}
@ -247,6 +304,14 @@ impl Item {
self
}
/// Set Anthropic `signature` on a `Reasoning` item. No-op on other variants.
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
if let Self::Reasoning { signature, .. } = &mut self {
*signature = Some(sig.into());
}
self
}
// ========================================================================
// Builder methods
// ========================================================================
@ -455,6 +520,17 @@ pub struct Request {
/// (Anthropic today) can place a long-lived cache breakpoint there.
/// Providers without prompt caching ignore the field.
pub cache_anchor: Option<usize>,
/// 会話単位の安定キー。`prompt_cache_key` として送られる
/// (OpenAI Responses)。ChatGPT backend (codex-oauth) は明示キーが
/// 無いと org/project ハッシュ衝突でプロンプトキャッシュが
/// ほぼヒットしないため、pod 側で `SegmentId` を渡す運用を想定。
/// `cache_anchor` と違い名前空間キーであり、`prefix anchor` とは
/// 別の概念。`cache_anchor` を読まない provider と同じく、
/// `prompt_cache_key` を持たない provider は無視する。
pub cache_key: Option<String>,
/// Request-local diagnostics sink for transport lifecycle tracing.
#[doc(hidden)]
pub transport_trace: Option<RequestTrace>,
}
impl Request {
@ -505,6 +581,15 @@ impl Request {
self
}
/// Attach a request-local transport trace callback.
pub fn transport_trace(
mut self,
callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static,
) -> Self {
self.transport_trace = Some(RequestTrace::new(callback));
self
}
/// Set max tokens
pub fn max_tokens(mut self, max_tokens: u32) -> Self {
self.config.max_tokens = Some(max_tokens);
@ -534,6 +619,14 @@ impl Request {
self.config.stop_sequences.push(sequence.into());
self
}
/// Set the conversation cache key.
///
/// 詳細は [`Request::cache_key`] のフィールドコメント参照。
pub fn cache_key(mut self, key: impl Into<String>) -> Self {
self.cache_key = Some(key.into());
self
}
}
// ============================================================================

View File

@ -11,12 +11,23 @@
//! 射影の適用は上位層(`pod::prune_hook` 等)が LLM に送る一時コンテキスト
//! に対してだけ行う。Worker の永続履歴は決して変更されない。
//!
//! `min_savings` 判定や savings 推定もこの crate には置かず、上位層が
//! usage 履歴ベースのトークン会計と組み合わせて行う。
//! 保護境界は末尾 token budget で決めるが、この crate は usage 履歴を
//! 所有しない。prefix ごとの token 推定値と savings 推定は上位層から
//! callback で注入される。
use serde::{Deserialize, Serialize};
use crate::llm_client::types::Item;
use crate::token_counter::{EstimateSource, TokenEstimate};
/// Callback that returns token estimates for every prefix boundary of the
/// supplied request history.
///
/// The returned slice must have `history.len() + 1` entries where entry `i`
/// estimates the token count of `history[..i]`. Returning a malformed vector,
/// or estimates whose source is [`EstimateSource::NoData`], makes prune treat
/// the request as having no candidates.
pub type TokenEstimator = Box<dyn Fn(&[Item]) -> Vec<TokenEstimate> + Send + Sync>;
/// Callback that estimates the token savings for projecting the
/// `ToolResult.content` out of `history[i]` for each `i` in `indices`.
@ -30,13 +41,49 @@ use crate::llm_client::types::Item;
/// 実際の projection と一致する savings を返す必要がある。
pub type SavingsEstimator = Box<dyn Fn(&[Item], &[usize]) -> u64 + Send + Sync>;
/// Result of one prune evaluation pass, surfaced to the optional
/// [`PruneObserver`] for instrumentation.
///
/// Worker は LLM リクエストごとに 1 回 prune の評価をし、その結果を
/// observer が登録されていればこの値で通知する。fire/skip の判定
/// 結果と、判定材料になった候補数 / 推定 savings / 保護領域の先頭 index を持つ。
#[derive(Debug, Clone)]
pub struct PruneEvaluation {
/// `prunable_indices` の長さ。`Skipped::NoCandidates` の時は 0。
pub candidate_count: usize,
/// 推定された savings (tokens)。`NoCandidates` の時は 0。
pub estimated_savings: u64,
/// Token budget で保護される suffix の先頭 item index。
/// usage 推定が `NoData` で境界が決まらない場合は `None`。
pub protected_start_index: Option<usize>,
/// 判定結果。
pub decision: PruneDecision,
}
/// Outcome of one prune evaluation. Each variant is one branch of the
/// "fire vs skip" decision tree the Worker walks before each LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PruneDecision {
/// `prunable_indices` が空 → 何もしない。
SkippedNoCandidates,
/// 候補はあったが推定 savings が `min_savings` 未満 → 何もしない。
SkippedBelowMinSavings,
/// 候補があり savings >= min_savings → projection を適用した。
/// `pruned_count` は `project()` が実際に書き換えた item 数
/// (既に content=None だった候補は 0 計上)。
Fired { pruned_count: usize },
}
/// Optional observer invoked after each prune evaluation, regardless of
/// branch. Pod 等の上位層が install して metrics を発行する。
pub type PruneObserver = Box<dyn Fn(&PruneEvaluation) + Send + Sync>;
/// Configuration for the Prune algorithm.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruneConfig {
/// Number of recent turns to protect from pruning.
/// A "turn" starts at each user message.
#[serde(default = "default_protected_turns")]
pub protected_turns: usize,
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_protected_tokens")]
pub protected_tokens: u64,
/// Minimum token savings required to actually prune. If the prunable
/// content is smaller than this, the caller should skip to avoid
@ -47,8 +94,8 @@ pub struct PruneConfig {
pub min_savings: u64,
}
fn default_protected_turns() -> usize {
3
fn default_protected_tokens() -> u64 {
8000
}
fn default_min_savings() -> u64 {
4096
@ -57,25 +104,12 @@ fn default_min_savings() -> u64 {
impl Default for PruneConfig {
fn default() -> Self {
Self {
protected_turns: default_protected_turns(),
protected_tokens: default_protected_tokens(),
min_savings: default_min_savings(),
}
}
}
/// Find indices where each "turn" begins.
///
/// A turn starts at every user message. Returns the indices of those
/// user messages in ascending order.
fn find_turn_starts(items: &[Item]) -> Vec<usize> {
items
.iter()
.enumerate()
.filter(|(_, item)| item.is_user_message())
.map(|(i, _)| i)
.collect()
}
/// Set `content = None` on each `Item::ToolResult` at the given indices.
///
/// Returns the number of items that were actually modified — items that
@ -84,28 +118,43 @@ fn find_turn_starts(items: &[Item]) -> Vec<usize> {
pub fn project(items: &mut [Item], indices: &[usize]) -> usize {
let mut count = 0;
for &i in indices {
if let Item::ToolResult { content, .. } = &mut items[i] {
if content.is_some() {
*content = None;
count += 1;
}
if let Item::ToolResult { content, .. } = &mut items[i]
&& content.is_some()
{
*content = None;
count += 1;
}
}
count
}
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie outside
/// the last `protected_turns` turns. Pure: does not mutate `items`.
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie before
/// the suffix protected by `protected_tokens`. Pure: does not mutate `items`.
///
/// Returns an empty vector when there are too few turns or no prunable
/// candidates.
pub fn prunable_indices(items: &[Item], protected_turns: usize) -> Vec<usize> {
let turn_starts = find_turn_starts(items);
if turn_starts.len() <= protected_turns {
return Vec::new();
}
let boundary = turn_starts[turn_starts.len() - protected_turns];
items[..boundary]
/// Returns an empty vector when token estimates are unavailable (`NoData`) or
/// no prunable candidates exist.
pub fn prunable_indices(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Vec<usize> {
evaluate_candidates(items, protected_tokens, token_estimates).0
}
/// Same as [`prunable_indices`] but also returns the start index of the
/// protected suffix. `None` means the token boundary could not be determined
/// (currently because usage estimates were `NoData` or malformed).
pub fn evaluate_candidates(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> (Vec<usize>, Option<usize>) {
let Some(protected_start) = protected_start_index(items, protected_tokens, token_estimates)
else {
return (Vec::new(), None);
};
let candidates = items[..protected_start]
.iter()
.enumerate()
.filter_map(|(i, item)| match item {
@ -114,7 +163,39 @@ pub fn prunable_indices(items: &[Item], protected_turns: usize) -> Vec<usize> {
} => Some(i),
_ => None,
})
.collect()
.collect();
(candidates, Some(protected_start))
}
fn protected_start_index(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Option<usize> {
if token_estimates.len() != items.len() + 1 {
return None;
}
let total = token_estimates[items.len()];
if total.source == EstimateSource::NoData {
return None;
}
if protected_tokens == 0 {
return Some(items.len());
}
let mut protected_start = items.len();
for idx in (0..items.len()).rev() {
let prefix = token_estimates[idx];
if prefix.source == EstimateSource::NoData {
return None;
}
protected_start = idx;
let tail_tokens = total.tokens.saturating_sub(prefix.tokens);
if tail_tokens >= protected_tokens {
break;
}
}
Some(protected_start)
}
#[cfg(test)]
@ -139,17 +220,70 @@ mod tests {
items
}
fn measured_prefix(tokens: &[u64]) -> Vec<TokenEstimate> {
tokens
.iter()
.copied()
.map(|tokens| TokenEstimate {
tokens,
source: EstimateSource::Measured,
})
.collect()
}
fn uniform_estimates(items: &[Item], item_tokens: u64) -> Vec<TokenEstimate> {
let mut tokens = Vec::with_capacity(items.len() + 1);
for i in 0..=items.len() {
tokens.push(i as u64 * item_tokens);
}
measured_prefix(&tokens)
}
fn estimates_from_item_tokens(item_tokens: &[u64]) -> Vec<TokenEstimate> {
let mut prefix = Vec::with_capacity(item_tokens.len() + 1);
let mut acc = 0;
prefix.push(acc);
for tokens in item_tokens {
acc += tokens;
prefix.push(acc);
}
measured_prefix(&prefix)
}
fn no_data_estimates(items: &[Item]) -> Vec<TokenEstimate> {
(0..=items.len())
.map(|i| TokenEstimate {
tokens: i as u64,
source: if i == 0 {
EstimateSource::Measured
} else {
EstimateSource::NoData
},
})
.collect()
}
#[test]
fn no_candidates_when_too_few_turns() {
fn no_candidates_when_estimate_has_no_data() {
let items = make_history(&[("turn1", vec![("summary1", Some("big content here"))])]);
let estimates = no_data_estimates(&items);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
#[test]
fn no_candidates_when_history_fits_in_protected_tokens() {
let items = make_history(&[
("turn1", vec![("summary1", Some("big content here"))]),
("turn2", vec![("summary2", Some("more content"))]),
]);
assert!(prunable_indices(&items, 3).is_empty());
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 10_000, &estimates).is_empty());
}
#[test]
fn candidates_in_unprotected_turns() {
fn candidates_before_token_protected_suffix() {
let big = "x".repeat(4096 * 4);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
@ -157,9 +291,39 @@ mod tests {
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// suffix budget 80 tokens protects turn3+turn4 (8 items), so only s1/s2 are candidates.
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn single_long_task_gets_candidates_without_multiple_user_turns() {
let big = "x".repeat(4096 * 8);
let items = make_history(&[(
"one long task",
vec![
("s1", Some(&big)),
("s2", Some(&big)),
("s3", Some(&big)),
("s4", Some(&big)),
],
)]);
// user + assistant are cheap; every ToolCall is cheap; every ToolResult is heavy.
let item_tokens = vec![1, 1, 1, 5_000, 1, 5_000, 1, 5_000, 1, 5_000];
let estimates = estimates_from_item_tokens(&item_tokens);
let (candidates, protected_start) = evaluate_candidates(&items, 8_000, &estimates);
assert_eq!(protected_start, Some(7));
assert_eq!(candidates.len(), 2);
// 候補は turn1 と turn2 の ToolResult のみ
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
@ -177,7 +341,8 @@ mod tests {
("turn3", vec![]),
("turn4", vec![]),
]);
assert!(prunable_indices(&items, 2).is_empty());
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 20, &estimates).is_empty());
}
#[test]
@ -189,7 +354,8 @@ mod tests {
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
let count = project(&mut items, &candidates);
assert_eq!(count, 2);
@ -215,7 +381,7 @@ mod tests {
("turn1", vec![("s1", None)]),
("turn2", vec![("s2", Some("hello"))]),
]);
// Manually target s1 (index 3) even though it's already None.
// Manually target s1 even though it's already None.
let target = items
.iter()
.position(|it| matches!(it, Item::ToolResult { summary, .. } if summary == "s1"))
@ -233,27 +399,53 @@ mod tests {
("turn3", vec![]),
("turn4", vec![]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 20, &estimates);
assert_eq!(project(&mut items, &candidates), 1);
// 2 周目: 候補は一度の prunable_indices 結果を使い回しても 0 件。
assert_eq!(project(&mut items, &candidates), 0);
}
#[test]
fn protected_turns_boundary_exact() {
// 3 turns with protected_turns=2: only turn 1 is a candidate.
fn evaluate_candidates_returns_protected_start_index() {
let big = "x".repeat(64);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some("protected"))]),
("turn3", vec![("s3", Some("also protected"))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep"))]),
("turn4", vec![("s4", Some("keep too"))]),
]);
let candidates = prunable_indices(&items, 2);
assert_eq!(candidates.len(), 1);
if let Item::ToolResult { summary, .. } = &items[candidates[0]] {
assert_eq!(summary, "s1");
} else {
panic!("expected ToolResult at candidate index");
}
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// protected_tokens=80 → protected suffix is turn3+turn4, starting at index 8.
assert_eq!(protected_start, Some(8));
}
#[test]
fn evaluate_candidates_reports_zero_start_when_everything_is_protected() {
let items = make_history(&[("only", vec![("s", Some("x"))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 10_000, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, Some(0));
}
#[test]
fn zero_protected_tokens_allows_all_tool_results_as_candidates() {
let big = "x".repeat(64);
let items = make_history(&[("turn1", vec![("s1", Some(&big)), ("s2", Some(&big))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 0, &estimates);
assert_eq!(protected_start, Some(items.len()));
assert_eq!(candidates.len(), 2);
}
#[test]
fn malformed_estimate_vector_is_treated_as_no_boundary() {
let items = make_history(&[("turn1", vec![("s1", Some("x"))])]);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &[]);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
}

View File

@ -10,12 +10,14 @@
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
pub mod event;
mod reasoning_item_collector;
mod text_block_collector;
mod timeline;
mod tool_call_collector;
// 公開API
pub use event::*;
pub use reasoning_item_collector::ReasoningItemCollector;
pub use text_block_collector::TextBlockCollector;
pub use timeline::Timeline;
pub use tool_call_collector::ToolCallCollector;
@ -28,6 +30,7 @@ pub use crate::handler::{
Handler,
Kind,
PingKind,
ReasoningItemKind,
StatusKind,
// Block Events
TextBlockEvent,

View File

@ -0,0 +1,77 @@
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
//!
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
//! Worker はターン終了時に `take_collected()` でドレインして
//! `Item::Reasoning` として `worker.history` に append する。
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ReasoningItemKind};
use crate::llm_client::event::ReasoningItemEvent;
/// 収集された reasoning item の連列。
#[derive(Clone, Default)]
pub struct ReasoningItemCollector {
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
}
impl ReasoningItemCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
self.collected.lock().unwrap().push(event.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::Event;
use crate::timeline::Timeline;
#[test]
fn collects_in_order() {
let collector = ReasoningItemCollector::new();
let mut timeline = Timeline::new();
timeline.on_reasoning_item(collector.clone());
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "first".into(),
signature: Some("sig1".into()),
..Default::default()
}));
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r2".into()),
text: "second".into(),
..Default::default()
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text, "first");
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text, "second");
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
}

View File

@ -381,6 +381,7 @@ pub struct Timeline {
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
// Block系ハンドラーBlockTypeごとにグループ化
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
@ -410,6 +411,7 @@ impl Timeline {
ping_handlers: Vec::new(),
status_handlers: Vec::new(),
error_handlers: Vec::new(),
reasoning_item_handlers: Vec::new(),
text_block_handlers: Vec::new(),
thinking_block_handlers: Vec::new(),
tool_use_block_handlers: Vec::new(),
@ -471,6 +473,18 @@ impl Timeline {
self
}
/// `ReasoningItemKind` 用 Handler を登録
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
where
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
H::Scope: Send + Sync,
{
let mut wrapper = HandlerWrapper::new(handler);
wrapper.start_scope();
self.reasoning_item_handlers.push(Box::new(wrapper));
self
}
/// TextBlockKind用のHandlerを登録
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
where
@ -516,12 +530,17 @@ impl Timeline {
Event::Ping(p) => self.dispatch_ping(p),
Event::Status(s) => self.dispatch_status(s),
Event::Error(e) => self.dispatch_error(e),
// Observability-only event: stream trace records it before timeline dispatch.
Event::UnhandledSse(_) => {}
// Block系: スコープ管理しながらディスパッチ
Event::BlockStart(s) => self.handle_block_start(s),
Event::BlockDelta(d) => self.handle_block_delta(d),
Event::BlockStop(s) => self.handle_block_stop(s),
Event::BlockAbort(a) => self.handle_block_abort(a),
// 完成済み reasoning item: 即時ディスパッチ
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
}
}
@ -564,6 +583,12 @@ impl Timeline {
}
}
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
for handler in &mut self.reasoning_item_handlers {
handler.dispatch(event);
}
}
fn handle_block_start(&mut self, start: &BlockStart) {
self.current_block = Some(start.block_type);
@ -655,6 +680,36 @@ mod tests {
assert!(timeline.current_block().is_none());
}
#[test]
fn unhandled_sse_is_ignored_by_timeline_handlers() {
struct TestTextHandler {
calls: Arc<Mutex<Vec<TextBlockEvent>>>,
}
impl Handler<TextBlockKind> for TestTextHandler {
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &TextBlockEvent) {
self.calls.lock().unwrap().push(event.clone());
}
}
let calls = Arc::new(Mutex::new(Vec::new()));
let mut timeline = Timeline::new();
timeline.on_text_block(TestTextHandler {
calls: calls.clone(),
});
timeline.dispatch(&Event::UnhandledSse(UnhandledSseEvent {
provider: "openai_responses".to_string(),
event_type: "response.mystery".to_string(),
data_preview: "{}".to_string(),
data_len: 2,
}));
assert!(timeline.current_block().is_none());
assert!(calls.lock().unwrap().is_empty());
}
#[test]
fn test_meta_event_dispatch() {
// シンプルなテスト用構造体

View File

@ -0,0 +1,222 @@
//! Usage 履歴ベースのトークン会計(汎用部分)。
//!
//! `UsageRecord` の列(プロバイダ実測値)と現在の history から、
//! 任意の history index 時点のプロンプト全長トークン数を pure に計算する。
//!
//! # 方針
//!
//! - ローカルトークナイザは持たない。実測値があればそれを採用し、
//! measurement 間はバイト数で按分、最新 measurement より先は最終 rate で外挿する
//! - 推定の出どころは [`EstimateSource`] で呼び出し側に明示する。
//! 課金判断には使えないが、compact / prune / memory extract trigger 等の
//! 閾値判定には十分な精度
//! - `records` は `history_len` 昇順を仮定する(呼び出し側がそのように積む)
use crate::{Item, UsageRecord};
/// 推定の出どころ。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EstimateSource {
/// measurement の境界にちょうど一致(実測値そのもの)
Measured,
/// 連続する 2 つの measurement の間をバイト按分で計算
Interpolated,
/// 最後の measurement より新しい区間を最終 rate で外挿
Extrapolated,
/// measurement が 1 件も無く、バイト数のみのフォールバック
NoData,
}
/// トークン数の推定値。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenEstimate {
pub tokens: u64,
pub source: EstimateSource,
}
/// `items[..i]` までの累積バイト数(`prefix[i]`)を返す。長さは `items.len()+1`。
pub fn prefix_bytes(items: &[Item]) -> Vec<u64> {
let mut prefix = Vec::with_capacity(items.len() + 1);
let mut acc: u64 = 0;
prefix.push(0);
for item in items {
acc = acc.saturating_add(item_bytes(item));
prefix.push(acc);
}
prefix
}
/// 1 Item の大きさ。JSON シリアライズ長を使う粗い近似。
/// トークン数との絶対変換ではなく区間の按分にしか使わないので、
/// プロバイダごとの overhead は比率でキャンセルされる。
pub fn item_bytes(item: &Item) -> u64 {
serde_json::to_string(item)
.map(|s| s.len() as u64)
.unwrap_or(0)
}
/// `history[..index]` までのトークン数を推定する。
///
/// `prefix` は [`prefix_bytes`] で得た `history.len() + 1` 長の累積バイト列。
/// 呼び出し側が 1 度だけ計算して使い回すことで、線形探索や複数回の推定が
/// O(n) シリアライズで済む(内部で毎回再計算すると O(n²) になる)。
pub fn tokens_at(
history: &[Item],
records: &[UsageRecord],
index: usize,
prefix: &[u64],
) -> TokenEstimate {
debug_assert!(index <= history.len());
debug_assert_eq!(prefix.len(), history.len() + 1);
if index == 0 {
return TokenEstimate {
tokens: 0,
source: EstimateSource::Measured,
};
}
if records.is_empty() {
return TokenEstimate {
tokens: prefix[index] / 4,
source: EstimateSource::NoData,
};
}
// exact matchrev 走査で一番新しい record を採用)
if let Some(r) = records.iter().rev().find(|r| r.history_len == index) {
return TokenEstimate {
tokens: r.input_total_tokens,
source: EstimateSource::Measured,
};
}
let lower = records.iter().rev().find(|r| r.history_len < index);
let upper = records.iter().find(|r| r.history_len > index);
let cap = history.len();
match (lower, upper) {
(Some(lo), Some(up)) => {
let lo_bytes = prefix[lo.history_len.min(cap)];
let up_bytes = prefix[up.history_len.min(cap)];
let at_bytes = prefix[index];
let span_bytes = up_bytes.saturating_sub(lo_bytes);
let span_tokens = up.input_total_tokens.saturating_sub(lo.input_total_tokens);
if span_bytes == 0 || span_tokens == 0 {
return TokenEstimate {
tokens: lo.input_total_tokens,
source: EstimateSource::Interpolated,
};
}
let delta_bytes = at_bytes.saturating_sub(lo_bytes);
let delta_tokens =
(delta_bytes as u128 * span_tokens as u128 / span_bytes as u128) as u64;
TokenEstimate {
tokens: lo.input_total_tokens + delta_tokens,
source: EstimateSource::Interpolated,
}
}
(Some(lo), None) => {
let lo_bytes = prefix[lo.history_len.min(cap)];
let at_bytes = prefix[index];
if lo_bytes == 0 || lo.input_total_tokens == 0 {
return TokenEstimate {
tokens: lo.input_total_tokens,
source: EstimateSource::Extrapolated,
};
}
let delta_bytes = at_bytes.saturating_sub(lo_bytes);
let delta_tokens =
(delta_bytes as u128 * lo.input_total_tokens as u128 / lo_bytes as u128) as u64;
TokenEstimate {
tokens: lo.input_total_tokens + delta_tokens,
source: EstimateSource::Extrapolated,
}
}
(None, Some(up)) => {
let up_bytes = prefix[up.history_len.min(cap)];
let at_bytes = prefix[index];
if up_bytes == 0 {
return TokenEstimate {
tokens: 0,
source: EstimateSource::Interpolated,
};
}
let t = (at_bytes as u128 * up.input_total_tokens as u128 / up_bytes as u128) as u64;
TokenEstimate {
tokens: t,
source: EstimateSource::Interpolated,
}
}
(None, None) => unreachable!("records non-empty but neither lower nor upper matched"),
}
}
/// 現在の history 全体の推定トークン数。
pub fn total_tokens(history: &[Item], records: &[UsageRecord]) -> TokenEstimate {
let prefix = prefix_bytes(history);
tokens_at(history, records, history.len(), &prefix)
}
/// 任意の history index 時点でのプロンプト全長推定。
/// `history_len == 0` で 0 を返す。delta 計算 (extract trigger 等) で
/// `total_tokens_at(now) - total_tokens_at(pointer)` の形で使う。
pub fn total_tokens_at(
history: &[Item],
records: &[UsageRecord],
history_len: usize,
) -> TokenEstimate {
let prefix = prefix_bytes(history);
tokens_at(history, records, history_len.min(history.len()), &prefix)
}
#[cfg(test)]
mod tests {
use super::*;
fn msg(text: &str) -> Item {
Item::user_message(text)
}
fn record(history_len: usize, tokens: u64) -> UsageRecord {
UsageRecord {
history_len,
input_total_tokens: tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
output_tokens: 0,
}
}
#[test]
fn total_no_data_falls_back_to_byte_estimate() {
let history = vec![msg("hello world")];
let est = total_tokens(&history, &[]);
assert_eq!(est.source, EstimateSource::NoData);
assert!(est.tokens > 0);
}
#[test]
fn total_measured_when_last_record_matches_history_len() {
let history = vec![msg("a"), msg("b"), msg("c")];
let records = vec![record(3, 120)];
let est = total_tokens(&history, &records);
assert_eq!(est.source, EstimateSource::Measured);
assert_eq!(est.tokens, 120);
}
#[test]
fn total_extrapolated_when_history_grew_past_last_measurement() {
let history = vec![msg("a"), msg("b"), msg("c"), msg("d")];
let records = vec![record(3, 100)];
let est = total_tokens(&history, &records);
assert_eq!(est.source, EstimateSource::Extrapolated);
assert!(est.tokens > 100);
}
#[test]
fn total_zero_history_is_zero() {
let est = total_tokens(&[], &[]);
assert_eq!(est.tokens, 0);
}
}

View File

@ -275,7 +275,7 @@ pub struct ToolCall {
///
/// Intermediate representation between tool execution and history.
/// Carries `summary` + optional `content` from [`ToolOutput`].
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolResult {
/// Corresponding tool call ID
pub tool_use_id: String,

View File

@ -0,0 +1,22 @@
//! Per-LLM-request Usage measurement snapshot.
//!
//! 1 リクエストの送信時点での「ある history prefix 長で計測した占有量」を
//! 1 件分にまとめたもの。`UsageEvent` (provider stream イベント) を
//! 受けて呼び出し側 (typically Pod) が組み立て、永続化層
//! (session-store) に流したり、token accounting (`token_counter`) で
//! 履歴として参照したりする。
/// LLM リクエスト送信時点での占有量スナップショット。
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UsageRecord {
/// 送信時の history.len()
pub history_len: usize,
/// history[..history_len] の占有量(プロンプト全長、実測)
pub input_total_tokens: u64,
/// 上記のうちキャッシュから読み出された分
pub cache_read_tokens: u64,
/// 上記のうちこのリクエストでキャッシュに書かれた分
pub cache_write_tokens: u64,
/// このリクエストで生成された出力トークン数
pub output_tokens: u64,
}

File diff suppressed because it is too large Load Diff

View File

@ -4,15 +4,77 @@
mod common;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use async_trait::async_trait;
use common::MockLlmClient;
use llm_worker::Worker;
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent as ClientStatusEvent};
use llm_worker::llm_client::retry::RetryPolicy;
use llm_worker::llm_client::{ClientError, LlmClient, Request, ResponseStream};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
// =============================================================================
// Tests
// =============================================================================
#[derive(Clone)]
struct FailOnceClient {
calls: Arc<AtomicUsize>,
events: Vec<Event>,
}
#[async_trait]
impl LlmClient for FailOnceClient {
async fn stream(&self, _request: Request) -> Result<ResponseStream, ClientError> {
if self.calls.fetch_add(1, Ordering::SeqCst) == 0 {
return Err(ClientError::Api {
status: Some(504),
code: None,
message: "gateway timeout".into(),
retry_after: None,
});
}
Ok(Box::pin(futures::stream::iter(
self.events.clone().into_iter().map(Ok),
)))
}
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
}
#[tokio::test]
async fn test_callback_llm_retry_event() {
let events = vec![Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
})];
let client = FailOnceClient {
calls: Arc::new(AtomicUsize::new(0)),
events,
};
let mut worker = Worker::new(client).with_retry_policy(RetryPolicy {
base: Duration::from_millis(1),
cap: Duration::from_millis(1),
max_attempts: 2,
total_timeout: Duration::from_secs(1),
});
let notices = Arc::new(Mutex::new(Vec::new()));
let sink = notices.clone();
worker.on_llm_retry(move |llm_call, notice| {
sink.lock().unwrap().push((llm_call, notice.clone()));
});
let result = worker.run("retry once").await;
assert!(result.is_ok(), "worker should succeed after one retry");
let notices = notices.lock().unwrap();
assert_eq!(notices.len(), 1);
assert_eq!(notices[0].0, 0);
assert_eq!(notices[0].1.failed_attempt, 1);
assert_eq!(notices[0].1.max_attempts, 2);
assert_eq!(notices[0].1.status, Some(504));
}
/// Verify that on_text_block correctly receives delta and stop events
#[tokio::test]
@ -149,6 +211,145 @@ async fn test_callback_turn_events() {
assert_eq!(ends[0], 0);
}
/// Stub tool returning a fixed [`ToolOutput`] for result-callback tests.
struct FixedOutputTool {
output: ToolOutput,
}
#[async_trait]
impl Tool for FixedOutputTool {
async fn execute(&self, _input_json: &str) -> Result<ToolOutput, ToolError> {
Ok(self.output.clone())
}
}
fn fixed_tool(name: &'static str, output: ToolOutput) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new(name).input_schema(serde_json::json!({"type":"object"}));
(
meta,
Arc::new(FixedOutputTool {
output: output.clone(),
}) as Arc<dyn Tool>,
)
})
}
/// Verify that on_tool_result fires once per executed tool with
/// summary/content/is_error matching what the tool returned.
#[tokio::test]
async fn test_callback_tool_result_events() {
let events = vec![
Event::tool_use_start(0, "call_1", "fixed"),
Event::tool_input_delta(0, "{}"),
Event::tool_use_stop(0),
Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let mut worker = Worker::new(client);
worker.register_tool(fixed_tool(
"fixed",
ToolOutput {
summary: "did the thing".into(),
content: Some("full detail body".into()),
},
));
let captured: Arc<Mutex<Vec<(String, String, Option<String>, bool)>>> =
Arc::new(Mutex::new(Vec::new()));
let sink = captured.clone();
worker.on_tool_result(move |result| {
sink.lock().unwrap().push((
result.tool_use_id.clone(),
result.summary.clone(),
result.content.clone(),
result.is_error,
));
});
let _ = worker.run("call it").await;
let observed = captured.lock().unwrap();
assert_eq!(observed.len(), 1);
assert_eq!(observed[0].0, "call_1");
assert_eq!(observed[0].1, "did the thing");
assert_eq!(observed[0].2.as_deref(), Some("full detail body"));
assert!(!observed[0].3);
}
/// Stub tool that always fails, for exercising the error path through
/// `on_tool_result`.
struct ErroringTool {
message: String,
}
#[async_trait]
impl Tool for ErroringTool {
async fn execute(&self, _input_json: &str) -> Result<ToolOutput, ToolError> {
Err(ToolError::ExecutionFailed(self.message.clone()))
}
}
fn erroring_tool(name: &'static str, message: &'static str) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new(name).input_schema(serde_json::json!({"type":"object"}));
(
meta,
Arc::new(ErroringTool {
message: message.to_string(),
}) as Arc<dyn Tool>,
)
})
}
/// Verify on_tool_result also fires for failed executions with
/// is_error=true, and that the ToolOutput content channel stays empty.
#[tokio::test]
async fn test_callback_tool_result_error_path() {
let events = vec![
Event::tool_use_start(0, "call_err", "erroring"),
Event::tool_input_delta(0, "{}"),
Event::tool_use_stop(0),
Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let mut worker = Worker::new(client);
worker.register_tool(erroring_tool("erroring", "boom"));
let captured: Arc<Mutex<Vec<(String, String, Option<String>, bool)>>> =
Arc::new(Mutex::new(Vec::new()));
let sink = captured.clone();
worker.on_tool_result(move |result| {
sink.lock().unwrap().push((
result.tool_use_id.clone(),
result.summary.clone(),
result.content.clone(),
result.is_error,
));
});
let _ = worker.run("fail it").await;
let observed = captured.lock().unwrap();
assert_eq!(observed.len(), 1);
assert_eq!(observed[0].0, "call_err");
assert!(
observed[0].1.contains("boom"),
"summary should carry the error message: {}",
observed[0].1
);
assert!(observed[0].2.is_none());
assert!(observed[0].3);
}
/// Verify that on_usage callback receives usage events
#[tokio::test]
async fn test_callback_usage_events() {

View File

@ -59,6 +59,7 @@ impl LlmClient for MockLlmClient {
status: Some(500),
code: Some("mock_error".to_string()),
message: "No more mock responses".to_string(),
retry_after: None,
});
}
let events = self.responses[count].clone();

View File

@ -12,7 +12,7 @@ use llm_worker::interceptor::{
Interceptor, PostToolAction, PreToolAction, ToolCallInfo, ToolResultInfo,
};
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput, ToolResult};
mod common;
use common::MockLlmClient;
@ -268,3 +268,59 @@ async fn test_post_tool_call_modification() {
"Result should be modified"
);
}
/// Hook: pre_tool_call synthetic result - skipped tool gets an error result in history.
#[tokio::test]
async fn test_before_tool_call_synthetic_result_committed() {
let events = vec![
Event::tool_use_start(0, "call_1", "blocked_tool"),
Event::tool_input_delta(0, r#"{}"#),
Event::tool_use_stop(0),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::with_responses(vec![
events,
vec![
Event::text_block_start(0),
Event::text_delta(0, "Denied."),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
]);
let mut worker = Worker::new(client);
let blocked_tool = SlowTool::new("blocked_tool", 10);
let blocked_clone = blocked_tool.clone();
worker.register_tool(blocked_tool.definition());
struct SyntheticPolicy;
#[async_trait]
impl Interceptor for SyntheticPolicy {
async fn pre_tool_call(&self, info: &mut ToolCallInfo) -> PreToolAction {
PreToolAction::SyntheticResult(ToolResult::error(
info.call.id.clone(),
"permission denied",
))
}
}
worker.set_interceptor(SyntheticPolicy);
let result = worker.run("Test synthetic result").await.unwrap();
assert_eq!(blocked_clone.call_count(), 0, "Blocked tool should not run");
assert!(result.worker.history().iter().any(|item| matches!(
item,
llm_worker::Item::ToolResult {
call_id,
summary,
is_error: true,
..
} if call_id == "call_1" && summary == "permission denied"
)));
}

View File

@ -0,0 +1,210 @@
//! Reasoning history round-trip 統合テスト
//!
//! Worker のストリーム → history append → 次リクエスト送出までの
//! ライフサイクルで `Item::Reasoning` が脱落せず保持されることを確認する。
//!
//! 検証点:
//! - Anthropic 由来の thinking + signature が `Item::Reasoning::signature` として
//! history に残る
//! - OpenAI Responses 由来の reasoning text + summary + encrypted_content が
//! `Item::Reasoning` の各フィールドに展開される
//! - 直前の reasoning は次の outgoing request の `request.items` の先頭付近に
//! 含まれるassistant メッセージの先頭、Anthropic 仕様)
mod common;
use common::MockLlmClient;
use llm_worker::Item;
use llm_worker::Worker;
use llm_worker::llm_client::event::{Event, ReasoningItemEvent, ResponseStatus, StatusEvent};
/// Anthropic 風: thinking ブロック → text → 終了 のシーケンス。
/// Worker history に Reasoning(signature 付き) → assistant_message が並ぶ。
#[tokio::test]
async fn anthropic_thinking_round_trips_signature_into_history() {
let events = vec![
Event::ReasoningItem(ReasoningItemEvent {
id: None,
text: "let me think...".into(),
summary: Vec::new(),
encrypted_content: None,
signature: Some("SIG-OPUS".into()),
}),
Event::text_block_start(0),
Event::text_delta(0, "Here's the answer"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("question?").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
// user / reasoning / assistant_message
assert_eq!(history.len(), 3, "history: {history:?}");
assert!(matches!(history[0], Item::Message { .. }));
match &history[1] {
Item::Reasoning {
text, signature, ..
} => {
assert_eq!(text, "let me think...");
assert_eq!(signature.as_deref(), Some("SIG-OPUS"));
}
other => panic!("expected Reasoning, got {other:?}"),
}
assert_eq!(history[2].as_text(), Some("Here's the answer"));
}
/// OpenAI Responses 風: encrypted_content + summary を持った reasoning が
/// `Item::Reasoning` のフィールドに展開されること。
#[tokio::test]
async fn openai_reasoning_round_trips_encrypted_and_summary() {
let events = vec![
Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "inner reasoning".into(),
summary: vec!["sum-A".into(), "sum-B".into()],
encrypted_content: Some("ENC-OPAQUE".into()),
signature: None,
}),
Event::text_block_start(0),
Event::text_delta(0, "answer"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
match &history[1] {
Item::Reasoning {
text,
summary,
encrypted_content,
signature,
id,
..
} => {
assert_eq!(text, "inner reasoning");
assert_eq!(summary, &vec!["sum-A".to_string(), "sum-B".to_string()]);
assert_eq!(encrypted_content.as_deref(), Some("ENC-OPAQUE"));
assert!(signature.is_none());
assert_eq!(id.as_deref(), Some("r1"));
}
other => panic!("expected Reasoning, got {other:?}"),
}
}
/// Reasoning は assistant ターン内で text/tool_call より先に並ぶことAnthropic
/// が thinking を assistant メッセージの先頭に要求するため)。
#[tokio::test]
async fn reasoning_precedes_text_in_assistant_burst() {
let events = vec![
// text/tool_call とは独立に、ReasoningItem が中盤で発火しても、
// history append 時には assistant items の先頭に置かれる。
Event::text_block_start(0),
Event::text_delta(0, "intermediate"),
Event::text_block_stop(0, None),
Event::ReasoningItem(ReasoningItemEvent {
text: "after text".into(),
signature: Some("SIG".into()),
..Default::default()
}),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
// user / reasoning(先頭) / assistant_message
assert!(matches!(history[1], Item::Reasoning { .. }));
assert_eq!(history[2].as_text(), Some("intermediate"));
}
/// resume シナリオ: history.json 由来の Item::Reasoning(signature) を Worker に
/// 注入して run しても、次の outgoing request の `Request::items` にそのまま
/// 載って LLM へ渡るworker は items を改変しない契約)。
#[tokio::test]
async fn injected_reasoning_survives_into_outgoing_request() {
use async_trait::async_trait;
use futures::Stream;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use llm_worker::llm_client::{ClientError, LlmClient, Request};
/// Request を 1 度だけキャプチャして空ストリームを返す client。
#[derive(Clone)]
struct CapturingClient {
captured: Arc<Mutex<Option<Request>>>,
}
#[async_trait]
impl LlmClient for CapturingClient {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>
{
*self.captured.lock().unwrap() = Some(request);
let stream = futures::stream::iter(vec![Ok(Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}))]);
Ok(Box::pin(stream))
}
}
let captured = Arc::new(Mutex::new(None));
let client = CapturingClient {
captured: captured.clone(),
};
let mut worker = Worker::new(client);
// resume: 既存 history を流し込む
worker.set_history(vec![
Item::user_message("prior question"),
Item::reasoning("prior thinking").with_signature("SIG-PRIOR"),
Item::assistant_message("prior answer"),
]);
let _ = worker.run("follow up").await.expect("run ok");
let req = captured
.lock()
.unwrap()
.take()
.expect("client should have received a request");
// Reasoning item が outgoing items に保持されていること
let mut found = false;
for item in &req.items {
if let Item::Reasoning {
text, signature, ..
} = item
{
assert_eq!(text, "prior thinking");
assert_eq!(signature.as_deref(), Some("SIG-PRIOR"));
found = true;
}
}
assert!(
found,
"Reasoning item must survive into outgoing request items: {req:?}",
req = req.items,
);
}

View File

@ -0,0 +1,185 @@
//! HTTP transport の単発 request / error classification テスト。
//!
//! Retry/backoff は Worker の lifecycle 管理に属するため、transport は 1 回だけ
//! request を送り、HTTP status / Retry-After を `ClientError` に載せて返す。
use futures::StreamExt;
use llm_worker::llm_client::LlmClient;
use llm_worker::llm_client::auth::AuthRequirement;
use llm_worker::llm_client::capability::ModelCapability;
use llm_worker::llm_client::error::ClientError;
use llm_worker::llm_client::event::Event;
use llm_worker::llm_client::scheme::Scheme;
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::llm_client::types::Request;
use serde_json::Value;
use std::time::Duration;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
/// SSE 本体は触らないテスト用 scheme。`parse_fail` を立てると
/// stream 消費中で `ClientError::Sse` を返す。
#[derive(Clone)]
struct DummyScheme {
parse_fail: bool,
}
impl Scheme for DummyScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
""
}
fn path(&self, _: &str) -> String {
"/v1/chat".into()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::None
}
fn build_request_body(&self, _: &str, _: &Request, _: &ModelCapability) -> Value {
serde_json::json!({})
}
fn parse_sse(&self, _: &str, _: &str, _: &mut ()) -> Result<Vec<Event>, ClientError> {
if self.parse_fail {
Err(ClientError::Sse(
"simulated mid-stream parse failure".into(),
))
} else {
Ok(vec![])
}
}
fn default_capability(&self) -> ModelCapability {
ModelCapability::minimal()
}
}
fn build_transport(base_url: impl Into<String>, parse_fail: bool) -> HttpTransport<DummyScheme> {
HttpTransport::new(
DummyScheme { parse_fail },
"test-model",
base_url,
ResolvedAuth::None,
ModelCapability::minimal(),
)
}
fn ok_sse() -> ResponseTemplate {
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(b"".to_vec(), "text/event-stream")
}
#[tokio::test]
async fn retryable_status_returns_api_error_without_retrying() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(503).set_body_string("upstream connect error"))
.up_to_n_times(1)
.mount(&server)
.await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ok_sse())
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(ClientError::Api {
status: Some(503), ..
}) => {}
Err(other) => panic!("expected Api(503), got {other:?}"),
Ok(_) => panic!("transport must not retry internally"),
}
let received = server.received_requests().await.unwrap();
assert_eq!(
received.len(),
1,
"transport should send exactly one request"
);
}
#[tokio::test]
async fn retry_after_header_is_preserved_on_api_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(503).insert_header("retry-after", "1"))
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(
err @ ClientError::Api {
status: Some(503), ..
},
) => {
assert_eq!(err.retry_after(), Some(Duration::from_secs(1)));
}
Err(other) => panic!("expected Api(503), got {other:?}"),
Ok(_) => panic!("expected error"),
}
}
#[tokio::test]
async fn mid_stream_sse_error_is_stream_item_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(
b"event: data\ndata: payload\n\n".to_vec(),
"text/event-stream",
),
)
.mount(&server)
.await;
let transport = build_transport(server.uri(), true);
let mut stream = transport
.stream(Request::default())
.await
.expect("status 200 should open stream");
let mut saw_sse_err = false;
while let Some(item) = stream.next().await {
if matches!(item, Err(ClientError::Sse(_))) {
saw_sse_err = true;
}
}
assert!(saw_sse_err, "expected Sse error from stream consumer");
let received = server.received_requests().await.unwrap();
assert_eq!(received.len(), 1, "mid-stream Sse must not reopen stream");
}
#[tokio::test]
async fn non_retryable_status_returns_api_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(401).set_body_string("unauthorized"))
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(ClientError::Api {
status: Some(401), ..
}) => {}
Err(other) => panic!("expected Api(401), got {other:?}"),
Ok(_) => panic!("expected error"),
}
let received = server.received_requests().await.unwrap();
assert_eq!(received.len(), 1);
}

View File

@ -5,8 +5,8 @@
mod common;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use common::MockLlmClient;
@ -44,14 +44,12 @@ fn test_mutable_history_manipulation() {
assert!(worker.history().is_empty());
// Add to history
worker.push_item(Item::user_message("Hello"));
worker.push_item(Item::assistant_message("Hi there!"));
worker.append_history(vec![Item::user_message("Hello")]);
worker.append_history(vec![Item::assistant_message("Hi there!")]);
assert_eq!(worker.history().len(), 2);
// Mutable access to history
worker
.history_mut()
.push(Item::user_message("How are you?"));
// Append to history via the callback-aware API.
worker.append_history(vec![Item::user_message("How are you?")]);
assert_eq!(worker.history().len(), 3);
// Clear history
@ -71,34 +69,38 @@ fn test_mutable_history_manipulation() {
#[test]
fn test_mutable_builder_pattern() {
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client)
.system_prompt("System prompt")
.with_item(Item::user_message("Hello"))
.with_item(Item::assistant_message("Hi!"))
.with_items(vec![
Item::user_message("How are you?"),
Item::assistant_message("I'm fine!"),
]);
let worker = Worker::new(client).system_prompt("System prompt");
assert_eq!(worker.get_system_prompt(), Some("System prompt"));
assert_eq!(worker.history().len(), 4);
assert!(worker.history().is_empty());
}
/// Verify that multiple items can be added with extend_history
/// Verify that multiple items can be added with append_history and callbacks fire.
#[test]
fn test_mutable_extend_history() {
fn test_mutable_append_history() {
let client = MockLlmClient::new(vec![]);
let observed = Arc::new(Mutex::new(Vec::new()));
let observed_for_callback = Arc::clone(&observed);
let mut worker = Worker::new(client);
worker.on_history_append(move |item| {
if let Some(text) = item.as_text() {
observed_for_callback.lock().unwrap().push(text.to_string());
}
});
worker.push_item(Item::user_message("First"));
worker.append_history(vec![Item::user_message("First")]);
worker.extend_history(vec![
worker.append_history(vec![
Item::assistant_message("Response 1"),
Item::user_message("Second"),
Item::assistant_message("Response 2"),
]);
assert_eq!(worker.history().len(), 4);
assert_eq!(
observed.lock().unwrap().as_slice(),
["First", "Response 1", "Second", "Response 2"]
);
}
#[derive(Clone)]
@ -162,8 +164,8 @@ fn test_lock_transition() {
let mut worker = Worker::new(client);
worker.set_system_prompt("System");
worker.push_item(Item::user_message("Hello"));
worker.push_item(Item::assistant_message("Hi"));
worker.append_history(vec![Item::user_message("Hello")]);
worker.append_history(vec![Item::assistant_message("Hi")]);
// Lock
let locked_worker = worker.lock();
@ -180,14 +182,14 @@ fn test_unlock_transition() {
let client = MockLlmClient::new(vec![]);
let mut worker = Worker::new(client);
worker.push_item(Item::user_message("Hello"));
worker.append_history(vec![Item::user_message("Hello")]);
let locked_worker = worker.lock();
// Unlock
let mut worker = locked_worker.unlock();
// History operations are available again in Mutable state
worker.push_item(Item::assistant_message("Hi"));
worker.append_history(vec![Item::assistant_message("Hi")]);
worker.clear_history();
assert!(worker.history().is_empty());
}
@ -310,8 +312,8 @@ async fn test_locked_prefix_len_tracking() {
let mut worker = Worker::new(client);
// Add items beforehand
worker.push_item(Item::user_message("Pre-existing message 1"));
worker.push_item(Item::assistant_message("Pre-existing response 1"));
worker.append_history(vec![Item::user_message("Pre-existing message 1")]);
worker.append_history(vec![Item::assistant_message("Pre-existing response 1")]);
assert_eq!(worker.history().len(), 2);
@ -352,14 +354,18 @@ async fn test_turn_count_increment() -> Result<(), WorkerError> {
let worker = Worker::new(client);
assert_eq!(worker.turn_count(), 0);
assert_eq!(worker.llm_call_count(), 0);
// First run consumes Mutable, returns RunOutput
let mut worker = worker.run("First").await?.worker;
assert_eq!(worker.turn_count(), 1);
// Retry not yet implemented → AgentTurn:LlmCall is 1:1.
assert_eq!(worker.llm_call_count(), 1);
// Subsequent runs on Locked take &mut self
worker.run("Second").await?;
assert_eq!(worker.turn_count(), 2);
assert_eq!(worker.llm_call_count(), 2);
Ok(())
}
@ -376,9 +382,11 @@ async fn test_unlock_edit_relock() {
}),
]]);
let worker = Worker::new(client)
.with_item(Item::user_message("Hello"))
.with_item(Item::assistant_message("Hi"));
let mut worker = Worker::new(client);
worker.append_history(vec![
Item::user_message("Hello"),
Item::assistant_message("Hi"),
]);
// Lock -> Unlock
let locked = worker.lock();
@ -388,7 +396,7 @@ async fn test_unlock_edit_relock() {
// Edit history
unlocked.clear_history();
unlocked.push_item(Item::user_message("Fresh start"));
unlocked.append_history(vec![Item::user_message("Fresh start")]);
// Re-lock
let relocked = unlocked.lock();

View File

@ -5,13 +5,14 @@ edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { version = "0.2.1", path = "../llm-worker" }
protocol = { version = "0.1.0", path = "../protocol" }
serde = { version = "1.0.228", features = ["derive"] }
arc-swap = "1"
llm-worker = { workspace = true }
protocol = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_ignored = "0.1.14"
thiserror = "2.0.18"
toml = "1.1.2"
tracing = "0.1.44"
thiserror = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]
tempfile = "3.27.0"
tempfile = { workspace = true }

View File

@ -9,6 +9,6 @@ Pod の宣言的設定を TOML マニフェストとして定義・パースす
- `ModelConfig` — LLM モデル設定scheme、base_url、model_id、auth
- `SchemeKind` — wire scheme 種別(`Anthropic`, `OpenaiChat`, `OpenaiResponses`, `Gemini`
- `AuthRef` — 認証参照(`None`, `ApiKey { env, file }`, `CodexOAuth`
- `WorkerManifest` — ワーカー設定(システムプロンプト、max_tokens、temperature
- `WorkerManifest` — ワーカー設定(システムプロンプト、生成設定、reasoning
- `ScopeConfig` / `ScopeRule` / `Permission` — allow / deny の宣言的スコープ設定
- `Scope` — 実行時スコープ。`from_config(&ScopeConfig, pwd)` で構築し、`is_readable` / `is_writable` / `permission_at` で問い合わせる

View File

@ -0,0 +1,125 @@
//! Cascade-layer collection helpers.
//!
//! Pod manifests are assembled from up to three on-disk layers (see
//! `pod::PodFactory` for the full cascade story):
//!
//! 1. **User manifest** — Pod CLI uses
//! [`crate::paths::user_manifest_path_with_env_override`]
//! 2. **Project manifest** at the closest `.insomnia/manifest.toml`
//! found by walking up from a starting directory (typically `cwd`)
//! 3. **Programmatic overlay** supplied at the call site
//!
//! This module owns the project-layer discovery and the parser glue.
//! User-layer path resolution lives in [`crate::paths`].
//!
//! Cascade *merging* and final validation stay outside this module —
//! that's the data layer's responsibility (`PodManifestConfig::merge`
//! and `PodManifest::try_from`). This module only handles the I/O and
//! path-discovery glue around them.
use std::path::{Path, PathBuf};
use crate::PodManifestConfig;
/// Errors returned when reading a single manifest layer from disk.
#[derive(Debug, thiserror::Error)]
pub enum LayerLoadError {
#[error("failed to read manifest {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to parse manifest {}: {source}", .path.display())]
Parse {
path: PathBuf,
#[source]
source: toml::de::Error,
},
}
/// Walk up from `start` looking for `.insomnia/manifest.toml`. Returns
/// the closest match, or `None` if none is found before reaching the
/// filesystem root.
pub fn find_project_manifest_from(start: &Path) -> Option<PathBuf> {
let start = start
.canonicalize()
.ok()
.unwrap_or_else(|| start.to_path_buf());
let mut cur: Option<&Path> = Some(start.as_path());
while let Some(dir) = cur {
let candidate = dir.join(".insomnia").join("manifest.toml");
if candidate.is_file() {
return Some(candidate);
}
cur = dir.parent();
}
None
}
/// Read a manifest file from `path` and parse it as a partial
/// [`PodManifestConfig`]. Path resolution against a base directory and
/// merging with other layers are the caller's responsibility.
pub fn load_layer(path: &Path) -> Result<PodManifestConfig, LayerLoadError> {
let toml = std::fs::read_to_string(path).map_err(|source| LayerLoadError::Io {
path: path.to_path_buf(),
source,
})?;
PodManifestConfig::from_toml(&toml).map_err(|source| LayerLoadError::Parse {
path: path.to_path_buf(),
source,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn find_project_manifest_walks_up() {
let tmp = TempDir::new().unwrap();
let root = tmp.path().canonicalize().unwrap();
let manifest = root.join(".insomnia").join("manifest.toml");
std::fs::create_dir_all(manifest.parent().unwrap()).unwrap();
std::fs::write(&manifest, "").unwrap();
let nested = root.join("a").join("b");
std::fs::create_dir_all(&nested).unwrap();
let found = find_project_manifest_from(&nested).unwrap();
assert_eq!(found, manifest);
}
#[test]
fn find_project_manifest_returns_none_when_absent() {
let tmp = TempDir::new().unwrap();
assert!(find_project_manifest_from(tmp.path()).is_none());
}
#[test]
fn load_layer_round_trips_partial_config() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("manifest.toml");
std::fs::write(
&path,
r#"
[pod]
name = "from-disk"
"#,
)
.unwrap();
let cfg = load_layer(&path).unwrap();
assert_eq!(cfg.pod.name.as_deref(), Some("from-disk"));
}
#[test]
fn load_layer_io_error_carries_path() {
let bogus = PathBuf::from("/definitely/does/not/exist/manifest.toml");
let err = load_layer(&bogus).unwrap_err();
match err {
LayerLoadError::Io { path, .. } => assert_eq!(path, bogus),
_ => panic!("expected Io variant"),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,15 @@
/// Byte-size cap applied to any tool's `content` output when no
/// per-tool override is set. See [`crate::ToolOutputLimits`].
pub const TOOL_OUTPUT_MAX_BYTES: usize = 16 * 1024;
pub const TOOL_OUTPUT_MAX_BYTES: usize = 64 * 1024;
/// Number of most-recent turns protected from pruning. See
/// [`crate::CompactionConfig::prune_protected_turns`].
pub const PRUNE_PROTECTED_TURNS: usize = 3;
/// Byte-size cap applied to each submit-time FileRef upload / attachment.
/// See [`crate::FileUploadLimits`].
pub const FILE_UPLOAD_MAX_BYTES: usize = 256 * 1024;
/// Token budget at the history tail protected from pruning. See
/// [`crate::CompactionConfig::prune_protected_tokens`].
pub const PRUNE_PROTECTED_TOKENS: u64 = 8000;
/// Minimum estimated token savings required to trigger a prune. See
/// [`crate::CompactionConfig::prune_min_savings`].
@ -21,27 +25,78 @@ pub const PRUNE_MIN_SAVINGS: u64 = 4096;
/// Token budget retained (unchanged) at the tail of the history across
/// a compact. Items whose cumulative token count fits within this budget
/// starting from the end are kept verbatim; the rest are summarised.
/// See [`crate::CompactionConfig::compact_retained_tokens`].
/// See [`crate::CompactionConfig::retained_tokens`].
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
/// Target size for the deterministic compact overview/index fed to the
/// compact worker. Exceeding this target is tolerated.
/// See [`crate::CompactionConfig::overview_target_tokens`].
pub const COMPACT_OVERVIEW_TARGET_TOKENS: u64 = 8_000;
/// Warning threshold for compact overview/index size. Compaction continues.
/// See [`crate::CompactionConfig::overview_warning_tokens`].
pub const COMPACT_OVERVIEW_WARNING_TOKENS: u64 = 16_000;
/// Hard deterministic-overview deadline. When exceeded, overview generation
/// falls back to a coarser index before the compact worker is started.
/// See [`crate::CompactionConfig::overview_deadline_tokens`].
pub const COMPACT_OVERVIEW_DEADLINE_TOKENS: u64 = 40_000;
/// Default instruction asset reference used when `worker.instruction`
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
/// `$insomnia/` / `$user/` / `$workspace/` namespaces.
pub const DEFAULT_INSTRUCTION: &str = "$insomnia/default";
/// Default language policy used by the main worker for normal prose
/// responses. See [`crate::WorkerManifest::language`].
pub const WORKER_LANGUAGE: &str =
"match the user's language unless they explicitly request another language";
/// Token budget for auto-read file contents injected into the new
/// session after compaction. Limits how much raw file text the
/// compact worker can pull into the compacted context via
/// `mark_read_required`. See
/// [`crate::CompactionConfig::compact_auto_read_budget`].
/// [`crate::CompactionConfig::auto_read_budget_tokens`].
pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;
/// Cumulative input-token cap for the compact worker's own LLM
/// Current prompt-occupancy cap for the compact worker's own LLM
/// calls. Exceeding this aborts the compact run (circuit-breaker
/// path). See
/// [`crate::CompactionConfig::compact_worker_max_input_tokens`].
/// path). See [`crate::CompactionConfig::worker_context_max_tokens`].
pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;
/// Remaining compact-worker context threshold that triggers an instruction
/// to stop exploring and call `write_summary`.
/// See [`crate::CompactionConfig::finish_warning_remaining_tokens`].
pub const COMPACT_FINISH_WARNING_REMAINING_TOKENS: u64 = 8_000;
/// Context reserve preserved for final summary/tool closing turns.
/// See [`crate::CompactionConfig::final_reserve_tokens`].
pub const COMPACT_FINAL_RESERVE_TOKENS: u64 = 4_000;
/// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
/// See [`crate::CompactionConfig::worker_max_turns`].
pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);
/// Target size for the `write_summary` text. Used in prompt/nudge text.
/// See [`crate::CompactionConfig::summary_target_tokens`].
pub const COMPACT_SUMMARY_TARGET_TOKENS: u64 = 2_000;
/// Hard validation cap for the final `write_summary` text.
/// See [`crate::CompactionConfig::summary_max_tokens`].
pub const COMPACT_SUMMARY_MAX_TOKENS: u64 = 4_000;
/// Dry-run cap for the compacted session's initial request context.
/// See [`crate::CompactionConfig::result_context_max_tokens`].
pub const COMPACT_RESULT_CONTEXT_MAX_TOKENS: u64 = 60_000;
/// Number of recently-touched files fed to the compact worker as
/// default references.
pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
/// Optional maximum extract-worker tool-loop depth. `None` means unlimited.
/// See [`crate::MemoryConfig::extract_worker_max_turns`].
pub const MEMORY_EXTRACT_WORKER_MAX_TURNS: Option<u32> = Some(8);
/// Default language used by memory extraction / consolidation workers for
/// durable memory and knowledge text. See [`crate::MemoryConfig::language`].
pub const MEMORY_LANGUAGE: &str = "English";

View File

@ -1,18 +1,27 @@
mod cascade;
mod config;
pub mod defaults;
mod model;
pub mod paths;
mod scope;
pub use cascade::{LayerLoadError, find_project_manifest_from, load_layer};
pub use config::{
CompactionConfigPartial, ModelConfigPartial, PodManifestConfig, PodMetaConfig, ResolveError,
ToolOutputLimitsPartial, WorkerManifestConfig,
CompactionConfigPartial, FileUploadLimitsPartial, PermissionConfigPartial, PodManifestConfig,
PodMetaConfig, ResolveError, ToolOutputLimitsPartial, WorkerManifestConfig,
};
pub use model::{
AuthRef, ModelCapability, ModelManifest, ReasoningControl, ReasoningEffort, SchemeKind,
};
pub use paths::{
user_manifest_path, user_manifest_path_from_env, user_manifest_path_with_env_override,
};
pub use model::{AuthRef, ModelConfig, SchemeKind};
pub use protocol::{Permission, ScopeRule};
pub use scope::{Scope, ScopeError};
pub use scope::{Scope, ScopeError, SharedScope};
use std::collections::HashMap;
use std::num::NonZeroU32;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
@ -25,17 +34,137 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PodManifest {
pub pod: PodMeta,
pub model: ModelConfig,
pub model: ModelManifest,
pub worker: WorkerManifest,
pub scope: ScopeConfig,
/// Session/debug persistence settings. Defaults keep extra traces off.
#[serde(default)]
pub session: SessionConfig,
/// Optional manifest-level tool permission policy. Absent means the
/// permission layer is disabled and tool calls run as before.
#[serde(default)]
pub permissions: Option<ToolPermissionConfig>,
#[serde(default)]
pub compaction: Option<CompactionConfig>,
/// Memory subsystem opt-in. Presence of `[memory]` in TOML enables
/// the memory tools (MemoryRead / MemoryWrite / MemoryEdit) and
/// causes Pod to deny generic write access to `<workspace>/memory/`
/// and `<workspace>/knowledge/`. Absent ⇒ legacy behaviour, no
/// memory tools registered.
#[serde(default)]
pub memory: Option<MemoryConfig>,
/// External Agent Skills (`SKILL.md`) directories to ingest as
/// Workflows. Each entry is a path to a skills *root* (i.e. a
/// directory whose children are individual `<name>/SKILL.md` skill
/// bundles). Paths are resolved against the manifest's base
/// directory like other path fields. Absent ⇒ no skills loaded;
/// there is no implicit `$config_dir/skills/` or builtin probe.
#[serde(default)]
pub skills: Option<SkillsConfig>,
}
/// External Agent Skills (`SKILL.md`) ingest configuration. Skills are
/// loaded *only* from the directories listed here — there is no
/// implicit `$config_dir/skills/` or builtin probe. Cascade-merged
/// across manifest layers, so a user-level manifest can declare a
/// shared skill root once while a project manifest adds its own
/// `.claude/skills/` / `.cursor/skills/` paths on top.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SkillsConfig {
/// Skills *roots*. Children of each root must be individual
/// `<name>/SKILL.md` bundles; the directory itself is not a skill.
/// Resolved against the manifest base directory before
/// [`PodManifest`] is materialised.
#[serde(default)]
pub directories: Vec<PathBuf>,
}
/// Memory subsystem configuration. Presence in the manifest enables
/// memory; the workspace root defaults to the Pod's pwd unless an
/// explicit override is given.
///
/// All fields are `Option`; defaults are applied at the consumer
/// (`.unwrap_or(defaults::...)`). This keeps cascade `merge` simple
/// (`upper.x.or(self.x)`) without a separate partial/resolved split.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MemoryConfig {
/// Override for the workspace root. When `None`, the Pod's pwd
/// (resolved at construction time) is used. When set, must be an
/// absolute path.
#[serde(default)]
pub workspace_root: Option<PathBuf>,
/// Maximum number of records returned by `MemoryQuery` /
/// `KnowledgeQuery` per call. `None` ⇒ tool default (20).
#[serde(default)]
pub query_result_limit: Option<usize>,
/// Lines of context before and after each match in query excerpts.
/// Ignored when the request omits `query`. `None` ⇒ tool default (3).
#[serde(default)]
pub query_excerpt_lines: Option<usize>,
/// Whether the body of `memory/summary.md` is exposed in the resident
/// system-prompt section. `None` ⇒ enabled.
#[serde(default)]
pub inject_summary: Option<bool>,
/// Language used by memory extraction / consolidation workers for durable
/// memory and knowledge text. Free-form so workspaces can use names like
/// `English`, `Japanese`, or locale tags. `None` ⇒
/// [`defaults::MEMORY_LANGUAGE`].
#[serde(default)]
pub language: Option<String>,
/// Optional model for the extract worker. When `None`,
/// the main pod model is cloned via `clone_boxed()`. Lightweight
/// reasoning-capable models (Haiku / 4o-mini / Flash class) are
/// recommended.
#[serde(default)]
pub extract_model: Option<ModelManifest>,
/// Cumulative input-token threshold (since the last extract pointer)
/// that triggers an extract run. `None` disables the extract trigger
/// entirely; memory tools and resident injection still work, only
/// the auto-extract trigger is dormant.
#[serde(default)]
pub extract_threshold: Option<u64>,
/// Optional maximum extract-worker tool-loop depth. `None` leaves
/// the worker unlimited; the default bounds runaway short-context
/// loops. Falls through to
/// [`defaults::MEMORY_EXTRACT_WORKER_MAX_TURNS`] when unset.
#[serde(default)]
pub extract_worker_max_turns: Option<u32>,
/// Optional model for the consolidation worker. When
/// `None`, the main pod model is cloned via `clone_boxed()`.
/// Reasoning-class models are recommended.
#[serde(default)]
pub consolidation_model: Option<ModelManifest>,
/// Consolidation trigger: file-count threshold of `_staging/`. The
/// consolidation run fires when the staging directory has at least
/// this many entries. Either threshold reaching its limit fires
/// consolidation (logical OR). `None` for both thresholds ⇒
/// consolidation disabled.
#[serde(default)]
pub consolidation_threshold_files: Option<usize>,
/// Consolidation trigger: byte-size threshold across all `_staging/`
/// entries. Either threshold reaching its limit fires consolidation.
/// `None` for both thresholds ⇒ consolidation disabled.
#[serde(default)]
pub consolidation_threshold_bytes: Option<u64>,
}
/// Pod metadata.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PodMeta {
pub name: String,
/// Optional path to a TOML override file read as the top layer of
/// `pod::PromptCatalog`. Subject to the same relative-path
/// resolution as other manifest paths (joined against the
/// manifest's base directory). `None` leaves the 4th overlay layer
/// empty; auto-discovered user and workspace packs still apply.
///
/// Note: unlike `worker.instruction`, this is a plain filesystem
/// path — not a `$prefix/` prompt reference. Pack files carry
/// structured TOML data, while `worker.instruction` points at a
/// minijinja `.md` template; the two use different addressing
/// conventions on purpose.
#[serde(default)]
pub prompt_pack: Option<PathBuf>,
}
/// Worker-level configuration embedded in the manifest.
@ -48,18 +177,39 @@ pub struct WorkerManifest {
/// unset manifests fall through to [`defaults::DEFAULT_INSTRUCTION`].
#[serde(default = "default_instruction")]
pub instruction: String,
/// Language policy used by the main worker for normal prose responses.
/// Free-form so workspaces can use names like `English`, `Japanese`,
/// locale tags, or a policy phrase. Unset manifests fall through to
/// [`defaults::WORKER_LANGUAGE`].
#[serde(default = "default_worker_language")]
pub language: String,
#[serde(default)]
pub max_tokens: Option<u32>,
#[serde(default)]
pub max_turns: Option<NonZeroU32>,
#[serde(default)]
pub temperature: Option<f32>,
#[serde(default)]
pub top_p: Option<f32>,
#[serde(default)]
pub top_k: Option<u32>,
#[serde(default)]
pub stop_sequences: Vec<String>,
#[serde(default)]
pub reasoning: Option<ReasoningControl>,
/// Byte-size caps applied to tool `content` before it reaches the
/// conversation history. The section is optional in TOML — when
/// omitted, `ToolOutputLimits::default()` (16KB default cap, no
/// omitted, `ToolOutputLimits::default()` (64 KiB default cap, no
/// per-tool overrides) is applied so truncation is on by default.
#[serde(default)]
pub tool_output: ToolOutputLimits,
/// Byte-size cap applied to submit-time FileRef uploads / attachments.
/// For file refs this caps the file body; for normal directory refs this
/// caps the rendered shallow listing body.
/// This is intentionally separate from tool-output truncation because
/// user-requested file attachments can usually tolerate a larger budget.
#[serde(default)]
pub file_upload: FileUploadLimits,
}
/// Byte-size caps applied to tool execution `content` before it enters
@ -79,14 +229,36 @@ pub struct ToolOutputLimits {
pub per_tool: HashMap<String, usize>,
}
/// Byte-size cap for submit-time FileRef uploads / attachments.
///
/// This governs the `[File: <path>]` system-message attachment produced
/// when a user explicitly submits a `@<path>` file reference, and the
/// rendered body of a shallow `[Dir: <path>]` listing for a normal directory
/// reference. It does not affect tool result truncation; see
/// [`ToolOutputLimits`] for that path.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileUploadLimits {
/// Cap applied to each resolved FileRef file body or directory-listing body.
#[serde(default = "default_file_upload_max_bytes")]
pub max_bytes: usize,
}
fn default_tool_output_max_bytes() -> usize {
defaults::TOOL_OUTPUT_MAX_BYTES
}
fn default_file_upload_max_bytes() -> usize {
defaults::FILE_UPLOAD_MAX_BYTES
}
fn default_instruction() -> String {
defaults::DEFAULT_INSTRUCTION.to_string()
}
fn default_worker_language() -> String {
defaults::WORKER_LANGUAGE.to_string()
}
impl Default for ToolOutputLimits {
fn default() -> Self {
Self {
@ -96,6 +268,14 @@ impl Default for ToolOutputLimits {
}
}
impl Default for FileUploadLimits {
fn default() -> Self {
Self {
max_bytes: default_file_upload_max_bytes(),
}
}
}
impl ToolOutputLimits {
/// Resolve the cap for a given tool name.
pub fn limit_for(&self, tool_name: &str) -> usize {
@ -123,15 +303,56 @@ pub struct ScopeConfig {
pub deny: Vec<ScopeRule>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct SessionConfig {
/// Persist every provider stream event directly to `trace.jsonl` next to the
/// segment log. Intended for debugging stalls between stream requests; off
/// by default because it can be verbose.
#[serde(default)]
pub record_event_trace: bool,
}
/// Manifest-level pattern-based tool permission policy.
///
/// Presence of `[permissions]` enables this layer. Rules are evaluated
/// in declaration order; if none match, [`Self::default_action`] is used.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolPermissionConfig {
pub default_action: ToolPermissionAction,
#[serde(default, rename = "rule")]
pub rules: Vec<ToolPermissionRule>,
}
/// One `[[permissions.rule]]` entry.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolPermissionRule {
/// Tool registration name. Matching is case-insensitive at runtime so
/// manifests may use either `Bash` or `bash`.
pub tool: String,
/// Glob-like pattern matched against the tool's permission target
/// (for built-in tools, commonly `command`, `file_path`, or `pattern`).
pub pattern: String,
pub action: ToolPermissionAction,
}
/// Tool permission decision.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ToolPermissionAction {
Allow,
Deny,
Ask,
}
/// Context compaction configuration.
///
/// Controls Prune (content removal from old tool results) and Compact
/// (full history summarisation). Omitting `[compaction]` disables both.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompactionConfig {
/// Number of recent turns protected from pruning.
#[serde(default = "default_prune_protected_turns")]
pub prune_protected_turns: usize,
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_prune_protected_tokens")]
pub prune_protected_tokens: u64,
/// Minimum estimated token savings to trigger a prune.
#[serde(default = "default_prune_min_savings")]
@ -142,8 +363,8 @@ pub struct CompactionConfig {
/// Checked by the Controller after each run. When current occupancy
/// exceeds this value, compact runs before the next turn. `None`
/// disables the between-turns check.
#[serde(default)]
pub compact_threshold: Option<u64>,
#[serde(default, alias = "compact_threshold")]
pub threshold: Option<u64>,
/// Safety-net (between-requests) compaction threshold.
///
@ -152,60 +373,145 @@ pub struct CompactionConfig {
/// Controller can compact before the next LLM request. `None`
/// disables the between-requests check.
///
/// Expected relation: `compact_threshold < compact_request_threshold`
/// (proactive triggers before safety net). A reversed configuration
/// is accepted but logged as a warning.
#[serde(default)]
pub compact_request_threshold: Option<u64>,
/// Expected relation: `threshold < request_threshold` (proactive triggers
/// before safety net). A reversed configuration is accepted but logged as
/// a warning.
#[serde(default, alias = "compact_request_threshold")]
pub request_threshold: Option<u64>,
/// Token budget retained verbatim at the tail of the history after
/// compaction. Measured against the occupancy estimate from
/// `UsageRecord` history; turn boundaries are ignored.
#[serde(default = "default_compact_retained_tokens")]
pub compact_retained_tokens: u64,
#[serde(default = "default_retained_tokens", alias = "compact_retained_tokens")]
pub retained_tokens: u64,
/// Target size for the deterministic overview/index fed to the compact
/// worker. Overshooting this target is not an error.
#[serde(default = "default_overview_target_tokens")]
pub overview_target_tokens: u64,
/// Warning threshold for deterministic overview/index size.
#[serde(default = "default_overview_warning_tokens")]
pub overview_warning_tokens: u64,
/// Deadline threshold for deterministic overview/index generation.
/// Oversized overviews fall back to a coarser deterministic index.
#[serde(default = "default_overview_deadline_tokens")]
pub overview_deadline_tokens: u64,
/// Current prompt-occupancy cap for the compact worker's own LLM
/// requests. Exceeding this aborts the compact run.
#[serde(
default = "default_worker_context_max_tokens",
alias = "compact_worker_max_input_tokens"
)]
pub worker_context_max_tokens: u64,
/// Remaining compact-worker context threshold that triggers a warning and
/// an instruction to stop exploring and call `write_summary`.
#[serde(default = "default_finish_warning_remaining_tokens")]
pub finish_warning_remaining_tokens: u64,
/// Context reserve preserved for final summary/tool closing turns.
#[serde(default = "default_final_reserve_tokens")]
pub final_reserve_tokens: u64,
/// Optional maximum compact-worker tool-loop depth. `None` leaves the
/// worker unlimited; the default bounds runaway short-context loops.
#[serde(
default = "default_worker_max_turns",
alias = "compact_worker_max_turns"
)]
pub worker_max_turns: Option<u32>,
/// Target size for the `write_summary` text. Used in prompt/nudge text.
#[serde(default = "default_summary_target_tokens")]
pub summary_target_tokens: u64,
/// Hard validation cap for the final `write_summary` text.
#[serde(default = "default_summary_max_tokens")]
pub summary_max_tokens: u64,
/// Aggregate token budget for auto-read file contents injected into
/// the compacted session by the compact worker.
#[serde(default = "default_compact_auto_read_budget")]
pub compact_auto_read_budget: u64,
#[serde(
default = "default_auto_read_budget_tokens",
alias = "compact_auto_read_budget"
)]
pub auto_read_budget_tokens: u64,
/// Cumulative input-token cap for the compact worker's own LLM
/// calls. Exceeding this aborts the compact run.
#[serde(default = "default_compact_worker_max_input_tokens")]
pub compact_worker_max_input_tokens: u64,
/// Dry-run cap for the compacted session's initial request context.
#[serde(default = "default_result_context_max_tokens")]
pub result_context_max_tokens: u64,
/// Optional model for the compactor (summary) LLM.
/// If omitted, the main model is cloned via `clone_boxed()`.
#[serde(default)]
pub model: Option<ModelConfig>,
pub model: Option<ModelManifest>,
}
fn default_prune_protected_turns() -> usize {
defaults::PRUNE_PROTECTED_TURNS
fn default_prune_protected_tokens() -> u64 {
defaults::PRUNE_PROTECTED_TOKENS
}
fn default_prune_min_savings() -> u64 {
defaults::PRUNE_MIN_SAVINGS
}
fn default_compact_retained_tokens() -> u64 {
fn default_retained_tokens() -> u64 {
defaults::COMPACT_RETAINED_TOKENS
}
fn default_compact_auto_read_budget() -> u64 {
fn default_overview_target_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_TARGET_TOKENS
}
fn default_overview_warning_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_WARNING_TOKENS
}
fn default_overview_deadline_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS
}
fn default_worker_context_max_tokens() -> u64 {
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
}
fn default_finish_warning_remaining_tokens() -> u64 {
defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS
}
fn default_final_reserve_tokens() -> u64 {
defaults::COMPACT_FINAL_RESERVE_TOKENS
}
fn default_worker_max_turns() -> Option<u32> {
defaults::COMPACT_WORKER_MAX_TURNS
}
fn default_summary_target_tokens() -> u64 {
defaults::COMPACT_SUMMARY_TARGET_TOKENS
}
fn default_summary_max_tokens() -> u64 {
defaults::COMPACT_SUMMARY_MAX_TOKENS
}
fn default_auto_read_budget_tokens() -> u64 {
defaults::COMPACT_AUTO_READ_BUDGET
}
fn default_compact_worker_max_input_tokens() -> u64 {
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
fn default_result_context_max_tokens() -> u64 {
defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
prune_protected_turns: default_prune_protected_turns(),
prune_protected_tokens: default_prune_protected_tokens(),
prune_min_savings: default_prune_min_savings(),
compact_threshold: None,
compact_request_threshold: None,
compact_retained_tokens: default_compact_retained_tokens(),
compact_auto_read_budget: default_compact_auto_read_budget(),
compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(),
threshold: None,
request_threshold: None,
retained_tokens: default_retained_tokens(),
overview_target_tokens: default_overview_target_tokens(),
overview_warning_tokens: default_overview_warning_tokens(),
overview_deadline_tokens: default_overview_deadline_tokens(),
worker_context_max_tokens: default_worker_context_max_tokens(),
finish_warning_remaining_tokens: default_finish_warning_remaining_tokens(),
final_reserve_tokens: default_final_reserve_tokens(),
worker_max_turns: default_worker_max_turns(),
summary_target_tokens: default_summary_target_tokens(),
summary_max_tokens: default_summary_max_tokens(),
auto_read_budget_tokens: default_auto_read_budget_tokens(),
result_context_max_tokens: default_result_context_max_tokens(),
model: None,
}
}
@ -214,6 +520,7 @@ impl Default for CompactionConfig {
impl PodManifest {
/// Parse a manifest from a TOML string.
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
config::reject_removed_manifest_fields(s)?;
toml::from_str(s)
}
}
@ -241,12 +548,18 @@ permission = "write"
fn parse_minimal_manifest() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert_eq!(manifest.pod.name, "test-agent");
assert_eq!(manifest.model.scheme, SchemeKind::Anthropic);
assert_eq!(manifest.model.model_id, "claude-sonnet-4-20250514");
assert_eq!(manifest.model.auth, AuthRef::None);
assert_eq!(manifest.model.scheme, Some(SchemeKind::Anthropic));
assert_eq!(
manifest.model.model_id.as_deref(),
Some("claude-sonnet-4-20250514")
);
assert!(manifest.model.auth.is_none());
assert_eq!(manifest.scope.allow.len(), 1);
assert!(manifest.scope.deny.is_empty());
assert_eq!(manifest.worker.instruction, defaults::DEFAULT_INSTRUCTION);
assert!(manifest.worker.top_p.is_none());
assert!(manifest.worker.top_k.is_none());
assert!(manifest.worker.stop_sequences.is_empty());
}
#[test]
@ -264,6 +577,10 @@ auth = { kind = "api_key", file = "/abs/keys/anthropic" }
instruction = "$user/reviewer"
max_tokens = 4096
temperature = 0.3
top_p = 0.9
top_k = 40
stop_sequences = ["\n\n", "</stop>"]
reasoning = "medium"
[[scope.allow]]
target = "/abs/project"
@ -280,14 +597,21 @@ permission = "write"
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "code-reviewer");
let file = match &manifest.model.auth {
AuthRef::ApiKey { file, .. } => file.as_deref(),
let file = match manifest.model.auth.as_ref() {
Some(AuthRef::ApiKey { file, .. }) => file.as_deref(),
_ => panic!("expected ApiKey"),
};
assert_eq!(file, Some(std::path::Path::new("/abs/keys/anthropic")));
assert_eq!(manifest.worker.instruction, "$user/reviewer");
assert_eq!(manifest.worker.max_tokens, Some(4096));
assert_eq!(manifest.worker.temperature, Some(0.3));
assert_eq!(manifest.worker.top_p, Some(0.9));
assert_eq!(manifest.worker.top_k, Some(40));
assert_eq!(manifest.worker.stop_sequences, vec!["\n\n", "</stop>"]);
assert_eq!(
manifest.worker.reasoning,
Some(ReasoningControl::Effort(ReasoningEffort::Medium))
);
let allow = &manifest.scope.allow;
assert_eq!(allow.len(), 2);
assert_eq!(allow[0].permission, Permission::Write);
@ -320,6 +644,16 @@ model_id = "claude-sonnet-4-20250514"
assert_eq!(manifest.worker.max_turns.unwrap().get(), 50);
}
#[test]
fn parse_reasoning_budget() {
let toml = MINIMAL_REQUIRED.replace("[worker]\n", "[worker]\nreasoning = -1\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
assert_eq!(
manifest.worker.reasoning,
Some(ReasoningControl::BudgetTokens(-1))
);
}
#[test]
fn omitted_max_turns_is_none() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
@ -334,14 +668,37 @@ model_id = "claude-sonnet-4-20250514"
#[test]
fn parse_compaction_config() {
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\ncompact_threshold = 80000\n");
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nthreshold = 80000\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.prune_protected_turns, 3);
assert_eq!(c.prune_protected_tokens, 8000);
assert_eq!(c.prune_min_savings, 4096);
assert_eq!(c.compact_threshold, Some(80000));
assert_eq!(c.compact_request_threshold, None);
assert_eq!(c.compact_retained_tokens, 8000);
assert_eq!(c.threshold, Some(80000));
assert_eq!(c.request_threshold, None);
assert_eq!(c.retained_tokens, 8000);
assert_eq!(c.worker_max_turns, Some(20));
}
#[test]
fn reject_removed_prune_protected_turns_field() {
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nprune_protected_turns = 3\n");
let err = PodManifest::from_toml(&toml).unwrap_err();
assert!(
err.to_string().contains("compaction.prune_protected_turns"),
"unexpected error: {err}"
);
}
#[test]
fn parse_compaction_worker_max_turns() {
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
worker_max_turns = 7\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.worker_max_turns, Some(7));
}
#[test]
@ -349,13 +706,13 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_threshold = 80000\n\
compact_request_threshold = 90000\n"
threshold = 80000\n\
request_threshold = 90000\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, Some(80000));
assert_eq!(c.compact_request_threshold, Some(90000));
assert_eq!(c.threshold, Some(80000));
assert_eq!(c.request_threshold, Some(90000));
}
#[test]
@ -363,12 +720,12 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_request_threshold = 90000\n"
request_threshold = 90000\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, None);
assert_eq!(c.compact_request_threshold, Some(90000));
assert_eq!(c.threshold, None);
assert_eq!(c.request_threshold, Some(90000));
}
#[test]
@ -376,7 +733,7 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_threshold = 80000\n\n\
threshold = 80000\n\n\
[compaction.model]\n\
scheme = \"gemini\"\n\
model_id = \"gemini-2.0-flash\"\n"
@ -384,8 +741,8 @@ model_id = "claude-sonnet-4-20250514"
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
let p = c.model.unwrap();
assert_eq!(p.scheme, SchemeKind::Gemini);
assert_eq!(p.model_id, "gemini-2.0-flash");
assert_eq!(p.scheme, Some(SchemeKind::Gemini));
assert_eq!(p.model_id.as_deref(), Some("gemini-2.0-flash"));
}
#[test]
@ -394,6 +751,48 @@ model_id = "claude-sonnet-4-20250514"
assert!(manifest.compaction.is_none());
}
#[test]
fn omitted_memory_is_none() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert!(manifest.memory.is_none());
}
#[test]
fn empty_memory_section_enables_with_default_root() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.expect("memory section parsed");
assert!(mem.workspace_root.is_none());
assert_eq!(mem.inject_summary, None);
}
#[test]
fn memory_section_with_inject_summary_false() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\ninject_summary = false\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(mem.inject_summary, Some(false));
}
#[test]
fn memory_section_with_explicit_root() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nworkspace_root = \"/some/where\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(
mem.workspace_root.unwrap(),
std::path::PathBuf::from("/some/where")
);
}
#[test]
fn memory_section_with_language() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nlanguage = \"Japanese\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(mem.language.as_deref(), Some("Japanese"));
}
#[test]
fn reject_unknown_scheme() {
let toml =
@ -402,15 +801,29 @@ model_id = "claude-sonnet-4-20250514"
}
#[test]
fn omitted_tool_output_falls_back_to_default_16k() {
fn omitted_limits_fall_back_to_defaults() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
let limits = &manifest.worker.tool_output;
assert_eq!(limits.default_max_bytes, 16 * 1024);
assert_eq!(limits.default_max_bytes, defaults::TOOL_OUTPUT_MAX_BYTES);
assert!(limits.per_tool.is_empty());
assert_eq!(
manifest.worker.file_upload.max_bytes,
defaults::FILE_UPLOAD_MAX_BYTES
);
}
#[test]
fn parse_tool_output_limits() {
fn worker_language_defaults_and_parses() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert_eq!(manifest.worker.language, defaults::WORKER_LANGUAGE);
let toml = MINIMAL_REQUIRED.replace("[worker]\n", "[worker]\nlanguage = \"Japanese\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
assert_eq!(manifest.worker.language, "Japanese");
}
#[test]
fn parse_worker_output_limits() {
let toml = MINIMAL_REQUIRED.replace(
"[worker]\n",
"[worker]\n\
@ -418,7 +831,9 @@ model_id = "claude-sonnet-4-20250514"
default_max_bytes = 8192\n\n\
[worker.tool_output.per_tool]\n\
Read = 32768\n\
Grep = 4096\n",
Grep = 4096\n\n\
[worker.file_upload]\n\
max_bytes = 12345\n",
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let limits = &manifest.worker.tool_output;
@ -426,6 +841,7 @@ model_id = "claude-sonnet-4-20250514"
assert_eq!(limits.limit_for("Read"), 32768);
assert_eq!(limits.limit_for("Grep"), 4096);
assert_eq!(limits.limit_for("Unknown"), 8192);
assert_eq!(manifest.worker.file_upload.max_bytes, 12345);
}
#[test]
@ -437,7 +853,7 @@ model_id = "claude-sonnet-4-20250514"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let limits = &manifest.worker.tool_output;
assert_eq!(limits.default_max_bytes, 16 * 1024);
assert_eq!(limits.default_max_bytes, defaults::TOOL_OUTPUT_MAX_BYTES);
assert!(limits.per_tool.is_empty());
}

View File

@ -1,8 +1,14 @@
//! LLM モデル宣言型
//!
//! Pod マニフェストの `[model]` セクションで記述する型。`scheme` と
//! `auth` を直交軸として表現し、1 つの汎用アダプタ(`crates/provider`
//! で任意の wire / 認証組合せを受け止める。
//! Pod マニフェストの `[model]` セクションで記述する型。`ref`(プロバイダ
//! とモデルを両方指し示す短縮形)と inline 指定(`scheme` / `model_id`
//! 直書き)の両方を受け入れるため、すべてのフィールドを `Option` として
//! 持つ 1 つの型 [`ModelManifest`] に統合している。実解決ref をプロバイダ
//! カタログ / モデルカタログから引いて `scheme` や `model_id` を埋める)
//! は `crates/provider` の責務で、本モジュールはデータ表現のみを提供する。
//!
//! 同じ型を partialカスケード層と完成形最終マニフェストの両方で
//! 使うことで、merge と最終変換の重複を避ける。
use std::path::PathBuf;
@ -10,27 +16,62 @@ use serde::{Deserialize, Serialize};
// `ModelCapability` は `llm-worker` 側に定義される runtime 構造だが、
// マニフェストで任意に override できるよう型だけ再エクスポートする。
pub use llm_worker::llm_client::capability::ModelCapability;
pub use llm_worker::llm_client::capability::{ModelCapability, ReasoningControl, ReasoningEffort};
/// Pod が使う LLM モデルの宣言。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ModelConfig {
/// wire format
pub scheme: SchemeKind,
/// API のベース URL。未指定なら scheme の既定値にフォールバック
#[serde(default)]
/// Pod マニフェストの `[model]` セクション。
///
/// - ref だけ書く: `[model] ref = "anthropic/claude-sonnet-4-6"`
/// - ref + 一部 override: ref で基底を引き、`auth` 等だけ書き換え
/// - 完全 inline: `ref` を省略して `scheme` / `model_id` / `auth` を直書き
///
/// どの形が有効かの判定は `provider::resolve_model_manifest` が担う。
/// 本クレートは「どこから取るか」を表現するだけで、未設定かどうかを
/// 理由にした hard error は出さない。
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
pub struct ModelManifest {
/// `<provider_id>/<model_id_in_ref>` 形式のカタログ参照。`/` の
/// 最初の 1 文字目で split し provider カタログを引く。
/// OpenRouter の `anthropic/claude-sonnet-4` のように `/` を含む
/// model_id は `openrouter/anthropic/claude-sonnet-4` と書く
/// provider 側で最初の `/` のみ split するため)。
#[serde(default, rename = "ref", skip_serializing_if = "Option::is_none")]
pub ref_: Option<String>,
/// wire format の明示指定。ref 未指定時は必須、ref 指定時は override。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scheme: Option<SchemeKind>,
/// API のベース URL。scheme の既定値を override する。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
/// プロバイダが受け付けるモデル ID
pub model_id: String,
/// 認証方式
#[serde(default)]
pub auth: AuthRef,
/// モデル能力の明示指定。`None` のときは `crates/provider` が
/// scheme 静的テーブル → scheme 既定値の順でフォールバックする。
/// OpenAI 互換ルーターOpenRouter / xAI / Groq 等)で scheme テーブル
/// に載っていないモデル ID を使うときに指定する。
#[serde(default)]
/// プロバイダが受け付けるモデル ID。ref 未指定時は必須。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_id: Option<String>,
/// 認証方式。ref 未指定時は必須、ref 指定時は override。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub auth: Option<AuthRef>,
/// モデル能力の明示指定。未指定時はモデルカタログ → provider
/// `default_capability` → scheme 既定の順で解決される。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub capability: Option<ModelCapability>,
/// モデルのコンテキストウィンドウ上限tokens。カタログ未掲載 / inline
/// モデルでもここで明示 override できる。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub context_window: Option<u64>,
}
impl ModelManifest {
/// `upper` を `self` に上書きマージする。マニフェスト cascade 向け
/// builtin → user → project → overlay の優先順位で呼ばれる)。
pub fn merge(self, upper: Self) -> Self {
Self {
ref_: upper.ref_.or(self.ref_),
scheme: upper.scheme.or(self.scheme),
base_url: upper.base_url.or(self.base_url),
model_id: upper.model_id.or(self.model_id),
auth: upper.auth.or(self.auth),
capability: upper.capability.or(self.capability),
context_window: upper.context_window.or(self.context_window),
}
}
}
/// サポートする wire scheme の種類。
@ -67,6 +108,7 @@ pub enum AuthRef {
file: Option<PathBuf>,
},
/// ChatGPT OAuth`~/.codex/auth.json`)。実装は `llm-auth-codex-oauth` チケット
#[serde(rename = "codex_oauth")]
CodexOAuth,
}

View File

@ -0,0 +1,384 @@
//! Insomnia のホームディレクトリ配下のパス解決を一元化するモジュール。
//!
//! 用途別に三つの base directory を持つ:
//!
//! - **`config_dir`** — 人が手で書く / 編集する設定。`manifest.toml`,
//! `providers.toml`, `models.toml`, `prompts/`, `prompts.toml` 等
//! - **`data_dir`** — プログラムが書く永続データ。`sessions/` 等
//! - **`runtime_dir`** — 再起動で消えてよいランタイム状態。socket,
//! `pods.json`, `pid` ファイル等
//!
//! ## 解決順 (優先順位高 → 低)
//!
//! | base | 1. `INSOMNIA_<KIND>_DIR` | 2. `INSOMNIA_HOME` | 3. `XDG_*` | 4. 既定 |
//! |---|---|---|---|---|
//! | config | `INSOMNIA_CONFIG_DIR` | `$INSOMNIA_HOME/config` | `$XDG_CONFIG_HOME/insomnia` | `$HOME/.config/insomnia` |
//! | data | `INSOMNIA_DATA_DIR` | `$INSOMNIA_HOME` | — | `$HOME/.insomnia` |
//! | runtime | `INSOMNIA_RUNTIME_DIR` | `$INSOMNIA_HOME/run` | `$XDG_RUNTIME_DIR/insomnia` | `$HOME/.insomnia/run` |
//!
//! `INSOMNIA_HOME=$X` のとき config は `$X/config`、data は `$X` 直下、
//! runtime は `$X/run` に集約される。テストや sandbox 利用ではこれ一本
//! で全部 tempdir に向けられる。
//!
//! 解決された各 base が存在するか / ディレクトリかは保証しない —
//! 呼び出し側がファイル操作の前に作成 / 検査する。
use std::ffi::OsString;
use std::path::PathBuf;
/// Environment variable that points at an explicit user manifest.
///
/// Pod CLI treats a non-empty value as an explicit manifest path. Empty values
/// are treated the same as an unset variable, so callers fall back to the
/// auto-discovered user manifest path.
pub const USER_MANIFEST_ENV: &str = "INSOMNIA_USER_MANIFEST";
/// 設定ディレクトリ。`manifest.toml`, `providers.toml`, `models.toml`,
/// `prompts/` などが置かれる。
pub fn config_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_CONFIG_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p.join("config"));
}
if let Some(p) = env_path("XDG_CONFIG_HOME") {
return Some(p.join("insomnia"));
}
Some(env_path("HOME")?.join(".config").join("insomnia"))
}
/// データディレクトリ。`sessions/` などプログラムが書く永続データの
/// 置き場。
pub fn data_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_DATA_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p);
}
Some(env_path("HOME")?.join(".insomnia"))
}
/// ランタイムディレクトリ。socket, `pods.json`, Pod ごとの `pid` /
/// `status.json` 等が置かれる。再起動で消えて構わない。
pub fn runtime_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_RUNTIME_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p.join("run"));
}
if let Some(p) = env_path("XDG_RUNTIME_DIR") {
return Some(p.join("insomnia"));
}
Some(env_path("HOME")?.join(".insomnia").join("run"))
}
// ---- well-known file getters ------------------------------------------------
/// `<config_dir>/manifest.toml` — user manifest の既定位置。
///
/// This deliberately ignores [`USER_MANIFEST_ENV`]. Use
/// [`user_manifest_path_with_env_override`] when mirroring the Pod CLI cascade
/// resolution rules.
pub fn user_manifest_path() -> Option<PathBuf> {
Some(config_dir()?.join("manifest.toml"))
}
/// Resolve an explicit user manifest override from an env value.
///
/// Non-empty values are paths. `None` and empty strings are both treated as no
/// override, matching the Pod CLI's `INSOMNIA_USER_MANIFEST` handling.
pub fn user_manifest_path_from_env(value: Option<OsString>) -> Option<PathBuf> {
value.and_then(|value| {
if value.as_os_str().is_empty() {
None
} else {
Some(PathBuf::from(value))
}
})
}
/// User manifest path using the same env override rule as the Pod CLI cascade.
///
/// A non-empty [`USER_MANIFEST_ENV`] value wins. If the variable is unset or
/// empty, this falls back to [`user_manifest_path`]. The returned path is not
/// guaranteed to exist.
pub fn user_manifest_path_with_env_override() -> Option<PathBuf> {
user_manifest_path_from_env(std::env::var_os(USER_MANIFEST_ENV)).or_else(user_manifest_path)
}
/// `<config_dir>/prompts/` — user prompts ライブラリ。
pub fn user_prompts_dir() -> Option<PathBuf> {
Some(config_dir()?.join("prompts"))
}
/// `<config_dir>/prompts.toml` — user prompt pack。
pub fn user_pack_file() -> Option<PathBuf> {
Some(config_dir()?.join("prompts.toml"))
}
/// `<config_dir>/<file_name>` — providers.toml / models.toml 等の
/// user override ファイル。
pub fn user_catalog_override(file_name: &str) -> Option<PathBuf> {
Some(config_dir()?.join(file_name))
}
/// `<data_dir>/sessions/` — session store のデフォルト位置。
pub fn sessions_dir() -> Option<PathBuf> {
Some(data_dir()?.join("sessions"))
}
/// `<runtime_dir>/pods.json` — machine-wide Pod allocation registry。
pub fn pod_registry_path() -> Option<PathBuf> {
Some(runtime_dir()?.join("pods.json"))
}
/// `<runtime_dir>/<pod_name>/` — Pod ごとのランタイムディレクトリ。
pub fn pod_runtime_dir(pod_name: &str) -> Option<PathBuf> {
Some(runtime_dir()?.join(pod_name))
}
/// `<runtime_dir>/<pod_name>/sock` — Pod の Unix socket パス。
///
/// Pod プロセス内で実際に socket を作成するのは `pod` crate の
/// `RuntimeDir::socket_path()` で、Pod 名が分かっている外部 (TUI の
/// attach フロー等) からの**予測**はこの関数で行う。両者は同じパス
/// を返すことが期待される。
pub fn pod_socket_path(pod_name: &str) -> Option<PathBuf> {
Some(pod_runtime_dir(pod_name)?.join("sock"))
}
// ---- internals --------------------------------------------------------------
/// 空文字列の env は未設定として扱う。`std::env::var` は `Ok("")` と
/// `Err(NotPresent)` を区別するが、パス解決においては両者を未設定と
/// 同等に扱うのが直感的。
fn env_path(name: &str) -> Option<PathBuf> {
std::env::var(name)
.ok()
.filter(|s| !s.is_empty())
.map(PathBuf::from)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::{Mutex, MutexGuard, OnceLock};
/// プロセス全体で env を弄るテスト同士が並行に走らないように保護
/// する。Cargo の test harness はファイル単位で別プロセスにせず
/// マルチスレッドで実行するため、env を読む全テストはこの lock を
/// 取ってから操作する。
fn env_lock() -> MutexGuard<'static, ()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
.lock()
.unwrap_or_else(|e| e.into_inner())
}
/// テスト中だけ env を上書きし、drop 時に元の値に戻す RAII guard。
struct EnvGuard {
vars: Vec<(&'static str, Option<String>)>,
_lock: MutexGuard<'static, ()>,
}
impl EnvGuard {
fn new(overrides: &[(&'static str, Option<&str>)]) -> Self {
let lock = env_lock();
let names = [
"INSOMNIA_CONFIG_DIR",
"INSOMNIA_DATA_DIR",
"INSOMNIA_RUNTIME_DIR",
"INSOMNIA_USER_MANIFEST",
"INSOMNIA_HOME",
"XDG_CONFIG_HOME",
"XDG_RUNTIME_DIR",
"HOME",
];
let saved: Vec<_> = names.iter().map(|n| (*n, std::env::var(n).ok())).collect();
// SAFETY: env_lock() 取得済みなので env への並行アクセスは
// この test バイナリ内では発生しない。
unsafe {
for (n, _) in &saved {
std::env::remove_var(n);
}
for (n, v) in overrides {
if let Some(v) = v {
std::env::set_var(n, v);
}
}
}
Self {
vars: saved,
_lock: lock,
}
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
// SAFETY: lock を握ったまま元に戻す。
unsafe {
for (n, v) in &self.vars {
match v {
Some(v) => std::env::set_var(n, v),
None => std::env::remove_var(n),
}
}
}
}
}
#[test]
fn config_dir_falls_back_to_home_dot_config() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/h/.config/insomnia"));
}
#[test]
fn config_dir_uses_xdg_when_set() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("XDG_CONFIG_HOME", Some("/x"))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/x/insomnia"));
}
#[test]
fn config_dir_insomnia_home_outranks_xdg() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_CONFIG_HOME", Some("/x")),
("INSOMNIA_HOME", Some("/sand")),
]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/sand/config"));
}
#[test]
fn config_dir_explicit_wins_over_insomnia_home() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("INSOMNIA_HOME", Some("/sand")),
("INSOMNIA_CONFIG_DIR", Some("/explicit-cfg")),
]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/explicit-cfg"));
}
#[test]
fn data_dir_default_is_dot_insomnia() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(data_dir().unwrap(), PathBuf::from("/h/.insomnia"));
}
#[test]
fn data_dir_insomnia_home_is_data_dir_itself() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("INSOMNIA_HOME", Some("/sand"))]);
assert_eq!(data_dir().unwrap(), PathBuf::from("/sand"));
}
#[test]
fn runtime_dir_prefers_xdg_runtime_dir() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_RUNTIME_DIR", Some("/xdg-runtime")),
]);
assert_eq!(
runtime_dir().unwrap(),
PathBuf::from("/xdg-runtime/insomnia")
);
}
#[test]
fn runtime_dir_falls_back_to_dot_insomnia_run() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(runtime_dir().unwrap(), PathBuf::from("/h/.insomnia/run"));
}
#[test]
fn runtime_dir_insomnia_home_is_run_subdir() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_RUNTIME_DIR", Some("/run/user/1000")),
("INSOMNIA_HOME", Some("/sand")),
]);
assert_eq!(runtime_dir().unwrap(), PathBuf::from("/sand/run"));
}
#[test]
fn empty_env_treated_as_unset() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("XDG_CONFIG_HOME", Some(""))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/h/.config/insomnia"));
}
#[test]
fn returns_none_when_nothing_set() {
let _g = EnvGuard::new(&[]);
assert!(config_dir().is_none());
assert!(data_dir().is_none());
assert!(runtime_dir().is_none());
}
#[test]
fn user_manifest_env_override_wins_when_non_empty() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("INSOMNIA_USER_MANIFEST", Some("/tmp/user.toml")),
]);
assert_eq!(
user_manifest_path_with_env_override().unwrap(),
PathBuf::from("/tmp/user.toml")
);
}
#[test]
fn empty_user_manifest_env_falls_back_to_default_path() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("INSOMNIA_USER_MANIFEST", Some(""))]);
assert_eq!(
user_manifest_path_with_env_override().unwrap(),
PathBuf::from("/h/.config/insomnia/manifest.toml")
);
}
#[test]
fn user_manifest_path_from_env_treats_empty_as_unset() {
assert_eq!(user_manifest_path_from_env(None), None);
assert_eq!(user_manifest_path_from_env(Some(OsString::from(""))), None);
assert_eq!(
user_manifest_path_from_env(Some(OsString::from("/tmp/u.toml"))).unwrap(),
PathBuf::from("/tmp/u.toml")
);
}
#[test]
fn well_known_files_compose_off_base_dirs() {
let _g = EnvGuard::new(&[("INSOMNIA_HOME", Some("/sand"))]);
assert_eq!(
user_manifest_path().unwrap(),
PathBuf::from("/sand/config/manifest.toml")
);
assert_eq!(
user_prompts_dir().unwrap(),
PathBuf::from("/sand/config/prompts")
);
assert_eq!(
user_pack_file().unwrap(),
PathBuf::from("/sand/config/prompts.toml")
);
assert_eq!(
user_catalog_override("providers.toml").unwrap(),
PathBuf::from("/sand/config/providers.toml")
);
assert_eq!(sessions_dir().unwrap(), PathBuf::from("/sand/sessions"));
assert_eq!(
pod_registry_path().unwrap(),
PathBuf::from("/sand/run/pods.json")
);
assert_eq!(
pod_runtime_dir("foo").unwrap(),
PathBuf::from("/sand/run/foo")
);
assert_eq!(
pod_socket_path("foo").unwrap(),
PathBuf::from("/sand/run/foo/sock")
);
}
}

View File

@ -8,6 +8,9 @@
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use arc_swap::{ArcSwap, Guard};
use crate::{Permission, ScopeConfig, ScopeRule};
@ -21,7 +24,7 @@ pub struct Scope {
deny: Vec<ResolvedRule>,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
struct ResolvedRule {
/// Absolute, canonicalized-or-normalized target directory/file.
target: PathBuf,
@ -142,7 +145,7 @@ impl Scope {
/// Allow rules with their targets resolved to absolute paths.
///
/// Used by the scope-lock registry, where every Pod's allocation
/// Used by the pod-registry, where every Pod's allocation
/// must be expressed in absolute terms so prefix comparisons are
/// meaningful across processes.
pub fn allow_rules(&self) -> Vec<ScopeRule> {
@ -156,6 +159,23 @@ impl Scope {
.collect()
}
/// Deny rules with their targets resolved to absolute paths.
///
/// Counterpart to [`allow_rules`](Self::allow_rules); together they
/// round-trip through [`ScopeConfig`] for callers that need to
/// rebuild a scope after layering extra rules on top of an
/// already-constructed [`Scope`].
pub fn deny_rules(&self) -> Vec<ScopeRule> {
self.deny
.iter()
.map(|r| ScopeRule {
target: r.target.clone(),
permission: r.permission,
recursive: r.recursive,
})
.collect()
}
/// Iterate over absolute paths granted `Write` by an allow rule.
/// Subset of [`readable_paths`](Self::readable_paths).
pub fn writable_paths(&self) -> impl Iterator<Item = &Path> {
@ -165,6 +185,64 @@ impl Scope {
.map(|r| r.target.as_path())
}
/// Build a new [`Scope`] equal to `self` with `extra_allow` appended
/// to the allow set. Used by dynamic-scope grow paths
/// (e.g. controller adding the bash-output Read rule, future
/// external `GrantScope`).
pub fn with_added_allow_rules(
&self,
extra_allow: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut config = ScopeConfig {
allow: self.allow_rules(),
deny: self.deny_rules(),
};
config.allow.extend(extra_allow);
Self::from_config(&config)
}
/// Build a new [`Scope`] equal to `self` with `extra_deny` appended
/// to the deny set. Used by dynamic-scope shrink paths
/// (e.g. SpawnPod-style delegation that strips Write from the
/// spawner without touching its allow rules).
pub fn with_added_deny_rules(
&self,
extra_deny: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut config = ScopeConfig {
allow: self.allow_rules(),
deny: self.deny_rules(),
};
config.deny.extend(extra_deny);
Self::from_config(&config)
}
/// Build a new [`Scope`] with one matching deny rule removed for each
/// rule in `remove_deny`.
///
/// This is intentionally exact (after the same target resolution used
/// by [`Scope::from_config`]) rather than geometric: reclaiming a
/// delegated child must remove the deny layer that was added for that
/// child without broadening any explicit base deny that merely overlaps
/// the delegated path. Missing rules are ignored, making repeated
/// reclaim calls harmless.
pub fn with_removed_deny_rules(
&self,
remove_deny: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut deny = self.deny.clone();
for rule in remove_deny {
let resolved = resolve_rule(&rule)?;
if let Some(idx) = deny.iter().position(|existing| existing == &resolved) {
deny.remove(idx);
}
}
Ok(Self {
allow: self.allow.clone(),
deny,
})
}
/// Human-readable grouping of allow rules, suitable for embedding in
/// LLM system prompts. Deny rules are intentionally omitted — they
/// only cap effective permission and surface them would mislead the
@ -213,6 +291,71 @@ impl Scope {
}
}
/// Shared, atomically-swappable view of a [`Scope`].
///
/// Built around [`ArcSwap`] so the hot path (permission checks inside
/// `ScopedFs`) reads the current scope lock-free. Mutators are
/// serialised by an internal `Mutex` so concurrent `update` calls do
/// not lose each other's contributions.
///
/// All clones share the same underlying state — a `SharedScope` cloned
/// out to multiple consumers (Pod, ScopedFs, future grant/revoke
/// callers) sees every update.
#[derive(Debug, Clone)]
pub struct SharedScope {
inner: Arc<SharedScopeInner>,
}
#[derive(Debug)]
struct SharedScopeInner {
scope: ArcSwap<Scope>,
/// Serialises read-modify-write update transactions so a producer
/// can read the current scope, build a derived one, and store it
/// without losing concurrent updates.
write_lock: Mutex<()>,
}
impl SharedScope {
/// Wrap an owned [`Scope`] in a shared, atomically-swappable handle.
pub fn new(scope: Scope) -> Self {
Self {
inner: Arc::new(SharedScopeInner {
scope: ArcSwap::from_pointee(scope),
write_lock: Mutex::new(()),
}),
}
}
/// Snapshot the current scope. Cheap and lock-free; the returned
/// guard borrows the live scope for as long as it is held.
pub fn load(&self) -> Guard<Arc<Scope>> {
self.inner.scope.load()
}
/// Snapshot the current scope into an owned `Arc<Scope>`. Useful
/// when the caller needs a value that outlives the load guard
/// (e.g. cloning into another struct).
pub fn snapshot(&self) -> Arc<Scope> {
self.inner.scope.load_full()
}
/// Read-modify-write transaction. `f` is called with the current
/// scope and returns a derived one (or an error). The internal
/// write lock ensures that two concurrent `update` calls see each
/// other's results — the second observes the first's output as its
/// input.
pub fn update<F>(&self, f: F) -> Result<(), ScopeError>
where
F: FnOnce(&Scope) -> Result<Scope, ScopeError>,
{
let _guard = self.inner.write_lock.lock().expect("scope mutex poisoned");
let current = self.inner.scope.load();
let new = f(&current)?;
self.inner.scope.store(Arc::new(new));
Ok(())
}
}
impl ResolvedRule {
fn matches(&self, path: &Path) -> bool {
if self.recursive {
@ -528,4 +671,128 @@ mod tests {
let deep = dir.path().join("a/b/c/new.txt");
assert!(scope.is_writable(&deep));
}
#[test]
fn with_added_allow_rules_grows_readable_set() {
let dir = TempDir::new().unwrap();
let extra = TempDir::new().unwrap();
let base = Scope::writable(dir.path()).unwrap();
assert!(!base.is_readable(&extra.path().join("x")));
let extended = base
.with_added_allow_rules([ScopeRule {
target: extra.path().to_path_buf(),
permission: Permission::Read,
recursive: true,
}])
.unwrap();
assert!(extended.is_readable(&extra.path().join("x")));
assert!(extended.is_writable(&dir.path().join("y")));
}
#[test]
fn with_added_deny_rules_demotes_write_to_read() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let base = Scope::writable(dir.path()).unwrap();
let demoted = base
.with_added_deny_rules([ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
}])
.unwrap();
let f = sub.join("a.txt");
assert_eq!(demoted.permission_at(&f), Some(Permission::Read));
assert_eq!(
demoted.permission_at(&dir.path().join("top.txt")),
Some(Permission::Write)
);
}
#[test]
fn with_removed_deny_rules_reclaims_one_matching_layer() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let rule = ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
};
let base = Scope::writable(dir.path())
.unwrap()
.with_added_deny_rules([rule.clone(), rule.clone()])
.unwrap();
let reclaimed_once = base.with_removed_deny_rules([rule.clone()]).unwrap();
assert_eq!(
reclaimed_once.permission_at(&sub.join("a.txt")),
Some(Permission::Read),
"one duplicate deny layer must remain"
);
let reclaimed_twice = reclaimed_once
.with_removed_deny_rules([rule.clone()])
.unwrap();
assert_eq!(
reclaimed_twice.permission_at(&sub.join("a.txt")),
Some(Permission::Write)
);
let reclaimed_again = reclaimed_twice.with_removed_deny_rules([rule]).unwrap();
assert_eq!(
reclaimed_again.permission_at(&sub.join("a.txt")),
Some(Permission::Write),
"missing rules are ignored for idempotent reclaim"
);
}
#[test]
fn shared_scope_load_returns_current_value() {
let dir = TempDir::new().unwrap();
let shared = SharedScope::new(Scope::writable(dir.path()).unwrap());
assert!(shared.load().is_writable(&dir.path().join("a.txt")));
}
#[test]
fn shared_scope_update_replaces_view_atomically() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let shared = SharedScope::new(Scope::writable(dir.path()).unwrap());
let target = sub.join("a.txt");
assert_eq!(
shared.load().permission_at(&target),
Some(Permission::Write)
);
shared
.update(|cur| {
cur.with_added_deny_rules([ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
}])
})
.unwrap();
assert_eq!(shared.load().permission_at(&target), Some(Permission::Read));
}
#[test]
fn shared_scope_clones_share_state() {
let dir = TempDir::new().unwrap();
let extra = TempDir::new().unwrap();
let a = SharedScope::new(Scope::writable(dir.path()).unwrap());
let b = a.clone();
assert!(!b.load().is_readable(&extra.path().join("x")));
a.update(|cur| {
cur.with_added_allow_rules([ScopeRule {
target: extra.path().to_path_buf(),
permission: Permission::Read,
recursive: true,
}])
})
.unwrap();
assert!(b.load().is_readable(&extra.path().join("x")));
}
}

25
crates/memory/Cargo.toml Normal file
View File

@ -0,0 +1,25 @@
[package]
name = "memory"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
async-trait = { workspace = true }
chrono = { version = "0.4", features = ["serde"] }
libc = { workspace = true }
lint-common = { workspace = true }
llm-worker = { workspace = true }
manifest = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
sha2 = { workspace = true }
serde_yaml = "0.9.34"
thiserror = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true, features = ["v7", "serde"] }
[dev-dependencies]
tempfile = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

450
crates/memory/src/audit.rs Normal file
View File

@ -0,0 +1,450 @@
//! Append-only JSONL audit log for memory workers and tools.
//!
//! The log is evidence-only observability data under
//! `.insomnia/memory/_logs/current.log`. It is intentionally separate from
//! `_staging` and `_usage`, and consolidation never consumes it. Operators can
//! follow the latest stream with:
//!
//! ```text
//! tail -f .insomnia/memory/_logs/current.log
//! ```
use std::collections::BTreeMap;
use std::fs::{self, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use uuid::Uuid;
use crate::workspace::WorkspaceLayout;
fn is_zero_usize(value: &usize) -> bool {
*value == 0
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditWorker {
MemoryExtract,
MemoryConsolidation,
}
impl AuditWorker {
pub fn label(self) -> &'static str {
match self {
Self::MemoryExtract => "extract",
Self::MemoryConsolidation => "consolidation",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkerLifecycleStatus {
Started,
Completed,
Skipped,
Failed,
Cancelled,
}
impl WorkerLifecycleStatus {
pub fn label(self) -> &'static str {
match self {
Self::Started => "running",
Self::Completed => "done",
Self::Skipped => "skipped",
Self::Failed => "failed",
Self::Cancelled => "cancelled",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditTrigger {
SessionEnd,
TurnThreshold,
TokenThreshold,
StagingBacklog,
Idle,
Manual,
StartupRecovery,
Unknown,
}
impl AuditTrigger {
pub fn label(self) -> &'static str {
match self {
Self::SessionEnd => "session_end",
Self::TurnThreshold => "turn_threshold",
Self::TokenThreshold => "token_threshold",
Self::StagingBacklog => "staging_backlog",
Self::Idle => "idle",
Self::Manual => "manual",
Self::StartupRecovery => "startup_recovery",
Self::Unknown => "unknown",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditStatus {
Success,
Failed,
Skipped,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ModelAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ref_: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scheme: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_id: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct UsageAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub output_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub total_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_read_input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_creation_input_tokens: Option<u64>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExtractAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub session_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub segment_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub entry_range: Option<[u64; 2]>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub history_range: Option<[u64; 2]>,
#[serde(default)]
pub staging_count: usize,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub staging_ids: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub staging_paths: Vec<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ConsolidationAudit {
#[serde(default)]
pub staging_count: usize,
#[serde(default, skip_serializing_if = "is_zero_usize")]
pub invalid_staging_count: usize,
#[serde(default)]
pub staging_bytes: u64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub consumed_staging_ids: Vec<String>,
#[serde(default)]
pub operations: OperationCounts,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct OperationCounts {
#[serde(default)]
pub write: usize,
#[serde(default)]
pub edit: usize,
#[serde(default)]
pub delete: usize,
#[serde(default)]
pub drop: usize,
#[serde(default)]
pub merge: usize,
#[serde(default)]
pub trim: usize,
}
impl OperationCounts {
pub fn total_record_changes(&self) -> usize {
self.write + self.edit + self.delete + self.drop + self.merge + self.trim
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct WorkerLifecycleAudit {
pub run_id: Uuid,
pub worker: AuditWorker,
pub status: WorkerLifecycleStatus,
pub trigger: AuditTrigger,
pub reason: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model: Option<ModelAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub usage: Option<UsageAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extract: Option<ExtractAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidation: Option<ConsolidationAudit>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordOperationAudit {
pub op: String,
pub status: AuditStatus,
pub kind: String,
pub slug: String,
pub path: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub before_hash: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub after_hash: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordUsageAudit {
pub op: String,
pub status: AuditStatus,
pub kind: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub slug: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub query: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub result_count: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "event", rename_all = "snake_case")]
pub enum AuditPayload {
WorkerLifecycle(WorkerLifecycleAudit),
RecordOperation(RecordOperationAudit),
RecordUsage(RecordUsageAudit),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct AuditEvent {
pub id: Uuid,
pub occurred_at: DateTime<Utc>,
#[serde(flatten)]
pub payload: AuditPayload,
}
impl AuditEvent {
pub fn new(payload: AuditPayload) -> Self {
Self {
id: Uuid::now_v7(),
occurred_at: Utc::now(),
payload,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RecordSnapshot {
pub kind: String,
pub slug: String,
pub path: PathBuf,
pub hash: String,
}
/// Append one audit event to `.insomnia/memory/_logs/current.log`.
pub fn append_audit_event(layout: &WorkspaceLayout, event: &AuditEvent) -> io::Result<()> {
let path = layout.audit_current_log_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let line = serde_json::to_string(event)
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let mut file = OpenOptions::new().create(true).append(true).open(path)?;
writeln!(file, "{line}")?;
Ok(())
}
pub fn append_worker_lifecycle(
layout: &WorkspaceLayout,
audit: WorkerLifecycleAudit,
) -> io::Result<()> {
append_audit_event(
layout,
&AuditEvent::new(AuditPayload::WorkerLifecycle(audit)),
)
}
pub fn append_record_operation(
layout: &WorkspaceLayout,
audit: RecordOperationAudit,
) -> io::Result<()> {
append_audit_event(
layout,
&AuditEvent::new(AuditPayload::RecordOperation(audit)),
)
}
pub fn append_record_usage(layout: &WorkspaceLayout, audit: RecordUsageAudit) -> io::Result<()> {
append_audit_event(layout, &AuditEvent::new(AuditPayload::RecordUsage(audit)))
}
pub fn file_hash(path: &Path) -> io::Result<Option<String>> {
match fs::read(path) {
Ok(bytes) => Ok(Some(hash_bytes(&bytes))),
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
}
}
pub fn hash_bytes(bytes: &[u8]) -> String {
let digest = Sha256::digest(bytes);
let mut out = String::with_capacity("sha256:".len() + digest.len() * 2);
out.push_str("sha256:");
for byte in digest {
use std::fmt::Write as _;
let _ = write!(&mut out, "{byte:02x}");
}
out
}
pub fn snapshot_records(layout: &WorkspaceLayout) -> BTreeMap<String, RecordSnapshot> {
let mut out = BTreeMap::new();
snapshot_one(&mut out, "summary", "summary", layout.summary_path());
snapshot_dir(&mut out, "decision", layout.decisions_dir());
snapshot_dir(&mut out, "request", layout.requests_dir());
snapshot_dir(&mut out, "knowledge", layout.knowledge_dir());
out
}
pub fn operation_counts_from_snapshots(
before: &BTreeMap<String, RecordSnapshot>,
after: &BTreeMap<String, RecordSnapshot>,
) -> OperationCounts {
let mut counts = OperationCounts::default();
for (key, after_record) in after {
match before.get(key) {
None => counts.write += 1,
Some(before_record) if before_record.hash != after_record.hash => counts.edit += 1,
Some(_) => {}
}
}
for key in before.keys() {
if !after.contains_key(key) {
counts.delete += 1;
}
}
counts
}
fn snapshot_dir(out: &mut BTreeMap<String, RecordSnapshot>, kind: &str, dir: PathBuf) {
let entries = match fs::read_dir(dir) {
Ok(entries) => entries,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
let Some(slug) = name.strip_suffix(".md").map(str::to_string) else {
continue;
};
snapshot_one(out, kind, &slug, path);
}
}
fn snapshot_one(out: &mut BTreeMap<String, RecordSnapshot>, kind: &str, slug: &str, path: PathBuf) {
if !path.is_file() {
return;
}
let Ok(Some(hash)) = file_hash(&path) else {
return;
};
out.insert(
format!("{kind}/{slug}"),
RecordSnapshot {
kind: kind.to_string(),
slug: slug.to_string(),
path,
hash,
},
);
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn setup() -> (TempDir, WorkspaceLayout) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[test]
fn appends_jsonl_to_current_log() {
let (_dir, layout) = setup();
let run_id = Uuid::now_v7();
append_worker_lifecycle(
&layout,
WorkerLifecycleAudit {
run_id,
worker: AuditWorker::MemoryExtract,
status: WorkerLifecycleStatus::Started,
trigger: AuditTrigger::TokenThreshold,
reason: "tokens_threshold_reached".to_string(),
model: None,
usage: None,
extract: None,
consolidation: None,
},
)
.unwrap();
let text = fs::read_to_string(layout.audit_current_log_path()).unwrap();
let value: serde_json::Value = serde_json::from_str(text.trim()).unwrap();
assert_eq!(value["event"], "worker_lifecycle");
assert_eq!(value["worker"], "memory_extract");
assert_eq!(value["status"], "started");
assert_eq!(value["run_id"], run_id.to_string());
}
#[test]
fn counts_created_edited_deleted_records() {
let (dir, layout) = setup();
let decision_dir = dir.path().join(".insomnia/memory/decisions");
fs::create_dir_all(&decision_dir).unwrap();
fs::write(decision_dir.join("a.md"), "old").unwrap();
fs::write(decision_dir.join("gone.md"), "old").unwrap();
let before = snapshot_records(&layout);
fs::write(decision_dir.join("a.md"), "new").unwrap();
fs::remove_file(decision_dir.join("gone.md")).unwrap();
fs::write(decision_dir.join("created.md"), "new").unwrap();
let after = snapshot_records(&layout);
let counts = operation_counts_from_snapshots(&before, &after);
assert_eq!(counts.write, 1);
assert_eq!(counts.edit, 1);
assert_eq!(counts.delete, 1);
}
#[test]
fn hash_has_sha256_prefix() {
assert_eq!(hash_bytes(b"abc").len(), "sha256:".len() + 64);
assert!(hash_bytes(b"abc").starts_with("sha256:"));
}
}

View File

@ -0,0 +1,292 @@
//! consolidation sub-Worker への最初のユーザー入力を組み立てる。
//!
//! extract (`extract::build_extract_input`) と同じ方針で、固定 schema の
//! markdown セクション列にしてサブWorker に渡す。`docs/plan/memory.md`
//! §Consolidation 入力 / §整理材料 の項目に従い:
//!
//! 1. consumed staging エントリ全文(`source` 込み)
//! 2. 既存 `memory/*` 全文summary / decisions / requests
//! 3. Usage evidence report明示使用回数 + resident exposure cost
//! 4. 整理材料Linter Warn ベース、hard protection 判定はしない)
//!
//! 既存 `knowledge/*` 本文は埋めず、agent に `KnowledgeQuery` 経由で引かせる
//! 設計(`docs/plan/memory.md` §retrieval 経路 / §Consolidation の Knowledge アクセス)。
use std::fmt::Write;
use crate::consolidate::staging::StagingEntry;
use crate::consolidate::tidy::TidyHints;
use crate::usage::UsageReport;
use crate::workspace::{RecordKind, WorkspaceLayout};
/// consolidation sub-Worker の最初の user 入力。
pub fn build_consolidate_input(
layout: &WorkspaceLayout,
staging: &[StagingEntry],
tidy: &TidyHints,
usage_report: &UsageReport,
) -> String {
let mut out = String::new();
out.push_str(
"consolidation input. Run the integration step first \
(fold the staging activity logs into memory and knowledge), then the \
tidy step (clean up existing records). Use the memory tools for \
every write direct file writes are denied by the pod scope.\n\n",
);
out.push_str("## Staging entries (consumed by this run)\n\n");
out.push_str(&render_staging_records(staging));
out.push('\n');
out.push_str("## Existing memory records (full content)\n\n");
out.push_str(&render_existing_memory_records(layout));
out.push('\n');
out.push_str("## Usage evidence report\n\n");
out.push_str(&render_usage_report(usage_report));
out.push('\n');
out.push_str("## Tidy hints\n\n");
out.push_str(&render_tidy_hints(tidy));
out.push('\n');
out.push_str(
"When done, end the turn with a short final assistant message describing \
what changed.",
);
out
}
/// Staging エントリ群を「`### <id>` ヘッダ + 整形 JSON ブロック」で並べる。
/// 空配列なら「(none)」と書く。
pub fn render_staging_records(entries: &[StagingEntry]) -> String {
if entries.is_empty() {
return "(none)\n".to_string();
}
let mut out = String::new();
for entry in entries {
let _ = writeln!(&mut out, "### {}", entry.id);
let json = serde_json::to_string_pretty(&entry.record).unwrap_or_else(|_| "{}".into());
out.push_str("```json\n");
out.push_str(&json);
out.push_str("\n```\n\n");
}
out
}
/// `<workspace>/.insomnia/memory/{summary.md,decisions/*,requests/*}` を
/// 「`### <kind>:<slug>` ヘッダ + raw markdown ブロック」で全文渡す。
pub fn render_existing_memory_records(layout: &WorkspaceLayout) -> String {
let mut out = String::new();
let summary = layout.summary_path();
if let Ok(content) = std::fs::read_to_string(&summary) {
out.push_str("### summary\n");
out.push_str("```markdown\n");
out.push_str(content.trim_end_matches('\n'));
out.push_str("\n```\n\n");
}
push_kind_records(&mut out, layout, RecordKind::Decision);
push_kind_records(&mut out, layout, RecordKind::Request);
if out.is_empty() {
return "(none)\n".to_string();
}
out
}
fn push_kind_records(out: &mut String, layout: &WorkspaceLayout, kind: RecordKind) {
let dir = match kind {
RecordKind::Decision => layout.decisions_dir(),
RecordKind::Request => layout.requests_dir(),
RecordKind::Knowledge | RecordKind::Summary | RecordKind::Workflow => return,
};
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return,
};
let mut paths: Vec<(String, std::path::PathBuf)> = Vec::new();
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => continue,
};
if path.extension().and_then(|s| s.to_str()) != Some("md") {
continue;
}
paths.push((stem.to_string(), path));
}
paths.sort();
for (slug, path) in paths {
let Ok(content) = std::fs::read_to_string(&path) else {
continue;
};
let _ = writeln!(out, "### {}:{}", kind.as_str(), slug);
out.push_str("```markdown\n");
out.push_str(content.trim_end_matches('\n'));
out.push_str("\n```\n\n");
}
}
fn render_usage_report(report: &UsageReport) -> String {
if report.is_empty() {
return "(empty — no explicit memory/knowledge usage events recorded yet. \
Treat this as lack of evidence, not proof that records are unused.)\n"
.to_string();
}
let json = serde_json::to_string_pretty(report).unwrap_or_else(|_| "{}".to_string());
format!(
"This report is evidence only. Do not make hard Knowledge-creation or tidy-protection decisions from it alone.\n\n```json\n{json}\n```\n"
)
}
/// Tidy hints の Markdown 描画。空ヒントなら "(none)" 1 行。
pub fn render_tidy_hints(tidy: &TidyHints) -> String {
if tidy.is_empty() {
return "(none)\n".to_string();
}
let mut out = String::new();
if !tidy.replaced_decisions.is_empty() {
out.push_str("**Replaced decisions still on disk** — collapse if the chain has settled:\n");
for (slug, replaced_by) in &tidy.replaced_decisions {
match replaced_by {
Some(target) => {
let _ = writeln!(&mut out, "- `{slug}` → `{target}`");
}
None => {
let _ = writeln!(&mut out, "- `{slug}` (no `replaced_by` set)");
}
}
}
out.push('\n');
}
if !tidy.sources_overflow.is_empty() {
out.push_str(
"**Sources overflow** — consider trimming to the most recent entries (git log keeps the rest):\n",
);
for s in &tidy.sources_overflow {
let _ = writeln!(
&mut out,
"- {} `{}` ({} sources)",
s.kind.as_str(),
s.slug,
s.count
);
}
out.push('\n');
}
if !tidy.similar_slug_clusters.is_empty() {
out.push_str("**Similar slug clusters** — evaluate for merge / rename:\n");
for c in &tidy.similar_slug_clusters {
let joined = c
.slugs
.iter()
.map(|s| format!("`{s}`"))
.collect::<Vec<_>>()
.join(", ");
let _ = writeln!(&mut out, "- {}: {}", c.kind.as_str(), joined);
}
out.push('\n');
}
out.push_str(
"Use the Usage evidence report as soft context only; \
require an explicit reason before deleting or heavily compressing records with recent use.\n",
);
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::consolidate::tidy::{SimilarSlugCluster, SourcesOverflow};
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
use chrono::Utc;
use std::path::Path;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn write(p: &Path, content: &str) {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(p, content).unwrap();
}
#[test]
fn build_includes_all_sections_when_populated() {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
write(
&dir.path().join(".insomnia/memory/summary.md"),
&format!("---\nupdated_at: {n}\n---\nstate of the world\n", n = now()),
);
write(
&dir.path().join(".insomnia/memory/decisions/dec.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\nbody\n",
n = now()
),
);
let (_id, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [0, 1],
},
ExtractedPayload::default(),
)
.unwrap();
let staging = crate::consolidate::staging::list_staging_entries(&layout);
let tidy = TidyHints {
replaced_decisions: [("old".to_string(), Some("new".to_string()))]
.into_iter()
.collect(),
sources_overflow: vec![SourcesOverflow {
kind: RecordKind::Decision,
slug: "dec".into(),
count: 12,
}],
similar_slug_clusters: vec![SimilarSlugCluster {
kind: RecordKind::Decision,
slugs: vec!["a".into(), "ab".into()],
}],
};
let report = UsageReport::empty();
let out = build_consolidate_input(&layout, &staging, &tidy, &report);
assert!(out.contains("Staging entries"));
assert!(out.contains("Existing memory records"));
assert!(out.contains("Usage evidence report"));
assert!(out.contains("Tidy hints"));
assert!(out.contains("state of the world"));
assert!(out.contains("decision:dec"));
assert!(out.contains("Replaced decisions"));
assert!(out.contains("Sources overflow"));
assert!(out.contains("Similar slug clusters"));
assert!(out.contains("no explicit memory/knowledge usage events"));
}
#[test]
fn empty_inputs_render_placeholders() {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
let out =
build_consolidate_input(&layout, &[], &TidyHints::default(), &UsageReport::empty());
// Both staging and tidy show "(none)"; existing memory records too.
assert!(out.contains("Staging entries"));
assert!(out.contains("(none)"));
}
}

View File

@ -0,0 +1,304 @@
//! `_staging/.consolidation.lock` による consolidation 占有ファイル。
//!
//! `docs/plan/memory.md` §並走防止 に従い:
//!
//! - ファイルが存在し、記録された Pod が動作している間、その Pod が排他占有
//! - クラッシュで残った stale lock は、所有者 PID が死んでいれば次回 spawn
//! 時に上書き取得できる
//! - cleanup は consumed ID の staging エントリのみ削除し、実行中に extract
//! が追加した分は残す
//!
//! 占有判定は Linux/macOS の `kill(pid, 0)` 経由で行う(`ESRCH` で死亡判定)。
//! Windows は対象外: INSOMNIA は POSIX 環境を前提にしている。
use std::fs;
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::workspace::WorkspaceLayout;
const LOCK_FILE: &str = ".consolidation.lock";
/// 占有ファイルの中身。`pid` で stale 判定し、`pod_name` / `started_at` /
/// `consumed_ids` は診断とクラッシュ復旧時の参照に使う。
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockRecord {
pub pid: u32,
pub pod_name: String,
pub started_at: DateTime<Utc>,
/// この consolidation run が起動時スナップショットで確定した consumed staging
/// entry の UUIDv7 列。完了時はこの列のみ削除し、追加分は残す。
pub consumed_ids: Vec<Uuid>,
}
/// 占有取得 / 解放のエラー。
#[derive(Debug, thiserror::Error)]
pub enum LockError {
/// 占有ファイルが既にあり、所有者 PID が生きているのでスキップ。
#[error("consolidation lock held by live pid {pid} (pod {pod_name:?})")]
InUse { pid: u32, pod_name: String },
#[error("io error at {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to (de)serialize lock record: {0}")]
Serde(#[from] serde_json::Error),
}
impl LockError {
fn io(path: impl Into<PathBuf>, source: std::io::Error) -> Self {
Self::Io {
path: path.into(),
source,
}
}
}
/// consolidation が走っている間 RAII で持つ占有ハンドル。`Drop` では何もしない —
/// 完了時の cleanup は consumed ID 列削除と一緒に行う必要があるため、明示
/// 解放 [`StagingLock::release_with_cleanup`] を使う。明示解放しないまま
/// drop された場合は占有ファイルがそのまま残り、次回 spawn 時に PID が
/// 死んでいれば stale 上書きされる。
#[derive(Debug)]
pub struct StagingLock {
path: PathBuf,
record: LockRecord,
}
impl StagingLock {
pub fn record(&self) -> &LockRecord {
&self.record
}
pub fn path(&self) -> &Path {
&self.path
}
/// 占有取得を試みる。既に live な lock があれば
/// [`LockError::InUse`]、stale 判定なら上書き取得する。
/// staging dir が無ければ作成する。
pub fn acquire(
layout: &WorkspaceLayout,
pid: u32,
pod_name: impl Into<String>,
consumed_ids: Vec<Uuid>,
) -> Result<Self, LockError> {
let staging_dir = layout.staging_dir();
fs::create_dir_all(&staging_dir).map_err(|e| LockError::io(&staging_dir, e))?;
let path = staging_dir.join(LOCK_FILE);
if path.exists() {
let raw = fs::read_to_string(&path).map_err(|e| LockError::io(&path, e))?;
// 壊れた lock は stale とみなして上書き許可。
if let Ok(existing) = serde_json::from_str::<LockRecord>(&raw) {
if pid_is_alive(existing.pid) {
return Err(LockError::InUse {
pid: existing.pid,
pod_name: existing.pod_name,
});
}
tracing::warn!(
stale_pid = existing.pid,
stale_pod = %existing.pod_name,
"consolidation stale lock detected, taking over"
);
} else {
tracing::warn!(path = %path.display(), "consolidation lock unparseable, treating as stale");
}
}
let record = LockRecord {
pid,
pod_name: pod_name.into(),
started_at: Utc::now(),
consumed_ids,
};
let json = serde_json::to_string_pretty(&record)?;
fs::write(&path, json).map_err(|e| LockError::io(&path, e))?;
Ok(Self { path, record })
}
/// 占有を解放しつつ consumed ID 列の staging エントリを削除する。
/// 削除対象が見当たらない場合は黙ってスキップ(既に外部で消えていた等)。
/// 占有ファイル自体の削除も best-effort: 失敗時は warn を出すだけで
/// エラーは伝播しない(次回 spawn 時に stale 判定で上書きされる)。
pub fn release_with_cleanup(self, layout: &WorkspaceLayout) {
let staging_dir = layout.staging_dir();
for id in &self.record.consumed_ids {
let target = staging_dir.join(format!("{id}.json"));
match fs::remove_file(&target) {
Ok(_) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => {
tracing::warn!(
path = %target.display(),
error = %e,
"failed to clean up consumed staging entry"
);
}
}
}
self.unlink_lock_only();
}
/// 占有ファイルだけ削除し、staging エントリには触らない。consolidation
/// sub-Worker が途中で失敗した場合に使う: 入力 staging を残したまま
/// 次回再評価で再処理させる(`docs/plan/memory.md` §並走防止 の
/// 「重複作成は同一 slug update に自然収束」運用)。
pub fn release_only(self) {
self.unlink_lock_only();
}
fn unlink_lock_only(&self) {
if let Err(e) = fs::remove_file(&self.path) {
if e.kind() != std::io::ErrorKind::NotFound {
tracing::warn!(
path = %self.path.display(),
error = %e,
"failed to remove consolidation lock"
);
}
}
}
}
#[cfg(unix)]
fn pid_is_alive(pid: u32) -> bool {
// `kill(0, 0)` and `kill(-1, 0)` are POSIX-special (process group / all
// signalable processes) and would yield false positives. Reject pids
// that don't fit a positive `pid_t` so a corrupted lock file with a
// u32::MAX-ish value is treated as stale instead of magically alive.
if pid == 0 || pid > i32::MAX as u32 {
return false;
}
// SAFETY: `kill` with sig 0 only probes whether the target pid exists
// and the caller has permission to signal it. No signal is delivered.
let rc = unsafe { libc::kill(pid as i32, 0) };
if rc == 0 {
return true;
}
// EPERM means the process exists but we can't signal it — still alive
// for our purposes. ESRCH means it's gone.
let errno = std::io::Error::last_os_error()
.raw_os_error()
.unwrap_or(libc::EINVAL);
errno != libc::ESRCH
}
#[cfg(not(unix))]
fn pid_is_alive(_pid: u32) -> bool {
// Unsupported platforms: assume the lock is live so we never overwrite
// someone else's claim. consolidation will skip and try again next post-run.
true
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
fn make_layout() -> (tempfile::TempDir, WorkspaceLayout) {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
std::fs::create_dir_all(layout.staging_dir()).unwrap();
(dir, layout)
}
#[test]
fn acquire_writes_lock_file() {
let (_dir, layout) = make_layout();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", Vec::new()).unwrap();
let path = layout.staging_dir().join(LOCK_FILE);
assert!(path.exists());
assert_eq!(lock.record().pid, std::process::id());
assert_eq!(lock.record().pod_name, "pod");
}
#[test]
fn acquire_rejects_when_live_pid_holds_lock() {
let (_dir, layout) = make_layout();
// Use this test process's pid — it's definitely alive.
let _first =
StagingLock::acquire(&layout, std::process::id(), "pod-a", Vec::new()).unwrap();
let err = StagingLock::acquire(&layout, std::process::id(), "pod-b", Vec::new())
.expect_err("expected InUse");
assert!(matches!(err, LockError::InUse { .. }));
}
#[test]
fn acquire_overwrites_stale_lock() {
let (_dir, layout) = make_layout();
// pid 1 is init on linux but for arbitrarily-large pids we'd need
// `kill(pid, 0)` to return ESRCH. Use u32::MAX which is guaranteed
// dead on every platform we target.
let stale = LockRecord {
pid: u32::MAX,
pod_name: "ghost".into(),
started_at: Utc::now(),
consumed_ids: Vec::new(),
};
std::fs::write(
layout.staging_dir().join(LOCK_FILE),
serde_json::to_string_pretty(&stale).unwrap(),
)
.unwrap();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", Vec::new())
.expect("stale lock must be overwritable");
assert_eq!(lock.record().pid, std::process::id());
}
#[test]
fn release_drops_consumed_entries_and_unlinks_lock() {
let (_dir, layout) = make_layout();
let (id_a, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [0, 0],
},
ExtractedPayload::default(),
)
.unwrap();
let (id_b, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [1, 1],
},
ExtractedPayload::default(),
)
.unwrap();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", vec![id_a]).unwrap();
let lock_path = lock.path().to_path_buf();
lock.release_with_cleanup(&layout);
assert!(!lock_path.exists(), "lock file must be removed");
assert!(
!layout.staging_dir().join(format!("{id_a}.json")).exists(),
"consumed entry must be deleted"
);
assert!(
layout.staging_dir().join(format!("{id_b}.json")).exists(),
"non-consumed entry must remain"
);
}
#[test]
fn release_is_resilient_to_missing_consumed_entries() {
let (_dir, layout) = make_layout();
let phantom = uuid::Uuid::now_v7();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", vec![phantom]).unwrap();
let lock_path = lock.path().to_path_buf();
// No file at <staging>/<phantom>.json — release must not panic.
lock.release_with_cleanup(&layout);
assert!(!lock_path.exists());
}
}

View File

@ -0,0 +1,32 @@
//! consolidation: 統合 + 整理。
//!
//! extract が staging に残した活動ログを `memory/*` / `knowledge/*` に
//! 統合し、続けて既存 record を `outdated | superseded | unused | noisy`
//! の観点で整理する disposable Worker を、Pod 側が組み立てるための
//! ヘルパー群を提供する。Pod は次の手順で sub-Worker を構築する:
//!
//! - [`build_consolidate_input`] を sub-Worker の最初の user 入力に
//! - memory 専用 Tool (read / write / edit) と Knowledge / memory 検索ツールを登録
//! - [`StagingLock::acquire`] で並走防止 + consumed ID 確定
//! - sub-Worker run 完了後、[`StagingLock::release_with_cleanup`] で
//! consumed ID 分の staging のみ削除し、占有ファイルを解放
//!
//! system prompt は Pod の `PromptCatalog`
//! (`PodPrompt::MemoryConsolidationSystem`) で管理される。Usage report は
//! 判断材料として渡すだけで、ここでは Knowledge 化や protection の hard decision はしない
//! `docs/plan/memory.md` §Consolidation / 整理材料)。
mod input;
mod lock;
mod staging;
mod tidy;
pub use input::{
build_consolidate_input, render_existing_memory_records, render_staging_records,
render_tidy_hints,
};
pub use lock::{LockError, LockRecord, StagingLock};
pub use staging::{
StagingEntriesSnapshot, StagingEntry, list_staging_entries, list_staging_entries_snapshot,
};
pub use tidy::{TidyHints, collect_tidy_hints};

View File

@ -0,0 +1,190 @@
//! `_staging/*.json` を列挙して [`StagingRecord`] に展開する読み込みヘルパー。
//!
//! consolidation 起動時のスナップショットconsumed ID list 確定)と、整理 step
//! が終わった後の cleanup の双方で使う。`.consolidation.lock` のような
//! 占有ファイルは UUIDv7 として parse できないので自然に除外される。
//!
//! [`StagingRecord`] のスキーマは extract が書き出す側 (`crate::extract`)
//! と単一の真実源 — ここでは読み出す側だけを担当する。
use std::path::PathBuf;
use uuid::Uuid;
use crate::extract::StagingRecord;
use crate::workspace::WorkspaceLayout;
/// staging に積まれている 1 件分のエントリ。`id` は UUIDv7 で、ファイル名
/// `<id>.json` を逆引きしたもの。
#[derive(Debug, Clone)]
pub struct StagingEntry {
pub id: Uuid,
pub path: PathBuf,
pub record: StagingRecord,
/// このファイルのバイト長。閾値判定 (`consolidation_threshold_bytes`)
/// に使う。
pub bytes: u64,
}
/// staging directory の検査結果。`entries` は current schema として読めた
/// staging のみで、`invalid_count` は `.json` だが staging として採用できなかった
/// ファイル数。
#[derive(Debug, Clone, Default)]
pub struct StagingEntriesSnapshot {
pub entries: Vec<StagingEntry>,
pub invalid_count: usize,
}
/// `<staging_dir>/*.json` を読んで UUIDv7 順に並べた [`StagingEntry`]
/// 配列を返す。staging_dir が存在しなければ空配列。読めないファイルや
/// JSON parse 失敗は `tracing::warn!` してスキップ(壊れた個別ファイルが
/// consolidation 全体を止めないように)。
pub fn list_staging_entries(layout: &WorkspaceLayout) -> Vec<StagingEntry> {
list_staging_entries_snapshot(layout).entries
}
/// `<staging_dir>/*.json` を読んで valid staging と invalid staging 件数を返す。
/// invalid は自動 migration / 削除 / archive せず、観測可能にするための件数だけを
/// 呼び出し側へ渡す。
pub fn list_staging_entries_snapshot(layout: &WorkspaceLayout) -> StagingEntriesSnapshot {
let dir = layout.staging_dir();
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return StagingEntriesSnapshot::default(),
};
let mut out: Vec<StagingEntry> = Vec::new();
let mut invalid_count = 0;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
if ext != "json" {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => {
invalid_count += 1;
continue;
}
};
let id = match Uuid::parse_str(stem) {
Ok(u) => u,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to parse staging entry id");
continue;
}
};
let bytes = match std::fs::metadata(&path) {
Ok(m) => m.len(),
Err(_) => 0,
};
let raw = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to read staging entry");
continue;
}
};
let record = match serde_json::from_str::<StagingRecord>(&raw) {
Ok(r) => r,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to parse staging entry");
continue;
}
};
out.push(StagingEntry {
id,
path,
record,
bytes,
});
}
out.sort_by_key(|e| e.id);
StagingEntriesSnapshot {
entries: out,
invalid_count,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
fn empty_payload() -> ExtractedPayload {
ExtractedPayload::default()
}
fn source(segment_id: &str, range: [u64; 2]) -> SourceRef {
SourceRef {
segment_id: segment_id.into(),
range,
}
}
#[test]
fn lists_in_uuidv7_order() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let (id1, _) = write_staging(&layout, source("s", [0, 1]), empty_payload()).unwrap();
let (id2, _) = write_staging(&layout, source("s", [2, 3]), empty_payload()).unwrap();
let (id3, _) = write_staging(&layout, source("s", [4, 5]), empty_payload()).unwrap();
let entries = list_staging_entries(&layout);
let ids: Vec<Uuid> = entries.iter().map(|e| e.id).collect();
assert_eq!(ids, vec![id1, id2, id3]);
}
#[test]
fn skips_lock_file_and_counts_invalid_json() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let (_id, _) = write_staging(&layout, source("s", [0, 1]), empty_payload()).unwrap();
// Drop a non-UUID json file, an unparsable UUID-named json file, an
// old-schema UUID-named json file, and a bare lock file alongside.
// Lock files are not `.json`; invalid `.json` files are surfaced
// separately instead of being mistaken for an empty staging directory.
std::fs::write(layout.staging_dir().join("not-a-uuid.json"), "{}").unwrap();
let bad_id = Uuid::now_v7();
std::fs::write(layout.staging_dir().join(format!("{bad_id}.json")), "{").unwrap();
let old_schema_id = Uuid::now_v7();
std::fs::write(
layout.staging_dir().join(format!("{old_schema_id}.json")),
serde_json::json!({
"source": {
"session_id": "legacy-session",
"range": [0, 1]
},
"requests": []
})
.to_string(),
)
.unwrap();
std::fs::write(layout.staging_dir().join(".consolidation.lock"), "{}").unwrap();
let entries = list_staging_entries(&layout);
assert_eq!(entries.len(), 1);
let snapshot = list_staging_entries_snapshot(&layout);
assert_eq!(snapshot.entries.len(), 1);
assert_eq!(snapshot.invalid_count, 3);
}
#[test]
fn missing_dir_returns_empty() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
// No staging dir at all.
assert!(list_staging_entries(&layout).is_empty());
}
}

View File

@ -0,0 +1,356 @@
//! 整理 step が prompt 入力に乗せる「整理材料」スキャナ。
//!
//! `docs/plan/memory.md` §整理GC 相当)の扱い と
//! `tickets/memory-consolidation.md` の整理材料リストに従い、
//! メトリクス未完の現状で機械的に拾えるヒントだけを集める:
//!
//! - `replaced` chain: `status: replaced` の Decision とその `replaced_by`
//! - sources 過多: `sources` / `last_sources` 配列が閾値超過の record
//! - 類似 slug 乱立: 同 kind の slug が Levenshtein 2 以内のクラスター
//!
//! 使用頻度メトリクスベースの保護閾値情報は `tickets/memory-usage-metrics.md`
//! の成果物が出るまで空で渡る。
use std::collections::{BTreeMap, BTreeSet};
use crate::Slug;
use crate::schema::{
DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, split_frontmatter,
};
use crate::workspace::{RecordKind, WorkspaceLayout};
/// `sources` overflow を flag する閾値。`linter::warnings::SOURCES_OVERFLOW_THRESHOLD`
/// と同値10を踏襲する。Linter Warn で sources 過多が検出されるラインと
/// 整理 step で勧告するラインを揃える狙い。
pub const SOURCES_OVERFLOW_THRESHOLD: usize = 10;
/// 類似 slug クラスタリングの距離。`linter::warnings::SIMILAR_SLUG_DISTANCE`
/// と同値。
pub const SIMILAR_SLUG_DISTANCE: usize = 2;
/// 整理 step 用の機械集計ヒント。空フィールドは「対象なし」を意味する。
#[derive(Debug, Default, Clone)]
pub struct TidyHints {
/// `status: replaced` で残っている Decision の slug → `replaced_by` map。
/// `replaced_by` が None でも置き換え滞留として列挙する。
pub replaced_decisions: BTreeMap<String, Option<String>>,
/// kind / slug / sources count の三つ組で sources 累積ラインを表す。
pub sources_overflow: Vec<SourcesOverflow>,
/// 同 kind 内で Levenshtein 距離 `<= SIMILAR_SLUG_DISTANCE` のクラスター。
/// クラスター内の slug は sorted。
pub similar_slug_clusters: Vec<SimilarSlugCluster>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SourcesOverflow {
pub kind: RecordKind,
pub slug: String,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SimilarSlugCluster {
pub kind: RecordKind,
pub slugs: Vec<String>,
}
impl TidyHints {
pub fn is_empty(&self) -> bool {
self.replaced_decisions.is_empty()
&& self.sources_overflow.is_empty()
&& self.similar_slug_clusters.is_empty()
}
}
/// workspace を一通りスキャンして [`TidyHints`] を組み立てる。読めない /
/// parse できない record は黙ってスキップLinter は write 経路で守って
/// いるので、ここで顕在化してもどうしようもない)。
pub fn collect_tidy_hints(layout: &WorkspaceLayout) -> TidyHints {
let mut hints = TidyHints::default();
let decisions = read_kind_records(layout, RecordKind::Decision);
let requests = read_kind_records(layout, RecordKind::Request);
let knowledge = read_kind_records(layout, RecordKind::Knowledge);
for (slug, content) in &decisions {
let fm = parse_yaml::<DecisionFrontmatter>(content);
if let Some(fm) = fm.as_ref() {
if matches!(fm.status, crate::schema::DecisionStatus::Replaced) {
hints
.replaced_decisions
.insert(slug.clone(), fm.replaced_by.as_ref().map(|s| s.to_string()));
}
if fm.sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Decision,
slug: slug.clone(),
count: fm.sources.len(),
});
}
}
}
for (slug, content) in &requests {
if let Some(fm) = parse_yaml::<RequestFrontmatter>(content) {
if fm.sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Request,
slug: slug.clone(),
count: fm.sources.len(),
});
}
}
}
for (slug, content) in &knowledge {
if let Some(fm) = parse_yaml::<KnowledgeFrontmatter>(content) {
if fm.last_sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Knowledge,
slug: slug.clone(),
count: fm.last_sources.len(),
});
}
}
}
hints.sources_overflow.sort_by(|a, b| {
(a.kind.as_str(), a.slug.as_str()).cmp(&(b.kind.as_str(), b.slug.as_str()))
});
let decision_slugs: Vec<&str> = decisions.keys().map(|s| s.as_str()).collect();
let request_slugs: Vec<&str> = requests.keys().map(|s| s.as_str()).collect();
let knowledge_slugs: Vec<&str> = knowledge.keys().map(|s| s.as_str()).collect();
if let Some(c) = cluster_similar(&decision_slugs, RecordKind::Decision) {
hints.similar_slug_clusters.extend(c);
}
if let Some(c) = cluster_similar(&request_slugs, RecordKind::Request) {
hints.similar_slug_clusters.extend(c);
}
if let Some(c) = cluster_similar(&knowledge_slugs, RecordKind::Knowledge) {
hints.similar_slug_clusters.extend(c);
}
hints
.similar_slug_clusters
.sort_by(|a, b| (a.kind.as_str(), &a.slugs).cmp(&(b.kind.as_str(), &b.slugs)));
hints
}
/// `<root>/.insomnia/memory/<kind>/*.md` (Knowledge は
/// `<root>/.insomnia/knowledge/*.md`) を slug ごとに `(slug, full content)`
/// 化して返す。
fn read_kind_records(layout: &WorkspaceLayout, kind: RecordKind) -> BTreeMap<String, String> {
let dir = match kind {
RecordKind::Decision => layout.decisions_dir(),
RecordKind::Request => layout.requests_dir(),
RecordKind::Knowledge => layout.knowledge_dir(),
RecordKind::Summary | RecordKind::Workflow => return BTreeMap::new(),
};
let mut out: BTreeMap<String, String> = BTreeMap::new();
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return out,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => continue,
};
if path.extension().and_then(|s| s.to_str()) != Some("md") {
continue;
}
if Slug::parse(stem).is_err() {
continue;
}
let content = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(_) => continue,
};
out.insert(stem.to_string(), content);
}
out
}
fn parse_yaml<F: serde::de::DeserializeOwned>(content: &str) -> Option<F> {
let (yaml, _body) = split_frontmatter(content).ok()?;
serde_yaml::from_str::<F>(yaml).ok()
}
/// Connected-component clustering over the `levenshtein <= SIMILAR_SLUG_DISTANCE`
/// graph among same-kind slugs. Returns each cluster of size >= 2 (singleton
/// clusters are not interesting for the integration step). Returns `None`
/// when there are no clusters at all.
fn cluster_similar(slugs: &[&str], kind: RecordKind) -> Option<Vec<SimilarSlugCluster>> {
if slugs.len() < 2 {
return None;
}
let n = slugs.len();
let mut parent: Vec<usize> = (0..n).collect();
fn find(parent: &mut [usize], i: usize) -> usize {
if parent[i] == i {
i
} else {
let root = find(parent, parent[i]);
parent[i] = root;
root
}
}
fn union(parent: &mut [usize], a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[ra] = rb;
}
}
for i in 0..n {
for j in (i + 1)..n {
if levenshtein(slugs[i], slugs[j]) <= SIMILAR_SLUG_DISTANCE {
union(&mut parent, i, j);
}
}
}
let mut groups: BTreeMap<usize, Vec<String>> = BTreeMap::new();
for i in 0..n {
let root = find(&mut parent, i);
groups.entry(root).or_default().push(slugs[i].to_string());
}
let mut out: Vec<SimilarSlugCluster> = Vec::new();
let mut seen_canonical: BTreeSet<Vec<String>> = BTreeSet::new();
for (_, mut group) in groups {
if group.len() < 2 {
continue;
}
group.sort();
if seen_canonical.insert(group.clone()) {
out.push(SimilarSlugCluster { kind, slugs: group });
}
}
if out.is_empty() { None } else { Some(out) }
}
/// Iterative two-row Levenshtein distance over chars (matches the Linter's
/// implementation; kept private to avoid widening that crate-internal API).
fn levenshtein(a: &str, b: &str) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
if a.is_empty() {
return b.len();
}
if b.is_empty() {
return a.len();
}
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr: Vec<usize> = vec![0; b.len() + 1];
for (i, ca) in a.iter().enumerate() {
curr[0] = i + 1;
for (j, cb) in b.iter().enumerate() {
let cost = if ca == cb { 0 } else { 1 };
curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b.len()]
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use std::path::Path;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn write(p: &Path, content: &str) {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(p, content).unwrap();
}
fn workspace() -> (tempfile::TempDir, WorkspaceLayout) {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[test]
fn collects_replaced_chain() {
let (dir, layout) = workspace();
write(
&dir.path().join(".insomnia/memory/decisions/replaced.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: replaced\nreplaced_by: winner\n---\n",
n = now()
),
);
write(
&dir.path().join(".insomnia/memory/decisions/winner.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
n = now()
),
);
let hints = collect_tidy_hints(&layout);
assert_eq!(
hints.replaced_decisions.get("replaced").cloned(),
Some(Some("winner".into()))
);
assert!(!hints.replaced_decisions.contains_key("winner"));
}
#[test]
fn flags_sources_overflow() {
let (dir, layout) = workspace();
let many_sources: String = (0..15)
.map(|i| format!(" - segment_id: s{i}\n range: [{i}, {i}]\n"))
.collect();
write(
&dir.path().join(".insomnia/memory/decisions/big.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nstatus: open\nsources:\n{m}---\n",
n = now(),
m = many_sources
),
);
let hints = collect_tidy_hints(&layout);
assert_eq!(hints.sources_overflow.len(), 1);
assert_eq!(hints.sources_overflow[0].slug, "big");
assert_eq!(hints.sources_overflow[0].kind, RecordKind::Decision);
assert_eq!(hints.sources_overflow[0].count, 15);
}
#[test]
fn clusters_similar_slugs() {
let (dir, layout) = workspace();
for slug in ["db-pool", "db-pol", "db-pools", "alpha"] {
write(
&dir.path()
.join(format!(".insomnia/memory/decisions/{slug}.md")),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
n = now()
),
);
}
let hints = collect_tidy_hints(&layout);
assert_eq!(hints.similar_slug_clusters.len(), 1);
assert_eq!(
hints.similar_slug_clusters[0].slugs,
vec![
"db-pol".to_string(),
"db-pool".to_string(),
"db-pools".to_string(),
]
);
}
#[test]
fn empty_workspace_yields_empty_hints() {
let (_dir, layout) = workspace();
let hints = collect_tidy_hints(&layout);
assert!(hints.is_empty());
}
}

121
crates/memory/src/error.rs Normal file
View File

@ -0,0 +1,121 @@
//! Errors raised by the memory subsystem.
use std::path::PathBuf;
use lint_common::RecordLintError;
use thiserror::Error;
/// Top-level error for memory operations that don't fit the lint flow.
#[derive(Debug, Error)]
pub enum MemoryError {
#[error("path is not under the memory or knowledge tree: {}", .0.display())]
OutsideMemoryTree(PathBuf),
#[error("path is not absolute: {}", .0.display())]
RelativePath(PathBuf),
#[error("io error at {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
impl MemoryError {
pub fn io(path: impl Into<PathBuf>, source: std::io::Error) -> Self {
Self::Io {
path: path.into(),
source,
}
}
}
/// A single Linter violation. Multiple are aggregated in a [`LintReport`].
///
/// `Display` produces a one-line message used directly in the `ToolError`
/// payload returned to the LLM.
#[derive(Debug, Clone, Error, PartialEq, Eq)]
pub enum LintError {
#[error("path is not a valid memory record location: {}", .0.display())]
InvalidPath(PathBuf),
#[error("path is for a different record kind than expected at this location: {}", .0.display())]
WrongRecordKind(PathBuf),
#[error(transparent)]
Record(#[from] RecordLintError),
#[error("missing required frontmatter field: `{0}`")]
MissingField(&'static str),
#[error("invalid value for `{field}`: {message}")]
InvalidField {
field: &'static str,
message: String,
},
#[error("Decisions `status` must be one of open|resolved|replaced (got `{0}`)")]
InvalidStatus(String),
#[error(
"Knowledge with model_invokation: true cannot have description longer than {limit} chars (got {actual})"
)]
DescriptionTooLong { actual: usize, limit: usize },
#[error("body exceeds the size limit for this record kind: {actual} chars > {limit}")]
BodyTooLong { actual: usize, limit: usize },
#[error("slug `{0}` already exists; use the edit tool instead of creating a new record")]
SlugAlreadyExists(String),
#[error("`{field}` references unknown {kind} slug `{slug}`")]
UnknownReference {
field: &'static str,
kind: &'static str,
slug: String,
},
#[error("`replaced_by` chain forms a cycle: {chain}")]
ReplacedByCycle { chain: String },
#[error("`replaced_by` must point to a different slug than the record itself")]
ReplacedBySelf,
}
/// A single Linter warning (non-blocking).
///
/// Warnings ride along in the `ToolOutput.summary` so the agent can act
/// on them when convenient; they never abort the write.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LintWarning {
/// Single-source record exceeds the importance/size threshold.
LowImportanceLargeRecord { chars: usize },
/// `sources` array has grown past the soft cap.
SourcesOverflow { count: usize },
/// Multiple slugs in the same kind are within Levenshtein distance 2.
SimilarSlugs(Vec<String>),
}
impl std::fmt::Display for LintWarning {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LowImportanceLargeRecord { chars } => write!(
f,
"record is large ({chars} chars) but only has 1 source — consider splitting or trimming"
),
Self::SourcesOverflow { count } => write!(
f,
"`sources` has {count} entries — consider keeping only the most recent and relying on git log for the rest"
),
Self::SimilarSlugs(slugs) => {
write!(f, "similar slugs detected (consider merging): ")?;
for (i, s) in slugs.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{s}")?;
}
Ok(())
}
}
}
}

View File

@ -0,0 +1,87 @@
//! extract sub-Worker への入力テキスト組み立て。
//!
//! `crates/pod/src/pod.rs::build_summary_prompt` と同じ方針で
//! Item 列を flat な行に落とすreasoning は省く、tool call は名前のみ、
//! tool result は summary のみ。conversation 全体を Markdown の単一
//! セクションとして渡し、抽出指示は system prompt 側に寄せる。
use llm_worker::Item;
/// 与えられた `items` を extract sub-Worker の最初の user 入力に整形する。
pub fn build_extract_input(items: &[Item]) -> String {
let mut out = String::new();
out.push_str(
"Extract activity logs from the conversation slice below. \
Follow the system prompt's schema strictly and call `write_extracted` once.\n\n",
);
out.push_str("## Conversation slice\n");
out.push_str(&render_items(items));
out.push_str("\n\nWhen you are done, call `write_extracted` and end the turn.");
out
}
fn render_items(items: &[Item]) -> String {
let mut lines: Vec<String> = Vec::new();
for item in items {
match item {
Item::Message { role, content, .. } => {
let role_label = match role {
llm_worker::Role::User => "User",
llm_worker::Role::Assistant => "Assistant",
llm_worker::Role::System => "System",
};
let text: String = content
.iter()
.map(|p| p.as_text())
.collect::<Vec<_>>()
.join("");
lines.push(format!("[{role_label}] {text}"));
}
Item::ToolCall { name, .. } => {
lines.push(format!("[ToolCall] {name}"));
}
Item::ToolResult { summary, .. } => {
lines.push(format!("[ToolResult] {summary}"));
}
Item::Reasoning { .. } => {}
}
}
lines.join("\n\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn renders_user_assistant_pair_and_tool_calls() {
let items = vec![
Item::user_message("hello"),
Item::assistant_message("hi"),
Item::tool_call("c1", "read_file", "{}"),
Item::tool_result("c1", "ok"),
Item::reasoning("internal scratch — should be skipped"),
];
let s = build_extract_input(&items);
assert!(s.contains("[User] hello"));
assert!(s.contains("[Assistant] hi"));
assert!(s.contains("[ToolCall] read_file"));
assert!(s.contains("[ToolResult] ok"));
assert!(!s.contains("scratch"));
}
#[test]
fn tool_result_renders_summary_but_not_content() {
let huge_content = "raw-content-should-never-enter-extract-input".repeat(10_000);
let items = vec![Item::tool_result_with_content(
"c1",
"short summary kept for extraction",
huge_content.clone(),
)];
let s = build_extract_input(&items);
assert!(s.contains("[ToolResult] short summary kept for extraction"));
assert!(!s.contains("raw-content-should-never-enter-extract-input"));
assert!(!s.contains(&huge_content));
}
}

View File

@ -0,0 +1,35 @@
//! extract: 活動抽出。
//!
//! 通常 Pod の post-run hook で発火する disposable Worker と、その
//! 出力を `<workspace>/.insomnia/memory/_staging/<id>.json` に書き出す
//! ヘルパーを提供する。Pod 側はこのモジュールから:
//!
//! - [`build_extract_input`] を sub-Worker の最初の user 入力に
//! - [`write_extracted_tool`] を唯一のツールとして
//! - [`write_staging`] で受け取った JSON を staging に書き出し
//!
//! の順で組み立てる。system prompt は Pod の `PromptCatalog`
//! (`PodPrompt::MemoryExtractSystem`) で管理される。pointer 永続化
//! session-store の `LogEntry::Extension`、domain `"memory.extract"`)は
//! Pod 側が責務を持つ。
//!
//! 出力 JSON の wrap は [`write_staging`] が `source: { segment_id, range }`
//! を機械付与する形で担当し、LLM には source を推論させない。
mod input;
mod payload;
mod pointer;
mod staging;
mod tool;
pub use input::build_extract_input;
pub use payload::{
AttemptEntry, DecisionEntry, DiscussionEntry, ExtractedPayload, RequestEntry, StagingRecord,
};
pub use pointer::{ExtractPointerPayload, fold_pointer};
pub use staging::{StagingError, write_staging};
pub use tool::{ExtractWorkerContext, write_extracted_tool};
/// session-store `LogEntry::Extension` で使う domain 名。
/// pointer の永続化と読み出しはこの定数を使う側が一致している必要がある。
pub const EXTRACT_DOMAIN: &str = "memory.extract";

View File

@ -0,0 +1,88 @@
//! extract 抽出の出力 schema。
//!
//! LLM は [`ExtractedPayload`] そのものsource 抜きを返し、Pod 側
//! ラッパーが [`StagingRecord`] に組み立てて staging へ書き出す。
//! source は機械付与する契約 (`docs/plan/memory.md` §Extract)。
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::schema::SourceRef;
/// LLM が返す活動ログ候補の集合。すべて optional空配列は許容
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct ExtractedPayload {
#[serde(default)]
pub decisions: Vec<DecisionEntry>,
#[serde(default)]
pub discussions: Vec<DiscussionEntry>,
#[serde(default)]
pub attempts: Vec<AttemptEntry>,
#[serde(default)]
pub requests: Vec<RequestEntry>,
}
impl ExtractedPayload {
/// すべての配列が空であれば true。空ペイロードは
/// "Nothing to save" 扱いで staging への書き込みを省いてよい。
pub fn is_empty(&self) -> bool {
self.decisions.is_empty()
&& self.discussions.is_empty()
&& self.attempts.is_empty()
&& self.requests.is_empty()
}
}
/// 判断したこと(選択肢 + 選んだ + 根拠)。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct DecisionEntry {
/// 検討された選択肢の列挙。
pub options: Vec<String>,
/// 採用された選択肢。
pub chosen: String,
/// 採用理由 / 根拠。
pub rationale: String,
}
/// 議論したこと(トピック + 論点)。結論が出ていなくてもよい。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct DiscussionEntry {
/// 議論の主題。
pub topic: String,
/// 主題の中で挙がった論点 / 観点。
pub points: Vec<String>,
}
/// 試したこと(試行 + 結果 + 成否)。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AttemptEntry {
/// 何を試したか。
pub action: String,
/// 試した結果。
pub result: String,
/// 試行が目的に対して成功したか。失敗 / 部分成功も含めて bool で表現する。
pub succeeded: bool,
}
/// ユーザー submit の構造化要約。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RequestEntry {
/// ユーザーの意図 / ゴール。
pub intent: String,
/// 対象ファイル / モジュール / 機能(任意)。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub target: Option<String>,
/// 一文サマリ。
pub summary: String,
}
/// staging に書き出される 1 ファイル分のレコード。
///
/// `source` は Pod 側ラッパーが segment_id と log entry range を
/// 機械付与する。LLM はこのフィールドを見ない / 推論しない。
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StagingRecord {
pub source: SourceRef,
#[serde(flatten)]
pub payload: ExtractedPayload,
}

View File

@ -0,0 +1,81 @@
//! `LogEntry::Extension { domain: "memory.extract", payload }` の payload 形式と
//! restore 時の fold ヘルパー。memory crate がドメインを所有するので、
//! session-store / Pod は payload 構造を知らない。
use serde::{Deserialize, Serialize};
use super::EXTRACT_DOMAIN;
/// extract 完了境界の永続化 payload。session log の Extension entry
/// として 1 回ずつ書かれ、最新の 1 件が現行 pointer として有効になる。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ExtractPointerPayload {
/// 直近 extract が処理した最後の session-store LogEntry の index。
/// 次回の `source.range.start` はこの値 + 1。
pub processed_through_entry: usize,
/// 直近 extract 時点の `history.len()`。次回入力は
/// `history[processed_through_history_len..]` を切り出す。
pub processed_through_history_len: usize,
/// 書き出した staging file の UUIDv7 文字列。LLM が空 payload を返した
/// 場合は staging file を作らず空文字列で記録するpointer は前進する)。
pub staging_id: String,
}
/// `RestoredState.extensions` から最新の extract pointer を取り出す。
/// 未抽出セッションでは `None`。
pub fn fold_pointer(extensions: &[(String, serde_json::Value)]) -> Option<ExtractPointerPayload> {
extensions
.iter()
.rev()
.find(|(domain, _)| domain == EXTRACT_DOMAIN)
.and_then(|(_, value)| serde_json::from_value(value.clone()).ok())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fold_returns_latest_when_multiple_present() {
let exts = vec![
(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({
"processed_through_entry": 5,
"processed_through_history_len": 4,
"staging_id": "old"
}),
),
("other.domain".to_string(), serde_json::json!({ "x": 1 })),
(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({
"processed_through_entry": 11,
"processed_through_history_len": 8,
"staging_id": "new"
}),
),
];
let p = fold_pointer(&exts).unwrap();
assert_eq!(p.processed_through_entry, 11);
assert_eq!(p.processed_through_history_len, 8);
assert_eq!(p.staging_id, "new");
}
#[test]
fn fold_returns_none_when_absent() {
let exts = vec![("other.domain".to_string(), serde_json::json!({ "x": 1 }))];
assert!(fold_pointer(&exts).is_none());
}
#[test]
fn fold_skips_malformed_entries() {
let exts = vec![(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({ "wrong_shape": true }),
)];
// 現状は最新を取り出して JSON 不一致なら None。古いものに fallback
// しないのは、壊れた最新を黙って無視すると意図しない再抽出を招くため。
assert!(fold_pointer(&exts).is_none());
}
}

View File

@ -0,0 +1,114 @@
//! `<workspace>/.insomnia/memory/_staging/<id>.json` への書き出しヘルパー。
//!
//! 1 件 1 ファイル、UUIDv7 命名(短命なので衝突回避と順序を兼ねる)。
//! `source` を機械付与した [`StagingRecord`] 形式で保存する。
use std::fs;
use std::path::PathBuf;
use uuid::Uuid;
use crate::extract::payload::{ExtractedPayload, StagingRecord};
use crate::schema::SourceRef;
use crate::workspace::WorkspaceLayout;
/// staging 書き出し時のエラー。
#[derive(Debug, thiserror::Error)]
pub enum StagingError {
#[error("failed to create staging dir {}: {source}", .path.display())]
CreateDir {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to write staging file {}: {source}", .path.display())]
Write {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to serialize staging record: {0}")]
Serialize(#[from] serde_json::Error),
}
/// `payload` を `source` で wrap して staging に書き出す。
///
/// 戻り値は割り当てられた staging file の (id, path)。`payload` が
/// 完全に空の場合は呼び出し側が事前に `is_empty()` で skip 推奨だが、
/// この関数は空でも正規に書き出す(仕様 §Extract で空配列許容と
/// 明記されており、書く / 書かないの判断は呼び出し側に委ねる)。
pub fn write_staging(
layout: &WorkspaceLayout,
source: SourceRef,
payload: ExtractedPayload,
) -> Result<(Uuid, PathBuf), StagingError> {
let staging_dir = layout.staging_dir();
fs::create_dir_all(&staging_dir).map_err(|source| StagingError::CreateDir {
path: staging_dir.clone(),
source,
})?;
let id = Uuid::now_v7();
let path = staging_dir.join(format!("{id}.json"));
let record = StagingRecord { source, payload };
let json = serde_json::to_string_pretty(&record)?;
fs::write(&path, json).map_err(|source| StagingError::Write {
path: path.clone(),
source,
})?;
Ok((id, path))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::payload::{DecisionEntry, ExtractedPayload};
#[test]
fn writes_record_with_machine_attached_source() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let source = SourceRef {
segment_id: "sess-1".into(),
range: [3, 7],
};
let payload = ExtractedPayload {
decisions: vec![DecisionEntry {
options: vec!["a".into(), "b".into()],
chosen: "a".into(),
rationale: "shorter".into(),
}],
..Default::default()
};
let (id, path) = write_staging(&layout, source.clone(), payload).unwrap();
assert_eq!(path.parent().unwrap(), layout.staging_dir());
assert!(
path.file_name()
.unwrap()
.to_string_lossy()
.contains(&id.to_string())
);
let written: StagingRecord =
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
assert_eq!(written.source.segment_id, "sess-1");
assert_eq!(written.source.range, [3, 7]);
assert_eq!(written.payload.decisions.len(), 1);
}
#[test]
fn empty_payload_is_written_verbatim() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let source = SourceRef {
segment_id: "sess".into(),
range: [0, 0],
};
let (_, path) = write_staging(&layout, source, ExtractedPayload::default()).unwrap();
let written: StagingRecord =
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
assert!(written.payload.is_empty());
}
}

View File

@ -0,0 +1,164 @@
//! `write_extracted` ツール実装と sub-Worker 用 context。
//!
//! sub-Worker からは extract worker が出した [`ExtractedPayload`] を
//! 受け取って `Mutex` 越しに [`ExtractWorkerContext`] に置くだけ。
//! Pod 側はランループ完了後に `take_payload()` で取り出して
//! [`super::staging::write_staging`] に渡す。
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use crate::extract::payload::ExtractedPayload;
const WRITE_EXTRACTED_DESCRIPTION: &str = "Submit the final activity-log JSON for this slice. \
Pass an object with `decisions`, `discussions`, `attempts`, and `requests` arrays (any may be empty). \
Call this exactly once and end the turn. Do not include `source`, session metadata, or free-form prose \
the wrapper attaches provenance mechanically.";
/// extract sub-Worker の出力受け口。`ExtractedPayload` 1 件をホストする。
#[derive(Debug, Default)]
pub struct ExtractWorkerContext {
payload: Mutex<Option<ExtractedPayload>>,
/// `write_extracted` が複数回呼ばれた回数debug 用)。
/// 後勝ちで上書きするが、Pod 側で warn を出したい場合に参照する。
call_count: Mutex<usize>,
}
impl ExtractWorkerContext {
pub fn new() -> Self {
Self::default()
}
/// sub-Worker 終了後に Pod が呼んで payload を取り出す。
/// 一度も `write_extracted` が呼ばれなければ `None`。
pub fn take_payload(&self) -> Option<ExtractedPayload> {
self.payload
.lock()
.expect("extract worker payload poisoned")
.take()
}
pub fn call_count(&self) -> usize {
*self
.call_count
.lock()
.expect("extract worker call_count poisoned")
}
}
struct WriteExtractedTool {
ctx: Arc<ExtractWorkerContext>,
}
#[async_trait]
impl Tool for WriteExtractedTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let payload: ExtractedPayload = serde_json::from_str(input_json).map_err(|e| {
ToolError::InvalidArgument(format!("invalid write_extracted input: {e}"))
})?;
let summary = format!(
"Recorded activity log: decisions={} discussions={} attempts={} requests={}",
payload.decisions.len(),
payload.discussions.len(),
payload.attempts.len(),
payload.requests.len(),
);
{
let mut guard = self
.ctx
.payload
.lock()
.expect("extract worker payload poisoned");
*guard = Some(payload);
}
{
let mut count = self
.ctx
.call_count
.lock()
.expect("extract worker call_count poisoned");
*count += 1;
}
Ok(ToolOutput {
summary,
content: None,
})
}
}
/// sub-Worker に register する `write_extracted` ツール定義を返す。
pub fn write_extracted_tool(ctx: Arc<ExtractWorkerContext>) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(ExtractedPayload);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("write_extracted")
.description(WRITE_EXTRACTED_DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use llm_worker::tool::Tool;
#[tokio::test]
async fn write_extracted_records_payload() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let input = serde_json::json!({
"decisions": [{
"options": ["a", "b"],
"chosen": "a",
"rationale": "test"
}],
"discussions": [],
"attempts": [],
"requests": []
})
.to_string();
let out = tool.execute(&input).await.unwrap();
assert!(out.summary.contains("decisions=1"));
let payload = ctx.take_payload().unwrap();
assert_eq!(payload.decisions.len(), 1);
assert_eq!(ctx.call_count(), 1);
}
#[tokio::test]
async fn last_call_wins_on_multiple_invocations() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let first =
serde_json::json!({"decisions": [], "discussions": [], "attempts": [], "requests": []})
.to_string();
tool.execute(&first).await.unwrap();
let second = serde_json::json!({
"decisions": [],
"discussions": [],
"attempts": [{"action": "x", "result": "ok", "succeeded": true}],
"requests": []
})
.to_string();
tool.execute(&second).await.unwrap();
let payload = ctx.take_payload().unwrap();
assert_eq!(payload.attempts.len(), 1);
assert_eq!(ctx.call_count(), 2);
}
#[tokio::test]
async fn invalid_json_returns_invalid_argument() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let res = tool.execute("not json").await;
assert!(matches!(res, Err(ToolError::InvalidArgument(_))));
assert!(ctx.take_payload().is_none());
}
}

35
crates/memory/src/lib.rs Normal file
View File

@ -0,0 +1,35 @@
//! Memory subsystem: persistence layer for `memory/*` and `knowledge/*` records.
//!
//! Self-contained: provides its own Tool implementations (read/write/edit)
//! that target `<workspace>/memory/` and `<workspace>/knowledge/` only,
//! with a pre-write Linter built in. Generic CRUD tools (in the `tools`
//! crate) must not touch these directories — Pod is responsible for
//! denying them at the Scope level when memory is enabled.
pub mod audit;
pub mod consolidate;
pub mod error;
pub mod extract;
pub mod linter;
pub mod resident;
pub mod schema;
pub mod scope;
pub mod tool;
pub mod usage;
pub mod workspace;
pub use error::{LintError, LintWarning, MemoryError};
pub use extract::ExtractPointerPayload;
pub use lint_common::{RecordLintError, Slug, is_valid_slug};
pub use linter::{LintReport, Linter};
pub use resident::{
ResidentKnowledgeEntry, collect_resident_knowledge, collect_resident_summary,
list_knowledge_slugs,
};
pub use scope::deny_write_rules;
pub use usage::{
UsageEvent, UsageEventKind, UsageRecordSnapshot, UsageReport, UsageReportRecord, UsageSource,
append_resident_exposure_event, append_usage_event, append_use_event, build_usage_report,
snapshot_record_from_bytes, snapshot_record_from_layout,
};
pub use workspace::WorkspaceLayout;

Some files were not shown because too many files have changed in this diff Show More