Compare commits

...

513 Commits

Author SHA1 Message Date
36c24a4c7e ticket: close multi-pod empty enter open 2026-05-29 12:27:14 +09:00
48b569667a merge: multi-pod empty enter open 2026-05-29 12:26:43 +09:00
89393e4623 tui: open multi pod entry on empty enter 2026-05-29 12:23:31 +09:00
3c57b1690b ticket: add multi-pod empty enter open 2026-05-29 12:19:27 +09:00
0f56ca1fc0 ticket: close manual rewind control 2026-05-29 12:09:24 +09:00
4065c0a0f3 merge: manual rewind control 2026-05-29 12:08:27 +09:00
cbb59a47d0 fix: guard manual rewind application 2026-05-29 12:05:33 +09:00
f8881f7289 feat: add manual rewind control 2026-05-29 11:48:56 +09:00
85ffbaf10a ticket: refine manual rewind view 2026-05-29 11:16:13 +09:00
e43e6620b5 ticket: close TUI command completion apply 2026-05-29 11:08:57 +09:00
b904f56b4f merge: TUI command completion apply 2026-05-29 11:08:29 +09:00
3e7b81aa9c ticket: close TUI composer cursor scroll 2026-05-29 11:08:05 +09:00
003d2b584c merge: TUI composer cursor scroll 2026-05-29 11:07:39 +09:00
67c5c4a864 ticket: define manual rewind UX 2026-05-29 10:39:55 +09:00
f6a3d2c6e5 tui: apply command completions from keyboard 2026-05-29 10:15:13 +09:00
208143f01b fix: scroll tui composer around cursor 2026-05-29 10:10:49 +09:00
97f3df651a ticket: add TUI input polish tasks 2026-05-29 10:03:07 +09:00
1da2498295 ticket: close installed binary rename 2026-05-29 09:39:09 +09:00
6bb7882c3e merge: rename installed binaries 2026-05-29 09:39:08 +09:00
06e0bfc359 chore: use static crate fetch for nix vendor 2026-05-29 09:28:49 +09:00
0b23aa8191 fix: rename installed binaries 2026-05-29 09:28:31 +09:00
ee4ccba591 ticket: add installed binary rename 2026-05-29 09:14:07 +09:00
0d7244d0cc ticket: close memory tool guidance prompt 2026-05-29 08:59:07 +09:00
eea40f5095 merge: memory tool guidance prompt 2026-05-29 08:58:47 +09:00
cc5510bc60 ticket: close multi-pod open return 2026-05-29 08:57:50 +09:00
0ecc7f487d merge: multi-pod open return 2026-05-29 08:57:24 +09:00
79da9aa102 prompt: add memory tool usage guidance 2026-05-29 08:49:24 +09:00
be54cb07ea tui: return to multi dashboard after opening pod 2026-05-29 08:45:15 +09:00
eb249dae0c ticket: add multi-pod open return 2026-05-29 08:36:02 +09:00
66408d87e0 ticket: close multi-pod layout polish 2026-05-29 01:49:26 +09:00
be4b175bc5 merge: multi-pod view section layout 2026-05-29 01:49:06 +09:00
d3c54a2407 tui: section multi-pod list layout 2026-05-29 01:46:48 +09:00
258ac2b9a6 ticket: close nix packaging 2026-05-29 01:42:09 +09:00
e58d820c90 merge: nix packaging 2026-05-29 01:41:54 +09:00
401d0912b7 nix: exclude local worktrees from package source 2026-05-29 01:41:09 +09:00
d54d49531f ticket: add multi-pod layout polish 2026-05-29 01:33:35 +09:00
b5e608c597 nix: add installable package 2026-05-29 01:32:04 +09:00
f21642a5a6 ticket: close multi-pod TUI view 2026-05-29 01:09:02 +09:00
b63ff7a11e merge: multi-pod TUI view 2026-05-29 01:08:42 +09:00
6fb7b57054 feat: add multi pod tui dashboard 2026-05-29 01:04:56 +09:00
98c931563e ticket: specify nix package file 2026-05-29 00:56:04 +09:00
7aa48ee4d2 ticket: specify multi-pod TUI entrypoint 2026-05-29 00:53:33 +09:00
f1504c40fd ticket: close TUI pod list abstraction 2026-05-29 00:40:32 +09:00
601ce9f5ac merge: TUI pod list abstraction 2026-05-29 00:39:57 +09:00
7d1db97754 tui: drain initial pod status events 2026-05-29 00:39:00 +09:00
0777bcf299 tui: add pod list model 2026-05-29 00:33:57 +09:00
e4810a7411 ticket: add web tools and nix packaging 2026-05-29 00:31:09 +09:00
142322ef95 ticket: refine TUI pod list abstraction 2026-05-29 00:25:03 +09:00
58981775ec ticket: define multi-pod TUI view 2026-05-28 23:48:39 +09:00
18ba6aee38 ticket: add TUI pod list abstraction 2026-05-28 23:17:16 +09:00
ccb4cd30cd audit: record crate boundary findings 2026-05-28 22:25:54 +09:00
06ebecb329 ticket: close spawnpod initial run confirmation 2026-05-28 22:25:28 +09:00
4998813e8a merge: spawnpod initial run confirmation 2026-05-28 22:24:14 +09:00
d6fd4d1b9c fix: confirm initial SpawnPod run delivery 2026-05-28 22:14:28 +09:00
8703f14a15 ticket: add crate boundary audit 2026-05-28 22:13:45 +09:00
09c2041a88 ticket: refine spawnpod socket delivery 2026-05-28 22:06:47 +09:00
2eab301bf8 ticket: close compact session-log exploration 2026-05-28 18:53:52 +09:00
35d35ef471 test: fix runtime dir expectation 2026-05-28 18:53:52 +09:00
1e938b548c merge: compact session-log exploration 2026-05-28 12:40:37 +09:00
fcca67a9ad style: format manifest paths test 2026-05-28 12:38:53 +09:00
a85ca369ae style: revert unrelated manifest path formatting 2026-05-28 12:36:32 +09:00
11d1dcffb6 merge: main trace diagnostics 2026-05-28 12:32:24 +09:00
12dd35cfb2 feat: add compact session exploration tools 2026-05-28 12:31:44 +09:00
9a92443269 trace: llm stream open diagnostics 2026-05-28 12:26:14 +09:00
c274e4a891 feat: bound compact worker context 2026-05-28 11:59:41 +09:00
7034d02455 ticket: add compact work item metadata 2026-05-28 10:01:28 +09:00
65bbff663f ticket: compact session-log exploration 2026-05-28 10:01:03 +09:00
311d74c25d sanitize: neutralize provider notes and remove claude knowledge 2026-05-28 07:45:49 +09:00
df55af3545 sanitize: remove local path references from current tree 2026-05-28 06:26:34 +09:00
7cb1804504 chore: record spawnpod hang report and local manifest 2026-05-28 06:21:01 +09:00
47cc6234be work-items: close openai unhandled sse observability 2026-05-28 05:44:20 +09:00
dbfdf6aa6c merge: openai unhandled sse observability 2026-05-28 05:44:14 +09:00
60b9cb169a fix: trace unhandled openai responses sse 2026-05-28 05:18:57 +09:00
36f544da18 work-items: add openai unhandled sse observability 2026-05-28 05:13:41 +09:00
1f7bc518cb work-items: add pod orchestration guidance item 2026-05-28 04:45:03 +09:00
56aa241d7b work-items: close tickets sh mvp 2026-05-28 04:29:35 +09:00
1e956c7dff merge: tickets work item thread mvp 2026-05-28 04:27:56 +09:00
3345acafab fix: repair migrated work item encoding 2026-05-28 04:09:47 +09:00
23fc2cf9f4 feat: add tickets work item mvp 2026-05-28 03:59:05 +09:00
1c82058c6f ticket: clarify workitem migration scope 2026-05-28 03:49:21 +09:00
2bb0605650 ticket: complete openai responses diagnostics 2026-05-28 03:23:54 +09:00
a0394a01a6 ticket: record openai responses diagnostics fix 2026-05-28 03:23:25 +09:00
c1a724aedf fix: preserve openai responses incomplete diagnostics 2026-05-28 03:22:53 +09:00
21e48bd2c0 ticket: complete memory consolidation skip observability 2026-05-28 03:09:42 +09:00
a7f1b348de merge: memory consolidation skip observability 2026-05-28 03:09:14 +09:00
11f644fddc ticket: complete llm request timeout fix 2026-05-28 02:44:00 +09:00
23d3b9e070 ticket: record llm request timeout fix 2026-05-28 02:43:23 +09:00
9cd776eaec fix: add llm request lifecycle timeouts 2026-05-28 02:42:31 +09:00
647223eb32 ticket: openai responses incomplete observability 2026-05-28 02:40:30 +09:00
40f4e801dc ticket: llm client request timeouts 2026-05-28 02:07:01 +09:00
dac5fc516f ticket: complete codex oauth wire compatibility 2026-05-28 02:05:49 +09:00
876d75a747 fix: align codex oauth wire behavior 2026-05-28 01:57:04 +09:00
00596d3f9a ticket: codex oauth wire compatibility 2026-05-28 01:44:30 +09:00
8c6a4acf5f fix: suppress memory idle skip notices 2026-05-27 18:55:58 +09:00
ef71bb57d3 ticket: complete compact retained split fix 2026-05-26 21:40:18 +09:00
93373066e0 ticket: record compact retained split fix 2026-05-26 21:39:57 +09:00
8416533695 feat: trace pre-stream lifecycle 2026-05-26 21:05:45 +09:00
372a99bc0b feat: add session stream event trace flag 2026-05-26 19:57:47 +09:00
5ccfdea7c8 fix: compact retained split uses raw tail size 2026-05-26 17:52:09 +09:00
ded02e4c08 ticket: compact retained split usage records 2026-05-26 17:04:29 +09:00
a5b1f15632 ticket: pod scope persistence authority 2026-05-26 16:50:01 +09:00
11e86d3e6e chore: complete memory summary resident injection ticket 2026-05-26 13:29:03 +09:00
670b4b876f chore: complete tui user manifest env overlay ticket 2026-05-26 10:10:00 +09:00
405339fb04 fix: align spawn user manifest env overlay 2026-05-26 10:09:17 +09:00
9435f44d53 merge: memory summary resident injection 2026-05-26 09:55:24 +09:00
fea274cfe3 fix: split resident injection gates 2026-05-26 09:44:24 +09:00
7c42c2b110 docs: add tickets.sh workitem mvp ticket 2026-05-26 09:33:30 +09:00
b1a3b06db7 docs: add memory tool guidance ticket 2026-05-26 09:21:57 +09:00
9ec77a2a2b feat: inject memory summary into resident prompt 2026-05-26 09:21:10 +09:00
962652832d chore: tune project memory thresholds 2026-05-26 09:05:14 +09:00
4a4ff0f6c9 docs: add memory summary resident injection ticket 2026-05-26 08:50:58 +09:00
88619f36cf merge: memory consolidation skip observability 2026-05-26 08:37:32 +09:00
3f260d7d4e fix: confirm SpawnPod initial run delivery 2026-05-26 08:37:24 +09:00
bdffa5120d chore: ignore generated insomnia memory 2026-05-26 08:14:46 +09:00
bb917246ec fix: suppress memory idle skip notices 2026-05-26 08:03:17 +09:00
770173a4ef docs: refine memory consolidation skip ticket 2026-05-26 07:53:37 +09:00
66540172de chore: complete llm retry continuation ticket 2026-05-26 07:22:45 +09:00
be753099ae feat: surface llm retry and continuation state 2026-05-26 07:13:59 +09:00
41402c0951 docs: refine llm retry continuation ticket 2026-05-26 05:20:43 +09:00
fc81555129 docs: note spawnpod delivery race precedent 2026-05-25 07:03:00 +09:00
d9191c393f docs: add spawnpod run delivery ticket 2026-05-25 06:37:38 +09:00
01796f9316 docs: add live pending pod picker ticket 2026-05-25 06:29:13 +09:00
ab73051ddc docs: add memory consolidation skip ticket 2026-05-25 05:43:06 +09:00
9fb11e25a4 docs: specify stream continuation policy 2026-05-25 04:48:07 +09:00
f73055550b chore: complete memory audit log ticket 2026-05-25 03:38:18 +09:00
7003c00d45 merge: memory-audit-log 2026-05-25 03:38:03 +09:00
10d3556792 memory: add audit log events 2026-05-25 03:24:04 +09:00
5b3c579324 docs: add actionbar notice api ticket 2026-05-25 02:40:59 +09:00
08710d808d docs: expand memory audit log ticket 2026-05-25 02:06:42 +09:00
e5fda7efdf fix: refine command mode footer 2026-05-25 01:08:41 +09:00
9224951000 chore: complete tui-system-command-compact ticket 2026-05-24 09:40:41 +09:00
0172414d9a merge: tui-system-command-compact 2026-05-24 09:40:25 +09:00
891b6d91fd test: clean up compact event assertion 2026-05-24 09:39:57 +09:00
cabc556b2c feat: add manual compact command 2026-05-24 08:59:44 +09:00
95d05628e7 chore: complete tui-command-mode ticket 2026-05-24 08:39:25 +09:00
50422ee555 merge: tui-command-mode 2026-05-24 08:38:39 +09:00
14381b8ba5 feat: add TUI command mode 2026-05-24 08:32:21 +09:00
f439de6cdc docs: replace gui mvp with tui spawned pod panel 2026-05-24 08:10:21 +09:00
83cab17f1f docs: split tui command and navigation tickets 2026-05-24 07:59:51 +09:00
b7340eab4b chore: complete worker-history-append-contract ticket 2026-05-24 07:37:29 +09:00
f1c886e451 merge: worker-history-append-contract 2026-05-24 07:37:05 +09:00
a9a2b1e034 docs: split maintainer workflows by role 2026-05-24 07:34:30 +09:00
65c399e6d9 fix: route worker history appends through callbacks 2026-05-24 06:44:19 +09:00
3ae145269c chore: drop stale tui spawn error todo 2026-05-24 06:29:15 +09:00
b64e098b5b chore: complete tui-input-queue ticket 2026-05-23 13:58:09 +09:00
fed3997eb8 merge: tui-input-queue 2026-05-23 13:57:32 +09:00
45bc2265f4 feat: queue tui input during runs 2026-05-23 13:57:22 +09:00
88f755a38f docs: add manual turn rollback ticket 2026-05-23 13:35:03 +09:00
e65b62affa chore: complete tui-empty-turn-restore ticket 2026-05-23 13:30:01 +09:00
2061cea5dd merge: tui-empty-turn-restore 2026-05-23 13:29:07 +09:00
df8f91bda7 feat: restore rolled back tui input 2026-05-23 13:28:56 +09:00
565f152e13 chore: complete pod-empty-turn-rollback ticket 2026-05-23 12:52:42 +09:00
7181910806 merge: pod-empty-turn-rollback 2026-05-23 12:52:12 +09:00
9fc3653502 chore: handle rolled back run result clients 2026-05-23 12:51:40 +09:00
03e7795130 feat: rollback empty interrupted turns 2026-05-23 12:50:46 +09:00
8813d966bb fix: make visible pod list schema object 2026-05-23 12:29:37 +09:00
74ccdd6726 chore: complete pod-discovery-restore-tools ticket 2026-05-23 12:05:30 +09:00
044032ef2b merge: pod-discovery-restore-tools 2026-05-23 12:04:59 +09:00
fd1b06198e feat: add visible pod discovery tools 2026-05-23 12:04:45 +09:00
aea33efaeb chore: complete memory-extract-remove-input-cap ticket 2026-05-23 09:14:37 +09:00
cf4ecf8d70 merge: memory-extract-remove-input-cap 2026-05-23 09:14:15 +09:00
d5da95499d fix: remove memory extract input cap 2026-05-23 09:14:07 +09:00
ba92581d51 chore: complete tui-pod-restore-picker ticket 2026-05-23 09:13:57 +09:00
c4183d5ba6 merge: tui-pod-restore-picker 2026-05-23 09:13:19 +09:00
a8b311bd1f feat: restore tui sessions by pod 2026-05-23 09:13:06 +09:00
cfb5fa89f1 chore: complete spawned-delegation-scope-reclaim ticket 2026-05-23 08:39:04 +09:00
c2bcaac03d merge: spawned-delegation-scope-reclaim 2026-05-23 08:38:50 +09:00
a26f18c466 fix: reclaim delegated scope from stopped children 2026-05-23 08:38:42 +09:00
0cba8e9f5c docs: refine pod visibility and tui restore flow 2026-05-23 08:33:00 +09:00
ccc6efc0e6 update: tui -rの際のリストの時系列ソート 2026-05-23 08:02:05 +09:00
942ab0e15b chore: complete tui-streaming-input-loss ticket 2026-05-23 07:16:08 +09:00
0ff39f33bb merge: tui-streaming-input-loss 2026-05-23 07:15:55 +09:00
7480a5732f fix: preserve tui input during streaming 2026-05-23 07:15:39 +09:00
c1d4d0b65c chore: complete tui-context-usage-indicator ticket 2026-05-23 07:15:30 +09:00
aa57253e39 merge: tui-context-usage-indicator 2026-05-23 07:15:17 +09:00
a267ad8114 feat: show context usage in tui status 2026-05-23 07:15:03 +09:00
637694893a docs: identify tui streaming input loss race 2026-05-23 05:47:59 +09:00
be8b10e759 Create tui-parts.md 2026-05-23 05:41:48 +09:00
60daf64808 fix: tighten task tool usage guidance 2026-05-23 05:11:48 +09:00
01ccac7cf1 chore: complete prune-token-budget ticket 2026-05-23 05:00:30 +09:00
369a5931bc merge: prune-token-budget 2026-05-23 05:00:15 +09:00
9ee7f04805 feat: protect prune tail by token budget 2026-05-23 05:00:06 +09:00
0d30b6139c chore: complete pod-event-callback-delivery ticket 2026-05-23 04:57:26 +09:00
f4627de3ee merge: pod-event-callback-delivery 2026-05-23 04:57:10 +09:00
767400d5c2 fix: drain snapshots before pod callbacks 2026-05-23 04:57:03 +09:00
5c4b1e1ec8 docs: add memory extract input cap ticket 2026-05-23 04:42:38 +09:00
79842b212a docs: add pod event callback delivery ticket 2026-05-23 03:29:01 +09:00
4072d35f81 docs: add spawned delegation scope reclaim ticket 2026-05-23 03:02:48 +09:00
4795b6cb4a refactor: remove legacy plural log entries 2026-05-23 02:03:42 +09:00
12d33c265c docs: track read pod output log entry bug 2026-05-23 00:53:47 +09:00
d9ca5e8c41 docs: add pod discovery restore tools ticket 2026-05-23 00:09:34 +09:00
d0cac8ab89 chore: complete spawned-registry-persist ticket 2026-05-22 23:30:16 +09:00
96fda589ac merge: spawned-registry-persist 2026-05-22 23:30:06 +09:00
1e0dc6566c feat: persist spawned pod registry 2026-05-22 23:30:02 +09:00
73e1c8332d chore: complete pod-name-resume ticket 2026-05-22 22:57:31 +09:00
292b923cae merge: pod-name-resume 2026-05-22 22:57:23 +09:00
0e562dd4d9 feat: resume pods by name 2026-05-22 22:57:16 +09:00
ca2e2352f4 chore: complete pod-state-write-points ticket 2026-05-22 22:29:23 +09:00
e0e58ebbf7 merge: pod-state-write-points 2026-05-22 22:29:12 +09:00
58608c4f57 feat: wire pod metadata lifecycle writes 2026-05-22 22:29:08 +09:00
78209d5126 chore: complete pod-state-backend ticket 2026-05-22 22:03:36 +09:00
93c91e7c06 style: run cargo fmt 2026-05-22 22:03:27 +09:00
458fdbc9e0 merge: pod-state-backend 2026-05-22 22:03:17 +09:00
8a062b1a19 feat: add pod metadata store backend 2026-05-22 22:03:11 +09:00
27be927afe Merge: live-fork-marker 2026-05-20 06:45:49 +09:00
8e1c5d3bdc chore: 空になった Storage 親見出しを TODO から削除 2026-05-20 06:45:43 +09:00
3057fe6c24 ticket: live-fork-marker 完了 2026-05-20 06:45:19 +09:00
8747cc802f chore: auto-fork ロジック二重実装を KNOWN_ISSUES に登録 2026-05-20 06:45:14 +09:00
ca7c5b82d7 ticket: live-fork-marker レビュー (Approve) 2026-05-20 06:44:54 +09:00
a9340a8817 feat: live auto-fork の marker 形式を確定(seq 比較 + forked_from 記録)
方針: 末尾 entry-count 比較で検知し、元 Segment は immutable のまま
(terminal marker を書き戻さない)。fork lineage は新 Segment の
SegmentStart.forked_from に前向きに記録するため、log だけから辿れる。
過去 fork と対称で、nested fork も marker 位置の調停が不要。

- session-store ensure_head_or_fork に at_turn_index 引数を追加し
  新 Segment へ forked_from を記録
- pod ensure_segment_head の auto-fork も同様に forked_from を記録
  (at_turn_index = writer の現 turn_count)
- fork_at の doc に「元 Segment を mutate しない」invariant を明記
- test: nested past-fork が祖先を不変に保つ / Pod 並行 writer drift で
  auto-fork し forked_from を記録 / 元 Segment に marker が書かれない
2026-05-20 06:42:09 +09:00
c47a539278 Merge: session-grouping-introduce 2026-05-20 06:29:48 +09:00
7542605ec9 ticket: session-grouping-introduce 完了 2026-05-20 06:29:43 +09:00
0dfdd11921 update: session-grouping review follow-up
- PickerOutcome::Picked から未使用の session_id を除去(pod-cli が lookup_session_of で再解決)
- picker preview が singular AssistantItem も拾うように
- fs_store layout doc に migration(後方互換なし、旧 flat sessions は破棄)を明記
- TaskStore は Session-lifetime、ScopedFs/Tracker は Pod-process lifetime と用語整理
- Pod::session_id / from_manifest_spawned のコメント補強
2026-05-20 06:29:37 +09:00
5edc4d3b03 feat: Session(Segment 群の grouping)を導入
- SessionId 型を新設、各 SegmentStart に session_id を持たせる
- compaction / 内部 fork は同 SessionId を継承、fork() は新 Session を発行
- Store API を (SessionId, SegmentId) ベースに、FsStore layout は
  <root>/<session_id>/<segment_id>.jsonl に
- Store::list_sessions / list_segments(session_id) / lookup_session_of を追加
- restore_by_segment shim を session-store に提供(pod-cli --session で使用)
- SegmentState に SegmentLocation (session_id, segment_id) を保持し ArcSwap で更新
- RestoredState に session_id: Option<SessionId> を追加
- Picker は Session 単位に列挙、leaf segment を解決して resume
2026-05-20 06:17:56 +09:00
d2b3c2f53d Merge: segment-rename 2026-05-20 05:18:11 +09:00
a084324830 ticket: segment-rename 完了 2026-05-20 05:18:04 +09:00
c2b55a498b update: 残存 Session 識別子の Segment 化(review follow-up)
レビュー指摘の通り、次の session-grouping-introduce で新 SessionId が
入る前に名称衝突を避けるため取り残しを掃除。

- PodError::Session{Empty,ScopeMissing} → Segment{Empty,ScopeMissing}
- ScopeLockError::SessionConflict → SegmentConflict
- Pod.session_state / SegmentState.set_session_id 系
- source_session_id / prev_session_id / ensure_session_head / short_session
- pod_cli の "Session ID:" 表示
- fs_store の sessions ローカル変数
2026-05-20 05:17:49 +09:00
2d23673393 update: Session-lifetime/scoped を Pod-lifetime に修正
タスクストア/ファイルトラッカーは compaction を跨いで Pod プロセス寿命まで生きる。
旧 SessionId = Segment の時代の表現を Pod-lifetime に正す。pod_cli の表示も Segment: に。
2026-05-20 05:06:38 +09:00
22f5d02385 update: SessionId / SessionStart / SessionOrigin 等を Segment 系名称へ
- Type/Function/Variantを Segment* 系へ統一
  - SessionId/SessionStart/SessionOrigin/SessionStartState/SessionState/SessionLogSink/SessionLockInfo
  - new_session_id / session_id / create_session* / list_sessions / lookup_session / update_session / find_by_session
  - protocol Event::SessionRotated → SegmentRotated、CompactDone.new_session_id → new_segment_id
- Module: session_log → segment_log / session → segment (file mv 含む)
  pod 側の session_log_sink → segment_log_sink も同様
- crate 名 (session-store)、CLI flag (--session)、ResumeWithSession (CLI tied) は据え置き
- session-tests/session_metrics_test 等の Store impl も追従
2026-05-20 05:06:04 +09:00
de549812ab Merge: entry-hash-abolish 2026-05-20 04:53:52 +09:00
5362e5858c ticket: entry-hash-abolish 完了 2026-05-20 04:53:47 +09:00
55c5ac4942 update: 旧用語コメントの掃除と KNOWN_ISSUES 追記
- 残存していた head_hash / SessionHead 言及コメントを 3 箇所更新
- FsStore::read_entry_count の O(n) 計測コストを KNOWN_ISSUES に登録
2026-05-20 04:53:33 +09:00
f41c60c3ae ticket: entry-hash-abolish レビュー (Approve) 2026-05-20 04:49:17 +09:00
90e83bf2ae update: entry hash chain と session_head mutex を撤廃
- HashedEntry / EntryHash / compute_hash / build_chain 撤去、JSONL は 1 行 1 LogEntry
- SessionOrigin.at_hash → at_turn_index (TurnEnd 由来) に置換
- Pod 側 SessionHead mutex を ArcSwap<SessionId> + AtomicUsize の SessionState に置換
- ensure_head_or_fork は store の entry count と writer の append tally で判定
- session-store から sha2 / hex 依存、pod から parking_lot 依存を削除
2026-05-20 04:31:37 +09:00
3d091acacd ticket: 永続化整理を 8 個に分割
persistence-semantics と pod-persistent-state を実装可能な粒度に分割。
Storage 層 (Phase 1) を entry-hash-abolish / segment-rename /
session-grouping-introduce / live-fork-marker に、Pod 単位永続化
(Phase 2) を pod-state-backend / pod-state-write-points /
pod-name-resume / spawned-registry-persist に切り出した。
2026-05-20 04:07:44 +09:00
72c1d04cf2 Merge: invoke-turn-llmcall-semantics
# Conflicts:
#	crates/pod/src/controller.rs
2026-05-15 22:08:41 +09:00
801d7d9abb ticket: invoke-turn-llmcall-semantics 完了 2026-05-15 21:54:40 +09:00
133402dcdb ticket: worker-history-append-contract 作成 2026-05-15 21:53:24 +09:00
39a803d7e5 chore: KNOWN_ISSUES に controller_test::double_run_returns_error の flakiness を追記 2026-05-15 21:52:40 +09:00
fcc6b67f40 ticket: pod-interrupt-prep-internalize 完了 2026-05-15 21:52:24 +09:00
f90ec5ee62 ticket: pod-interrupt-prep-internalize レビュー (Approve with follow-up) 2026-05-15 21:51:57 +09:00
35988f3249 update: Paused→Run の interrupt 前処理を Pod::run に内包 2026-05-15 21:51:57 +09:00
d840add130 ticket: invoke-turn-llmcall-semantics review (Approve) 2026-05-15 21:42:43 +09:00
79b8336a14 feat: Invoke marker と LlmCall callback を導入し AgentTurn セマンティクスを明確化
- protocol: InvokeKind enum、Event::InvokeStart / LlmCallStart / LlmCallEnd 追加
- llm-worker: Worker.llm_call_count と on_llm_call_start/end callback、turn_count を AgentTurn 数として doc 更新
- session-store: LogEntry::Invoke { ts, trigger } 追加 (replay は marker のみで state 不変)
- pod: run/run_for_notification 開始時に Invoke marker commit、PendingRun::RunForNotification(InvokeKind) で kind を伝搬
- pod ipc: sink + server で Invoke エントリーを Event::InvokeStart として broadcast
- tui: 新 Event 3種を no-op で受理 (UI 設計はチケット範囲外)
2026-05-15 07:04:26 +09:00
fd8526799b ticket: invoke/turn/llmcall 決定事項と実装範囲を明文化 2026-05-15 06:48:57 +09:00
dcfffbcbde ticket: Exchange語撤廃、Invoke/Turn/LlmCall でセマンティクスを再整理 2026-05-15 05:41:13 +09:00
7b79743ea4 ticket: pod-input-validate-internlize完了 2026-05-15 05:38:27 +09:00
9f9e42ab59 update: Controllerで入力のValidationを行っていた部分をPod側に移す 2026-05-15 05:33:33 +09:00
a761372a9e ticket: PodとControllerの責務の抱え違いを修正するチケット 2026-05-15 04:52:39 +09:00
c4f81da828 ticket: pod-parent-turn-callback完了 2026-05-15 04:43:12 +09:00
beef5e5710 ticket: pod-parent-turn-callbackレビュー 2026-05-15 04:42:29 +09:00
d5a4f77420 ticket: 消し忘れ 2026-05-15 04:39:30 +09:00
d5a7cf2aab update: 親にターン完了を通達する経路の整理 2026-05-15 04:38:53 +09:00
35fec78519 update: エントリの単数化のフォローアップ 2026-05-14 19:42:23 +09:00
988495cfea update: 書き込みの不要なasyncを削除 2026-05-14 19:16:48 +09:00
34c89f8739 ticket: 書き込みのsync化を計画 2026-05-14 16:45:58 +09:00
f9def2d5bb update: SystemItem1本化 2026-05-14 14:36:29 +09:00
e4b66345aa ticket: イベントプロトコルと永続化におけるシステムイベントの統合 2026-05-14 04:12:40 +09:00
6358affd76 chore: cargo fmt 2026-05-14 03:36:08 +09:00
f73e648929 fix: 実態にそぐわないEvent::Entryを実装した構造を訂正 2026-05-14 03:35:52 +09:00
e7064878c2 refactor: Podのメインループのリファクタリング 2026-05-14 03:27:49 +09:00
dfa466c980 ticket: 追加:Podのメインループとソケット通信周りのリファクタリング 2026-05-13 22:16:25 +09:00
d04d2fc704 ticket: add tui manual compact command 2026-05-13 06:50:27 +09:00
3c2f5eb337 docs: update pod cli manifest flags 2026-05-13 06:44:48 +09:00
38c309535d ticket: note tui user manifest overlay mismatch 2026-05-13 06:41:23 +09:00
4a8dba276a close: complete pod manifest and file ref tickets 2026-05-13 06:30:45 +09:00
28443d2e04 merge: file-ref-directory 2026-05-13 06:30:45 +09:00
a44288c258 merge: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
f3b99aca0c review: file-ref-directory 2026-05-13 06:30:45 +09:00
e001f4c3f9 review: pod-cli-manifest-flags 2026-05-13 06:30:45 +09:00
ba3655522b refactor: PodControllerの構造のリファクタリング 2026-05-13 06:07:38 +09:00
5a3e3f5994 docs(tickets): PodControllerの構造調整チケット作成 2026-05-13 05:43:23 +09:00
b7e329a1a1 chore: planの更新 2026-05-13 05:42:55 +09:00
524e3dc551 feat: handle directory file refs 2026-05-13 02:57:58 +09:00
b67023aafc feat: organize pod manifest cli flags 2026-05-13 02:57:50 +09:00
aa27f62409 feat: Languageインストラクションの追加 2026-05-13 02:27:30 +09:00
1803b0cf67 update: fmt + memoryに用いる言語の構成 2026-05-13 01:57:04 +09:00
93145afc3c fix: compact時にToolCallとOutputの間でCutしてしまう問題 2026-05-13 00:59:02 +09:00
d236521c77 chore: workflowの調整・knowledgeの追加テスト 2026-05-13 00:06:33 +09:00
fc4786628c merge: lint common crate 2026-05-12 21:56:49 +09:00
9cd76515d0 chore: complete lint common crate ticket 2026-05-12 21:56:39 +09:00
cf822dbc5c refactor: extract shared lint record primitives 2026-05-12 21:56:25 +09:00
bedaf62cb0 docs(tickets): submit時FileRefでディレクトリを参照した時の挙動 2026-05-12 17:39:40 +09:00
ea3014164e docs(tickets): mainfest-output-upload-limits完了 2026-05-12 17:27:47 +09:00
8d2ca5d530 feat: add manifest output upload limits 2026-05-12 16:20:15 +09:00
53b508abae Merge branch 'tui-knowledge-completion' into develop 2026-05-12 15:43:29 +09:00
27b84abccc docs(memory): fix knowledge dir path in collect_resident_knowledge doc 2026-05-12 15:07:39 +09:00
abbf7f8273 docs(tickets): review tui knowledge completion (approve) 2026-05-12 14:56:30 +09:00
2f84bd32ba feat(pod): wire knowledge slugs into # completion 2026-05-12 14:45:46 +09:00
df6ec428ca docs(tickets): tui knowledge completion unimplemented fix 2026-05-12 14:40:37 +09:00
0c34923ad1 docs(tickets): define work item query strategy 2026-05-12 02:32:32 +09:00
0af1c16009 docs(tickets): use timestamp work item ids 2026-05-12 02:07:29 +09:00
64506b643b docs: add ai maintainer work item plan 2026-05-12 01:53:52 +09:00
b78018b62e docs(tickets): add lint-common crate ticket 2026-05-12 00:06:06 +09:00
8026e8e319 merge: workflow crate extraction 2026-05-11 22:50:19 +09:00
7ac948afa0 docs(tickets): complete workflow crate extraction 2026-05-11 22:50:06 +09:00
96821556c6 review: workflow crate extraction 2026-05-11 22:49:50 +09:00
f70975789e refactor: extract workflow crate 2026-05-11 22:49:07 +09:00
520895f1c9 merge: anthropic assistant burst bundling 2026-05-11 22:24:36 +09:00
072c1bfbc7 docs(tickets): complete anthropic assistant burst bundling 2026-05-11 22:23:53 +09:00
fdb181e825 review: anthropic assistant burst bundling 2026-05-11 22:23:38 +09:00
cfb7f09e38 fix: bundle anthropic assistant bursts 2026-05-11 22:22:36 +09:00
1676e525e8 merge: memory usage metrics 2026-05-11 21:46:24 +09:00
01bdf04f2e docs(tickets): complete memory usage metrics 2026-05-11 21:46:19 +09:00
96d5be4337 review: memory usage metrics 2026-05-11 21:46:19 +09:00
6c93ec38df feat: add memory usage event metrics 2026-05-11 21:29:48 +09:00
1d8db0aadd docs(tickets): complete memory phase naming cleanup 2026-05-11 17:16:36 +09:00
69ac3799d6 docs(tickets): simplify memory usage metrics 2026-05-11 16:54:23 +09:00
eb9a67decc fix: remove remaining memory phase wording 2026-05-11 01:57:39 +09:00
0e906b72b7 docs(tickets): compact-worker-occupancy-cap完了 2026-05-11 01:56:20 +09:00
a2aecbf029 update: memoryシステムの"Phase"表記を撤廃 2026-05-11 01:55:28 +09:00
a8a6e049bc docs(tickets): memory-extract-occupancy-cap 完了 2026-05-11 01:32:45 +09:00
dbc96ee075 review: memory-extract-occupancy-cap (approve) 2026-05-11 01:25:20 +09:00
2c8aec5385 feat: extract worker サーキットブレーカーを占有量ベースに統一 2026-05-11 01:20:37 +09:00
eb9af32b49 docs(tickets): add memory-extract-occupancy-cap ticket 2026-05-11 01:14:59 +09:00
919602b496 Merge branch 'compact-worker-occupancy-cap' into develop 2026-05-11 01:12:32 +09:00
507b164822 review: compact-worker-occupancy-cap (set_max_turns 分岐削除) 2026-05-11 00:56:41 +09:00
8100a5dfd1 feat: compact worker サーキットブレーカーを占有量ベースに統一 2026-05-11 00:43:16 +09:00
8f3c1942fb docs(tickets): add memory audit log ticket 2026-05-11 00:06:42 +09:00
d6f27f7c45 docs(tickets): completed tickets cleanup 2026-05-10 17:31:34 +09:00
b7953b3d28 merge: memory prompt record policy 2026-05-10 14:40:58 +09:00
20c82a4bf6 docs: generalize memory prompt record policy 2026-05-10 14:40:52 +09:00
7d9b9682bd docs: memory effectiveness plan 2026-05-10 01:25:10 +09:00
75ade5750e docs: memory prompt ticket policy ticket 2026-05-10 01:13:57 +09:00
3def5edbdf feat: client-crateの実装 2026-05-10 00:57:50 +09:00
df0be1cd6b chore: E2Eの計画とgit運用の話 2026-05-09 05:04:57 +09:00
175343c612 docs(tickets): file-ref-symlink-diagnostics完了 2026-05-09 04:22:27 +09:00
4d6b548611 docs(tickets): file-ref-symlink-diagnosticsレビュー 2026-05-09 04:21:56 +09:00
f479aa5206 feat: Toolsのシンボリックリンク対応 2026-05-09 04:21:56 +09:00
94c2f3a106 docs(tickets): tui-assistant-markdown完了 2026-05-09 03:31:49 +09:00
45b9912c8f docs(tickets): permission既定policy整理チケット追加 2026-05-09 03:27:22 +09:00
7c5c1609cb docs(tickets): permission-extension-point完了 2026-05-09 03:20:17 +09:00
60144c550a feat: パターンベースのツール権限制御を追加 2026-05-09 03:20:02 +09:00
2df9de73c7 chore: tui compact progress ticket完了 2026-05-09 03:14:23 +09:00
41bce21339 feat: compactのプログレス表示 2026-05-09 03:11:53 +09:00
584cbe406a chore: git方針の変更とセマンティクス変更の計画の帳尻合わせ 2026-05-08 20:17:11 +09:00
0900e05f9e docs(tickets): 自己改善workflowの設計 2026-05-08 01:50:55 +09:00
d036d53096 docs(tickets): workflow-directory-layout完了 2026-05-08 01:08:25 +09:00
69edd29a46 update: Workflowディレクトリ修正のフォローアップ 2026-05-08 00:59:08 +09:00
902b8d759b feat: Workflowの読み取り位置変更の実装 2026-05-08 00:15:50 +09:00
22d87f8ade docs(tickets): reportの運用・Workflowのディレクトリ位置修正 2026-05-07 23:34:00 +09:00
6c95b2da56 feat: TUIのmarkdown対応 2026-05-05 18:30:25 +09:00
fc5cfefb62 docs(tickets): PermissionのチケットとTUIのmd表示 2026-05-05 17:16:03 +09:00
ba2c8ae687 docs(tickets): agent-skills完了 2026-05-05 16:00:40 +09:00
9d709c6470 update: Agent skills実装のレビュー・対応 2026-05-05 13:54:02 +09:00
dec17c9909 feat: writingに対する基本的な指示promptを追加 2026-05-05 13:42:34 +09:00
37065144da feat: agent skillsの互換実装 2026-05-05 13:16:10 +09:00
5acb0d4d85 fix: Reasoningの永続化のスキーマのミスを修正 2026-05-05 12:30:29 +09:00
d0270288de docs(tickets): turnのセマンティクスを変える計画 2026-05-05 12:29:52 +09:00
0d5ec4ff63 docs(tickets): reasoning-history-perisit完了 2026-05-04 23:06:21 +09:00
75c573fed1 update: Reasoningコンテキスト管理のレビュー・対応 2026-05-04 23:05:08 +09:00
594671edc3 feat: Reasoningのコンテキスト管理の対応 2026-05-04 21:31:44 +09:00
e31cbcb150 docs(tickets): Reasoningのコンテキスト管理とPruneの調整チケット追加 2026-05-04 21:16:31 +09:00
9f14a43a59 docs(tickets): tui-task-display完了 2026-05-04 20:43:21 +09:00
f33eba3fe6 feat: Task表示のレビュー・修正 2026-05-04 17:28:39 +09:00
cb7da11de7 feat: TUI上に進行中のTaskを表示する実装 2026-05-04 17:06:02 +09:00
006897790c docs(tickets): Compaction進行中のライブ表示 2026-05-04 17:03:51 +09:00
cb72533ab0 docs(tickets): post-run memory detach 完了 2026-05-04 16:11:38 +09:00
954cf200e2 feat: Pos処理の非同期化・Busy状態の削除 2026-05-04 15:52:27 +09:00
5b38aa6a87 docs(tickets): 追加:タスクリストの表示とコンテキスト長インジケータ 2026-05-04 15:32:40 +09:00
ef7d7bd6a1 docs(tickets): Busyの切り離し 2026-05-04 13:20:25 +09:00
a61b498564 Merge branch 'llm-worker-transient-retry' into develop 2026-05-04 13:16:26 +09:00
dcd22d4399 docs(tickets): pod状態のTUI同期完了 2026-05-04 13:08:44 +09:00
f2dda9097e feat: Podのステータス同期の修正 2026-05-04 12:55:29 +09:00
560c23bc75 feat: Podのステータスを厳密にし、同期漏れを防ぐ 2026-05-04 12:55:11 +09:00
79e85ccda6 docs(tickets): llm-worker-transient-retry完了 2026-05-04 12:51:41 +09:00
73a9efdc9a docs(tickets): llm-worker-transient-retry レビュー追記
7183847 のレビュー結果を Approve として記録する。チケット要件
(リトライ対象 / バックオフ / Retry-After 上書き / mid-stream 温存 /
完了条件) はすべて満たしており、コードベースの層構造を歪める変更も
ない。Retry-After テストの方針差 (実時間 1s vs 仮想時間 5s) と
connect refused テストの試行回数未検証は non-blocking として
review.md に記録。
2026-05-04 12:49:13 +09:00
19df6340cd feat(llm-worker): HTTP transient エラーへのリトライを追加
`transport.rs` の HTTP 送信〜ステータスチェック区間に指数バックオフ
+ フルジッターのリトライループを追加する。SSE 読み出し開始後 (
`bytes_stream()` 以降) のエラーは従来どおりそのまま流す。

- `is_retryable(&ClientError)`: 408/425/429/500/502/503/504/529 と
  reqwest の connect/timeout のみ true
- `RetryPolicy` (default: base 500ms / cap 10s / max_attempts 4 /
  total_timeout 30s)
- `Retry-After` ヘッダ (秒数) があればバックオフを上書き
- リトライ発火ごとに warn! でステータス・attempt・wait を出す

ref: tickets/llm-worker-transient-retry.md
2026-05-04 12:45:33 +09:00
a0771608b1 Merge branch 'tui-system-message-render' into develop 2026-05-04 12:10:17 +09:00
b4ca718c24 docs(tickets): tui-system-message-render完了 2026-05-04 12:05:50 +09:00
a0e1583916 feat: システムメッセージをTUIで表示させる 2026-05-04 12:04:09 +09:00
0e2521d7c1 update: Taskツールの説明を更新 2026-05-04 11:32:04 +09:00
798b95a887 docs(tickets): tuiトークン表示完了 2026-05-04 00:07:59 +09:00
59b953c139 docs(tickets): tuiトークン表示レビュー 2026-05-04 00:05:59 +09:00
e6b6df09b8 feat: tuiのトークン集計表示の修正 2026-05-04 00:01:37 +09:00
9a06494bc6 docs(tickets): TUI表示トークンの集計の修正 2026-05-03 23:28:31 +09:00
58b12d859b docs(tickets): チケット追加:システムメッセージのTUI表示とセッションのロールバック・フォーク 2026-05-03 22:43:21 +09:00
aec7a071ac docs(tickets): tui-pod-event-render 完了 (消し忘れ片付け) 2026-05-03 22:14:24 +09:00
a18f74ca24 update: Taskツール群の説明を更新 2026-05-03 22:09:45 +09:00
8d9910fd20 docs(tickets): notify-history-persist 完了 (消し忘れ片付け) 2026-05-03 22:07:18 +09:00
698175e60c docs(tickets): session-todo-reminder spec を pending_history_appends に改訂 (AGENTS.md 揮発禁止に整合) 2026-05-03 21:53:20 +09:00
9ffe91b5bc Merge branch 'session-todo-tools' into develop
# Conflicts:
#	tickets/session-todo.md
2026-05-03 21:50:30 +09:00
4dae614a55 docs(tickets): session-todo (本体) 完了 2026-05-03 21:48:44 +09:00
b22b226fea update: tuiからspawnする際にエラー詳細が落ちていた問題を修正 2026-05-03 21:47:54 +09:00
25d07976f2 docs(tickets): notify-history-persist完了 2026-05-03 21:37:13 +09:00
6021f27469 docs(tickets): session-todo レビュー反映 (Approve) + reminder spec 段階レビュー 2026-05-03 21:34:54 +09:00
2e8dd759d8 fix: TaskStore snapshot を JSON ブロック化 + 構造ラウンドトリップテスト追加 2026-05-03 21:33:50 +09:00
9679661313 fix: TaskStore snapshot を compact 後 history の末尾に置いて retained 中の TaskCreate 重複を防ぐ 2026-05-03 21:26:49 +09:00
9e58149dda feat: notify-history-persist実装 2026-05-03 19:27:22 +09:00
3cbd759397 feat: セッション内 Task ツール (TaskCreate/List/Get/Update + 履歴 replay + compact 跨ぎ) 2026-05-03 19:03:52 +09:00
b8c459549f docs(tickets): セッション内 Task ツールを本体と注意機構に分割 2026-05-03 19:03:48 +09:00
7ddfdb09b5 Merge branch 'resume-scope-claim' into develop
# Conflicts:
#	TODO.md
2026-05-03 18:59:01 +09:00
613bf07610 docs(tickets): resume-scope-claim 完了 2026-05-03 18:56:39 +09:00
b90291d5a0 fix: resume-scope-claim レビュー指摘対応 (deny セマンティクス doc・破損 snapshot の警告ログ) 2026-05-03 18:56:21 +09:00
364f936ed1 docs(tickets): resume-scope-claim レビュー (Approve) 2026-05-03 18:46:15 +09:00
3fe4f169b7 docs(tickets): Notifyが永続化されいない問題についてのチケット 2026-05-03 18:45:10 +09:00
4e48f35e55 feat: resume時のscope claimを過去の有効scopeに揃える 2026-05-03 17:12:36 +09:00
aca5bda1f2 feat: session-metrics完了 2026-05-03 15:56:06 +09:00
70c4f1930e feat: session-metrics実装 2026-05-03 15:10:43 +09:00
702ed79517 feat: TUIに他Podからの通知を表示する 2026-05-03 12:45:05 +09:00
b35bb2154f docs(tickets): 消し忘れチケットども 2026-05-03 01:16:22 +09:00
56e94911c1 chore: TODOから[ ]を削除 2026-05-03 01:08:43 +09:00
201bb6c82e Update AGENTS.md 2026-05-03 01:06:23 +09:00
4466a35d6c docs(tickets): tuiでPodEventを表示する・セッション中でメトリクスを取るチケットを追加 2026-05-03 01:01:09 +09:00
2c86e64f19 update: tuiの文字入力のCtrlブロックを追加 2026-05-03 00:44:38 +09:00
261c682e5e update: memoryシステム周りのプロンプトの整理 2026-05-03 00:27:10 +09:00
3bb8af824e docs(tickets): memory-consolidation-drop-input-cap完了 2026-05-02 23:57:36 +09:00
689a988e83 update: Consolidationの不要なToken上限の削除 2026-05-02 23:48:33 +09:00
9d0b9e9d90 docs(tickets): セッション内TODOツールと注意機構のチケット 2026-05-02 23:48:01 +09:00
81ff4c6073 update: codexのキャッシュ利用が出来てなかった問題 2026-05-02 03:23:44 +09:00
cb5cd1e3d1 fix: tuiからのPod作成の挙動を修正・開発時にcargo runでpodを起動する経路を実装 2026-05-02 02:13:30 +09:00
08da79822d Merge branch 'workflow-impl' into develop
# Conflicts:
#	crates/pod/src/controller.rs
#	crates/pod/src/pod.rs
2026-05-02 01:47:49 +09:00
1b8a9efdd5 docs(tickets): workflow完了 2026-05-02 01:40:06 +09:00
76e5e66326 update: workflowの実装修正 2026-05-02 01:38:50 +09:00
b8aab6725b docs(tickets): 消し忘れチケット 2026-05-02 01:36:19 +09:00
e9b426b825 feat: dynamic-scopeの実装修正 2026-05-02 01:33:32 +09:00
d33b1c111e feat: dynamic-scopeの実装 2026-05-02 01:26:17 +09:00
5479b14411 fix: SpawnPodの起動経路の問題・を修正 2026-05-02 01:09:57 +09:00
e2d1fa120f Implement workflow MVP 2026-05-02 00:46:47 +09:00
8e24b3c607 update: manifestで一部値のzeroの扱いを変更 2026-05-02 00:08:46 +09:00
573501e37c fix(llm-worker): openai_responsesのroleの最新の投影を反映 2026-05-01 23:55:26 +09:00
9ac127d607 chore: dev-depsの整理 2026-05-01 23:50:14 +09:00
7899ab4386 docs(tickets): workflowのプロパティ名の修正 2026-05-01 23:40:47 +09:00
99dc94416b chore: 依存パッケージの集約 2026-05-01 23:35:46 +09:00
469c5ead99 Merge branch 'tui-mouse-scroll' into develop 2026-05-01 23:22:58 +09:00
bccd60d9be feat: memory P2の修正 2026-05-01 23:22:49 +09:00
201e68f17e feat(tui): マウスホイールスクロール完了 2026-05-01 23:16:02 +09:00
b907715dd4 スキルの整理 2026-05-01 23:14:37 +09:00
06f4dc0428 feat(tui): マウスホイールでスクロールする実装 2026-05-01 23:14:16 +09:00
d8a7200ea4 メモリPhase2の実装 2026-05-01 23:00:55 +09:00
f1b7af6249 docs: memoryシステムの仕様変更と、動的Tool・VCSの話 2026-05-01 18:47:52 +09:00
57fab557d3 bashツール一旦完了 2026-05-01 18:47:09 +09:00
8773e751ec bashツール実装 2026-05-01 18:14:13 +09:00
5b27fb9e19 ClaudeによるTool出力メタ認知 2026-05-01 02:47:44 +09:00
d7bc7ab3dd ファイル参照を与えた際に自動的に読ませる実装 2026-04-30 21:58:10 +09:00
bc81dc1513 TUI補完の細かい挙動修正 2026-04-30 14:38:03 +09:00
623b54cefc tuiの補完の実装 2026-04-30 12:46:48 +09:00
f914ae235a claudeの動的ツールの調査レポート 2026-04-30 01:35:42 +09:00
0df59462ea fix: セッション復元時にhistoryが表示されない問題 2026-04-30 00:02:26 +09:00
f31c58dccd cargo fmt 2026-04-29 23:20:25 +09:00
3fc65e6f6b templatureがcodexエンドポイントで使えない件の修正 2026-04-29 23:20:16 +09:00
b0393d2fe9 session-log関連完了 2026-04-29 23:00:55 +09:00
fa68957277 session-logリファクタのレビュー・修正 2026-04-29 22:55:36 +09:00
e3b36371e9 session-log-segments実装 2026-04-29 22:42:10 +09:00
8a9e3b4fe3 session-log-decouple-item実装 2026-04-29 22:24:18 +09:00
913ee4764a session-storeの永続化形式からllm-workerの内部型を削除 2026-04-29 22:09:30 +09:00
7736baaec9 tui-input-word-motion完了 2026-04-29 21:45:49 +09:00
b0db02da6d tui-input-word-motionレビュー・半角カナに関する修正 2026-04-29 21:41:24 +09:00
b79747bd0c tuiの単語単位Backspace 2026-04-29 21:31:19 +09:00
99dbb1c6c0 tuiの単語境界カーソル移動実装 2026-04-29 21:23:29 +09:00
dfb458cff2 workflowのチケットとtuiの単語境界カーソル移動のチケット 2026-04-29 21:22:49 +09:00
81ac7ccb7b pod-registry-rename完了 2026-04-29 21:05:09 +09:00
0b322a645a pod-registry-rename修正 2026-04-29 21:04:47 +09:00
274b7df32d pod-registryのモジュール分割 2026-04-29 20:14:34 +09:00
8a8fd225bf scope-lock -> pod-registry 2026-04-29 20:01:32 +09:00
1aa06dba60 scope.lockの意味変更に伴うクレート名変更チケット作成 2026-04-29 19:54:08 +09:00
deab5c5b50 memory-phase1-extract完了消し忘れ 2026-04-29 19:53:37 +09:00
59bbfb9621 tui-session-restore完了 2026-04-29 19:52:24 +09:00
fd96a517bb tuiからセッションを復帰する経路の実装 2026-04-29 19:03:03 +09:00
87768c2e2d 不要なforkの削除 2026-04-28 20:19:50 +09:00
2b89bb6d2e resumeの実装 2026-04-28 18:52:58 +09:00
023ed09adc max_tokenとreasoning_tokenに関するdocs修正 2026-04-28 18:01:17 +09:00
ce4c0930c3 max_tokensのスキーマ不整合に関する修正 2026-04-28 17:58:24 +09:00
3d1b8a4761 tui-thinking-display完了 2026-04-28 16:23:09 +09:00
f385f06abc tui-thinking-display修正 2026-04-28 16:22:45 +09:00
cf4c454a03 TUIにThinkingを表示する実装 2026-04-28 16:10:48 +09:00
513653ce55 ThinkingのTUI表示のチケット作成 2026-04-28 16:07:41 +09:00
1e65287bf1 session-store-llm-worker-type-ownership完了 2026-04-28 15:44:16 +09:00
6fe19b84ce セッション関連の責務の分離 2026-04-28 15:43:34 +09:00
e49fb3f1a0 memory-phase1の、トークンカウントの実装位置が悪い件 2026-04-28 14:24:38 +09:00
e437028849 memory-phase1-extract修正 2026-04-28 13:12:21 +09:00
3d04f793de memoryを抽出する仕組みの実装 2026-04-28 12:58:33 +09:00
141b77b7e4 session-restoreの設計更新 2026-04-28 12:42:49 +09:00
37d35df6be session復帰経路を作るチケット・テスト用のファイルの削除 2026-04-28 12:31:38 +09:00
8cf1d6c9cf memoryが.insomnia配下ではなくworkspace root直下を想定していた問題の修正 2026-04-28 11:53:08 +09:00
dfa6213c18 memoryのクエリと動作のテスト 2026-04-28 11:37:41 +09:00
4273d2a463 worker-generation-settings完了 2026-04-28 09:38:23 +09:00
bdf2a08459 生成設定のmanifest化の実装 2026-04-28 09:37:22 +09:00
7a0ed7d744 cargo fmt 2026-04-27 22:51:07 +09:00
bcaa4645f7 model-reasoning-control完了 2026-04-27 22:49:56 +09:00
84c6c2f17f model-reasoning-contolレビュー 2026-04-27 22:41:51 +09:00
0435ec5cbd model-reasoning-control実装 2026-04-27 22:25:27 +09:00
c75efb50b9 home-dir-layout完了 2026-04-27 22:11:15 +09:00
8cf0bd6374 home-dir-layout修正 2026-04-27 22:10:36 +09:00
8658845b02 home-dirの整理 2026-04-27 21:45:30 +09:00
306b1cf942 reasoningを利用可能にするチケット 2026-04-27 20:21:22 +09:00
695cfa05a7 memory-resident-injection完了 2026-04-27 18:30:21 +09:00
e8559d4bee メモリー内容のシステムプロンプトへの埋め込みの実装 2026-04-27 18:25:47 +09:00
12c1d55127 環境変数に関するチケットの修正 2026-04-27 18:11:40 +09:00
bc9eb3aa1c pod-spawn-ui完了・設定UI関連のチケット作成 2026-04-27 17:38:32 +09:00
978d542855 memory-search-tool完了 2026-04-27 17:26:07 +09:00
c9d1d4fa5c memoryサーチツールを実装 2026-04-27 17:24:08 +09:00
c8fe95901c manifest読み込み経路の整理チケット作成 2026-04-27 17:17:00 +09:00
9f1443b027 manifest側で設定ファイルの収集を行うようにした 2026-04-27 16:52:23 +09:00
c4ca9e1d89 tuiからSpawnする仮UI 2026-04-27 16:22:06 +09:00
cd8b620e6a memory-file-format完了 2026-04-27 13:59:04 +09:00
f2e47629d0 メモリーに関するクレート作成・ファイル構造の実装 2026-04-27 13:33:31 +09:00
c7a873bcf9 セグメントのセッション永続化チケット 2026-04-27 13:25:16 +09:00
c3278bb8da submit-segment-protocol完了 2026-04-27 11:42:42 +09:00
0a3af686f7 submitをvec segmentを受け付ける形に変更 2026-04-27 11:03:58 +09:00
c9a7d652dc notification-naming完了 2026-04-26 23:30:46 +09:00
2ee536ed71 Method::NotifyとEvent::Notificationが紛らわしい問題 2026-04-26 23:25:50 +09:00
82f08b966b memory実装チケット 2026-04-26 17:00:38 +09:00
d3b729c671 カタログの実装完了、ドキュメント整理 2026-04-24 13:33:56 +09:00
0a97886005 podのモジュール分割完了 2026-04-24 11:58:11 +09:00
4763173f36 podのモジュール分割 2026-04-24 11:48:27 +09:00
30f9abacb8 modelsとprovidersをカタログ化 2026-04-24 10:45:03 +09:00
7ecb1e6fc1 モデルとプロバイダーをカタログ化するチケット 2026-04-23 16:18:30 +09:00
ccf1f2b6bf llm-provider-catalog実装 2026-04-23 15:37:51 +09:00
e21d2041ef Agents.mdを一定閾値でturncateする仕様を削除 2026-04-23 01:34:25 +09:00
26ce346a81 pod-prompt-catalog完了 2026-04-22 17:43:42 +09:00
b8d368f5e5 Promptを一元管理するファイルから参照する実装 2026-04-22 17:43:05 +09:00
0c1276b730 Memoryシステムの整理・Promptカタログチケット 2026-04-22 13:21:15 +09:00
270d7923ab TUIのEditツール周りの表示とカラー 2026-04-22 01:17:58 +09:00
ef294eeb68 複数クライアント間でのRunメソッドの同期漏れ 2026-04-21 23:59:49 +09:00
d3ba0a299a 改行テキストの行計算・Padding設定 2026-04-21 23:26:34 +09:00
72128aab9f TUIのオーバーホール実装 2026-04-21 23:12:35 +09:00
388079759c protocol-tool-result-shape完了 2026-04-21 20:52:19 +09:00
ce59c5320e TUIに向けたprotocolの詳細調整 2026-04-21 20:50:59 +09:00
de3272fdfd TUIオーバーホールチケット 2026-04-21 19:37:14 +09:00
ca5a3d1152 メモリシステムの設計 2026-04-21 19:23:07 +09:00
47da4a03cb モデル性能のハードコードを消し飛し、Codexのフォーマットの修正 2026-04-21 18:35:56 +09:00
2914800673 Docsのアップロード 2026-04-21 17:39:43 +09:00
453 changed files with 72245 additions and 9138 deletions

View File

@ -1,3 +0,0 @@
- [Event broadcast pattern](project_event_broadcast_pattern.md) — Pod は event_tx: Option<broadcast::Sender<Event>> を保持、Controller が attach_notifier と同タイミングで attach
- [Test-path omission precedent](feedback_test_path_omission.md) — 要件に挙がったテストを「共通ヘルパ経由だから省略」した場合は Approve with follow-up が相場
- [cargo add workspace pitfall](feedback_cargo_add_workspace_pitfall.md) — ルート Cargo.toml に [workspace.dependencies] が未定義、workspace = true 指定は現状使えない

View File

@ -1,18 +0,0 @@
---
name: cargo add workspace pitfall
description: ルート Cargo.toml に [workspace.dependencies] が未定義なので workspace = true は使えない
type: feedback
---
ルート `Cargo.toml``[workspace.package]` のみを持ち `[workspace.dependencies]`
定義していない。したがってチケットや PR に
`foo = { workspace = true, features = [...] }` と書かれていても、そのままでは解決しない。
**Why:** プロジェクトの現状流儀として、各クレートは直接バージョン指定する
(例: `crates/session-store/Cargo.toml``uuid = { version = "1", features = [...] }`)。
protocol-design (2026-04-21) レビュー時に発見。
**How to apply:** チケットに `workspace = true` の文言を見たら、
- 実装が直接バージョン指定にしていれば「コードベース流儀に整合」として Follow-up 扱い、
- `workspace = true` のまま書かれていたら「ビルドが通らないはず」として Request changes、
- もしくは `[workspace.dependencies]` を整備する方向の提案を添える。

View File

@ -1,19 +0,0 @@
---
name: Test-path omission precedent
description: 要件で列挙されたテストパスを「共通ヘルパなので省略」した場合の判断相場
type: feedback
---
チケット要件にテストパス (例: 成功/失敗/mid-turn の 3 本) が明示列挙されている場合、
そのうち 1 本を「共通ヘルパ経由だから inspection で担保」として省略する実装が来たら、
**Approve with follow-up** が相場。Blocking にはしない。
**Why:** 共通化されたインスツルメント (例: `send_event`) 1 点だけが共通で、
呼び出し側の制御フロー (async 再帰・フラグ管理・エラー伝播) は個別なのが通例。
ただしビルドと主要パスが動いており、後続チケットでテストを足すだけの差分で済むケースが多い。
protocol-design (2026-04-21) で先例。
**How to apply:** 要件とテストコードを 1:1 で突き合わせ、欠けたパスがあれば
- 制御フローが共通化されていれば Follow-up
- 制御フローが別物 (別関数 / 別状態遷移) なら Request changes
と切り分ける。`send_event` 型のヘルパ共通化は Follow-up 側の判断。

View File

@ -1,20 +0,0 @@
---
name: Event broadcast pattern
description: Pod が protocol::Event を broadcast する公式パターン (Notifier と別経路)
type: project
---
Pod 内部から `protocol::Event` を broadcast する正規ルートは、`Pod` に
`event_tx: Option<broadcast::Sender<Event>>` を持たせて `attach_event_tx`
Controller 側から注入する方式。`Notifier` は `Event::Notification`
replay バッファ専用で、他イベントは通さない。
**Why:** `Notifier` は Notification 型の Warn/Error レベル情報 + late subscriber への
snapshot replay を責務にしており、Event 一般を乗せると意味が噛み合わない。
protocol-design チケットの決定事項 6/7 で確定 (2026-04-21)。
**How to apply:** 新しい Pod 発の Event を追加するときは、
1) `Pod::send_event(&self, event)` ヘルパ (`pod.rs:370-374`) を使う、
2) Controller は `pod.attach_notifier` の直後に `pod.attach_event_tx` を呼ぶ、
3) late subscriber への届きは期待しない (buffer 化が必要なら別チケット化)。
Notifier 経由で新種 Event を流す PR が来たら差し戻し対象。

View File

@ -1,9 +1,8 @@
---
name: "ticket-reviewer"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. Examples:\\n<example>\\nContext: User has just finished implementing a feature described in tickets/foo.md and wants it reviewed.\\nuser: \"tickets/foo.md の実装が終わったのでレビューして\"\\nassistant: \"I'll use the Agent tool to launch the ticket-reviewer agent to review the implementation against tickets/foo.md's requirements and produce tickets/foo.review.md.\"\\n<commentary>\\nThe user is explicitly requesting a ticket-scoped review with the project's .review.md workflow, which is this agent's purpose.\\n</commentary>\\n</example>\\n<example>\\nContext: User finishes a chunk of work and mentions the ticket name.\\nuser: \"scopedfs-scripting のチケット、一通り実装出来た\"\\nassistant: \"Let me use the Agent tool to launch the ticket-reviewer agent to review the implementation and produce the review artifacts.\"\\n<commentary>\\nCompletion of a ticket implementation is the trigger for the ticket-reviewer agent per project's lifecycle (c. レビュー).\\n</commentary>\\n</example>\\n<example>\\nContext: User requests re-review after addressing feedback.\\nuser: \"指摘を反映したので再レビューお願い\"\\nassistant: \"I'll use the Agent tool to launch the ticket-reviewer agent to re-review and update the .review.md accordingly.\"\\n<commentary>\\nRe-review updates the existing .review.md and ticket status; this agent handles that workflow.\\n</commentary>\\n</example>"
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. "
model: opus
color: purple
memory: project
---
You are a senior reviewer specialized in the `insomnia` project. You are an expert at evaluating ticket-scoped implementations against their stated premises and requirements, and at safeguarding the codebase from unnecessary complexity or architectural drift. You operate strictly within the project's ticket lifecycle conventions defined in `CLAUDE.md`.
@ -118,153 +117,3 @@ Do not modify the ticket's 背景・要件 sections unless the user explicitly a
4. Did I avoid making git writes?
5. Did I update both `<name>.review.md` and `<name>.md`?
6. Is my judgment line unambiguous?
## Agent Memory
**Update your agent memory** as you review tickets in this project. This builds up institutional knowledge across review sessions. Write concise notes about what you found and where.
Examples of what to record:
- Recurring architectural patterns and anti-patterns observed across tickets
- Layer boundary conventions (e.g., what belongs in llm-worker vs. upper layers) as they become clearer
- Common requirement-miss patterns (e.g., tests omitted, build-through invariant violated)
- Crate/module organization conventions confirmed during reviews
- Reviewer judgment precedents — when a similar issue was Approve-with-follow-up vs. Request-changes
- Ticket authoring patterns that correlate with smooth vs. troubled reviews
- Project-specific policies reinforced during review (provider policy, ScopedFs scripting direction, cargo add discipline, etc.)
Keep entries short and link-friendly so they can be referenced in future reviews.
# Persistent Agent Memory
You have a persistent, file-based memory system at `<repo>/.claude/agent-memory/ticket-reviewer/`. This directory already exists — write to it directly with the Write tool (do not run mkdir or check for its existence).
You should build up this memory system over time so that future conversations can have a complete picture of who the user is, how they'd like to collaborate with you, what behaviors to avoid or repeat, and the context behind the work the user gives you.
If the user explicitly asks you to remember something, save it immediately as whichever type fits best. If they ask you to forget something, find and remove the relevant entry.
## Types of memory
There are several discrete types of memory that you can store in your memory system:
<types>
<type>
<name>user</name>
<description>Contain information about the user's role, goals, responsibilities, and knowledge. Great user memories help you tailor your future behavior to the user's preferences and perspective. Your goal in reading and writing these memories is to build up an understanding of who the user is and how you can be most helpful to them specifically. For example, you should collaborate with a senior software engineer differently than a student who is coding for the very first time. Keep in mind, that the aim here is to be helpful to the user. Avoid writing memories about the user that could be viewed as a negative judgement or that are not relevant to the work you're trying to accomplish together.</description>
<when_to_save>When you learn any details about the user's role, preferences, responsibilities, or knowledge</when_to_save>
<how_to_use>When your work should be informed by the user's profile or perspective. For example, if the user is asking you to explain a part of the code, you should answer that question in a way that is tailored to the specific details that they will find most valuable or that helps them build their mental model in relation to domain knowledge they already have.</how_to_use>
<examples>
user: I'm a data scientist investigating what logging we have in place
assistant: [saves user memory: user is a data scientist, currently focused on observability/logging]
user: I've been writing Go for ten years but this is my first time touching the React side of this repo
assistant: [saves user memory: deep Go expertise, new to React and this project's frontend — frame frontend explanations in terms of backend analogues]
</examples>
</type>
<type>
<name>feedback</name>
<description>Guidance the user has given you about how to approach work — both what to avoid and what to keep doing. These are a very important type of memory to read and write as they allow you to remain coherent and responsive to the way you should approach work in the project. Record from failure AND success: if you only save corrections, you will avoid past mistakes but drift away from approaches the user has already validated, and may grow overly cautious.</description>
<when_to_save>Any time the user corrects your approach ("no not that", "don't", "stop doing X") OR confirms a non-obvious approach worked ("yes exactly", "perfect, keep doing that", accepting an unusual choice without pushback). Corrections are easy to notice; confirmations are quieter — watch for them. In both cases, save what is applicable to future conversations, especially if surprising or not obvious from the code. Include *why* so you can judge edge cases later.</when_to_save>
<how_to_use>Let these memories guide your behavior so that the user does not need to offer the same guidance twice.</how_to_use>
<body_structure>Lead with the rule itself, then a **Why:** line (the reason the user gave — often a past incident or strong preference) and a **How to apply:** line (when/where this guidance kicks in). Knowing *why* lets you judge edge cases instead of blindly following the rule.</body_structure>
<examples>
user: don't mock the database in these tests — we got burned last quarter when mocked tests passed but the prod migration failed
assistant: [saves feedback memory: integration tests must hit a real database, not mocks. Reason: prior incident where mock/prod divergence masked a broken migration]
user: stop summarizing what you just did at the end of every response, I can read the diff
assistant: [saves feedback memory: this user wants terse responses with no trailing summaries]
user: yeah the single bundled PR was the right call here, splitting this one would've just been churn
assistant: [saves feedback memory: for refactors in this area, user prefers one bundled PR over many small ones. Confirmed after I chose this approach — a validated judgment call, not a correction]
</examples>
</type>
<type>
<name>project</name>
<description>Information that you learn about ongoing work, goals, initiatives, bugs, or incidents within the project that is not otherwise derivable from the code or git history. Project memories help you understand the broader context and motivation behind the work the user is doing within this working directory.</description>
<when_to_save>When you learn who is doing what, why, or by when. These states change relatively quickly so try to keep your understanding of this up to date. Always convert relative dates in user messages to absolute dates when saving (e.g., "Thursday" → "2026-03-05"), so the memory remains interpretable after time passes.</when_to_save>
<how_to_use>Use these memories to more fully understand the details and nuance behind the user's request and make better informed suggestions.</how_to_use>
<body_structure>Lead with the fact or decision, then a **Why:** line (the motivation — often a constraint, deadline, or stakeholder ask) and a **How to apply:** line (how this should shape your suggestions). Project memories decay fast, so the why helps future-you judge whether the memory is still load-bearing.</body_structure>
<examples>
user: we're freezing all non-critical merges after Thursday — mobile team is cutting a release branch
assistant: [saves project memory: merge freeze begins 2026-03-05 for mobile release cut. Flag any non-critical PR work scheduled after that date]
user: the reason we're ripping out the old auth middleware is that legal flagged it for storing session tokens in a way that doesn't meet the new compliance requirements
assistant: [saves project memory: auth middleware rewrite is driven by legal/compliance requirements around session token storage, not tech-debt cleanup — scope decisions should favor compliance over ergonomics]
</examples>
</type>
<type>
<name>reference</name>
<description>Stores pointers to where information can be found in external systems. These memories allow you to remember where to look to find up-to-date information outside of the project directory.</description>
<when_to_save>When you learn about resources in external systems and their purpose. For example, that bugs are tracked in a specific project in Linear or that feedback can be found in a specific Slack channel.</when_to_save>
<how_to_use>When the user references an external system or information that may be in an external system.</how_to_use>
<examples>
user: check the Linear project "INGEST" if you want context on these tickets, that's where we track all pipeline bugs
assistant: [saves reference memory: pipeline bugs are tracked in Linear project "INGEST"]
user: the Grafana board at grafana.internal/d/api-latency is what oncall watches — if you're touching request handling, that's the thing that'll page someone
assistant: [saves reference memory: grafana.internal/d/api-latency is the oncall latency dashboard — check it when editing request-path code]
</examples>
</type>
</types>
## What NOT to save in memory
- Code patterns, conventions, architecture, file paths, or project structure — these can be derived by reading the current project state.
- Git history, recent changes, or who-changed-what — `git log` / `git blame` are authoritative.
- Debugging solutions or fix recipes — the fix is in the code; the commit message has the context.
- Anything already documented in CLAUDE.md files.
- Ephemeral task details: in-progress work, temporary state, current conversation context.
These exclusions apply even when the user explicitly asks you to save. If they ask you to save a PR list or activity summary, ask what was *surprising* or *non-obvious* about it — that is the part worth keeping.
## How to save memories
Saving a memory is a two-step process:
**Step 1** — write the memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:
```markdown
---
name: {{memory name}}
description: {{one-line description — used to decide relevance in future conversations, so be specific}}
type: {{user, feedback, project, reference}}
---
{{memory content — for feedback/project types, structure as: rule/fact, then **Why:** and **How to apply:** lines}}
```
**Step 2** — add a pointer to that file in `MEMORY.md`. `MEMORY.md` is an index, not a memory — each entry should be one line, under ~150 characters: `- [Title](file.md) — one-line hook`. It has no frontmatter. Never write memory content directly into `MEMORY.md`.
- `MEMORY.md` is always loaded into your conversation context — lines after 200 will be truncated, so keep the index concise
- Keep the name, description, and type fields in memory files up-to-date with the content
- Organize memory semantically by topic, not chronologically
- Update or remove memories that turn out to be wrong or outdated
- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.
## When to access memories
- When memories seem relevant, or the user references prior-conversation work.
- You MUST access memory when the user explicitly asks you to check, recall, or remember.
- If the user says to *ignore* or *not use* memory: Do not apply remembered facts, cite, compare against, or mention memory content.
- Memory records can become stale over time. Use memory as context for what was true at a given point in time. Before answering the user or building assumptions based solely on information in memory records, verify that the memory is still correct and up-to-date by reading the current state of the files or resources. If a recalled memory conflicts with current information, trust what you observe now — and update or remove the stale memory rather than acting on it.
## Before recommending from memory
A memory that names a specific function, file, or flag is a claim that it existed *when the memory was written*. It may have been renamed, removed, or never merged. Before recommending it:
- If the memory names a file path: check the file exists.
- If the memory names a function or flag: grep for it.
- If the user is about to act on your recommendation (not just asking about history), verify first.
"The memory says X exists" is not the same as "X exists now."
A memory that summarizes repo state (activity logs, architecture snapshots) is frozen in time. If the user asks about *recent* or *current* state, prefer `git log` or reading the code over recalling the snapshot.
## Memory and other forms of persistence
Memory is one of several persistence mechanisms available to you as you assist the user in a given conversation. The distinction is often that memory can be recalled in future conversations and should not be used for persisting information that is only useful within the scope of the current conversation.
- When to use or update a plan instead of memory: If you are about to start a non-trivial implementation task and would like to reach alignment with the user on your approach you should use a Plan rather than saving this information to memory. Similarly, if you already have a plan within the conversation and you have changed your approach persist that change by updating the plan rather than saving a memory.
- When to use or update tasks instead of memory: When you need to break your work in current conversation into discrete steps or keep track of your progress use tasks instead of saving to memory. Tasks are great for persisting information about the work that needs to be done in the current conversation, but memory should be reserved for information that will be useful in future conversations.
- Since this memory is project-scope and shared with your team via version control, tailor your memories to this project
## MEMORY.md
Your MEMORY.md is currently empty. When you save new memories, they will appear here.

View File

@ -0,0 +1,26 @@
---
name: worktree-workflow
description: "Worktreeを用いた開発フローを進める。git上の開発に置けるミクロな指示で、プロジェクトの管理に関する指示は提供されていない。"
allowed-tools: "Bash(cd *), Bash(git worktree *), Bash(mkdir *), Bash(cp *), Bash(ln *), Bash(ls *), Bash(find *)"
---
# Worktreeを用いた開発
Goal: 実装を完了させ、ブランチをマージ待ちの状態にする。
`./.worktree`にworktreeを作成します。
エージェントの1セッション=1ワークツリーとしており、ブランチ/イシュー/チケット単位で切ります。
このワークフローにおいては、ブランチはローカルで並行開発するためのマージ後削除の運用とし、Worktreeと同名のbranchを同時に作って進めます。メインのディレクトリのブランチから切るものとして扱います。
```
git worktree add .worktree/<task-name> -n <task-name>
```
## flake.nixの無効化
基本的に、CWDを変更できない場合、.envrcによる自動アクティベートは効かないので無視で構わない。
## 完了時
マージウィンドウからこのスキルがinvokeされた際は、ブランチのマージ・worktreeの削除まで行う。対して、実装者がマージしてクローズしてはならない。

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
.direnv
*.local*
.env
.worktree

1
.insomnia/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/memory/

13
.insomnia/manifest.toml Normal file
View File

@ -0,0 +1,13 @@
[scope]
allow = [
{ target = ".", permission = "write", recursive = true },
]
[session]
record_event_trace = true
[memory]
extract_threshold = 50000
consolidation_threshold_files = 5
consolidation_threshold_bytes = 50000

View File

@ -0,0 +1,143 @@
---
description: TODO / tickets / docs / git history から次の作業候補を見繕い、課題発見や方針決定を半自動でイテレーションする WIP maintainer workflow
model_invokation: false
user_invocable: true
requires: []
---
# Auto Maintain Workflow (WIP)
insomnia を AI maintainer として運用するための半自動 loop。TODO / tickets から「今進められそうな作業」を選ぶだけでなく、課題の発見、設計判断の切り分け、次に人間へ戻すべき問いの整理までを扱う。
これは unattended 自動開発ではない。実装の並列委譲は `multi-agent-workflow`、worktree の機械的作成は `worktree-workflow` に任せる。本 Workflow はその前段として、何を進めるべきか、何をまだ決めるべきかを整理する。
参照:
- `docs/plan/ai-maintainer.md`
- `tickets/auto-maintain-workflow.md`
## 位置づけ
AI maintainer の目的は、コードを書くこと自体ではなく、プロジェクト状態を前に進めることである。
この Workflow は WIP として、以下を行う。
- TODO / tickets / docs / git history を読んで現在地を把握する。
- 実装可能な ticket と、方針決定が必要な ticket を分ける。
- 小さく実装できる候補を提案する。
- 設計相談が必要な論点を人間に戻す。
- 運用上の問題や繰り返し発生する詰まりを report / ticket / workflow 改訂候補として整理する。
## 非目標
現時点では以下をしない。
- 常駐 scheduler として自動実行する。
- 人間の合意なしに新規 ticket を作る。
- 人間の合意なしに既存 ticket を大幅変更する。
- 人間の合意なしに ticket 完了削除を行う。
- push する。
- Workflow を自律生成・自律改訂する。
- scope / permission / history persistence / prompt context 加工原則に関わる判断を勝手に決める。
## 入力として読むもの
必要に応じて以下を読む。
1. `TODO.md`
2. `tickets/*.md`
3. `docs/plan/`
4. `docs/report/`
5. `git log --oneline` / ticket file の git history
6. 既存 worktree / branch 状態
7. 最近の失敗や通知、ユーザーからの観測
TODO と ticket の不整合を見つけたら、勝手に修正せず、まず報告する。ただしユーザーが明示的に「直して」と言った場合は Mode 1 として整理してよい。
## 分類
候補を以下に分ける。
### A. 実装委譲可能
- 要件と完了条件が具体的。
- 影響範囲が限定的。
- test / build で確認できる。
- 大きな設計判断が不要。
- scope を狭く切れる。
この場合は、人間に候補として提示する。人間が実行を許可したら `$user/multi-agent-workflow` に進む。
### B. 方針決定が必要
- 複数の設計方針が自然に導ける。
- protocol / permission / scope / persistence / prompt context に触れる。
- UX の仕様が未確定。
- 既存 ticket の要件が古い。
この場合は、実装せず、決めるべき問いを短く提示する。
### C. ticket 整理が必要
- TODO にあるが ticket がない。
- ticket があるが TODO にない。
- 完了済みに見えるが残っている。
- ticket の前提が変わっている。
この場合は、不整合と修正案を提示する。修正は人間の許可後に行う。
### D. report / workflow 改善候補
- 同じ tool 問題が繰り返し出る。
- Workflow の指示が曖昧で実装 Pod が迷った。
- AI が過剰に Task tool を使うなど、運用上の癖が出た。
- 通知や Pod completion tracking など、開発基盤の不足が観測された。
この場合は、すぐ ticket 化するか、`docs/report/` に観測として残すか、人間に確認する。
## 半自動 iteration
1. 状態把握
- TODO / tickets / git status を読む。
- 最近完了した流れや未完了 branch を確認する。
2. 候補抽出
- 実装可能そうな ticket を 2〜5 件挙げる。
- correctness / developer experience / user-visible UX / cleanup で分類する。
3. 推奨順位
- blocking correctness を最優先。
- 実害が出ている運用問題を次点。
- 小さく完了できる UX / cleanup を次点。
- 大きな設計変更は方針相談に回す。
4. 人間への提示
- 「次に進めるなら X」を1つ推奨する。
- 理由を短く述べる。
- 実装委譲する場合の scope / test 方針を添える。
5. 実行への接続
- 人間が「進めて」と言ったら `$user/multi-agent-workflow` に接続する。
- worktree 作成は `$user/worktree-workflow` に従う。
## エスカレーション基準
以下では実装に進まず、人間へ戻す。
- ticket の要件から複数の設計方針が自然に導ける。
- 長期構造、crate boundary、protocol、permission、scope、history persistence に触れる。
- prompt context 加工原則に関わる。
- 新 ticket の作成、既存 ticket の大幅変更、ticket 完了削除について合意がない。
- test 不能、再現不能、または作業範囲外の不具合に遭遇した。
- WorkItem / Thread / Lease / maintainer state など、まだ設計中の概念が必要になる。
## まだ固定しないもの
以下は `docs/plan/ai-maintainer.md` の上位設計に残し、本 Workflow では詳細を固定しない。
- WorkItemStore / LeaseStore。
- operation inbox / trial log。
- QA feedback を ticket / review / report のどれに落とすか。
- AI 自身の feedback を Knowledge / report / ticket / workflow 改訂のどれにするか。
- maintainer doctor。
- reviewer Pod の評価基準の機械化。

View File

@ -0,0 +1,150 @@
---
description: worktree と子 Pod を使って複数 ticket の実装・レビュー・修正・完了処理を並列に進める orchestration フロー
model_invokation: true
user_invocable: true
requires: []
---
# Multi-agent Worktree Workflow
insomnia を insomnia で開発する際の、worktree + 実装 Pod + 親 Pod review の標準フロー。これは **実装を並列に進めるためのフロー** であり、worktree の機械的作成手順は `$user/worktree-workflow`、ticket 候補選定や方針探索の半自動 loop は `$user/auto-maintain` に分ける。
## 目的
- 実装差分を ticket ごとの child worktree に隔離する。
- 実装 Pod に narrow write scope を渡して並列実装させる。
- 親 Pod が diff / test / ticket 要件を review し、必要なら修正依頼する。
- approve 後に merge / ticket 完了処理 / main workspace での再検証を行う。
## 開始条件
以下が揃っている時に使う。
- 対象 ticket が決まっている。
- ticket の背景・要件・完了条件から実装方針が概ね導ける。
- worktree 作成と git 書き込み操作について、人間の許可がある。
- main workspace の unrelated dirty changes を把握している。
設計方針が複数自然に導ける場合、protocol / scope / permission / history persistence に触れる場合、ticket 自体の再定義が必要な場合は、実装委譲前に人間へ戻す。
## 親 Pod / orchestrator の責務
1. 状態確認
- `git status --short --branch`
- 対象 ticket
- 関連 TODO / docs / 既存 worktree
2. worktree 作成
- `$user/worktree-workflow` に従い `./.worktree/<task-name>` を作る。
- `.insomnia` を sparse checkout で除外する。
3. 実装 Pod spawn
- read scope: main workspace 全体。
- write scope: child worktree、または必要最小 directory。
- task には以下を明示する。
- child worktree path / branch
- 対象 ticket path
- Bash は必ず child worktree に `cd` すること
- main workspace の `TODO.md` / `tickets/` / `docs/report/` / `.insomnia` は編集しないこと
- 範囲外事項
- 実行すべき build / test / format
- 完了報告項目
4. 監督
- `ReadPodOutput` で報告を読む。
- 通知が来ない場合でも、worktree の `git status` / `git diff` / test で完了状態を確認する。
- 必要なら `SendToPod` で修正依頼する。
5. review
- ticket の背景・要件・完了条件・範囲外に照らして diff を確認する。
- build / test / `git diff --check` を確認する。
- 必要なら reviewer Pod を read-only で立てる。
6. merge / lifecycle
- approve 後に main workspace へ merge する。
- `TODO.md` から該当行を削除し、`tickets/foo.md` を削除して完了 commit を作る。
- main workspace で必要な test / `cargo check --workspace` / `cargo fmt --check` を再実行する。
## 実装 Pod の責務
- child worktree 内でのみ実装する。
- main workspace の管理ファイルを書かない。
- 指定された build / test / format を実行する。
- ticket 要件外の設計変更、依存関係追加、scope / permission / history persistence / prompt context 加工原則に触れる変更が必要なら止めて報告する。
- 完了時に以下を報告する。
- worktree path / branch
- commit hashcommit した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか
## 実装 Pod の commit 方針
実装 Pod には child worktree 内での commit を許可してよい。
- commit は ticket 内で意味のある粒度にする。
- 例: `feat: ...`、`fix: ...`、`test: ...`、`docs: ...`
- 実装 Pod は merge / push / branch deletion / worktree remove をしない。
- 実装 Pod は `TODO.md` / `tickets/` の完了処理 commit をしない。
- 親 Pod は review 時に commit 粒度も確認する。
- 必要な修正は、原則追加 commit として積む。履歴改変や squash は人間の明示指示がある時だけ行う。
## Review → 修正 → 完了の標準形
### Approve
1. 実装 Pod を停止し、scope を回収する。
2. 親 Pod が main workspace で `git merge --no-ff <branch>` する。
3. 親 Pod が `TODO.md``tickets/foo.md` を完了処理して commit する。
4. main workspace で検証コマンドを再実行する。
5. 変更内容・commit・検証結果・残 dirty changes を報告する。
### Request changes
1. blocking finding をファイル / 行 / 理由 / 修正方針つきで整理する。
2. 実装 Pod が生きていれば `SendToPod` で修正依頼する。
3. 停止済みなら、同じ worktree / branch / scope で再 spawn するか、親 Pod が最小修正する。
4. 修正後に focused test と必要な broader test を再実行する。
5. 再 review する。
### Non-blocking comments
- ticket 要件外の改善はその場で混ぜない。
- 必要なら後続 ticket / docs/report にする。
- non-blocking を理由に completion を遅らせない。
## 並列実装時の注意
- 1 ticket = 1 worktree = 1 branch を基本にする。
- 複数 Pod に同じ write scope を渡さない。
- parent は child の write scope 配下を直接編集しない。
- 依存関係がある ticket は、土台 branch を merge してから次 worktree を切る。
- parallel に走らせた Pod の完了通知は取りこぼしうるため、`ReadPodOutput` と worktree 状態で確認する。
## 完了報告の標準形
```text
完了:
- ticket: <path>
- branch: <name>
- commits:
- <hash> <subject>
- 変更概要: ...
- 検証:
- cargo fmt --check
- cargo check --workspace
- cargo test ...
- review: approve / approve with comments / request changes
- 未解決事項: ...
- 残 dirty changes: ...
```
## この Workflow で扱わないもの
以下は `$user/auto-maintain` または別の設計相談で扱う。
- ticket 候補を見繕うこと。
- 新規 ticket 作成判断。
- QA feedback / AI feedback を ticket / report / workflow に落とす判断。
- 長期 maintainer loop / WorkItemStore / LeaseStore の設計。

View File

@ -0,0 +1,98 @@
---
description: insomnia プロジェクトで child git worktree を作成・管理するための機械的手順。実装 Pod に作らせず、親 Pod が main workspace で実行する。
model_invokation: false
user_invocable: true
requires: []
---
# Worktree Workflow
insomnia プロジェクトで実装差分を main workspace から分離するため、`./.worktree/<task-name>` に child git worktree を作る。これは **worktree の扱い方だけ** を定める Workflow であり、ticket 選定、実装委譲、review、merge の運用は `$user/multi-agent-workflow` 側で扱う。
insomnia では Pod の write scope が排他的に委譲されるため、child worktree に `.insomnia` を置かない。main workspace は orchestration / ticket / docs / memory / workflow 管理の場所として残し、child worktree はコード差分専用の作業面として扱う。
## 適用範囲
この Workflow は親 Pod / orchestrator が main workspace で実行する。
- 実装 Pod にこの Workflow を渡して worktree を作らせない。
- 実装 Pod は、親 Pod が作成済みの child worktree を受け取り、その中で実装・build・test・報告を行う。
- ticket 作成、TODO 更新、review artifact、docs/report は main workspace 側で扱う。
## 原則
- 1 ticket / 1 実装 task につき 1 worktree を作る。
- worktree path は `./.worktree/<task-name>`
- branch 名は原則 `<task-name>` と同じ kebab-case。
- child worktree には `.insomnia` を出さない。
- child worktree は実装差分用。`TODO.md` / `tickets/` / `docs/report/` / workflow / memory は原則 main workspace 側で扱う。
- push はしない。
## 事前確認
作成前に以下を確認する。
1. 対象 ticket / task が決まっているか。
2. `<task-name>` が branch / path 名に使える kebab-case か。
3. `git worktree add` を実行してよい許可があるか。
4. main workspace に混ぜてはいけない未保存差分がないか。
5. 同名 branch / worktree が既に存在しないか。
同名 branch がある場合は、既存 branch を使うか、人間に確認する。`git worktree add -b` で上書きしない。
## 作成手順
main workspace で実行する。
```bash
git worktree add .worktree/<task-name> -b <task-name>
git -C .worktree/<task-name> sparse-checkout init --no-cone
git -C .worktree/<task-name> sparse-checkout set --no-cone \
'/*' \
'!/.insomnia/' \
'!/.insomnia/**'
```
確認する。
```bash
git -C .worktree/<task-name> status --short --branch
test ! -e .worktree/<task-name>/.insomnia
```
失敗した場合は、worktree / branch / lock の状態を確認し、勝手に cleanup せず人間へ報告する。
## 子 Pod へ渡す scope
子 Pod を使う場合、子 Pod の cwd は main workspace のままになる。必ず作業対象が child worktree であることを明示し、Bash 実行時は毎回 `cd <repo>/.worktree/<task-name> && ...` させる。
推奨 scope:
```text
read: <repo>
write: <repo>/.worktree/<task-name>
```
より狭く切れる場合は、write scope を変更対象 crate / directory まで狭めてよい。ただし build / test に必要な生成物を書けることを確認する。
## child worktree 内の禁止事項
- `.insomnia` を作らない / コピーしない。
- main workspace の `TODO.md` / `tickets/` / `docs/report/` を編集しない。
- merge / push / branch deletion / worktree remove をしない。
- scope / permission / history persistence / prompt context 加工原則に関わる設計変更を無断で行わない。
## 完了時の扱い
worktree 作成 Workflow としては、完了時に merge しない。merge、ticket 完了、TODO 削除は `$user/multi-agent-workflow` または人間の明示指示で行う。
実装 Pod へ渡す完了報告項目の標準形:
- worktree path
- branch 名
- commit hash実装 Pod に commit を許可した場合)
- 変更ファイル
- 実装概要
- 実行した build / test / format
- 未解決事項
- review に回せるか

View File

@ -1,11 +1,43 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
---
## このシステムに置ける設計要旨
Gitは基本的にすべてユーザーが操作している。書き込みが必要な操作は明示的に許可されない限り行わないこと
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
@ -31,7 +63,13 @@ b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

View File

@ -1,11 +1,43 @@
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
---
## このシステムに置ける設計要旨
Gitは基本的にすべてユーザーが操作している。書き込みが必要な操作は明示的に許可されない限り行わないこと
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
### LLM コンテキストの加工原則
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
---
## 実際のセッションを読んでデバッグする
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
---
## Git操作
workflowで明示されない限り、読み取り以外の操作は控えること。
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
---
## Ticketの運用について
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
### TODO.md
@ -31,7 +63,13 @@ b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
d. 完了: `tickets/foo.md``tickets/foo.review.md` を両方削除してcommit
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
`.review.md` にはレビューの指摘事項と判断結果を記載する。
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
---
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。

182
Cargo.lock generated
View File

@ -82,6 +82,15 @@ version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
[[package]]
name = "arc-swap"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207"
dependencies = [
"rustversion",
]
[[package]]
name = "assert-json-diff"
version = "2.0.2"
@ -319,6 +328,16 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]]
name = "client"
version = "0.1.0"
dependencies = [
"manifest",
"protocol",
"tokio",
"uuid",
]
[[package]]
name = "cmake"
version = "0.1.57"
@ -1571,9 +1590,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
[[package]]
name = "libc"
version = "0.2.185"
version = "0.2.186"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ff2c0fe9bc6cb6b14a0592c2ff4fa9ceb83eea9db979b0487cd054946a2b8f"
checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
[[package]]
name = "libredox"
@ -1596,6 +1615,16 @@ dependencies = [
"bitflags 2.11.0",
]
[[package]]
name = "lint-common"
version = "0.1.0"
dependencies = [
"chrono",
"serde",
"serde_json",
"thiserror 2.0.18",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
@ -1641,6 +1670,8 @@ dependencies = [
"tracing",
"tracing-subscriber",
"trybuild",
"wiremock",
"zstd",
]
[[package]]
@ -1696,6 +1727,7 @@ dependencies = [
name = "manifest"
version = "0.1.0"
dependencies = [
"arc-swap",
"llm-worker",
"protocol",
"serde",
@ -1751,6 +1783,28 @@ dependencies = [
"autocfg",
]
[[package]]
name = "memory"
version = "0.1.0"
dependencies = [
"async-trait",
"chrono",
"libc",
"lint-common",
"llm-worker",
"manifest",
"schemars",
"serde",
"serde_json",
"serde_yaml",
"sha2 0.11.0",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "mime"
version = "0.3.17"
@ -2096,6 +2150,7 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
name = "pod"
version = "0.1.0"
dependencies = [
"arc-swap",
"async-trait",
"chrono",
"clap",
@ -2106,12 +2161,15 @@ dependencies = [
"libc",
"llm-worker",
"manifest",
"memory",
"minijinja",
"pod-registry",
"protocol",
"provider",
"schemars",
"serde",
"serde_json",
"session-metrics",
"session-store",
"tempfile",
"thiserror 2.0.18",
@ -2119,6 +2177,22 @@ dependencies = [
"toml",
"tools",
"tracing",
"uuid",
"workflow",
]
[[package]]
name = "pod-registry"
version = "0.1.0"
dependencies = [
"fs4",
"libc",
"manifest",
"serde",
"serde_json",
"session-store",
"tempfile",
"thiserror 2.0.18",
]
[[package]]
@ -2201,6 +2275,17 @@ dependencies = [
"wiremock",
]
[[package]]
name = "pulldown-cmark"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c3a14896dfa883796f1cb410461aef38810ea05f2b2c33c5aded3649095fdad"
dependencies = [
"bitflags 2.11.0",
"memchr",
"unicase",
]
[[package]]
name = "quinn"
version = "0.11.9"
@ -2840,6 +2925,19 @@ dependencies = [
"serde_core",
]
[[package]]
name = "serde_yaml"
version = "0.9.34+deprecated"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "serial_test"
version = "3.4.0"
@ -2866,20 +2964,29 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "session-metrics"
version = "0.1.0"
dependencies = [
"serde",
"serde_json",
"session-store",
]
[[package]]
name = "session-store"
version = "0.1.0"
dependencies = [
"async-trait",
"futures",
"hex",
"llm-worker",
"protocol",
"serde",
"serde_json",
"sha2 0.11.0",
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
"uuid",
]
@ -3537,12 +3644,23 @@ dependencies = [
name = "tui"
version = "0.1.0"
dependencies = [
"client",
"crossterm 0.28.1",
"llm-worker",
"manifest",
"pod-registry",
"protocol",
"pulldown-cmark",
"ratatui",
"serde",
"serde_json",
"session-store",
"tempfile",
"tokio",
"toml",
"tools",
"unicode-width",
"uuid",
]
[[package]]
@ -3557,6 +3675,12 @@ version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]]
name = "unicase"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
[[package]]
name = "unicode-ident"
version = "1.0.24"
@ -3592,6 +3716,12 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
[[package]]
name = "unsafe-libyaml"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]]
name = "untrusted"
version = "0.9.0"
@ -4283,6 +4413,22 @@ dependencies = [
"wasmparser",
]
[[package]]
name = "workflow"
version = "0.1.0"
dependencies = [
"chrono",
"lint-common",
"manifest",
"memory",
"serde",
"serde_json",
"serde_yaml",
"tempfile",
"thiserror 2.0.18",
"tracing",
]
[[package]]
name = "writeable"
version = "0.6.3"
@ -4397,3 +4543,31 @@ name = "zmij"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",
]

View File

@ -1,6 +1,7 @@
[workspace]
resolver = "2"
members = [
"crates/client",
"crates/daemon",
"crates/llm-worker",
"crates/llm-worker-macros",
@ -9,10 +10,48 @@ members = [
"crates/pod",
"crates/protocol",
"crates/provider",
"crates/pod-registry",
"crates/session-metrics",
"crates/lint-common",
"crates/tools",
"crates/tui",
"crates/memory",
"crates/workflow",
]
[workspace.package]
edition = "2024"
license = "MIT"
[workspace.dependencies]
# Internal crates
client = { path = "crates/client" }
llm-worker = { path = "crates/llm-worker", version = "0.2" }
llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" }
manifest = { path = "crates/manifest" }
lint-common = { path = "crates/lint-common" }
memory = { path = "crates/memory" }
pod-registry = { path = "crates/pod-registry" }
protocol = { path = "crates/protocol" }
provider = { path = "crates/provider" }
session-metrics = { path = "crates/session-metrics" }
session-store = { path = "crates/session-store" }
tools = { path = "crates/tools" }
# External
# Note: `reqwest` and `chrono` are not aggregated here because some crates
# need `default-features = false`, which workspace inheritance cannot override.
async-trait = "0.1"
fs4 = "0.13"
futures = "0.3"
libc = "0.2"
schemars = "1.2"
serde = "1.0"
serde_json = "1.0"
sha2 = "0.11"
tempfile = "3.27"
thiserror = "2.0"
tokio = "1.52"
toml = "1.1"
tracing = "0.1"
uuid = "1.23"

18
KNOWN_ISSUES.md Normal file
View File

@ -0,0 +1,18 @@
# Known Issues
Ticket を切るほどではないが、次に近所を触るときに合わせて拾いたい小粒な所見の置き場。
## 運用
- 1 項目 = 出典 (file:line) + 症状 (一文) + トリガー (いつ拾うか、一文)
- 関連 ticket があれば `→ [tickets/foo.md]` でリンク
- 修正したら同じコミットで該当エントリを削除する (履歴は git)
- ここに溜める基準: 「ticket は重い」「だが忘れたら次の触り手が踏む」もの。明確に作業すべきものは ticket 化する
## エントリ
- `crates/tui/src/app.rs:478-485` — bad workflow slug を含む `Method::Run` 送信時、`Event::UserMessage` の早期 broadcast で `turn_index += 1` されターンヘッダだけ残る ("ghost turn header")。次に TUI のターンヘッダ / エラー表示周りを触るときに整理。→ [tickets/pod-input-validate-internalize.md] の review 由来。
- `crates/pod/src/controller.rs:944``worker_error_code``PodError::WorkflowResolve(_) => InvalidRequest` が post-commit な resolve エラー (`KnowledgeNotFound` 等) にも適用される。意味論的には妥当方向だが、resolve 系のエラー粒度を分けたくなったタイミングで再評価。
- `crates/pod/tests/controller_test.rs``double_run_returns_error` がたまに失敗する flakiness を観測。`pod-interrupt-prep-internalize` 以前から存在する別件。次に controller_test の Run 連投系のタイミングを触るときに併せて原因を切り分け。
- `crates/session-store/src/fs_store.rs:117-122``FsStore::read_entry_count``fs::read_to_string` で全文ロードしてから行数カウントするため O(n)。`ensure_head_or_fork` は run-start でしか呼ばれず現状は許容範囲だが、長期セッションが普通になった時点で `\n` バイト数の cheap count か末尾 seek に置き換える。
- `crates/session-store/src/segment.rs:121` `ensure_head_or_fork` (free fn, test 専用・本番 caller ゼロ) と `crates/pod/src/pod.rs` `Pod::ensure_segment_head` (本番 inline) に live auto-fork の検知 + forked_from 記録が二重実装されている。entry-hash-abolish 以前からの重複で、両方独立にテスト済みだが drift 必至。session-store 側を本番から呼ぶ形に寄せるか free fn を畳むかは要設計判断。Pod state / fork 周辺を次に触るときに統合を検討。

15
TODO.md
View File

@ -1,10 +1,5 @@
- [ ] テスト設計 → [tickets/test-design.md](tickets/test-design.md)
- [ ] ツール設計
- [ ] Bash ツール (Permission 層と統合) → [tickets/bash-tool.md](tickets/bash-tool.md)
- [ ] パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
- [ ] Pod オーケストレーション
- [ ] 動的 Scope 変更 → [tickets/dynamic-scope.md](tickets/dynamic-scope.md)
- [ ] ネイティブ GUI クライアント MVP → [tickets/native-gui-mvp.md](tickets/native-gui-mvp.md)
- [ ] TUI 拡充
- [ ] 新しい Pod を spawn する UI の設計 → [tickets/tui-pod-spawn-ui.md](tickets/tui-pod-spawn-ui.md)
- [ ] ツール呼び出しのフレーム更新型表示 → [tickets/tui-tool-call-ui.md](tickets/tui-tool-call-ui.md)
# TODO legacy notice
Active repository work items have been migrated to `work-items/`.
Use `./tickets.sh list --status all` for the generated/current view and `./tickets.sh doctor` to validate the migration state.

11
crates/client/Cargo.toml Normal file
View File

@ -0,0 +1,11 @@
[package]
name = "client"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
protocol = { workspace = true }
manifest = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "net", "io-util", "sync", "time", "process", "fs"] }
uuid = { workspace = true }

15
crates/client/src/lib.rs Normal file
View File

@ -0,0 +1,15 @@
//! Pod プロトコルを喋るクライアント。
//!
//! - [`PodClient`]: 既存 pod の Unix ソケットへ接続して `Method` を送り、
//! `Event` を受け取る低レベル接続。
//! - [`spawn`]: pod バイナリをサブプロセスとして起動し、`INSOMNIA-READY`
//! ハンドシェイクが終わるまで待つフロー。subprocess を立ち上げる必要が
//! ない呼び出し側 (=既存 pod に attach する場合) は使わなくてよい。
//!
//! TUI / GUI / E2E ハーネスはこの crate に依存して protocol を喋る。
mod pod_client;
pub mod spawn;
pub use pod_client::PodClient;
pub use spawn::{SpawnConfig, SpawnError, SpawnReady, spawn_pod};

View File

@ -35,6 +35,10 @@ impl PodClient {
self.writer.write(method).await
}
pub fn try_next_event(&mut self) -> Option<Event> {
self.event_rx.try_recv().ok()
}
pub async fn next_event(&mut self) -> Option<Event> {
self.event_rx.recv().await
}

299
crates/client/src/spawn.rs Normal file
View File

@ -0,0 +1,299 @@
//! `insomnia-pod` バイナリをサブプロセスとして立ち上げ、`INSOMNIA-READY` を待つ
//! ハンドシェイク。
//!
//! - 親プロセス (TUI / GUI / E2E) は overlay TOML を組み立ててこの関数に
//! 渡す。pod はそれを受けて socket を bind し、stderr に
//! `INSOMNIA-READY\t<name>\t<socket>` を吐く。
//! - 待機中の stderr 行は `progress` コールバック越しに呼び出し側へ流す。
//! UI の進捗表示や E2E のログ収集はここで賄う。
//! - `kill_on_drop = false` + `process_group(0)` により、親プロセス
//! ライフサイクルから切り離した detached pod を作る。ready 後の lifecycle
//! 管理は runtime ディレクトリ / socket を介して行う。
use std::io;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::time::Duration;
use tokio::process::Command;
use uuid::Uuid;
const READY_PREFIX: &str = "INSOMNIA-READY\t";
const READY_TIMEOUT: Duration = Duration::from_secs(20);
/// `spawn_pod` の入力。
pub struct SpawnConfig {
/// `pod.name` として使う識別子。runtime ディレクトリ
/// (`manifest::paths::pod_runtime_dir`) の解決と、ready 行に乗る
/// 名前との突き合わせに使う。
pub pod_name: String,
/// `--overlay` で pod に渡す TOML 文字列。
pub overlay_toml: String,
/// pod の current_dir。
pub cwd: PathBuf,
/// `Some(id)` のとき `--session <id>` を付与し、当該セッションから
/// resume させる。
pub resume_from: Option<Uuid>,
/// true のとき `--pod <pod_name>` を付与し、pod 側で name-keyed state
/// があれば resume、なければ同名の新規 Pod として起動させる。
pub resume_by_pod_name: bool,
}
pub struct SpawnReady {
pub pod_name: String,
pub socket_path: PathBuf,
}
#[derive(Debug)]
pub enum SpawnError {
Io(io::Error),
/// runtime ディレクトリが解決できなかった (環境変数未設定等)。
RuntimeDirUnavailable,
PodLaunchFailed(io::Error),
PodExitedEarly {
stderr_tail: String,
},
Timeout,
}
impl std::fmt::Display for SpawnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "io error: {e}"),
Self::RuntimeDirUnavailable => write!(
f,
"could not resolve runtime directory (set INSOMNIA_HOME, INSOMNIA_RUNTIME_DIR, XDG_RUNTIME_DIR, or HOME)"
),
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
Self::PodExitedEarly { stderr_tail } => {
if stderr_tail.is_empty() {
write!(f, "pod exited before becoming ready")
} else {
write!(f, "pod exited before becoming ready: {stderr_tail}")
}
}
Self::Timeout => write!(
f,
"pod did not become ready within {}s",
READY_TIMEOUT.as_secs()
),
}
}
}
impl std::error::Error for SpawnError {}
impl From<io::Error> for SpawnError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}
/// pod を spawn し、`INSOMNIA-READY` ハンドシェイクが終わるまで待つ。
///
/// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
/// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let pod_bin = resolve_pod_command();
let pod_runtime_dir = manifest::paths::pod_runtime_dir(&config.pod_name)
.ok_or(SpawnError::RuntimeDirUnavailable)?;
std::fs::create_dir_all(&pod_runtime_dir).map_err(SpawnError::Io)?;
let stderr_path = pod_runtime_dir.join("stderr.log");
let stderr_file = std::fs::File::create(&stderr_path).map_err(SpawnError::Io)?;
let mut command = Command::new(&pod_bin);
command
.arg("--overlay")
.arg(&config.overlay_toml)
.current_dir(&config.cwd)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::from(stderr_file))
.process_group(0);
if config.resume_by_pod_name {
command.arg("--pod").arg(&config.pod_name);
}
if let Some(id) = config.resume_from {
command.arg("--session").arg(id.to_string());
}
let mut child = command.spawn().map_err(SpawnError::PodLaunchFailed)?;
// Default `kill_on_drop = false` plus `process_group(0)` makes this
// a detached Pod once startup succeeds: dropping the handle does not
// terminate it, and terminal-generated signals for the parent's
// process group do not hit the Pod. Runtime state/socket files are
// the source of truth after that point.
let ready = match wait_for_ready_file(&mut progress, &stderr_path, &mut child).await {
Ok(ready) => ready,
Err(e) => {
let _ = child.start_kill();
let _ = child.wait().await;
return Err(e);
}
};
tokio::spawn(async move {
let _ = child.wait().await;
});
Ok(ready)
}
async fn wait_for_ready_file<F>(
progress: &mut F,
stderr_path: &Path,
child: &mut tokio::process::Child,
) -> Result<SpawnReady, SpawnError>
where
F: FnMut(&str),
{
let mut tail = StderrTail::new();
let deadline = tokio::time::Instant::now() + READY_TIMEOUT;
let mut offset = 0usize;
loop {
let content = match tokio::fs::read_to_string(stderr_path).await {
Ok(content) => content,
Err(e) if e.kind() == io::ErrorKind::NotFound => String::new(),
Err(e) => return Err(SpawnError::Io(e)),
};
if content.len() > offset {
for line in content[offset..].lines() {
if let Some(rest) = line.strip_prefix(READY_PREFIX) {
let mut parts = rest.splitn(2, '\t');
let pod_name = parts.next().unwrap_or("").to_string();
let socket_str = parts.next().unwrap_or("").to_string();
if pod_name.is_empty() || socket_str.is_empty() {
return Err(SpawnError::PodExitedEarly {
stderr_tail: format!("malformed ready line: {line}"),
});
}
let socket_path = PathBuf::from(socket_str);
wait_for_socket(
&socket_path,
deadline,
child,
stderr_path,
&mut tail,
&mut offset,
)
.await?;
return Ok(SpawnReady {
pod_name,
socket_path,
});
}
tail.push(line);
progress(line);
}
offset = content.len();
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
// Pod は exit 直前に最終 stderr 行を flush することがある。
// child.wait() が解決した後に再読みして、原因行を取りこ
// ぼさず PodExitedEarly に載せる。
drain_stderr_into_tail(stderr_path, &mut tail, &mut offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.into_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(100)) => {}
}
}
}
async fn wait_for_socket(
socket_path: &Path,
deadline: tokio::time::Instant,
child: &mut tokio::process::Child,
stderr_path: &Path,
tail: &mut StderrTail,
offset: &mut usize,
) -> Result<(), SpawnError> {
loop {
match tokio::net::UnixStream::connect(socket_path).await {
Ok(_) => return Ok(()),
Err(e)
if e.kind() == io::ErrorKind::NotFound
|| e.kind() == io::ErrorKind::ConnectionRefused => {}
Err(e) => return Err(SpawnError::Io(e)),
}
if tokio::time::Instant::now() >= deadline {
return Err(SpawnError::Timeout);
}
tokio::select! {
status = child.wait() => {
let _ = status;
drain_stderr_into_tail(stderr_path, tail, offset).await;
return Err(SpawnError::PodExitedEarly {
stderr_tail: tail.as_string(),
});
}
_ = tokio::time::sleep(Duration::from_millis(50)) => {}
}
}
}
async fn drain_stderr_into_tail(stderr_path: &Path, tail: &mut StderrTail, offset: &mut usize) {
let Ok(content) = tokio::fs::read_to_string(stderr_path).await else {
return;
};
if content.len() <= *offset {
return;
}
for line in content[*offset..].lines() {
if !line.starts_with(READY_PREFIX) {
tail.push(line);
}
}
*offset = content.len();
}
/// Resolves the binary used to launch a child Pod. Must point at a
/// `insomnia-pod`-compatible executable — the parent reads the child's stderr
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
/// extra lines on stderr will pollute that handshake.
///
/// `INSOMNIA_POD_COMMAND` overrides the lookup (used by tests to inject
/// a mock binary). Otherwise we defer to `PATH` — missing binary
/// surfaces as the spawn `io::Error`.
fn resolve_pod_command() -> PathBuf {
if let Ok(cmd) = std::env::var("INSOMNIA_POD_COMMAND")
&& !cmd.is_empty()
{
return PathBuf::from(cmd);
}
PathBuf::from("insomnia-pod")
}
struct StderrTail {
lines: std::collections::VecDeque<String>,
}
impl StderrTail {
fn new() -> Self {
Self {
lines: std::collections::VecDeque::with_capacity(8),
}
}
fn push(&mut self, line: &str) {
if self.lines.len() == 8 {
self.lines.pop_front();
}
self.lines.push_back(line.to_string());
}
fn as_string(&self) -> String {
self.lines.iter().cloned().collect::<Vec<_>>().join(" | ")
}
fn into_string(self) -> String {
self.lines.into_iter().collect::<Vec<_>>().join(" | ")
}
}

View File

@ -5,6 +5,6 @@ edition.workspace = true
license.workspace = true
[dependencies]
manifest = { path = "../manifest" }
protocol = { path = "../protocol" }
tokio = { version = "1.49", features = ["full"] }
manifest = { workspace = true }
protocol = { workspace = true }
tokio = { workspace = true, features = ["full"] }

View File

@ -0,0 +1,13 @@
[package]
name = "lint-common"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
chrono = { version = "0.4", features = ["serde"] }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
[dev-dependencies]
serde_json = { workspace = true }

View File

@ -0,0 +1,81 @@
//! Common frontmatter helpers.
use chrono::{DateTime, Utc};
use crate::RecordLintError;
/// Trait record frontmatter types implement so linters can drive them uniformly.
pub trait Frontmatter: Sized {
/// Hard upper bound on body chars (excluding the frontmatter block).
const BODY_LIMIT: usize;
fn created_at(&self) -> Option<DateTime<Utc>>;
fn updated_at(&self) -> Option<DateTime<Utc>>;
}
const FRONTMATTER_DELIM: &str = "---";
/// Split a markdown document into `(yaml_frontmatter, body)`.
///
/// Expects the document to start with `---\n` and have a closing
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
/// after the closing delimiter is consumed.
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), RecordLintError> {
// The opening delimiter must be the very first line.
let after_open = content
.strip_prefix(FRONTMATTER_DELIM)
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
.ok_or(RecordLintError::MissingFrontmatter)?;
// Look for the closing `---` on its own line.
let mut yaml_end = None;
let mut byte_offset = 0usize;
for line in after_open.split_inclusive('\n') {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
if trimmed == FRONTMATTER_DELIM {
yaml_end = Some((byte_offset, byte_offset + line.len()));
break;
}
byte_offset += line.len();
}
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
RecordLintError::MalformedFrontmatter("missing closing `---` line".to_string())
})?;
let yaml = &after_open[..yaml_end_excl];
let body = &after_open[body_start..];
Ok((yaml, body))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn splits_simple() {
let doc = "---\nfoo: 1\n---\nbody here\n";
let (y, b) = split_frontmatter(doc).unwrap();
assert_eq!(y, "foo: 1\n");
assert_eq!(b, "body here\n");
}
#[test]
fn no_leading_delim_errors() {
let err = split_frontmatter("hello").unwrap_err();
assert!(matches!(err, RecordLintError::MissingFrontmatter));
}
#[test]
fn no_closing_delim_errors() {
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
assert!(matches!(err, RecordLintError::MalformedFrontmatter(_)));
}
#[test]
fn handles_empty_body() {
let doc = "---\nfoo: 1\n---\n";
let (_, b) = split_frontmatter(doc).unwrap();
assert_eq!(b, "");
}
}

View File

@ -0,0 +1,20 @@
//! Shared record lint primitives for memory and workflow files.
mod frontmatter;
mod slug;
pub use frontmatter::{Frontmatter, split_frontmatter};
pub use slug::{Slug, is_valid_slug};
/// Common lint errors for Markdown record syntax shared by memory and workflow.
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
pub enum RecordLintError {
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
InvalidSlug(String),
#[error("malformed frontmatter: {0}")]
MalformedFrontmatter(String),
#[error("frontmatter is missing or document is empty")]
MissingFrontmatter,
}

View File

@ -0,0 +1,146 @@
//! Slug type and validation.
//!
//! Syntax (agent-skills compatible):
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
//! - 164 chars
//! - lowercase ASCII alphanumerics and `-`
//! - cannot start or end with `-`
//! - no consecutive `--`
use std::fmt;
use std::str::FromStr;
use serde::{Deserialize, Deserializer, Serialize};
use crate::RecordLintError;
const MIN_LEN: usize = 1;
const MAX_LEN: usize = 64;
/// Validated slug. Constructible only via [`Slug::parse`].
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
#[serde(transparent)]
pub struct Slug(String);
impl Slug {
/// Parse and validate. Returns [`RecordLintError::InvalidSlug`] on rejection.
pub fn parse(s: impl Into<String>) -> Result<Self, RecordLintError> {
let s = s.into();
if is_valid_slug(&s) {
Ok(Self(s))
} else {
Err(RecordLintError::InvalidSlug(s))
}
}
pub fn as_str(&self) -> &str {
&self.0
}
pub fn into_string(self) -> String {
self.0
}
}
impl fmt::Display for Slug {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.0)
}
}
impl AsRef<str> for Slug {
fn as_ref(&self) -> &str {
&self.0
}
}
impl FromStr for Slug {
type Err = RecordLintError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::parse(s)
}
}
impl<'de> Deserialize<'de> for Slug {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let raw = String::deserialize(deserializer)?;
Self::parse(raw).map_err(serde::de::Error::custom)
}
}
/// Pure-fn predicate matching the agent-skills slug regex without
/// pulling in the `regex` crate.
pub fn is_valid_slug(s: &str) -> bool {
let bytes = s.as_bytes();
let len = bytes.len();
if len < MIN_LEN || len > MAX_LEN {
return false;
}
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
return false;
}
let mut prev_dash = false;
for &b in bytes {
if b == b'-' {
if prev_dash {
return false;
}
prev_dash = true;
} else if is_alnum_lower(b) {
prev_dash = false;
} else {
return false;
}
}
true
}
fn is_alnum_lower(b: u8) -> bool {
b.is_ascii_digit() || b.is_ascii_lowercase()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn accepts_basic_slugs() {
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
assert!(is_valid_slug(s), "expected `{s}` valid");
assert!(Slug::parse(s).is_ok());
}
}
#[test]
fn rejects_bad_slugs() {
for s in [
"", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä",
] {
assert!(!is_valid_slug(s), "expected `{s}` invalid");
assert!(Slug::parse(s).is_err());
}
}
#[test]
fn enforces_length_bounds() {
let too_long = "a".repeat(MAX_LEN + 1);
assert!(!is_valid_slug(&too_long));
let max = "a".repeat(MAX_LEN);
assert!(is_valid_slug(&max));
}
#[test]
fn deserializes_via_serde() {
let json = "\"valid-slug\"";
let slug: Slug = serde_json::from_str(json).unwrap();
assert_eq!(slug.as_str(), "valid-slug");
let bad = "\"BAD\"";
let err: Result<Slug, _> = serde_json::from_str(bad);
assert!(err.is_err());
}
}

View File

@ -6,22 +6,24 @@ edition.workspace = true
license.workspace = true
[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "2.0"
tracing = "0.1"
async-trait = "0.1"
futures = "0.3"
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
async-trait = { workspace = true }
futures = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
tokio-util = "0.7"
reqwest = { version = "0.13.1", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
reqwest = { version = "0.13", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
eventsource-stream = "0.2"
llm-worker-macros = { path = "../llm-worker-macros", version = "0.2" }
zstd = "0.13"
llm-worker-macros = { workspace = true }
[dev-dependencies]
clap = { version = "4.5", features = ["derive", "env"] }
schemars = "1.2"
tempfile = "3.24"
schemars = { workspace = true }
tempfile = { workspace = true }
dotenv = "0.15"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
trybuild = "1.0.116"
wiremock = "0.6.5"

View File

@ -20,32 +20,13 @@ mod recorder;
mod scenarios;
use clap::{Parser, ValueEnum};
use llm_worker::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use llm_worker::llm_client::scheme::{
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
/// 既定の capability: fixture 記録には cache_control を付けない
/// (既知モデルの静的テーブルを経由すると scheme 毎に自動設定される)。
fn fallback_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
}
}
fn make_transport<S: Scheme>(
scheme: S,
model: &str,
auth: ResolvedAuth,
) -> HttpTransport<S> {
let cap = scheme.capability_for(model).unwrap_or_else(fallback_capability);
fn make_transport<S: Scheme>(scheme: S, model: &str, auth: ResolvedAuth) -> HttpTransport<S> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
HttpTransport::new(scheme, model.to_string(), base_url, auth, cap)
}
@ -86,11 +67,7 @@ async fn run_scenario_with_anthropic(
let api_key = std::env::var("ANTHROPIC_API_KEY")
.expect("ANTHROPIC_API_KEY environment variable must be set");
let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514");
let client = make_transport(
AnthropicScheme::new(),
model,
ResolvedAuth::ApiKey(api_key),
);
let client = make_transport(AnthropicScheme::new(), model, ResolvedAuth::ApiKey(api_key));
recorder::record_request(
&client,
@ -138,7 +115,7 @@ async fn run_scenario_with_ollama(
model.to_string(),
"http://localhost:11434".to_string(),
ResolvedAuth::None,
fallback_capability(),
AnthropicScheme::new().default_capability(),
);
recorder::record_request(

View File

@ -2,9 +2,6 @@
//!
//! Example of cancelling from another thread during streaming
use llm_worker::llm_client::capability::{
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme};
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::{Worker, WorkerResult};
@ -28,13 +25,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let scheme = AnthropicScheme::new();
let model = "claude-sonnet-4-20250514".to_string();
let cap = scheme.capability_for(&model).unwrap_or(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
});
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap);
let worker = Worker::new(client);

View File

@ -338,14 +338,8 @@ fn default_capability() -> ModelCapability {
}
}
fn build_transport<S: Scheme>(
scheme: S,
model: String,
auth: ResolvedAuth,
) -> Box<dyn LlmClient> {
let cap = scheme
.capability_for(&model)
.unwrap_or_else(default_capability);
fn build_transport<S: Scheme>(scheme: S, model: String, auth: ResolvedAuth) -> Box<dyn LlmClient> {
let cap = scheme.default_capability();
let base_url = scheme.default_base_url().to_string();
Box::new(HttpTransport::new(scheme, model, base_url, auth, cap))
}

View File

@ -7,8 +7,8 @@
use std::marker::PhantomData;
use crate::handler::{
Handler, Kind, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind,
ToolUseBlockStart,
Handler, Kind, TextBlockEvent, TextBlockKind, ThinkingBlockEvent, ThinkingBlockKind,
ToolUseBlockEvent, ToolUseBlockKind, ToolUseBlockStart,
};
use crate::tool::ToolCall;
@ -95,6 +95,81 @@ impl Handler<TextBlockKind> for ClosureTextBlockHandler {
}
}
// =============================================================================
// ThinkingBlock Closure Handler
// =============================================================================
/// Callback scope for a thinking block.
///
/// Mirrors `TextBlockScope`. Some providers (or some configurations)
/// emit thinking metadata without plaintext deltas — in that case the
/// block fires `Start` and `Stop` with no `Delta` in between, which is
/// expected and not an error.
pub struct ThinkingBlockScope {
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
}
impl ThinkingBlockScope {
fn new() -> Self {
Self {
on_delta: None,
on_stop: None,
}
}
/// Register a callback for each thinking text delta (streaming fragment).
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_delta = Some(Box::new(f));
}
/// Register a callback invoked when the block completes.
///
/// Receives the full accumulated thinking text. May be empty when
/// the provider didn't emit any plaintext deltas.
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
self.on_stop = Some(Box::new(f));
}
}
#[derive(Default)]
pub(crate) struct ThinkingBlockClosureState {
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
buffer: String,
}
pub(crate) struct ClosureThinkingBlockHandler {
pub(crate) setup: Box<dyn FnMut(&mut ThinkingBlockScope) + Send + Sync>,
}
impl Handler<ThinkingBlockKind> for ClosureThinkingBlockHandler {
type Scope = ThinkingBlockClosureState;
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
match event {
ThinkingBlockEvent::Start(_) => {
scope.buffer.clear();
let mut builder = ThinkingBlockScope::new();
(self.setup)(&mut builder);
scope.on_delta = builder.on_delta;
scope.on_stop = builder.on_stop;
}
ThinkingBlockEvent::Delta(text) => {
scope.buffer.push_str(text);
if let Some(f) = &mut scope.on_delta {
f(text);
}
}
ThinkingBlockEvent::Stop(_) => {
if let Some(f) = &mut scope.on_stop {
f(&scope.buffer);
}
}
}
}
}
// =============================================================================
// ToolUseBlock Closure Handler
// =============================================================================

View File

@ -91,6 +91,16 @@ impl Kind for ErrorKind {
type Event = ErrorEvent;
}
/// Reasoning item Kind - 完成済み reasoning item の永続化用
///
/// 1 reasoning item につき 1 度だけ発火する。Worker は
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
/// `Item::Reasoning` として history に append する。
pub struct ReasoningItemKind;
impl Kind for ReasoningItemKind {
type Event = ReasoningItemEvent;
}
// =============================================================================
// Block Kind Definitions
// =============================================================================

View File

@ -17,19 +17,31 @@ use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
// =============================================================================
/// Action after prompt submission.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq)]
pub enum PromptAction {
/// Proceed normally.
Continue,
/// Cancel with a reason.
Cancel(String),
/// Proceed, and append these items to history right after the user
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
/// submit edge: lets the upper layer attach resolver-produced
/// system messages (e.g. `@<path>` file content) so they sit
/// adjacent to the user message that referenced them.
ContinueWith(Vec<Item>),
}
/// Action before an LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq)]
pub enum PreRequestAction {
/// Proceed normally.
Continue,
/// Proceed after appending these items to durable worker history.
///
/// This is for upper-layer budget/status nudges that the model may react
/// to: the items are committed before the request so later turns can see
/// why the worker changed course.
ContinueWith(Vec<Item>),
/// Cancel with a reason (treated as an error).
Cancel(String),
/// Yield control to the caller for external processing.
@ -46,6 +58,11 @@ pub enum PreToolAction {
Continue,
/// Skip this tool call (do not execute).
Skip,
/// Do not execute the tool call; commit this synthetic result instead.
///
/// This preserves provider-visible `tool_use` / `tool_result` pairing
/// without aborting the whole turn.
SyntheticResult(ToolResult),
/// Abort the entire run.
Abort(String),
/// Pause execution (can be resumed later).
@ -114,8 +131,36 @@ pub trait Interceptor: Send + Sync {
PromptAction::Continue
}
/// Called before each LLM request. The context can be modified
/// (e.g. for context compaction).
/// Items that should be **committed to `worker.history`** just
/// before the next LLM request. Returned items are `extend`ed into
/// the persistent history (and therefore picked up by the per-turn
/// clone that backs the LLM request, plus the usual
/// history-persistence path).
///
/// Use this for inputs that arrive from outside the LLM and need
/// to be reflected in the on-disk history — notifications,
/// cross-Pod events, system reminders. Do **not** use
/// [`Self::pre_llm_request`] for that purpose: it mutates a
/// per-request clone, so any committed assistant response that
/// reacts to the injection would have no visible trigger on the
/// next turn (or after resume / compaction).
///
/// `pre_llm_request` remains the right place for purely
/// reproducible per-request transformations (pruning, content
/// trimming, cache anchors) that depend only on the existing
/// history.
async fn pending_history_appends(&self) -> Vec<Item> {
Vec::new()
}
/// Called before each LLM request. The context starts as a clone
/// of `worker.history` (after `pending_history_appends` and the
/// Worker's own prune projection have been applied).
///
/// Direct mutations to `context` remain request-local and are not persisted.
/// If an interceptor derives a human/model-visible nudge from the current
/// request context, return [`PreRequestAction::ContinueWith`] so the Worker
/// commits it to history before the request is sent.
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
PreRequestAction::Continue
}

View File

@ -48,12 +48,17 @@ pub mod llm_client;
pub mod prune;
pub mod state;
pub mod timeline;
pub mod token_counter;
pub mod tool;
pub mod tool_server;
pub mod usage_record;
pub use callback::{TextBlockScope, ToolUseBlockScope};
pub use callback::{TextBlockScope, ThinkingBlockScope, ToolUseBlockScope};
pub use handler::ToolUseBlockStart;
pub use interceptor::Interceptor;
pub use message::{ContentPart, Item, Message, Role};
pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
pub use worker::{RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
pub use usage_record::UsageRecord;
pub use worker::{
LlmRetryNotice, RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult,
};

View File

@ -45,4 +45,13 @@ pub enum AuthRequirement {
pub trait AuthProvider: Send + Sync + std::fmt::Debug {
/// 1 リクエスト分の認証ヘッダを返す。refresh が必要なら内部で行う。
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError>;
/// ChatGPT Codex backend 向けの複合認証かどうか。
///
/// transport は provider crate の具象型を知らないため、この hook だけで
/// Codex CLI 互換の wire behaviorconversation header / request compression 等)
/// を切り替える。
fn is_codex_backend(&self) -> bool {
false
}
}

View File

@ -8,7 +8,7 @@
//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル)
//! 2. `ModelConfig::capability` での明示 override未知モデル、または上書き
use serde::{Deserialize, Serialize};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
/// モデル能力メタデータ
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@ -80,23 +80,90 @@ pub enum CacheStrategy {
Auto,
}
/// Reasoning 制御共通型、scheme 側で各社形式に投影)
/// Reasoning 制御共通型、scheme 側で各社形式に投影)
///
/// `effort` / `budget_tokens` はユーザー設定から任意で渡される。Scheme
/// 側は自身の `ReasoningSupport` に応じて片方だけ使う。両方が宣言
/// されている場合の優先順位は scheme 実装が決める。
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct ReasoningControl {
#[serde(default)]
pub effort: Option<ReasoningEffort>,
#[serde(default)]
pub budget_tokens: Option<u32>,
/// 文字列は provider-native な effort label、数値は provider-native な
/// thinking budget token として扱う。どちらか一方だけを型で表現する。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
pub enum ReasoningControl {
Effort(ReasoningEffort),
BudgetTokens(i32),
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ReasoningEffort {
Minimal,
Low,
Medium,
High,
XHigh,
Other(String),
}
impl ReasoningEffort {
pub fn as_str(&self) -> &str {
match self {
Self::Minimal => "minimal",
Self::Low => "low",
Self::Medium => "medium",
Self::High => "high",
Self::XHigh => "xhigh",
Self::Other(label) => label.as_str(),
}
}
}
impl From<String> for ReasoningEffort {
fn from(value: String) -> Self {
match value.as_str() {
"minimal" => Self::Minimal,
"low" => Self::Low,
"medium" => Self::Medium,
"high" => Self::High,
"xhigh" => Self::XHigh,
_ => Self::Other(value),
}
}
}
impl Serialize for ReasoningEffort {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for ReasoningEffort {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
String::deserialize(deserializer).map(Self::from)
}
}
#[cfg(test)]
mod tests {
use super::{ReasoningControl, ReasoningEffort};
#[test]
fn reasoning_control_deserializes_effort_labels() {
let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap();
assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh));
let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap();
assert_eq!(
unknown,
ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into()))
);
}
#[test]
fn reasoning_control_deserializes_signed_budget() {
let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap();
assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1));
}
}

View File

@ -36,6 +36,8 @@ impl std::fmt::Display for ConfigWarning {
}
}
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>;
/// LLMクライアントのtrait
///
/// 各プロバイダはこのtraitを実装し、統一されたインターフェースを提供する。
@ -49,10 +51,7 @@ pub trait LlmClient: Send + Sync {
/// # Returns
/// * `Ok(Stream)` - イベントストリーム
/// * `Err(ClientError)` - エラー
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>;
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError>;
/// Clone this client into a new `Box<dyn LlmClient>`.
///
@ -74,15 +73,18 @@ pub trait LlmClient: Send + Sync {
}
}
impl Clone for Box<dyn LlmClient> {
fn clone(&self) -> Self {
self.clone_boxed()
}
}
/// `Box<dyn LlmClient>` に対する `LlmClient` の実装
///
/// これにより、動的ディスパッチを使用するクライアントも `Worker` で利用可能になる。
#[async_trait]
impl LlmClient for Box<dyn LlmClient> {
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
(**self).stream(request).await
}

View File

@ -1,6 +1,6 @@
//! LLMクライアントエラー型
use std::fmt;
use std::{fmt, time::Duration};
/// LLMクライアントのエラー
#[derive(Debug)]
@ -16,6 +16,12 @@ pub enum ClientError {
status: Option<u16>,
code: Option<String>,
message: String,
retry_after: Option<Duration>,
},
/// A request lifecycle phase exceeded its hard timeout.
Timeout {
phase: &'static str,
timeout: Duration,
},
/// 設定エラー
Config(String),
@ -31,6 +37,7 @@ impl fmt::Display for ClientError {
status,
code,
message,
..
} => {
write!(f, "API error")?;
if let Some(s) = status {
@ -41,6 +48,9 @@ impl fmt::Display for ClientError {
}
write!(f, ": {}", message)
}
ClientError::Timeout { phase, timeout } => {
write!(f, "{phase} timed out after {}s", timeout.as_secs())
}
ClientError::Config(msg) => write!(f, "Config error: {}", msg),
}
}
@ -67,3 +77,96 @@ impl From<serde_json::Error> for ClientError {
ClientError::Json(err)
}
}
impl ClientError {
pub fn status(&self) -> Option<u16> {
match self {
ClientError::Api { status, .. } => *status,
_ => None,
}
}
pub fn retry_after(&self) -> Option<Duration> {
match self {
ClientError::Api { retry_after, .. } => *retry_after,
_ => None,
}
}
}
/// transient な失敗としてリトライ対象になるかを判定する。
///
/// 対象:
/// - `Api { status }` のうち 408 / 425 / 429 / 500 / 502 / 503 / 504 / 529
/// - `Http(reqwest::Error)` のうち `is_connect()` または `is_timeout()`
/// - `Timeout { .. }` の lifecycle hard timeout
///
/// それ以外Json、Sse、Config、上記以外の Api ステータス)は false。
/// SSE 読み出し開始後の失敗は呼び出し側で `Sse` として上に流すため、
/// ここで対象外にしておけば自動的に弾かれる。
pub fn is_retryable(error: &ClientError) -> bool {
match error {
ClientError::Api {
status: Some(code), ..
} => matches!(*code, 408 | 425 | 429 | 500 | 502 | 503 | 504 | 529),
ClientError::Api { status: None, .. } => false,
ClientError::Timeout { .. } => true,
ClientError::Http(e) => e.is_connect() || e.is_timeout(),
ClientError::Json(_) | ClientError::Sse(_) | ClientError::Config(_) => false,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn api_err(status: Option<u16>) -> ClientError {
ClientError::Api {
status,
code: None,
message: String::new(),
retry_after: None,
}
}
#[test]
fn retryable_status_codes() {
for code in [408u16, 425, 429, 500, 502, 503, 504, 529] {
assert!(
is_retryable(&api_err(Some(code))),
"status {code} should be retryable",
);
}
}
#[test]
fn non_retryable_status_codes() {
for code in [400u16, 401, 403, 404, 409, 410, 422, 501] {
assert!(
!is_retryable(&api_err(Some(code))),
"status {code} should not be retryable",
);
}
}
#[test]
fn api_without_status_not_retryable() {
assert!(!is_retryable(&api_err(None)));
}
#[test]
fn lifecycle_timeout_is_retryable() {
assert!(is_retryable(&ClientError::Timeout {
phase: "stream_open",
timeout: Duration::from_secs(30),
}));
}
#[test]
fn json_sse_config_not_retryable() {
let json_err = serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
assert!(!is_retryable(&ClientError::Json(json_err)));
assert!(!is_retryable(&ClientError::Sse("boom".into())));
assert!(!is_retryable(&ClientError::Config("boom".into())));
}
}

View File

@ -15,8 +15,11 @@ use serde::{Deserialize, Serialize};
///
/// # イベントの種類
///
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse`
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
/// は別経路で発火する)
///
/// # ブロックのライフサイクル
///
@ -32,6 +35,10 @@ pub enum Event {
Status(StatusEvent),
/// エラー発生
Error(ErrorEvent),
/// Scheme が生成内容として解釈しない未対応 SSE イベント。
///
/// stream trace 用の観測イベントであり、timeline / history には反映しない。
UnhandledSse(UnhandledSseEvent),
/// ブロック開始(テキスト、ツール使用等)
BlockStart(BlockStart),
@ -41,6 +48,18 @@ pub enum Event {
BlockStop(BlockStop),
/// ブロック中断
BlockAbort(BlockAbort),
/// Reasoning item の完成。scheme が「次の request に送り返すための
/// reasoning material が揃った」点で 1 度だけ発火する。
///
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
///
/// 上位層Worker / ReasoningItemCollectorはこれを `Item::Reasoning`
/// として `worker.history` に append する。streaming 表示用の
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
/// は依然として並行発火するlive display と round-trip persist の責務分離)。
ReasoningItem(ReasoningItemEvent),
}
// =============================================================================
@ -104,6 +123,18 @@ pub struct ErrorEvent {
pub message: String,
}
/// 未対応 SSE イベントの観測用メタイベント。
///
/// `data_preview` は provider から受け取った raw SSE data の bounded preview、
/// `data_len` は preview 前の raw data byte length。
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct UnhandledSseEvent {
pub provider: String,
pub event_type: String,
pub data_preview: String,
pub data_len: usize,
}
// =============================================================================
// Block Types
// =============================================================================
@ -212,6 +243,31 @@ impl BlockAbort {
}
}
// =============================================================================
// Reasoning Item Event
// =============================================================================
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
/// materialtext, summary, encrypted_content, signature, idを揃えて
/// 1 度だけ発火する。
///
/// `Item::Reasoning` のフィールドを 1:1 に持つ。
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
pub struct ReasoningItemEvent {
/// scheme 側で観測した item idOpenAI Responses の `id`)。
pub id: Option<String>,
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は
/// `reasoning_text` 累積。redacted_thinking では空。
pub text: String,
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
pub summary: Vec<String>,
/// 暗号化された opaque blobAnthropic `redacted_thinking.data` /
/// OpenAI Responses `encrypted_content`)。
pub encrypted_content: Option<String>,
/// Anthropic extended thinking signature。round-trip 必須。
pub signature: Option<String>,
}
/// 停止理由
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum StopReason {

View File

@ -23,6 +23,7 @@ pub mod error;
pub mod event;
pub mod types;
pub mod retry;
pub mod scheme;
pub mod transport;

View File

@ -0,0 +1,104 @@
//! LLM response stream を開く前の transient error 向けリトライポリシー。
//!
//! Worker が `LlmClient::stream` の open error に対して `is_retryable` を見て
//! retry / backoff / TUI event / cancellation をまとめて管理する。
//! SSE 読み出し開始後の失敗は対象外。
use std::time::Duration;
/// 指数バックオフ + ジッター + 累積タイムアウトを表すポリシー。
///
/// `Default` は llm-worker 全体の固定値を返す。manifest 経由の上書きが
/// 必要になったら拡張する(現状は不要 → `tickets/llm-worker-transient-retry.md`)。
#[derive(Debug, Clone)]
pub struct RetryPolicy {
/// 指数の基準値。`base * 2^attempt` を `cap` で頭打ちにした上限から
/// フルジッターで実際の wait を抽選する。
pub base: Duration,
/// 1 回あたりの wait の上限。
pub cap: Duration,
/// 試行の合計回数(初回 + リトライ)。`1` ならリトライしない。
pub max_attempts: u32,
/// 初回送信開始からの累積タイムアウト。これを超える wait は打ち切る。
pub total_timeout: Duration,
}
impl Default for RetryPolicy {
fn default() -> Self {
Self {
base: Duration::from_millis(500),
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(40),
}
}
}
impl RetryPolicy {
/// `attempt` 回目の失敗0-indexed後に待つ時間を返す。
/// `Retry-After` で上書きしたい場合は呼び出さず、その値をそのまま使う。
pub fn backoff(&self, attempt: u32) -> Duration {
let shift = attempt.min(20);
let base_nanos = self.base.as_nanos() as u64;
let exp_nanos = base_nanos.saturating_mul(1u64 << shift);
let cap_nanos = self.cap.as_nanos() as u64;
let upper = exp_nanos.min(cap_nanos);
Duration::from_nanos(jitter_nanos(upper))
}
}
/// `[0, max_nanos]` から擬似乱数的に 1 つ取り出す。`SystemTime` の
/// 下位ビットを splitmix64 で攪拌するだけの軽量実装で、暗号的乱数性は
/// 持たないがフルジッターのぶつかり回避には十分。
fn jitter_nanos(max_nanos: u64) -> u64 {
if max_nanos == 0 {
return 0;
}
let seed = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(0);
let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
x ^= x >> 31;
x % (max_nanos + 1)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_policy_values() {
let p = RetryPolicy::default();
assert_eq!(p.base, Duration::from_millis(500));
assert_eq!(p.cap, Duration::from_secs(10));
assert_eq!(p.max_attempts, 4);
assert_eq!(p.total_timeout, Duration::from_secs(40));
}
#[test]
fn backoff_respects_cap() {
let p = RetryPolicy::default();
for attempt in 0..30u32 {
assert!(
p.backoff(attempt) <= p.cap,
"attempt {attempt} exceeded cap",
);
}
}
#[test]
fn backoff_zero_when_base_zero() {
let p = RetryPolicy {
base: Duration::ZERO,
cap: Duration::from_secs(10),
max_attempts: 4,
total_timeout: Duration::from_secs(30),
};
for attempt in 0..5 {
assert_eq!(p.backoff(attempt), Duration::ZERO);
}
}
}

View File

@ -1,34 +1,17 @@
//! `model_id → ModelCapability` 静的テーブル
//! Anthropic scheme の wire-level 既定 capability
//!
//! 既知モデルのみ網羅する。未知モデルは `None` を返し、呼び出し側
//! `HttpTransport` 構築時)に scheme 既定へフォールバックさせる。
//! モデル ID 固有のテーブル(`claude-*` など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは未知モデルでも「この wire で
//! 安全に送れる最小共通項」を返すだけに留める。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Anthropic 公式モデルの既定 capability。
///
/// `claude-sonnet-*` / `claude-opus-*` / `claude-haiku-*` に対応する。
/// `cache_control` は公式のみ有効で、最大 4 breakpoint公式仕様
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
if !model_id.starts_with("claude-") {
return None;
}
Some(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::BudgetTokens),
vision: true,
prompt_caching: CacheStrategy::Explicit { max_breakpoints: 4 },
})
}
/// Scheme 既定の capability。
///
/// Ollama の `/v1/messages` 流用を想定して `cache_control` を送らない
/// `CacheStrategy::Auto` にする。Anthropic 本家の未知モデル(新 Claude
/// も tool_calling / vision を備える想定で Parallel / true を返す。
/// `CacheStrategy::Auto` にする。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,

View File

@ -12,6 +12,7 @@ use crate::llm_client::{
use serde::Deserialize;
use super::AnthropicScheme;
use super::scheme_impl::{AnthropicState, PendingThinking};
/// Anthropic SSEイベントタイプ
#[derive(Debug, Clone, PartialEq, Eq)]
@ -75,7 +76,21 @@ pub(crate) enum ContentBlock {
#[serde(rename = "text")]
Text { text: String },
#[serde(rename = "thinking")]
Thinking { thinking: String },
Thinking {
#[serde(default)]
thinking: String,
/// 非ストリーミングレスポンス由来の初期 signature通常はストリームでは
/// 空 → `signature_delta` で埋まる)。
#[serde(default)]
signature: Option<String>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化された opaque blob。signature ではなく、まるごと
/// `redacted_thinking.data` として送り返す必要がある。
#[serde(default)]
data: String,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -228,7 +243,9 @@ impl AnthropicScheme {
fn convert_block_start(&self, event: &ContentBlockStartEvent) -> Event {
let (block_type, metadata) = match &event.content_block {
ContentBlock::Text { .. } => (BlockType::Text, BlockMetadata::Text),
ContentBlock::Thinking { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
(BlockType::Thinking, BlockMetadata::Thinking)
}
ContentBlock::ToolUse { id, name, .. } => (
BlockType::ToolUse,
BlockMetadata::ToolUse {
@ -264,6 +281,121 @@ impl AnthropicScheme {
}))
}
/// state を持ち回す上位パース。
///
/// `parse_event` の単発 Event に加えて、以下を行う:
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
/// `state.pending_thinking` に蓄積し、`content_block_stop` で
/// `Event::ReasoningItem` を追加発火する
/// - `signature_delta` を蓄積Stream channel には流さず、reasoning event
/// にだけ反映する)
pub(crate) fn parse_with_state(
&self,
event_type: &str,
data: &str,
state: &mut AnthropicState,
) -> Result<Vec<Event>, ClientError> {
let Some(parsed_event_type) = AnthropicEventType::parse(event_type) else {
return Ok(Vec::new());
};
// signature_delta はストリーム表示には流さず、state にだけ蓄積。
// それ以外は parse_event で標準 Event 化する。
let mut emitted: Vec<Event> = Vec::new();
match parsed_event_type {
AnthropicEventType::ContentBlockStart => {
let raw: ContentBlockStartEvent = serde_json::from_str(data)?;
state.current_block_type = Some(match &raw.content_block {
ContentBlock::Text { .. } => BlockType::Text,
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
BlockType::Thinking
}
ContentBlock::ToolUse { .. } => BlockType::ToolUse,
});
match &raw.content_block {
ContentBlock::Thinking {
thinking,
signature,
} => {
state.pending_thinking = Some(PendingThinking {
text: thinking.clone(),
signature: signature.clone(),
redacted_data: None,
});
}
ContentBlock::RedactedThinking { data: blob } => {
state.pending_thinking = Some(PendingThinking {
text: String::new(),
signature: None,
redacted_data: Some(blob.clone()),
});
}
_ => {}
}
emitted.push(self.convert_block_start(&raw));
}
AnthropicEventType::ContentBlockDelta => {
let raw: ContentBlockDeltaEvent = serde_json::from_str(data)?;
match &raw.delta {
DeltaBlock::ThinkingDelta { thinking } => {
if let Some(pending) = state.pending_thinking.as_mut() {
pending.text.push_str(thinking);
}
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Thinking(thinking.clone()),
}));
}
DeltaBlock::SignatureDelta { signature } => {
if let Some(pending) = state.pending_thinking.as_mut() {
// 通常 1 回しか来ないが、複数 fragment 来ても連結しておく
match &mut pending.signature {
Some(acc) => acc.push_str(signature),
None => pending.signature = Some(signature.clone()),
}
}
}
DeltaBlock::TextDelta { text } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::Text(text.clone()),
}));
}
DeltaBlock::InputJsonDelta { partial_json } => {
emitted.push(Event::BlockDelta(BlockDelta {
index: raw.index,
delta: DeltaContent::InputJson(partial_json.clone()),
}));
}
}
}
AnthropicEventType::ContentBlockStop => {
let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
emitted.push(Event::BlockStop(BlockStop {
index: raw.index,
block_type,
stop_reason: None,
}));
if matches!(block_type, BlockType::Thinking) {
if let Some(pending) = state.pending_thinking.take() {
emitted.push(Event::ReasoningItem(pending.into_event()));
}
}
}
// 残りは state を必要としない。既存 parse_event に委譲。
_ => {
if let Some(event) = self.parse_event(event_type, data)? {
emitted.push(event);
}
}
}
Ok(emitted)
}
fn convert_usage(&self, usage: &UsageData) -> UsageEvent {
// Anthropic の `input_tokens` は **キャッシュ外** の入力トークンのみで、
// プロンプト全長は input_tokens + cache_read + cache_creation。
@ -391,6 +523,117 @@ mod tests {
}
}
#[test]
fn thinking_block_emits_reasoning_item_with_signature() {
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
let evs = scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
&mut state,
)
.unwrap();
assert!(matches!(evs[0], Event::BlockStart(_)));
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hello "}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"world"}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"SIG-XYZ"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
// BlockStop と ReasoningItem の 2 件が並ぶ
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
};
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
assert!(reasoning.encrypted_content.is_none());
}
#[test]
fn redacted_thinking_emits_reasoning_item_with_data() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"opaque-blob"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
panic!("expected ReasoningItem");
};
assert!(reasoning.text.is_empty());
assert!(reasoning.signature.is_none());
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
}
#[test]
fn text_block_does_not_emit_reasoning_item() {
let scheme = AnthropicScheme::new();
let mut state = AnthropicState::default();
scheme
.parse_with_state(
"content_block_start",
r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
&mut state,
)
.unwrap();
scheme
.parse_with_state(
"content_block_delta",
r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}"#,
&mut state,
)
.unwrap();
let stop_evs = scheme
.parse_with_state(
"content_block_stop",
r#"{"type":"content_block_stop","index":0}"#,
&mut state,
)
.unwrap();
assert_eq!(stop_evs.len(), 1);
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
}
#[test]
fn test_parse_ping() {
let scheme = AnthropicScheme::new();

View File

@ -8,12 +8,16 @@ use serde::Serialize;
use crate::llm_client::{
Request,
capability::{CacheStrategy, ModelCapability, ReasoningSupport},
capability::{CacheStrategy, ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
use super::AnthropicScheme;
fn is_false(value: &bool) -> bool {
!*value
}
/// Anthropic API request body
#[derive(Debug, Serialize)]
pub(crate) struct AnthropicRequest {
@ -41,7 +45,7 @@ pub(crate) struct AnthropicRequest {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum AnthropicThinking {
Enabled { budget_tokens: u32 },
Enabled { budget_tokens: i32 },
}
/// Anthropic message
@ -77,6 +81,21 @@ pub(crate) enum AnthropicContentPart {
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "thinking")]
Thinking {
thinking: String,
signature: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "redacted_thinking")]
RedactedThinking {
/// 暗号化済み reasoning blob。`Item::Reasoning::encrypted_content`
/// から渡る。
data: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@ -89,6 +108,8 @@ pub(crate) enum AnthropicContentPart {
ToolResult {
tool_use_id: String,
content: String,
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
@ -102,6 +123,21 @@ impl AnthropicContentPart {
}
}
fn thinking(thinking: String, signature: String) -> Self {
Self::Thinking {
thinking,
signature,
cache_control: None,
}
}
fn redacted_thinking(data: String) -> Self {
Self::RedactedThinking {
data,
cache_control: None,
}
}
fn tool_use(id: String, name: String, input: serde_json::Value) -> Self {
Self::ToolUse {
id,
@ -111,10 +147,11 @@ impl AnthropicContentPart {
}
}
fn tool_result(tool_use_id: String, content: String) -> Self {
fn tool_result(tool_use_id: String, content: String, is_error: bool) -> Self {
Self::ToolResult {
tool_use_id,
content,
is_error,
cache_control: None,
}
}
@ -122,6 +159,8 @@ impl AnthropicContentPart {
fn set_cache_control(&mut self, cc: CacheControl) {
match self {
Self::Text { cache_control, .. }
| Self::Thinking { cache_control, .. }
| Self::RedactedThinking { cache_control, .. }
| Self::ToolUse { cache_control, .. }
| Self::ToolResult { cache_control, .. } => {
*cache_control = Some(cc);
@ -170,9 +209,13 @@ impl AnthropicScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.budget_tokens)
.filter(|_| supports_budget_tokens)
.map(|budget_tokens| AnthropicThinking::Enabled { budget_tokens });
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget_tokens) => Some(AnthropicThinking::Enabled {
budget_tokens: *budget_tokens,
}),
ReasoningControl::Effort(_) => None,
});
AnthropicRequest {
model: model.to_string(),
@ -199,10 +242,13 @@ impl AnthropicScheme {
/// - Tool calls are content parts within assistant messages
/// - Tool results are content parts within user messages
///
/// Each non-`Message` item produces exactly one content part, so
/// "last part for the item" is always well-defined. For breakpoint
/// `Message` items the output is forced into the array form so a
/// marker has a part to attach to.
/// Assistant-side items are accumulated until a user/system message or
/// tool result boundary so one logical assistant burst becomes one
/// Anthropic assistant message content array. Pending parts carry their
/// origin item index; when flushed, the final part for each item records
/// the `(msg_idx, part_idx)` used by breakpoint attachment. User/system
/// `Message` items keep the single-text shorthand unless a breakpoint
/// needs a concrete part to live on.
fn convert_items_to_messages(
&self,
items: &[Item],
@ -218,26 +264,30 @@ impl AnthropicScheme {
for (i, item) in items.iter().enumerate() {
match item {
Item::Message { role, content, .. } => {
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
let anthropic_role = match role {
Role::User | Role::System => "user",
Role::Assistant => "assistant",
};
let parts: Vec<AnthropicContentPart> = content
.iter()
.map(|p| match p {
ContentPart::Text { text } => {
AnthropicContentPart::text(text.clone())
}
ContentPart::Text { text } => AnthropicContentPart::text(text.clone()),
ContentPart::Refusal { refusal } => {
AnthropicContentPart::text(refusal.clone())
}
})
.collect();
match role {
Role::Assistant => {
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
pending_assistant.extend(parts.into_iter().map(|part| (i, part)));
}
Role::User | Role::System => {
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
let force_parts = breakpoints.contains(&i);
let msg_idx = messages.len();
@ -246,7 +296,7 @@ impl AnthropicScheme {
if parts.len() == 1 && !force_parts {
if let AnthropicContentPart::Text { text, .. } = &parts[0] {
messages.push(AnthropicMessage {
role: anthropic_role.to_string(),
role: "user".to_string(),
content: AnthropicContent::Text(text.clone()),
});
continue;
@ -255,11 +305,13 @@ impl AnthropicScheme {
let last_part_idx = parts.len().saturating_sub(1);
messages.push(AnthropicMessage {
role: anthropic_role.to_string(),
role: "user".to_string(),
content: AnthropicContent::Parts(parts),
});
locations[i] = Some((msg_idx, last_part_idx));
}
}
}
Item::ToolCall {
call_id,
@ -282,29 +334,59 @@ impl AnthropicScheme {
call_id,
summary,
content,
is_error,
..
} => {
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
let text = match content {
Some(c) => format!("{summary}\n{c}"),
None => summary.clone(),
};
pending_user.push((
i,
AnthropicContentPart::tool_result(call_id.clone(), text),
AnthropicContentPart::tool_result(call_id.clone(), text, *is_error),
));
}
Item::Reasoning { text, .. } => {
Item::Reasoning {
text,
encrypted_content,
signature,
..
} => {
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
// Reasoning is treated as assistant text in Anthropic
// (actual thinking blocks are handled differently in streaming).
pending_assistant.push((i, AnthropicContentPart::text(text.clone())));
// Anthropic はアシスタントターン中の `thinking` /
// `redacted_thinking` ブロックを必ず assistant role の
// content_part として送り返す必要がある。
//
// - signature あり: `thinking` content_part を投影
// - signature 無し + encrypted_content あり:
// `redacted_thinking` content_part を投影
// - どちらも無い: 他 schemeOpenAI 等)から流入した
// 素の reasoning text。Anthropic に投げる意味も
// round-trip の根拠も無いので drop。
if let Some(sig) = signature.clone() {
pending_assistant
.push((i, AnthropicContentPart::thinking(text.clone(), sig)));
} else if let Some(data) = encrypted_content.clone() {
pending_assistant.push((i, AnthropicContentPart::redacted_thinking(data)));
}
// どちらも None なら何も pend せず、本 item は無視。
}
}
}
flush_pending(&mut messages, &mut pending_assistant, "assistant", &mut locations);
flush_pending(
&mut messages,
&mut pending_assistant,
"assistant",
&mut locations,
);
flush_pending(&mut messages, &mut pending_user, "user", &mut locations);
// Apply cache_control markers at each breakpoint item's last part.
@ -400,7 +482,7 @@ fn compute_breakpoints(items: &[Item], cache_anchor: Option<usize>) -> BTreeSet<
mod tests {
use super::*;
use crate::llm_client::capability::{
CacheStrategy, StructuredOutput, ToolCallingSupport,
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
/// cache_control が有効になる既定の capability。
@ -422,6 +504,13 @@ mod tests {
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap_explicit()
}
}
#[test]
fn test_build_simple_request() {
let scheme = AnthropicScheme::new();
@ -429,7 +518,8 @@ mod tests {
.system("You are a helpful assistant.")
.user("Hello!");
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.model, "claude-sonnet-4-20250514");
assert_eq!(
@ -455,12 +545,45 @@ mod tests {
})),
);
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.tools.len(), 1);
assert_eq!(anthropic_req.tools[0].name, "get_weather");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let req = scheme.build_request(
"claude-sonnet-4-20250514",
&request,
&cap_budget_reasoning(),
);
let json = serde_json::to_value(&req).unwrap();
assert_eq!(json["thinking"]["type"], "enabled");
assert_eq!(json["thinking"]["budget_tokens"], 4096);
}
#[test]
fn effort_reasoning_not_projected_to_anthropic() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let req = scheme.build_request(
"claude-sonnet-4-20250514",
&request,
&cap_budget_reasoning(),
);
assert!(req.thinking.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = AnthropicScheme::new();
@ -473,7 +596,8 @@ mod tests {
))
.item(Item::tool_result("call_123", "Sunny, 25°C"));
let anthropic_req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let anthropic_req =
scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(anthropic_req.messages.len(), 3);
assert_eq!(anthropic_req.messages[0].role, "user");
@ -485,6 +609,8 @@ mod tests {
fn part_cache_control(part: &AnthropicContentPart) -> Option<CacheControl> {
match part {
AnthropicContentPart::Text { cache_control, .. }
| AnthropicContentPart::Thinking { cache_control, .. }
| AnthropicContentPart::RedactedThinking { cache_control, .. }
| AnthropicContentPart::ToolUse { cache_control, .. }
| AnthropicContentPart::ToolResult { cache_control, .. } => *cache_control,
}
@ -506,6 +632,109 @@ mod tests {
out
}
#[test]
fn assistant_burst_bundles_reasoning_text_and_tool_call() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking").with_signature("SIG-A"))
.item(Item::assistant_message("answer"))
.item(Item::tool_call("c1", "tool_a", r#"{"x":1}"#));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert_eq!(req.messages.len(), 2, "messages: {:?}", req.messages);
assert_eq!(req.messages[0].role, "user");
assert_eq!(req.messages[1].role, "assistant");
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
panic!("assistant burst must be emitted as content parts");
};
assert_eq!(parts.len(), 3, "parts: {:?}", parts);
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
}
#[test]
fn tool_result_and_user_messages_bound_assistant_bursts() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking").with_signature("SIG-A"))
.item(Item::assistant_message("answer"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "result"))
.item(Item::assistant_message("final"))
.user("follow up");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let roles: Vec<&str> = req.messages.iter().map(|msg| msg.role.as_str()).collect();
assert_eq!(
roles,
vec!["user", "assistant", "user", "assistant", "user"]
);
let AnthropicContent::Parts(first_assistant) = &req.messages[1].content else {
panic!("first assistant burst must be content parts");
};
assert_eq!(first_assistant.len(), 3);
assert!(matches!(
first_assistant[0],
AnthropicContentPart::Thinking { .. }
));
assert!(matches!(
first_assistant[1],
AnthropicContentPart::Text { .. }
));
assert!(matches!(
first_assistant[2],
AnthropicContentPart::ToolUse { .. }
));
let AnthropicContent::Parts(tool_result) = &req.messages[2].content else {
panic!("tool result must be content parts");
};
assert_eq!(tool_result.len(), 1);
assert!(matches!(
tool_result[0],
AnthropicContentPart::ToolResult { .. }
));
let AnthropicContent::Parts(second_assistant) = &req.messages[3].content else {
panic!("second assistant burst must be content parts");
};
assert_eq!(second_assistant.len(), 1);
assert!(matches!(
second_assistant[0],
AnthropicContentPart::Text { .. }
));
}
#[test]
fn assistant_message_breakpoint_maps_to_text_part_inside_burst() {
let scheme = AnthropicScheme::new();
let mut request = Request::new().items(vec![
Item::user_message("question?"),
Item::reasoning("thinking").with_signature("SIG-A"),
Item::assistant_message("answer"),
Item::tool_call("c1", "tool_a", "{}"),
Item::user_message("next"),
]);
request.cache_anchor = Some(2);
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let AnthropicContent::Parts(parts) = &req.messages[1].content else {
panic!("assistant burst must be content parts");
};
assert!(matches!(parts[0], AnthropicContentPart::Thinking { .. }));
assert!(matches!(parts[1], AnthropicContentPart::Text { .. }));
assert!(matches!(parts[2], AnthropicContentPart::ToolUse { .. }));
assert_eq!(part_cache_control(&parts[1]), Some(CacheControl::Ephemeral));
assert_eq!(part_cache_control(&parts[2]), Some(CacheControl::Ephemeral));
}
/// Convenience: a turn that ends with one assistant text, one tool
/// call/result pair, and a final assistant text. Produced at
/// `history[head..]` indices shown alongside, so tests can reason
@ -607,9 +836,7 @@ mod tests {
// so we don't bloat requests with wrapper arrays. Here the Head
// lands on items[1], leaving items[0] without a marker.
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hello")
.assistant("hi there");
let request = Request::new().user("hello").assistant("hi there");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
assert!(
matches!(req.messages[0].content, AnthropicContent::Text(_)),
@ -628,10 +855,7 @@ mod tests {
match &req.messages[0].content {
AnthropicContent::Parts(parts) => {
assert_eq!(parts.len(), 1);
assert_eq!(
part_cache_control(&parts[0]),
Some(CacheControl::Ephemeral)
);
assert_eq!(part_cache_control(&parts[0]), Some(CacheControl::Ephemeral));
}
AnthropicContent::Text(_) => panic!("breakpoint item should use Parts form"),
}
@ -668,7 +892,8 @@ mod tests {
#[test]
fn empty_items_produce_no_breakpoints() {
let scheme = AnthropicScheme::new();
let req = scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit());
let req =
scheme.build_request("claude-sonnet-4-20250514", &Request::new(), &cap_explicit());
assert!(req.messages.is_empty());
assert!(breakpoint_positions(&req).is_empty());
}
@ -684,6 +909,165 @@ mod tests {
assert!(breakpoint_positions(&req).is_empty());
}
fn collect_assistant_thinking_parts(req: &AnthropicRequest) -> Vec<&AnthropicContentPart> {
let mut out = Vec::new();
for msg in &req.messages {
if msg.role != "assistant" {
continue;
}
if let AnthropicContent::Parts(parts) = &msg.content {
for part in parts {
if matches!(
part,
AnthropicContentPart::Thinking { .. }
| AnthropicContentPart::RedactedThinking { .. }
) {
out.push(part);
}
}
}
}
out
}
#[test]
fn reasoning_with_signature_projects_thinking_part() {
// Item::Reasoning に signature があれば assistant role の
// `thinking` content_part として送る。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("step-by-step").with_signature("SIG-A"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let thinking_parts = collect_assistant_thinking_parts(&req);
assert_eq!(thinking_parts.len(), 1);
match thinking_parts[0] {
AnthropicContentPart::Thinking {
thinking,
signature,
..
} => {
assert_eq!(thinking, "step-by-step");
assert_eq!(signature, "SIG-A");
}
other => panic!("expected Thinking part, got {other:?}"),
}
}
#[test]
fn reasoning_with_only_encrypted_content_projects_redacted_thinking() {
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("").with_encrypted_content("opaque"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
let parts = collect_assistant_thinking_parts(&req);
assert_eq!(parts.len(), 1);
match parts[0] {
AnthropicContentPart::RedactedThinking { data, .. } => {
assert_eq!(data, "opaque");
}
other => panic!("expected RedactedThinking, got {other:?}"),
}
}
#[test]
fn reasoning_without_signature_or_encrypted_is_dropped() {
// 他 scheme から流入した素の reasoning は Anthropic に投げない。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("hi")
.item(Item::reasoning("plain text"))
.item(Item::assistant_message("done"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
// thinking part は 1 つも乗らない
assert!(collect_assistant_thinking_parts(&req).is_empty());
}
#[test]
fn thinking_part_lands_in_assistant_role_message() {
// wire 構造の position 検証: thinking part は assistant role の
// message 配列に並ぶuser role には絶対に入らない)。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("question?")
.item(Item::reasoning("thinking inside").with_signature("SIG-A"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "result"))
.user("follow up");
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
// 全 thinking part が assistant role の message に存在すること
let mut thinking_msg_indices = Vec::new();
for (i, msg) in req.messages.iter().enumerate() {
if let AnthropicContent::Parts(parts) = &msg.content {
if parts
.iter()
.any(|p| matches!(p, AnthropicContentPart::Thinking { .. }))
{
assert_eq!(
msg.role, "assistant",
"thinking part must be in assistant role, got {} at msg {}",
msg.role, i,
);
thinking_msg_indices.push(i);
}
}
}
assert!(
!thinking_msg_indices.is_empty(),
"expected at least one thinking part in messages: {:?}",
req.messages,
);
// thinking part を含む assistant message は、それに続く tool_use を含む
// assistant message より前 (= 先頭側) に位置すること
// (Anthropic 仕様: 同一論理ターン内で thinking → tool_use の順)
let mut tool_use_msg_indices = Vec::new();
for (i, msg) in req.messages.iter().enumerate() {
if let AnthropicContent::Parts(parts) = &msg.content {
if parts
.iter()
.any(|p| matches!(p, AnthropicContentPart::ToolUse { .. }))
{
tool_use_msg_indices.push(i);
}
}
}
assert!(!tool_use_msg_indices.is_empty(), "expected tool_use part");
let first_thinking = thinking_msg_indices[0];
let first_tool_use = tool_use_msg_indices[0];
assert!(
first_thinking <= first_tool_use,
"thinking msg ({}) must precede tool_use msg ({})",
first_thinking,
first_tool_use,
);
}
#[test]
fn redacted_thinking_part_lands_in_assistant_role_message() {
// RedactedThinking も同様に assistant role に置かれること。
let scheme = AnthropicScheme::new();
let request = Request::new()
.user("ask")
.item(Item::reasoning("").with_encrypted_content("opaque"))
.item(Item::tool_call("c1", "tool_a", "{}"))
.item(Item::tool_result("c1", "ok"));
let req = scheme.build_request("claude-sonnet-4-20250514", &request, &cap_explicit());
for msg in &req.messages {
if let AnthropicContent::Parts(parts) = &msg.content {
for part in parts {
if matches!(part, AnthropicContentPart::RedactedThinking { .. }) {
assert_eq!(msg.role, "assistant");
}
}
}
}
}
#[test]
fn tool_definitions_carry_no_cache_control() {
// Tool JSON schema must serialise unchanged — no sneak-in of

View File

@ -7,9 +7,9 @@ use serde_json::Value;
use crate::llm_client::{
ClientError,
capability::ModelCapability,
event::{BlockStop, BlockType, Event},
auth::AuthRequirement,
capability::ModelCapability,
event::{BlockType, Event, ReasoningItemEvent},
scheme::Scheme,
types::Request,
};
@ -18,12 +18,37 @@ use super::AnthropicScheme;
/// Anthropic の SSE パースで必要な状態。
///
/// `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 1. `content_block_stop` イベントは `block_type` を持たない仕様なので、
/// 直前の `content_block_start` で観測した `block_type` を保持して
/// `BlockStop` に書き戻す。
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
/// `content_block_stop` で `Event::ReasoningItem` を発火する
/// round-trip 永続化のため)。
#[derive(Debug, Default)]
pub struct AnthropicState {
current_block_type: Option<BlockType>,
pub(crate) current_block_type: Option<BlockType>,
pub(crate) pending_thinking: Option<PendingThinking>,
}
/// 1 つの `thinking` または `redacted_thinking` content_block の蓄積バッファ。
#[derive(Debug, Default)]
pub(crate) struct PendingThinking {
pub(crate) text: String,
pub(crate) signature: Option<String>,
pub(crate) redacted_data: Option<String>,
}
impl PendingThinking {
pub(crate) fn into_event(self) -> ReasoningItemEvent {
ReasoningItemEvent {
id: None,
text: self.text,
summary: Vec::new(),
encrypted_content: self.redacted_data,
signature: self.signature,
}
}
}
impl Scheme for AnthropicScheme {
@ -73,28 +98,7 @@ impl Scheme for AnthropicScheme {
data: &str,
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
let Some(mut event) = self.parse_event(event_type, data)? else {
return Ok(Vec::new());
};
match &event {
Event::BlockStart(start) => {
state.current_block_type = Some(start.block_type);
}
Event::BlockStop(stop) => {
if let Some(block_type) = state.current_block_type.take() {
event = Event::BlockStop(BlockStop {
block_type,
..stop.clone()
});
}
}
_ => {}
}
Ok(vec![event])
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
self.parse_with_state(event_type, data, state)
}
fn default_capability(&self) -> ModelCapability {

View File

@ -1,10 +1,14 @@
//! `model_id → ModelCapability` 静的テーブルGoogle Gemini
//! Gemini scheme の wire-level 既定 capability。
//!
//! モデル ID 固有のテーブル(`gemini-*` バージョン別の reasoning 有無)は
//! 高レベル構築層(`provider::capability`)の責務。ここでは wire の
//! 保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// Scheme 既定の capability(未知モデル / 未明示モデル用)
/// Scheme 既定の capability(未知モデル / 未明示モデル用)
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
@ -14,24 +18,3 @@ pub(crate) fn default_capability() -> ModelCapability {
prompt_caching: CacheStrategy::Auto,
}
}
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
if !model_id.starts_with("gemini-") {
return None;
}
// 2.5 系以降は thinking / reasoning を持つ
let reasoning = if model_id.starts_with("gemini-2.5")
|| model_id.starts_with("gemini-3")
{
Some(ReasoningSupport::BudgetTokens)
} else {
None
};
Some(ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning,
vision: true,
prompt_caching: CacheStrategy::Auto,
})
}

View File

@ -131,6 +131,7 @@ impl GeminiScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse Gemini SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -7,7 +7,7 @@ use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -203,10 +203,12 @@ impl GeminiScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.budget_tokens)
.filter(|_| supports_budget)
.map(|budget| GeminiThinkingConfig {
thinking_budget: budget as i32,
.and_then(|rc| match rc {
ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig {
thinking_budget: *budget,
}),
ReasoningControl::Effort(_) => None,
});
// Generation config
@ -374,7 +376,9 @@ impl GeminiScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport};
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
@ -386,6 +390,13 @@ mod tests {
}
}
fn cap_budget_reasoning() -> ModelCapability {
ModelCapability {
reasoning: Some(ReasoningSupport::BudgetTokens),
..cap()
}
}
#[test]
fn test_build_simple_request() {
let scheme = GeminiScheme::new();
@ -457,4 +468,29 @@ mod tests {
assert_eq!(gemini_req.contents[1].role, "model");
assert_eq!(gemini_req.contents[2].role, "user");
}
#[test]
fn thinking_budget_projected_when_supported() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
let thinking = config.thinking_config.expect("thinking config");
assert_eq!(thinking.thinking_budget, -1);
}
#[test]
fn effort_reasoning_not_projected_to_gemini() {
let scheme = GeminiScheme::new();
let mut request = Request::new().user("think");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
let config = gemini_req.generation_config.expect("generation config");
assert!(config.thinking_config.is_none());
}
}

View File

@ -3,11 +3,7 @@
use serde_json::Value;
use crate::llm_client::{
ClientError,
capability::ModelCapability,
event::Event,
auth::AuthRequirement,
scheme::Scheme,
ClientError, auth::AuthRequirement, capability::ModelCapability, event::Event, scheme::Scheme,
types::Request,
};
@ -47,10 +43,6 @@ impl Scheme for GeminiScheme {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}

View File

@ -76,13 +76,10 @@ pub trait Scheme: Clone + Send + Sync + 'static {
state: &mut Self::State,
) -> Result<Vec<Event>, ClientError>;
/// 既知モデル ID の能力テーブル引き。未知なら `None` を返す
/// ので、呼び出し側は [`Scheme::default_capability`] に
/// フォールバックする。
fn capability_for(&self, model_id: &str) -> Option<ModelCapability>;
/// scheme 既定の capability。未知モデル ID や未明示モデルでの
/// フォールバックに使う。`capability_for` と違って必ず値を返す。
/// scheme 既定の capability。モデル ID に関係なく、この wire で
/// 安全に送れる最小共通項を返す。既知モデル ID の能力テーブルは
/// `provider::capability::lookup` 側(高レベル構築層)の責務で、
/// scheme はここには関与しない。
fn default_capability(&self) -> ModelCapability;
/// scheme 側でサポートしていない `RequestConfig` フィールドを
@ -93,4 +90,3 @@ pub trait Scheme: Clone + Send + Sync + 'static {
Vec::new()
}
}

View File

@ -1,76 +1,13 @@
//! `model_id → ModelCapability` 静的テーブルOpenAI Chat Completions
//! OpenAI Chat Completions scheme の wire-level 既定 capability
//!
//! OpenAI 本家の主要モデルのみ網羅する。OpenRouter / xAI / Groq 等は
//! モデル ID が各社独自なので、マニフェスト側で明示 override する
//! 前提。
//!
//! [`classify`] はモデル ID から family を判定する一次情報で、
//! `scheme/openai_responses` からも参照される。
//! モデル ID 固有のテーブル(`gpt-5` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
/// OpenAI 本家のモデル family 分類。
///
/// `openai_chat` と `openai_responses` で共有する一次情報。各 scheme は
/// この分類に自 scheme 固有の `ReasoningSupport` 等を当てはめて
/// `ModelCapability` を組み立てる。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum OpenAiFamily {
/// GPT-5 / o1 / o3 / o4 系 — reasoning 対応
Reasoning,
/// GPT-4o / GPT-4 系
Gpt4,
/// GPT-3.5 系(旧式)
Gpt35,
}
/// モデル ID の prefix から family を判定する。未知は `None`。
pub(crate) fn classify(model_id: &str) -> Option<OpenAiFamily> {
if model_id.starts_with("gpt-5")
|| model_id.starts_with("o1")
|| model_id.starts_with("o3")
|| model_id.starts_with("o4")
{
return Some(OpenAiFamily::Reasoning);
}
if model_id.starts_with("gpt-4") {
return Some(OpenAiFamily::Gpt4);
}
if model_id.starts_with("gpt-3.5") {
return Some(OpenAiFamily::Gpt35);
}
None
}
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
classify(model_id).map(|family| match family {
OpenAiFamily::Reasoning => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt4 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt35 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonObject,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
},
})
}
/// Scheme 既定の capability。OpenAI 互換ルーター系xAI / Groq / OpenRouter 等)
/// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等)
/// で未知モデル ID を受けたときのフォールバックに使う。
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {

View File

@ -75,6 +75,7 @@ impl OpenAIScheme {
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {} -> {}", e, data),
retry_after: None,
})?;
let mut events = Vec::new();

View File

@ -7,7 +7,7 @@ use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningEffort, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -37,7 +37,7 @@ pub(crate) struct OpenAIRequest {
pub tool_choice: Option<String>,
/// Reasoning efforto1 / o3 / o4 / gpt-5 系で有効)。
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_effort: Option<&'static str>,
pub reasoning_effort: Option<String>,
}
#[derive(Debug, Serialize)]
@ -154,12 +154,10 @@ impl OpenAIScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.effort)
.filter(|_| supports_effort)
.map(|effort| match effort {
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
.and_then(|rc| match rc {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
});
OpenAIRequest {
@ -322,7 +320,9 @@ impl OpenAIScheme {
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::capability::{CacheStrategy, StructuredOutput, ToolCallingSupport};
use crate::llm_client::capability::{
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
};
fn cap() -> ModelCapability {
ModelCapability {
@ -387,6 +387,38 @@ mod tests {
assert!(body.max_tokens.is_none());
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other(
"provider-native".into(),
)));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Effort),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native"));
}
#[test]
fn budget_reasoning_not_projected_to_openai_chat() {
let scheme = OpenAIScheme::new();
let mut request = Request::new().user("Hello");
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
let capability = ModelCapability {
reasoning: Some(ReasoningSupport::Both),
..cap()
};
let body = scheme.build_request("gpt-5", &request, &capability);
assert!(body.reasoning_effort.is_none());
}
#[test]
fn test_tool_call_and_result() {
let scheme = OpenAIScheme::new();

View File

@ -52,10 +52,6 @@ impl Scheme for OpenAIScheme {
Ok(self.parse_event(data)?.unwrap_or_default())
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}

View File

@ -1,75 +1,11 @@
//! `model_id → ModelCapability` 静的テーブルOpenAI Responses API
//! OpenAI Responses scheme の wire-level 既定 capability
//!
//! モデル family 判定は `scheme/openai_chat/capability.rs::classify` を
//! 共有する。Responses 側は `ReasoningSupport::Effort` 固定で、prompt
//! caching はサーバ側自動(`CacheStrategy::Auto`)。
//!
//! `gpt-5-codex` は `gpt-5` prefix 経由で Reasoning 扱いされるが、
//! `codex-mini-latest` 等 `codex-` prefix のモデルは ChatGPT backend
//! 経由CodexOAuthでしか使えないため、このテーブルでだけ Reasoning
//! にフォールバックする。
//! モデル ID 固有のテーブル(`gpt-5` / `codex-` 系など)は高レベル構築層
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
use crate::llm_client::capability::{
CacheStrategy, ModelCapability, ReasoningSupport, StructuredOutput, ToolCallingSupport,
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
};
use crate::llm_client::scheme::openai_chat::capability::{OpenAiFamily, classify};
pub(crate) fn lookup(model_id: &str) -> Option<ModelCapability> {
let family = classify(model_id).or_else(|| {
if model_id.starts_with("codex-") {
Some(OpenAiFamily::Reasoning)
} else {
None
}
})?;
Some(match family {
OpenAiFamily::Reasoning => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: Some(ReasoningSupport::Effort),
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt4 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonSchema,
reasoning: None,
vision: true,
prompt_caching: CacheStrategy::Auto,
},
OpenAiFamily::Gpt35 => ModelCapability {
tool_calling: ToolCallingSupport::Parallel,
structured_output: StructuredOutput::JsonObject,
reasoning: None,
vision: false,
prompt_caching: CacheStrategy::Auto,
},
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn gpt_5_codex_is_reasoning() {
// `gpt-5` prefix で classify される
let cap = lookup("gpt-5-codex").unwrap();
assert!(cap.reasoning.is_some());
}
#[test]
fn codex_mini_latest_is_reasoning() {
// ChatGPT backend 専用モデル。`codex-` prefix で Reasoning にフォールバック
let cap = lookup("codex-mini-latest").unwrap();
assert!(cap.reasoning.is_some());
}
#[test]
fn unknown_model_returns_none() {
assert!(lookup("foo-bar-3000").is_none());
}
}
pub(crate) fn default_capability() -> ModelCapability {
ModelCapability {

View File

@ -5,15 +5,16 @@
//! insomnia 側 1 次元 `BlockStart/Delta/Stop::index` のマッピングは
//! [`OpenAIResponsesState`] が保持する。
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};
use serde::Deserialize;
use serde_json::{Map, Value};
use crate::llm_client::{
ClientError,
event::{
BlockDelta, BlockMetadata, BlockStart, BlockStop, BlockType, DeltaContent, ErrorEvent,
Event, ResponseStatus, StatusEvent, UsageEvent,
Event, ReasoningItemEvent, ResponseStatus, StatusEvent, UnhandledSseEvent, UsageEvent,
},
};
@ -22,6 +23,21 @@ use crate::llm_client::{
pub struct OpenAIResponsesState {
slots: HashMap<SlotKey, SlotInfo>,
next_index: usize,
/// 蓄積中の reasoning output_item。`output_item.added`(Reasoning) で
/// 確保し、`reasoning_text.delta` / `reasoning_summary_text.delta` で
/// 蓄積、`output_item.done`(Reasoning) で `Event::ReasoningItem` を
/// 発火してエントリを除去する。
pending_reasoning: HashMap<usize, PendingReasoning>,
}
/// 1 つの reasoning output_item の蓄積バッファ。
#[derive(Debug, Default)]
struct PendingReasoning {
id: Option<String>,
/// `reasoning_text.delta` の累積。複数 content_part あれば順に concat。
text: String,
/// `reasoning_summary_text.delta` を summary_index 順に蓄積。
summary: Vec<String>,
}
impl OpenAIResponsesState {
@ -38,17 +54,25 @@ impl OpenAIResponsesState {
/// 既存 slot を取得。無ければ `block_type` で暗黙に確保し、
/// 新規確保したかを併せて返す。delta 先行 / content_part.added が
/// 抜けたときの防御。
fn get_or_allocate(
&mut self,
key: SlotKey,
block_type: BlockType,
) -> (SlotInfo, bool) {
fn get_or_allocate(&mut self, key: SlotKey, block_type: BlockType) -> (SlotInfo, bool) {
if let Some(info) = self.slots.get(&key).copied() {
(info, false)
} else {
(self.allocate(key, block_type), true)
}
}
fn ensure_reasoning(&mut self, output_index: usize) -> &mut PendingReasoning {
self.pending_reasoning.entry(output_index).or_default()
}
fn extend_reasoning_summary(&mut self, output_index: usize, summary_index: usize, text: &str) {
let entry = self.ensure_reasoning(output_index);
if entry.summary.len() <= summary_index {
entry.summary.resize(summary_index + 1, String::new());
}
entry.summary[summary_index].push_str(text);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -93,8 +117,12 @@ enum OutputItem {
id: Option<String>,
},
Reasoning {
#[allow(dead_code)]
#[serde(default)]
id: Option<String>,
/// `output_item.done` で初めて埋まる。`include=["reasoning.encrypted_content"]`
/// 指定時に opaque blob が乗る。
#[serde(default)]
encrypted_content: Option<String>,
},
FunctionCall {
#[allow(dead_code)]
@ -214,17 +242,30 @@ struct ResponsesUsage {
output_tokens: Option<u64>,
#[serde(default)]
total_tokens: Option<u64>,
/// `input_tokens` の内訳。`cached_tokens` がプロンプトキャッシュヒット分。
#[serde(default)]
input_tokens_details: Option<InputTokensDetails>,
}
#[derive(Debug, Deserialize)]
struct InputTokensDetails {
#[serde(default)]
cached_tokens: Option<u64>,
}
#[derive(Debug, Deserialize)]
struct ResponseFailed {
response: FailedResponse,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
struct FailedResponse {
#[serde(default)]
error: Option<ErrorDetail>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
@ -233,6 +274,17 @@ struct ErrorDetail {
error_type: Option<String>,
#[serde(default)]
message: Option<String>,
#[serde(default)]
code: Option<String>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
struct TopLevelErrorEnvelope {
error: TopLevelError,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
#[derive(Debug, Deserialize)]
@ -243,6 +295,8 @@ struct TopLevelError {
error_type: Option<String>,
#[serde(default)]
code: Option<String>,
#[serde(flatten)]
extra: BTreeMap<String, Value>,
}
// ============================================================================
@ -251,9 +305,9 @@ struct TopLevelError {
/// SSE フレーム 1 件をパースし、0 個以上の [`Event`] に変換する。
///
/// `event_type` は SSE の `event:` フィールド。未対応の event
/// 静かに無視する。`data` が JSON でない / 必要なフィールドが抜けて
/// いる等は [`ClientError::Api`] で返す。
/// `event_type` は SSE の `event:` フィールド。未対応の event type
/// [`Event::UnhandledSse`] として観測可能にする。`data` が JSON でない /
/// 必要なフィールドが抜けている等は [`ClientError::Api`] で返す。
pub(crate) fn parse_sse(
event_type: &str,
data: &str,
@ -274,7 +328,10 @@ pub(crate) fn parse_sse(
total_tokens: usage.total_tokens.or_else(|| {
Some(usage.input_tokens.unwrap_or(0) + usage.output_tokens.unwrap_or(0))
}),
cache_read_input_tokens: None,
cache_read_input_tokens: usage
.input_tokens_details
.and_then(|d| d.cached_tokens),
// Responses API は cache 書き込みを別計上しないinput_tokens に含まれる)
cache_creation_input_tokens: None,
}));
}
@ -286,10 +343,7 @@ pub(crate) fn parse_sse(
"response.failed" | "response.incomplete" => {
let ev: ResponseFailed = from_json(data)?;
let (code, message) = match ev.response.error {
Some(err) => (err.error_type, err.message.unwrap_or_default()),
None => (None, format!("response {event_type}")),
};
let (code, message) = response_failure_diagnostic(event_type, ev);
Ok(vec![
Event::Error(ErrorEvent { code, message }),
Event::Status(StatusEvent {
@ -303,23 +357,57 @@ pub(crate) fn parse_sse(
match ev.item {
OutputItem::FunctionCall { call_id, name, .. }
| OutputItem::CustomToolCall { call_id, name, .. } => {
let info = state
.allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse);
let info =
state.allocate(SlotKey::OutputItem(ev.output_index), BlockType::ToolUse);
Ok(vec![Event::BlockStart(BlockStart {
index: info.flat_index,
block_type: BlockType::ToolUse,
metadata: BlockMetadata::ToolUse {
id: call_id,
name,
},
metadata: BlockMetadata::ToolUse { id: call_id, name },
})])
}
OutputItem::Reasoning { id, .. } => {
// wrapper を確保。中身の content_part / summary_part は
// 別 SlotKey で扱われ続けるStreaming 表示は維持)。
let entry = state.ensure_reasoning(ev.output_index);
if id.is_some() {
entry.id = id;
}
Ok(Vec::new())
}
_ => Ok(Vec::new()),
}
}
"response.output_item.done" => {
let ev: OutputItemDone = from_json(data)?;
// Reasoning wrapper の done で蓄積分を ReasoningItem として発火。
// これは `slots` の OutputItem slot とは独立している
// (FunctionCall は slots、Reasoning は pending_reasoning)。
if let OutputItem::Reasoning {
id,
encrypted_content,
..
} = ev.item
{
let mut pending = state
.pending_reasoning
.remove(&ev.output_index)
.unwrap_or_default();
if pending.id.is_none() {
pending.id = id;
}
return Ok(vec![Event::ReasoningItem(ReasoningItemEvent {
id: pending.id,
text: pending.text,
summary: pending
.summary
.into_iter()
.filter(|s| !s.is_empty())
.collect(),
encrypted_content,
signature: None,
})]);
}
if let Some(info) = state.slots.remove(&SlotKey::OutputItem(ev.output_index)) {
Ok(vec![Event::BlockStop(BlockStop {
index: info.flat_index,
@ -384,6 +472,11 @@ pub(crate) fn parse_sse(
"response.reasoning_text.delta" => {
let ev: ReasoningTextDelta = from_json(data)?;
// round-trip 用に蓄積
state
.ensure_reasoning(ev.output_index)
.text
.push_str(&ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::ContentPart {
@ -414,6 +507,8 @@ pub(crate) fn parse_sse(
"response.reasoning_summary_text.delta" => {
let ev: ReasoningSummaryTextDelta = from_json(data)?;
// round-trip 用に蓄積
state.extend_reasoning_summary(ev.output_index, ev.summary_index, &ev.delta);
Ok(ensure_and_delta(
state,
SlotKey::Summary {
@ -471,22 +566,167 @@ pub(crate) fn parse_sse(
}
"error" => {
let ev: TopLevelError = from_json(data).unwrap_or(TopLevelError {
let ev = from_json::<TopLevelErrorEnvelope>(data).unwrap_or_else(|_| {
TopLevelErrorEnvelope {
error: TopLevelError {
message: Some(data.to_string()),
error_type: None,
code: None,
extra: BTreeMap::new(),
},
extra: BTreeMap::new(),
}
});
Ok(vec![Event::Error(ErrorEvent {
code: ev.error_type.or(ev.code),
message: ev.message.unwrap_or_default(),
})])
let (code, message) = top_level_error_diagnostic(ev);
Ok(vec![Event::Error(ErrorEvent { code, message })])
}
// 未対応 / 情報系イベントは無視
_ => Ok(Vec::new()),
// 未対応 / 情報系 event type は生成 semantics からは無視しつつ trace に残す。
_ => Ok(vec![unhandled_sse_event(event_type, data)]),
}
}
fn response_failure_diagnostic(event_type: &str, ev: ResponseFailed) -> (Option<String>, String) {
let mut diagnostic = Map::new();
diagnostic.insert("event".to_string(), Value::String(event_type.to_string()));
let mut code = None;
let base_message = if let Some(err) = ev.response.error {
code = err.code.clone().or(err.error_type.clone());
if let Some(error_type) = err.error_type {
diagnostic.insert("error_type".to_string(), Value::String(error_type));
}
if let Some(error_code) = err.code {
diagnostic.insert("error_code".to_string(), Value::String(error_code));
}
if !err.extra.is_empty() {
diagnostic.insert(
"error_extra".to_string(),
diagnostic_object(err.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
err.message
.filter(|message| !message.trim().is_empty())
.unwrap_or_else(|| format!("OpenAI Responses {event_type}"))
} else {
format!("OpenAI Responses {event_type}")
};
let response_extra = ev.response.extra;
if let Some(reason) = response_extra
.get("incomplete_details")
.and_then(|value| value.get("reason"))
.and_then(Value::as_str)
{
diagnostic.insert(
"incomplete_reason".to_string(),
Value::String(reason.to_string()),
);
if code.is_none() {
code = Some(reason.to_string());
}
}
if !response_extra.is_empty() {
diagnostic.insert(
"response_extra".to_string(),
diagnostic_object(response_extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
if !ev.extra.is_empty() {
diagnostic.insert(
"event_extra".to_string(),
diagnostic_object(ev.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
(code, append_diagnostic(base_message, diagnostic))
}
fn top_level_error_diagnostic(ev: TopLevelErrorEnvelope) -> (Option<String>, String) {
let code = ev.error.code.clone().or(ev.error.error_type.clone());
let mut diagnostic = Map::new();
diagnostic.insert("event".to_string(), Value::String("error".to_string()));
if let Some(error_type) = ev.error.error_type {
diagnostic.insert("error_type".to_string(), Value::String(error_type));
}
if let Some(error_code) = ev.error.code {
diagnostic.insert("error_code".to_string(), Value::String(error_code));
}
if !ev.error.extra.is_empty() {
diagnostic.insert(
"error_extra".to_string(),
diagnostic_object(ev.error.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
if !ev.extra.is_empty() {
diagnostic.insert(
"event_extra".to_string(),
diagnostic_object(ev.extra, DIAGNOSTIC_VALUE_LIMIT),
);
}
let message = ev
.error
.message
.filter(|message| !message.trim().is_empty())
.unwrap_or_else(|| "OpenAI Responses error".to_string());
(code, append_diagnostic(message, diagnostic))
}
const DIAGNOSTIC_VALUE_LIMIT: usize = 512;
const UNHANDLED_SSE_DATA_PREVIEW_LIMIT: usize = 512;
fn capped_unhandled_sse_data_preview(data: &str) -> String {
if data.len() <= UNHANDLED_SSE_DATA_PREVIEW_LIMIT {
return data.to_string();
}
let mut end = 0;
for (idx, ch) in data.char_indices() {
let next = idx + ch.len_utf8();
if next > UNHANDLED_SSE_DATA_PREVIEW_LIMIT {
break;
}
end = next;
}
data[..end].to_string()
}
fn unhandled_sse_event(event_type: &str, data: &str) -> Event {
Event::UnhandledSse(UnhandledSseEvent {
provider: "openai_responses".to_string(),
event_type: event_type.to_string(),
data_preview: capped_unhandled_sse_data_preview(data),
data_len: data.len(),
})
}
fn diagnostic_object(extra: BTreeMap<String, Value>, value_limit: usize) -> Value {
Value::Object(
extra
.into_iter()
.map(|(key, value)| (key, cap_json_value(value, value_limit)))
.collect(),
)
}
fn cap_json_value(value: Value, limit: usize) -> Value {
let rendered = value.to_string();
if rendered.len() <= limit {
value
} else {
let capped: String = rendered.chars().take(limit).collect();
Value::String(format!("{capped}"))
}
}
fn append_diagnostic(message: String, diagnostic: Map<String, Value>) -> String {
if diagnostic.len() <= 1 {
return message;
}
format!("{} | diagnostic={}", message, Value::Object(diagnostic))
}
/// 対応する BlockStart がまだ発行されていなければ発行しつつ、delta を流す。
/// content_part.added を取りこぼしても delta 単独で復旧できるようにする。
fn ensure_and_delta(
@ -517,6 +757,7 @@ fn from_json<T: for<'de> Deserialize<'de>>(data: &str) -> Result<T, ClientError>
status: None,
code: Some("parse_error".to_string()),
message: format!("Failed to parse SSE data: {e}"),
retry_after: None,
})
}
@ -530,11 +771,7 @@ mod tests {
(events, state)
}
fn with(
state: &mut OpenAIResponsesState,
event_type: &str,
data: &str,
) -> Vec<Event> {
fn with(state: &mut OpenAIResponsesState, event_type: &str, data: &str) -> Vec<Event> {
parse_sse(event_type, data, state).unwrap()
}
@ -551,7 +788,8 @@ mod tests {
#[test]
fn completed_emits_usage_and_status() {
let data = r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#;
let data =
r#"{"response":{"usage":{"input_tokens":10,"output_tokens":20,"total_tokens":30}}}"#;
let (events, _) = run("response.completed", data);
assert!(matches!(events[0], Event::Usage(_)));
assert!(matches!(
@ -564,9 +802,31 @@ mod tests {
assert_eq!(u.input_tokens, Some(10));
assert_eq!(u.output_tokens, Some(20));
assert_eq!(u.total_tokens, Some(30));
assert_eq!(u.cache_read_input_tokens, None);
assert_eq!(u.cache_creation_input_tokens, None);
}
}
#[test]
fn completed_extracts_cached_tokens_from_input_tokens_details() {
let data = r#"{"response":{"usage":{
"input_tokens":12345,
"input_tokens_details":{"cached_tokens":11000},
"output_tokens":50,
"total_tokens":12395
}}}"#;
let (events, _) = run("response.completed", data);
let Event::Usage(u) = &events[0] else {
panic!("expected usage")
};
assert_eq!(u.input_tokens, Some(12345));
assert_eq!(u.output_tokens, Some(50));
assert_eq!(u.total_tokens, Some(12395));
assert_eq!(u.cache_read_input_tokens, Some(11000));
// OpenAI Responses は cache 書き込みを別計上しない
assert_eq!(u.cache_creation_input_tokens, None);
}
#[test]
fn text_stream_start_delta_stop() {
let mut state = OpenAIResponsesState::default();
@ -761,8 +1021,7 @@ mod tests {
#[test]
fn failed_response_emits_error_and_status() {
let data =
r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#;
let data = r#"{"response":{"error":{"type":"invalid_request_error","message":"bad"}}}"#;
let (events, _) = run("response.failed", data);
assert_eq!(events.len(), 2);
assert!(matches!(events[0], Event::Error(_)));
@ -775,8 +1034,207 @@ mod tests {
}
#[test]
fn unknown_event_is_ignored() {
let (events, _) = run("response.in_progress", "{}");
assert!(events.is_empty());
fn incomplete_response_preserves_incomplete_reason_without_error() {
let data = r#"{
"response": {
"status": "incomplete",
"incomplete_details": {"reason": "max_output_tokens"}
}
}"#;
let (events, _) = run("response.incomplete", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("max_output_tokens"));
assert!(err.message.contains("OpenAI Responses response.incomplete"));
assert!(err.message.contains("incomplete_reason"));
assert!(err.message.contains("max_output_tokens"));
assert!(!err.message.ends_with("response response.incomplete"));
}
#[test]
fn incomplete_response_preserves_unknown_response_fields() {
let data = r#"{
"response": {
"status": "incomplete",
"incomplete_details": {"reason": "content_filter"},
"mystery_field": {"nested": true}
},
"sequence_number": 42
}"#;
let (events, _) = run("response.incomplete", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert!(err.message.contains("mystery_field"));
assert!(err.message.contains("sequence_number"));
assert!(err.message.contains("content_filter"));
}
#[test]
fn failed_response_preserves_error_and_response_extra_fields() {
let data = r#"{
"response": {
"error": {
"type": "server_error",
"code": "upstream_overloaded",
"message": "try later",
"param": "input"
},
"retry_hint": "short"
}
}"#;
let (events, _) = run("response.failed", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("upstream_overloaded"));
assert!(err.message.contains("try later"));
assert!(err.message.contains("param"));
assert!(err.message.contains("retry_hint"));
}
#[test]
fn top_level_error_preserves_unknown_fields() {
let data = r#"{
"error": {
"type": "rate_limit_error",
"code": "rate_limit_exceeded",
"message": "slow down",
"retry_after_ms": 1000
},
"request_id": "req_123"
}"#;
let (events, _) = run("error", data);
let Event::Error(err) = &events[0] else {
panic!("expected error event")
};
assert_eq!(err.code.as_deref(), Some("rate_limit_exceeded"));
assert!(err.message.contains("slow down"));
assert!(err.message.contains("retry_after_ms"));
assert!(err.message.contains("request_id"));
}
#[test]
fn reasoning_output_item_emits_reasoning_item_with_text_summary_encrypted() {
// 完成済み reasoning wrapper が text + summary[] + encrypted_content を持って
// ReasoningItem として届くこと。
let mut state = OpenAIResponsesState::default();
// wrapper added (id だけ持つ)
with(
&mut state,
"response.output_item.added",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1"}}"#,
);
// 内側の reasoning_text 用 content_part
with(
&mut state,
"response.content_part.added",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"hello "}"#,
);
with(
&mut state,
"response.reasoning_text.delta",
r#"{"output_index":0,"content_index":0,"item_id":"r1","delta":"world"}"#,
);
with(
&mut state,
"response.content_part.done",
r#"{"output_index":0,"content_index":0,"item_id":"r1","part":{"type":"reasoning_text","text":"hello world"}}"#,
);
// summary 1 件
with(
&mut state,
"response.reasoning_summary_part.added",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","part":{"type":"summary_text","text":""}}"#,
);
with(
&mut state,
"response.reasoning_summary_text.delta",
r#"{"output_index":0,"summary_index":0,"item_id":"r1","delta":"sum-A"}"#,
);
with(
&mut state,
"response.reasoning_summary_part.done",
r#"{"output_index":0,"summary_index":0,"item_id":"r1"}"#,
);
// wrapper done (encrypted_content が乗る)
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":0,"item":{"type":"reasoning","id":"r1","encrypted_content":"ENC-XYZ"}}"#,
);
assert_eq!(evs.len(), 1);
let Event::ReasoningItem(reasoning) = &evs[0] else {
panic!("expected ReasoningItem, got {:?}", evs[0]);
};
assert_eq!(reasoning.id.as_deref(), Some("r1"));
assert_eq!(reasoning.text, "hello world");
assert_eq!(reasoning.summary, vec!["sum-A".to_string()]);
assert_eq!(reasoning.encrypted_content.as_deref(), Some("ENC-XYZ"));
assert!(reasoning.signature.is_none());
// pending_reasoning は drain されていること
assert!(state.pending_reasoning.is_empty());
}
#[test]
fn reasoning_wrapper_without_inner_content_emits_empty_text() {
// encrypted_content だけ届くreasoning_text 無し)ケースでも
// ReasoningItem は発火する。
let mut state = OpenAIResponsesState::default();
with(
&mut state,
"response.output_item.added",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9"}}"#,
);
let evs = with(
&mut state,
"response.output_item.done",
r#"{"output_index":2,"item":{"type":"reasoning","id":"r9","encrypted_content":"BLOB"}}"#,
);
let Event::ReasoningItem(r) = &evs[0] else {
panic!()
};
assert!(r.text.is_empty());
assert!(r.summary.is_empty());
assert_eq!(r.encrypted_content.as_deref(), Some("BLOB"));
}
#[test]
fn unknown_event_emits_trace_visible_unhandled_sse() {
let data = r#"{"sequence_number":7,"note":"debug me"}"#;
let (events, _) = run("response.mystery", data);
assert_eq!(events.len(), 1);
let Event::UnhandledSse(unhandled) = &events[0] else {
panic!("expected UnhandledSse, got {:?}", events[0]);
};
assert_eq!(unhandled.provider, "openai_responses");
assert_eq!(unhandled.event_type, "response.mystery");
assert_eq!(unhandled.data_preview, data);
assert_eq!(unhandled.data_len, data.len());
}
#[test]
fn unknown_event_data_preview_is_bounded_and_data_len_is_original_bytes() {
let data = format!("{}終端", "x".repeat(UNHANDLED_SSE_DATA_PREVIEW_LIMIT + 32));
let (events, _) = run("response.mystery.large", &data);
assert_eq!(events.len(), 1);
let Event::UnhandledSse(unhandled) = &events[0] else {
panic!("expected UnhandledSse, got {:?}", events[0]);
};
assert_eq!(unhandled.data_len, data.len());
assert!(unhandled.data_preview.len() <= UNHANDLED_SSE_DATA_PREVIEW_LIMIT);
assert_eq!(
unhandled.data_preview,
"x".repeat(UNHANDLED_SSE_DATA_PREVIEW_LIMIT)
);
assert!(unhandled.data_preview.len() < unhandled.data_len);
}
}

View File

@ -16,11 +16,13 @@ pub use scheme_impl::OpenAIResponsesState;
/// OpenAI Responses scheme 本体。
///
/// `store` / `include_encrypted_content` は scheme 固定の wire 設定で、
/// デフォルトは stateless + ZDR 相当 (`store=false`, `include=[...]`)。
/// 将来 ZDR 非対応環境で `store=true` にしたくなった場合に限り override
/// する。`ModelCapability` には入れない(これはモデルの能力ではなく、
/// クライアントの運用方針)。
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
/// ように受理パラメータが subset の経路では provider 層で
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy
#[derive(Debug, Clone)]
pub struct OpenAIResponsesScheme {
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
@ -28,6 +30,14 @@ pub struct OpenAIResponsesScheme {
/// `include: ["reasoning.encrypted_content"]` を付けるか。
/// `store=false` で reasoning を使うなら必須。
pub include_encrypted_content: bool,
/// `max_output_tokens` を body に載せるか。公式 OpenAI Responses API は
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_max_output_tokens: bool,
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
/// で 400 を返すため、その経路では `false` にする。
pub send_sampling_params: bool,
}
impl Default for OpenAIResponsesScheme {
@ -35,12 +45,15 @@ impl Default for OpenAIResponsesScheme {
Self {
store: false,
include_encrypted_content: true,
send_max_output_tokens: true,
send_sampling_params: true,
}
}
}
impl OpenAIResponsesScheme {
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`)。
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
pub fn new() -> Self {
Self::default()
}
@ -56,4 +69,16 @@ impl OpenAIResponsesScheme {
self.include_encrypted_content = include;
self
}
/// `max_output_tokens` を body に載せるかを上書き。
pub fn with_send_max_output_tokens(mut self, send: bool) -> Self {
self.send_max_output_tokens = send;
self
}
/// `temperature` / `top_p` を body に載せるかを上書き。
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
self.send_sampling_params = send;
self
}
}

View File

@ -4,12 +4,12 @@
//! item 配列で reasoning / function_call / function_call_output が
//! first-class。`Item` を素に近い形で `input[]` に投影できる。
use serde::Serialize;
use serde::{Serialize, Serializer};
use serde_json::Value;
use crate::llm_client::{
Request,
capability::{ModelCapability, ReasoningEffort, ReasoningSupport},
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
};
@ -38,19 +38,30 @@ pub(crate) struct ResponsesRequest {
/// `["reasoning.encrypted_content"]` 等。
#[serde(skip_serializing_if = "Vec::is_empty")]
pub include: Vec<&'static str>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は 400 で弾く。scheme の `send_max_output_tokens`
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u32>,
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
/// `send_sampling_params` が `false` のときは `None` のまま送る。
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
/// 会話単位の安定キー。ChatGPT backend (codex-oauth) は明示キーが
/// 無いとプロンプトキャッシュがほぼ効かない。pod 側は `SegmentId`
/// を渡す。`Request::cache_key` が `None` のときはキー自体を送らない。
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
}
/// reasoning 制御。
#[derive(Debug, Serialize)]
pub(crate) struct ReasoningConfig {
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<&'static str>,
pub effort: Option<String>,
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
pub summary: &'static str,
}
@ -62,7 +73,11 @@ pub(crate) struct ReasoningConfig {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputItem {
/// 会話メッセージ。user / assistant / system のいずれか。
/// 会話メッセージ。user / assistant / developer のいずれか。
/// `Role::System` items は `developer` として投影するChatGPT
/// backend が `role: "system"` を拒否するため。Codex CLI も
/// system 相当の挿入には DeveloperInstructions = `role: "developer"`
/// を使う)。
Message {
role: &'static str,
content: Vec<InputContent>,
@ -85,7 +100,11 @@ pub(crate) enum InputItem {
Reasoning {
#[serde(skip_serializing_if = "Option::is_none")]
id: Option<String>,
#[serde(skip_serializing_if = "Vec::is_empty")]
/// Responses API は reasoning item に `summary` フィールドを必須で
/// 要求する(中身が空でも `[]` として送る必要がある。GPT-5 など
/// summary を返さないモデル + reasoning effort 指定なしのターンでは
/// summary text が一切付かないので、ここを skip すると 400
/// "Missing required parameter: 'input[N].summary'" で弾かれる。
summary: Vec<ReasoningSummaryPart>,
#[serde(skip_serializing_if = "Vec::is_empty")]
content: Vec<ReasoningContentPart>,
@ -98,7 +117,7 @@ pub(crate) enum InputItem {
#[derive(Debug, Serialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub(crate) enum InputContent {
/// user / system 側のテキスト
/// user / developer 側のテキスト
InputText { text: String },
/// assistant 側のテキスト
OutputText { text: String },
@ -125,11 +144,28 @@ pub(crate) struct ResponseTool {
pub name: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// OpenAI Responses API は `type:"object"` のパラメータスキーマに
/// `properties` が存在することを要求する。schemars は引数なし struct
/// から `properties` を含まない最小スキーマを出すので、serialize
/// 時に空オブジェクトを補う。
#[serde(serialize_with = "serialize_parameters")]
pub parameters: Value,
/// Structured output モード制御。デフォルト false。
pub strict: bool,
}
fn serialize_parameters<S: Serializer>(value: &Value, s: S) -> Result<S::Ok, S::Error> {
if let Some(obj) = value.as_object()
&& obj.get("type").and_then(Value::as_str) == Some("object")
&& !obj.contains_key("properties")
{
let mut patched = obj.clone();
patched.insert("properties".to_string(), Value::Object(Default::default()));
return Value::Object(patched).serialize(s);
}
value.serialize(s)
}
impl OpenAIResponsesScheme {
/// `Request` から wire 形式の body を組み立てる。
pub(crate) fn build_request(
@ -151,16 +187,15 @@ impl OpenAIResponsesScheme {
.config
.reasoning
.as_ref()
.and_then(|rc| rc.effort)
.filter(|_| supports_effort)
.map(|effort| ReasoningConfig {
effort: Some(match effort {
ReasoningEffort::Low => "low",
ReasoningEffort::Medium => "medium",
ReasoningEffort::High => "high",
}),
effort: match effort {
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
ReasoningControl::BudgetTokens(_) => None,
},
summary: "auto",
});
})
.filter(|reasoning| reasoning.effort.is_some());
let include: Vec<&'static str> = if self.include_encrypted_content {
vec!["reasoning.encrypted_content"]
@ -179,9 +214,22 @@ impl OpenAIResponsesScheme {
store: self.store,
stream: true,
include,
max_output_tokens: request.config.max_tokens,
temperature: request.config.temperature,
top_p: request.config.top_p,
max_output_tokens: if self.send_max_output_tokens {
request.config.max_tokens
} else {
None
},
temperature: if self.send_sampling_params {
request.config.temperature
} else {
None
},
top_p: if self.send_sampling_params {
request.config.top_p
} else {
None
},
prompt_cache_key: request.cache_key.clone(),
}
}
}
@ -192,11 +240,11 @@ fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
for item in items {
match item {
Item::Message { role, content, .. } => {
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) = match role
{
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
match role {
Role::User => ("user", |t| InputContent::InputText { text: t }),
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
Role::System => ("system", |t| InputContent::InputText { text: t }),
Role::System => ("developer", |t| InputContent::InputText { text: t }),
};
let parts: Vec<InputContent> = content
.iter()
@ -353,6 +401,28 @@ mod tests {
}
}
#[test]
fn system_role_item_is_projected_as_developer() {
// ChatGPT backend (codex-oauth) は input[] の `role: "system"` を
// "System messages are not allowed" で 400 拒否する。in-conversation
// な system note (notify / fs_view auto-read / compaction summary) は
// `role: "developer"` として投影し、両 backend で受理されるようにする。
let scheme = OpenAIResponsesScheme::new();
let req = Request::new()
.user("hi")
.item(Item::system_message("[notify] hello"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
match &body.input[1] {
InputItem::Message { role, content } => {
assert_eq!(*role, "developer");
assert!(
matches!(&content[0], InputContent::InputText { text } if text == "[notify] hello"),
);
}
_ => panic!("expected message"),
}
}
#[test]
fn assistant_message_uses_output_text() {
let scheme = OpenAIResponsesScheme::new();
@ -378,7 +448,10 @@ mod tests {
.item(Item::tool_result("c1", "ok"));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(matches!(body.input[1], InputItem::FunctionCall { .. }));
assert!(matches!(body.input[2], InputItem::FunctionCallOutput { .. }));
assert!(matches!(
body.input[2],
InputItem::FunctionCallOutput { .. }
));
}
#[test]
@ -404,17 +477,37 @@ mod tests {
}
}
#[test]
fn reasoning_summary_field_is_always_serialized() {
// Responses API は reasoning item に `summary` を必須で要求する。
// summary が空でも wire 上に `summary: []` として残らないと、
// ChatGPT backend (codex-oauth) が
// 400 invalid_request_error: Missing required parameter:
// 'input[N].summary'.
// で弾く。GPT-5 + reasoning effort 未指定のターンでは summary text
// が付かないことがあるため、空のままでも skip しないこと。
let scheme = OpenAIResponsesScheme::new();
let item = Item::reasoning("").with_encrypted_content("ENC");
let req = Request::new().user("hi").item(item);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
let reasoning_item = &json["input"][1];
assert_eq!(reasoning_item["type"], "reasoning");
assert!(
reasoning_item.get("summary").is_some(),
"summary key must be present even when empty, got: {reasoning_item}"
);
assert_eq!(reasoning_item["summary"], serde_json::json!([]));
}
#[test]
fn reasoning_effort_projected_when_supported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl {
effort: Some(ReasoningEffort::High),
budget_tokens: None,
});
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let reasoning = body.reasoning.expect("reasoning should be set");
assert_eq!(reasoning.effort, Some("high"));
assert_eq!(reasoning.effort.as_deref(), Some("high"));
assert_eq!(reasoning.summary, "auto");
}
@ -422,22 +515,118 @@ mod tests {
fn reasoning_omitted_when_unsupported() {
let scheme = OpenAIResponsesScheme::new();
let mut req = Request::new().user("hi");
req.config.reasoning = Some(ReasoningControl {
effort: Some(ReasoningEffort::High),
budget_tokens: None,
});
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning());
assert!(body.reasoning.is_none());
}
#[test]
fn max_output_tokens_passed_through() {
fn max_output_tokens_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, Some(100));
}
#[test]
fn max_output_tokens_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(false);
let req = Request::new().user("hi").max_tokens(100);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.max_output_tokens, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("max_output_tokens").is_none(),
"max_output_tokens key must not appear in serialised body, got: {json}"
);
}
#[test]
fn sampling_params_passed_through_by_default() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, Some(0.4));
assert_eq!(body.top_p, Some(0.9));
}
#[test]
fn sampling_params_dropped_when_send_disabled() {
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.temperature, None);
assert_eq!(body.top_p, None);
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("temperature").is_none() && json.get("top_p").is_none(),
"temperature/top_p keys must not appear in serialised body, got: {json}"
);
}
#[test]
fn prompt_cache_key_passed_through_when_set() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi").cache_key("session-abc");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert_eq!(body.prompt_cache_key.as_deref(), Some("session-abc"));
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["prompt_cache_key"], "session-abc");
}
#[test]
fn prompt_cache_key_omitted_when_none() {
let scheme = OpenAIResponsesScheme::new();
let req = Request::new().user("hi");
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
assert!(body.prompt_cache_key.is_none());
let json = serde_json::to_value(&body).unwrap();
assert!(
json.get("prompt_cache_key").is_none(),
"prompt_cache_key key must not appear in serialised body, got: {json}"
);
}
#[test]
fn tool_schema_without_properties_is_normalized() {
// schemars は引数なし struct から `type:"object"` だけのスキーマを
// 吐く。OpenAI Responses は `properties` 欠落を 400 で拒否するので
// 送る直前に空オブジェクトを補うのを確認。
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({ "type": "object" });
let req = Request::new().tool(
ToolDefinition::new("empty")
.description("no args")
.input_schema(raw_schema),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"]["type"], "object");
assert!(
json["tools"][0]["parameters"]["properties"].is_object(),
"properties must be present as an object, got: {}",
json["tools"][0]["parameters"]
);
}
#[test]
fn tool_schema_with_properties_is_untouched() {
let scheme = OpenAIResponsesScheme::new();
let raw_schema = serde_json::json!({
"type": "object",
"properties": { "path": { "type": "string" } },
"required": ["path"]
});
let req = Request::new().tool(
ToolDefinition::new("t")
.description("d")
.input_schema(raw_schema.clone()),
);
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
let json = serde_json::to_value(&body).unwrap();
assert_eq!(json["tools"][0]["parameters"], raw_schema);
}
#[test]
fn serialized_body_has_expected_shape() {
// wire 形式が崩れていないかのスモークテスト

View File

@ -6,9 +6,10 @@ use crate::llm_client::{
ClientError,
auth::AuthRequirement,
capability::ModelCapability,
client::ConfigWarning,
event::Event,
scheme::Scheme,
types::Request,
types::{Request, RequestConfig},
};
use super::OpenAIResponsesScheme;
@ -19,11 +20,14 @@ impl Scheme for OpenAIResponsesScheme {
type State = OpenAIResponsesState;
fn default_base_url(&self) -> &'static str {
"https://api.openai.com"
// `/v1` は base_url 側に寄せる。ChatGPT OAuth 経由のときは
// `https://chatgpt.com/backend-api/codex` を base にすれば同じ
// `/responses` path で両系統を吸収できるCodex CLI 準拠)。
"https://api.openai.com/v1"
}
fn path(&self, _model_id: &str) -> String {
"/v1/responses".to_string()
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
@ -49,11 +53,36 @@ impl Scheme for OpenAIResponsesScheme {
super::events::parse_sse(event_type, data, state)
}
fn capability_for(&self, model_id: &str) -> Option<ModelCapability> {
super::capability::lookup(model_id)
}
fn default_capability(&self) -> ModelCapability {
super::capability::default_capability()
}
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
let mut warnings = Vec::new();
// ChatGPT backend (codex-oauth) は `max_output_tokens` を 400 で弾く。
// scheme 構築時に `send_max_output_tokens=false` で組まれていれば
// body 投影は止まっているので、ユーザの意図が落ちることだけを通知する。
if !self.send_max_output_tokens && config.max_tokens.is_some() {
warnings.push(ConfigWarning::unsupported(
"max_tokens",
"OpenAI Responses (ChatGPT backend)",
));
}
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
if !self.send_sampling_params {
if config.temperature.is_some() {
warnings.push(ConfigWarning::unsupported(
"temperature",
"OpenAI Responses (ChatGPT backend)",
));
}
if config.top_p.is_some() {
warnings.push(ConfigWarning::unsupported(
"top_p",
"OpenAI Responses (ChatGPT backend)",
));
}
}
warnings
}
}

View File

@ -6,20 +6,27 @@
use std::pin::Pin;
use std::sync::Arc;
use std::time::{Duration, Instant};
use async_trait::async_trait;
use eventsource_stream::Eventsource;
use futures::{Stream, StreamExt, TryStreamExt};
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
use reqwest::header::{
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
};
use serde_json::{Value, json};
use super::auth::{AuthProvider, AuthRequirement};
use super::capability::ModelCapability;
use super::client::{ConfigWarning, LlmClient};
use super::client::{ConfigWarning, LlmClient, ResponseStream};
use super::error::ClientError;
use super::event::Event;
use super::scheme::Scheme;
use super::types::{Request, RequestConfig};
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(20);
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
///
/// - `None`: 認証ヘッダを送らないOllama 等の opt-out
@ -46,7 +53,9 @@ impl ResolvedAuth {
(Self::Custom(_), _) => true,
(
Self::ApiKey(_),
AuthRequirement::Bearer | AuthRequirement::XApiKey | AuthRequirement::QueryParam { .. },
AuthRequirement::Bearer
| AuthRequirement::XApiKey
| AuthRequirement::QueryParam { .. },
) => true,
_ => false,
}
@ -146,6 +155,120 @@ impl<S: Scheme> HttpTransport<S> {
Ok(headers)
}
fn is_codex_backend(&self) -> bool {
match &self.auth {
ResolvedAuth::Custom(provider) => provider.is_codex_backend(),
_ => false,
}
}
fn apply_stream_headers(
&self,
headers: &mut HeaderMap,
request: &Request,
) -> Result<(), ClientError> {
headers.insert(ACCEPT, HeaderValue::from_static("text/event-stream"));
if self.is_codex_backend()
&& let Some(cache_key) = request.cache_key.as_deref()
{
let value = HeaderValue::from_str(cache_key).map_err(|e| {
ClientError::Config(format!("invalid Codex conversation header: {e}"))
})?;
headers.insert(HeaderName::from_static("session_id"), value.clone());
headers.insert(HeaderName::from_static("x-client-request-id"), value);
}
Ok(())
}
fn encode_request_body(
&self,
body: &serde_json::Value,
headers: &mut HeaderMap,
) -> Result<RequestBody, ClientError> {
if !self.is_codex_backend() {
return Ok(RequestBody::Json(body.clone()));
}
let raw = serde_json::to_vec(body)?;
let raw_json_bytes = raw.len();
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
Ok(RequestBody::CompressedJson {
bytes: compressed,
raw_json_bytes,
})
}
}
enum RequestBody {
Json(serde_json::Value),
CompressedJson {
bytes: Vec<u8>,
raw_json_bytes: usize,
},
}
impl RequestBody {
fn encoding(&self) -> &'static str {
match self {
Self::Json(_) => "json",
Self::CompressedJson { .. } => "zstd",
}
}
fn raw_json_bytes(&self) -> Option<usize> {
match self {
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
Self::CompressedJson { raw_json_bytes, .. } => Some(*raw_json_bytes),
}
}
fn wire_bytes(&self) -> Option<usize> {
match self {
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
Self::CompressedJson { bytes, .. } => Some(bytes.len()),
}
}
}
fn auth_kind(auth: &ResolvedAuth) -> &'static str {
match auth {
ResolvedAuth::None => "none",
ResolvedAuth::ApiKey(_) => "api_key",
ResolvedAuth::Custom(_) => "custom",
}
}
fn emit_transport_trace(request: &Request, label: &str, data: Value) {
if let Some(trace) = &request.transport_trace {
trace.emit(label, data);
}
}
fn json_value_kind(value: &Value) -> &'static str {
match value {
Value::Null => "null",
Value::Bool(_) => "bool",
Value::Number(_) => "number",
Value::String(_) => "string",
Value::Array(_) => "array",
Value::Object(_) => "object",
}
}
async fn response_with_timeout(
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
timeout: Duration,
phase: &'static str,
) -> Result<reqwest::Response, ClientError> {
tokio::time::timeout(timeout, future)
.await
.map_err(|_| ClientError::Timeout { phase, timeout })?
.map_err(ClientError::Http)
}
impl<S: Scheme + Clone> Clone for HttpTransport<S> {
@ -161,6 +284,40 @@ impl<S: Scheme + Clone> Clone for HttpTransport<S> {
}
}
/// エラーレスポンスを `ClientError::Api` に変換する。
async fn classify_error_response(resp: reqwest::Response) -> ClientError {
let status = resp.status().as_u16();
let retry_after = resp
.headers()
.get(RETRY_AFTER)
.and_then(|v| v.to_str().ok())
.and_then(|s| s.trim().parse::<u64>().ok())
.map(Duration::from_secs);
let text = resp.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
ClientError::Api {
status: Some(status),
code,
message,
retry_after,
}
} else {
ClientError::Api {
status: Some(status),
code: None,
message: text,
retry_after,
}
}
}
#[async_trait]
impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
@ -171,48 +328,176 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
self.scheme.validate_config(config)
}
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
let total_started = Instant::now();
let path = self.scheme.path(&self.model_id);
emit_transport_trace(
&request,
"transport_start",
json!({
"model": &self.model_id,
"path": path,
"auth_kind": auth_kind(&self.auth),
"required_auth": format!("{:?}", self.scheme.required_auth()),
"codex_backend": self.is_codex_backend(),
"cache_key_present": request.cache_key.is_some(),
"stream_open_timeout_ms": DEFAULT_STREAM_OPEN_TIMEOUT.as_millis() as u64,
}),
);
let url = self.build_url();
let headers = self.build_headers().await?;
let headers_started = Instant::now();
emit_transport_trace(
&request,
"transport_headers_start",
json!({
"auth_kind": auth_kind(&self.auth),
"required_auth": format!("{:?}", self.scheme.required_auth()),
}),
);
let mut headers = match self.build_headers().await {
Ok(headers) => {
emit_transport_trace(
&request,
"transport_headers_done",
json!({
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
"headers_len": headers.len(),
}),
);
headers
}
Err(error) => {
emit_transport_trace(
&request,
"transport_headers_error",
json!({
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
let stream_headers_started = Instant::now();
if let Err(error) = self.apply_stream_headers(&mut headers, &request) {
emit_transport_trace(
&request,
"transport_stream_headers_error",
json!({
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
emit_transport_trace(
&request,
"transport_stream_headers_done",
json!({
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
"headers_len": headers.len(),
}),
);
let body_started = Instant::now();
emit_transport_trace(&request, "transport_body_build_start", json!({}));
let body = self
.scheme
.build_request_body(&self.model_id, &request, &self.capability);
emit_transport_trace(
&request,
"transport_body_build_done",
json!({
"elapsed_ms": body_started.elapsed().as_millis() as u64,
"body_kind": json_value_kind(&body),
}),
);
let response = self
.http_client
.post(&url)
.headers(headers)
.json(&body)
.send()
.await?;
let encode_started = Instant::now();
let request_body = match self.encode_request_body(&body, &mut headers) {
Ok(body) => body,
Err(error) => {
emit_transport_trace(
&request,
"transport_body_encode_error",
json!({
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
emit_transport_trace(
&request,
"transport_body_encode_done",
json!({
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
"encoding": request_body.encoding(),
"raw_json_bytes": request_body.raw_json_bytes(),
"wire_bytes": request_body.wire_bytes(),
}),
);
let builder = self.http_client.post(&url).headers(headers);
let builder = match request_body {
RequestBody::Json(body) => builder.json(&body),
RequestBody::CompressedJson { bytes, .. } => builder.body(bytes),
};
let send_started = Instant::now();
emit_transport_trace(&request, "transport_http_send_start", json!({}));
let response =
match response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
.await
{
Ok(response) => {
emit_transport_trace(
&request,
"transport_http_headers_received",
json!({
"elapsed_ms": send_started.elapsed().as_millis() as u64,
"status": response.status().as_u16(),
"success": response.status().is_success(),
}),
);
response
}
Err(error) => {
emit_transport_trace(
&request,
"transport_http_send_error",
json!({
"elapsed_ms": send_started.elapsed().as_millis() as u64,
"error": error.to_string(),
}),
);
return Err(error);
}
};
if !response.status().is_success() {
let status = response.status().as_u16();
let text = response.text().await.unwrap_or_default();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
let error = json.get("error").unwrap_or(&json);
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
let message = error
.get("message")
.and_then(|v| v.as_str())
.unwrap_or(&text)
.to_string();
return Err(ClientError::Api {
status: Some(status),
code,
message,
});
}
return Err(ClientError::Api {
status: Some(status),
code: None,
message: text,
});
emit_transport_trace(
&request,
"transport_http_status_error",
json!({
"status": response.status().as_u16(),
"retry_after_present": response.headers().get(RETRY_AFTER).is_some(),
}),
);
return Err(classify_error_response(response).await);
}
emit_transport_trace(
&request,
"transport_stream_ready",
json!({
"elapsed_ms": total_started.elapsed().as_millis() as u64,
}),
);
let scheme = self.scheme.clone();
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
let event_stream = byte_stream.eventsource();
@ -240,3 +525,170 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
Ok(Box::pin(stream))
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[derive(Debug)]
struct TestAuthProvider {
codex: bool,
}
#[async_trait]
impl AuthProvider for TestAuthProvider {
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError> {
Ok(vec![
(
HeaderName::from_static("authorization"),
HeaderValue::from_static("Bearer test-token"),
),
(
HeaderName::from_static("chatgpt-account-id"),
HeaderValue::from_static("account-1"),
),
])
}
fn is_codex_backend(&self) -> bool {
self.codex
}
}
#[derive(Clone)]
struct TestScheme;
impl Scheme for TestScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
"https://example.test"
}
fn path(&self, _model_id: &str) -> String {
"/responses".to_string()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::Bearer
}
fn build_request_body(
&self,
model_id: &str,
request: &Request,
_capability: &ModelCapability,
) -> serde_json::Value {
json!({
"model": model_id,
"input_len": request.items.len(),
"prompt_cache_key": request.cache_key,
})
}
fn parse_sse(
&self,
_event_type: &str,
_data: &str,
_state: &mut Self::State,
) -> Result<Vec<Event>, ClientError> {
Ok(Vec::new())
}
fn default_capability(&self) -> ModelCapability {
ModelCapability::minimal()
}
}
fn transport(auth: ResolvedAuth) -> HttpTransport<TestScheme> {
HttpTransport::new(
TestScheme,
"gpt-test",
"https://example.test",
auth,
ModelCapability::minimal(),
)
}
#[tokio::test]
async fn response_timeout_returns_retryable_lifecycle_timeout() {
let err = response_with_timeout(
std::future::pending::<Result<reqwest::Response, reqwest::Error>>(),
Duration::from_millis(5),
"stream_open",
)
.await
.unwrap_err();
assert!(crate::llm_client::error::is_retryable(&err));
assert!(matches!(
err,
ClientError::Timeout {
phase: "stream_open",
..
}
));
}
#[tokio::test]
async fn codex_backend_adds_conversation_headers_and_zstd_body() {
let transport = transport(ResolvedAuth::Custom(Arc::new(TestAuthProvider {
codex: true,
})));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert_eq!(headers.get("session_id").unwrap(), "segment-123");
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
let RequestBody::CompressedJson {
bytes: compressed,
raw_json_bytes,
} = encoded
else {
panic!("Codex backend request body must be zstd-compressed");
};
assert!(raw_json_bytes > 0);
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
#[tokio::test]
async fn non_codex_request_does_not_get_codex_only_headers_or_compression() {
let transport = transport(ResolvedAuth::ApiKey("api-key".to_string()));
let request = Request::new().user("hello").cache_key("segment-123");
let mut headers = transport.build_headers().await.unwrap();
transport
.apply_stream_headers(&mut headers, &request)
.unwrap();
let body = transport.scheme.build_request_body(
&transport.model_id,
&request,
&transport.capability,
);
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
assert!(headers.get("session_id").is_none());
assert!(headers.get("x-client-request-id").is_none());
assert!(headers.get(CONTENT_ENCODING).is_none());
let RequestBody::Json(decoded) = encoded else {
panic!("non-Codex request body must remain normal JSON");
};
assert_eq!(decoded["prompt_cache_key"], "segment-123");
}
}

View File

@ -7,8 +7,14 @@
//! - ToolResult items (tool results)
//! - Reasoning items (extended thinking)
use std::{fmt, sync::Arc};
use serde::{Deserialize, Serialize};
fn is_false(value: &bool) -> bool {
!*value
}
// ============================================================================
// Item - The core unit of conversation
// ============================================================================
@ -19,6 +25,35 @@ pub type ItemId = String;
/// Call ID type for linking function calls to their outputs
pub type CallId = String;
/// Callback sink for request-local transport lifecycle diagnostics.
///
/// This is carried on [`Request`] so generic [`crate::llm_client::LlmClient`]
/// implementations can emit fine-grained transport milestones without widening
/// the trait method signature. The callback must never receive request body
/// contents or secret header values.
#[derive(Clone)]
pub struct RequestTrace {
callback: Arc<dyn Fn(&str, serde_json::Value) + Send + Sync>,
}
impl RequestTrace {
pub fn new(callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static) -> Self {
Self {
callback: Arc::new(callback),
}
}
pub fn emit(&self, label: &str, data: serde_json::Value) {
(self.callback)(label, data);
}
}
impl fmt::Debug for RequestTrace {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("RequestTrace").finish_non_exhaustive()
}
}
/// Conversation item - the primary unit of conversation history
///
/// Items represent discrete elements in a conversation. Tool calls and reasoning
@ -79,6 +114,9 @@ pub enum Item {
/// Detailed output (removed by pruning when old enough)
#[serde(default, skip_serializing_if = "Option::is_none")]
content: Option<String>,
/// Whether the tool result represents an execution error.
#[serde(default, skip_serializing_if = "is_false")]
is_error: bool,
},
/// Reasoning/thinking item
@ -94,8 +132,15 @@ pub enum Item {
summary: Vec<String>,
/// サーバから返された暗号化済み reasoning blob。ZDR / `store=false`
/// 運用で stateless に再送するときそのまま添える必要がある。
/// Anthropic の `redacted_thinking.data` もここに格納する。
#[serde(default, skip_serializing_if = "Option::is_none")]
encrypted_content: Option<String>,
/// Anthropic extended thinking の `signature`。新世代 Claude
/// (Opus 4.5+/Sonnet 4.6+) では同一論理ターン内の `thinking`
/// ブロックを送り返す際に必須。改ざん検知に使われる。他 scheme
/// では `None`。
#[serde(default, skip_serializing_if = "Option::is_none")]
signature: Option<String>,
/// Item status
#[serde(skip_serializing_if = "Option::is_none")]
status: Option<ItemStatus>,
@ -191,11 +236,27 @@ impl Item {
/// Create a tool result item with summary only (no content).
pub fn tool_result(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, false)
}
/// Create an error tool result item with summary only (no content).
pub fn tool_result_error(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
Self::tool_result_item(call_id, summary, None, true)
}
/// Create a tool result item with summary, optional content, and error flag.
pub fn tool_result_item(
call_id: impl Into<String>,
summary: impl Into<String>,
content: Option<String>,
is_error: bool,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
summary: summary.into(),
content: None,
content,
is_error,
}
}
@ -205,12 +266,7 @@ impl Item {
summary: impl Into<String>,
content: impl Into<String>,
) -> Self {
Self::ToolResult {
id: None,
call_id: call_id.into(),
summary: summary.into(),
content: Some(content.into()),
}
Self::tool_result_item(call_id, summary, Some(content.into()), false)
}
// ========================================================================
@ -224,6 +280,7 @@ impl Item {
text: text.into(),
summary: Vec::new(),
encrypted_content: None,
signature: None,
status: None,
}
}
@ -247,6 +304,14 @@ impl Item {
self
}
/// Set Anthropic `signature` on a `Reasoning` item. No-op on other variants.
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
if let Self::Reasoning { signature, .. } = &mut self {
*signature = Some(sig.into());
}
self
}
// ========================================================================
// Builder methods
// ========================================================================
@ -455,6 +520,17 @@ pub struct Request {
/// (Anthropic today) can place a long-lived cache breakpoint there.
/// Providers without prompt caching ignore the field.
pub cache_anchor: Option<usize>,
/// 会話単位の安定キー。`prompt_cache_key` として送られる
/// (OpenAI Responses)。ChatGPT backend (codex-oauth) は明示キーが
/// 無いと org/project ハッシュ衝突でプロンプトキャッシュが
/// ほぼヒットしないため、pod 側で `SegmentId` を渡す運用を想定。
/// `cache_anchor` と違い名前空間キーであり、`prefix anchor` とは
/// 別の概念。`cache_anchor` を読まない provider と同じく、
/// `prompt_cache_key` を持たない provider は無視する。
pub cache_key: Option<String>,
/// Request-local diagnostics sink for transport lifecycle tracing.
#[doc(hidden)]
pub transport_trace: Option<RequestTrace>,
}
impl Request {
@ -505,6 +581,15 @@ impl Request {
self
}
/// Attach a request-local transport trace callback.
pub fn transport_trace(
mut self,
callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static,
) -> Self {
self.transport_trace = Some(RequestTrace::new(callback));
self
}
/// Set max tokens
pub fn max_tokens(mut self, max_tokens: u32) -> Self {
self.config.max_tokens = Some(max_tokens);
@ -534,6 +619,14 @@ impl Request {
self.config.stop_sequences.push(sequence.into());
self
}
/// Set the conversation cache key.
///
/// 詳細は [`Request::cache_key`] のフィールドコメント参照。
pub fn cache_key(mut self, key: impl Into<String>) -> Self {
self.cache_key = Some(key.into());
self
}
}
// ============================================================================

View File

@ -11,12 +11,23 @@
//! 射影の適用は上位層(`pod::prune_hook` 等)が LLM に送る一時コンテキスト
//! に対してだけ行う。Worker の永続履歴は決して変更されない。
//!
//! `min_savings` 判定や savings 推定もこの crate には置かず、上位層が
//! usage 履歴ベースのトークン会計と組み合わせて行う。
//! 保護境界は末尾 token budget で決めるが、この crate は usage 履歴を
//! 所有しない。prefix ごとの token 推定値と savings 推定は上位層から
//! callback で注入される。
use serde::{Deserialize, Serialize};
use crate::llm_client::types::Item;
use crate::token_counter::{EstimateSource, TokenEstimate};
/// Callback that returns token estimates for every prefix boundary of the
/// supplied request history.
///
/// The returned slice must have `history.len() + 1` entries where entry `i`
/// estimates the token count of `history[..i]`. Returning a malformed vector,
/// or estimates whose source is [`EstimateSource::NoData`], makes prune treat
/// the request as having no candidates.
pub type TokenEstimator = Box<dyn Fn(&[Item]) -> Vec<TokenEstimate> + Send + Sync>;
/// Callback that estimates the token savings for projecting the
/// `ToolResult.content` out of `history[i]` for each `i` in `indices`.
@ -30,13 +41,49 @@ use crate::llm_client::types::Item;
/// 実際の projection と一致する savings を返す必要がある。
pub type SavingsEstimator = Box<dyn Fn(&[Item], &[usize]) -> u64 + Send + Sync>;
/// Result of one prune evaluation pass, surfaced to the optional
/// [`PruneObserver`] for instrumentation.
///
/// Worker は LLM リクエストごとに 1 回 prune の評価をし、その結果を
/// observer が登録されていればこの値で通知する。fire/skip の判定
/// 結果と、判定材料になった候補数 / 推定 savings / 保護領域の先頭 index を持つ。
#[derive(Debug, Clone)]
pub struct PruneEvaluation {
/// `prunable_indices` の長さ。`Skipped::NoCandidates` の時は 0。
pub candidate_count: usize,
/// 推定された savings (tokens)。`NoCandidates` の時は 0。
pub estimated_savings: u64,
/// Token budget で保護される suffix の先頭 item index。
/// usage 推定が `NoData` で境界が決まらない場合は `None`。
pub protected_start_index: Option<usize>,
/// 判定結果。
pub decision: PruneDecision,
}
/// Outcome of one prune evaluation. Each variant is one branch of the
/// "fire vs skip" decision tree the Worker walks before each LLM request.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PruneDecision {
/// `prunable_indices` が空 → 何もしない。
SkippedNoCandidates,
/// 候補はあったが推定 savings が `min_savings` 未満 → 何もしない。
SkippedBelowMinSavings,
/// 候補があり savings >= min_savings → projection を適用した。
/// `pruned_count` は `project()` が実際に書き換えた item 数
/// (既に content=None だった候補は 0 計上)。
Fired { pruned_count: usize },
}
/// Optional observer invoked after each prune evaluation, regardless of
/// branch. Pod 等の上位層が install して metrics を発行する。
pub type PruneObserver = Box<dyn Fn(&PruneEvaluation) + Send + Sync>;
/// Configuration for the Prune algorithm.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruneConfig {
/// Number of recent turns to protect from pruning.
/// A "turn" starts at each user message.
#[serde(default = "default_protected_turns")]
pub protected_turns: usize,
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_protected_tokens")]
pub protected_tokens: u64,
/// Minimum token savings required to actually prune. If the prunable
/// content is smaller than this, the caller should skip to avoid
@ -47,8 +94,8 @@ pub struct PruneConfig {
pub min_savings: u64,
}
fn default_protected_turns() -> usize {
3
fn default_protected_tokens() -> u64 {
8000
}
fn default_min_savings() -> u64 {
4096
@ -57,25 +104,12 @@ fn default_min_savings() -> u64 {
impl Default for PruneConfig {
fn default() -> Self {
Self {
protected_turns: default_protected_turns(),
protected_tokens: default_protected_tokens(),
min_savings: default_min_savings(),
}
}
}
/// Find indices where each "turn" begins.
///
/// A turn starts at every user message. Returns the indices of those
/// user messages in ascending order.
fn find_turn_starts(items: &[Item]) -> Vec<usize> {
items
.iter()
.enumerate()
.filter(|(_, item)| item.is_user_message())
.map(|(i, _)| i)
.collect()
}
/// Set `content = None` on each `Item::ToolResult` at the given indices.
///
/// Returns the number of items that were actually modified — items that
@ -84,28 +118,43 @@ fn find_turn_starts(items: &[Item]) -> Vec<usize> {
pub fn project(items: &mut [Item], indices: &[usize]) -> usize {
let mut count = 0;
for &i in indices {
if let Item::ToolResult { content, .. } = &mut items[i] {
if content.is_some() {
if let Item::ToolResult { content, .. } = &mut items[i]
&& content.is_some()
{
*content = None;
count += 1;
}
}
}
count
}
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie outside
/// the last `protected_turns` turns. Pure: does not mutate `items`.
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie before
/// the suffix protected by `protected_tokens`. Pure: does not mutate `items`.
///
/// Returns an empty vector when there are too few turns or no prunable
/// candidates.
pub fn prunable_indices(items: &[Item], protected_turns: usize) -> Vec<usize> {
let turn_starts = find_turn_starts(items);
if turn_starts.len() <= protected_turns {
return Vec::new();
/// Returns an empty vector when token estimates are unavailable (`NoData`) or
/// no prunable candidates exist.
pub fn prunable_indices(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Vec<usize> {
evaluate_candidates(items, protected_tokens, token_estimates).0
}
let boundary = turn_starts[turn_starts.len() - protected_turns];
items[..boundary]
/// Same as [`prunable_indices`] but also returns the start index of the
/// protected suffix. `None` means the token boundary could not be determined
/// (currently because usage estimates were `NoData` or malformed).
pub fn evaluate_candidates(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> (Vec<usize>, Option<usize>) {
let Some(protected_start) = protected_start_index(items, protected_tokens, token_estimates)
else {
return (Vec::new(), None);
};
let candidates = items[..protected_start]
.iter()
.enumerate()
.filter_map(|(i, item)| match item {
@ -114,7 +163,39 @@ pub fn prunable_indices(items: &[Item], protected_turns: usize) -> Vec<usize> {
} => Some(i),
_ => None,
})
.collect()
.collect();
(candidates, Some(protected_start))
}
fn protected_start_index(
items: &[Item],
protected_tokens: u64,
token_estimates: &[TokenEstimate],
) -> Option<usize> {
if token_estimates.len() != items.len() + 1 {
return None;
}
let total = token_estimates[items.len()];
if total.source == EstimateSource::NoData {
return None;
}
if protected_tokens == 0 {
return Some(items.len());
}
let mut protected_start = items.len();
for idx in (0..items.len()).rev() {
let prefix = token_estimates[idx];
if prefix.source == EstimateSource::NoData {
return None;
}
protected_start = idx;
let tail_tokens = total.tokens.saturating_sub(prefix.tokens);
if tail_tokens >= protected_tokens {
break;
}
}
Some(protected_start)
}
#[cfg(test)]
@ -139,17 +220,70 @@ mod tests {
items
}
fn measured_prefix(tokens: &[u64]) -> Vec<TokenEstimate> {
tokens
.iter()
.copied()
.map(|tokens| TokenEstimate {
tokens,
source: EstimateSource::Measured,
})
.collect()
}
fn uniform_estimates(items: &[Item], item_tokens: u64) -> Vec<TokenEstimate> {
let mut tokens = Vec::with_capacity(items.len() + 1);
for i in 0..=items.len() {
tokens.push(i as u64 * item_tokens);
}
measured_prefix(&tokens)
}
fn estimates_from_item_tokens(item_tokens: &[u64]) -> Vec<TokenEstimate> {
let mut prefix = Vec::with_capacity(item_tokens.len() + 1);
let mut acc = 0;
prefix.push(acc);
for tokens in item_tokens {
acc += tokens;
prefix.push(acc);
}
measured_prefix(&prefix)
}
fn no_data_estimates(items: &[Item]) -> Vec<TokenEstimate> {
(0..=items.len())
.map(|i| TokenEstimate {
tokens: i as u64,
source: if i == 0 {
EstimateSource::Measured
} else {
EstimateSource::NoData
},
})
.collect()
}
#[test]
fn no_candidates_when_too_few_turns() {
fn no_candidates_when_estimate_has_no_data() {
let items = make_history(&[("turn1", vec![("summary1", Some("big content here"))])]);
let estimates = no_data_estimates(&items);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
#[test]
fn no_candidates_when_history_fits_in_protected_tokens() {
let items = make_history(&[
("turn1", vec![("summary1", Some("big content here"))]),
("turn2", vec![("summary2", Some("more content"))]),
]);
assert!(prunable_indices(&items, 3).is_empty());
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 10_000, &estimates).is_empty());
}
#[test]
fn candidates_in_unprotected_turns() {
fn candidates_before_token_protected_suffix() {
let big = "x".repeat(4096 * 4);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
@ -157,9 +291,39 @@ mod tests {
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// suffix budget 80 tokens protects turn3+turn4 (8 items), so only s1/s2 are candidates.
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
} else {
panic!("non tool-result selected");
}
}
}
#[test]
fn single_long_task_gets_candidates_without_multiple_user_turns() {
let big = "x".repeat(4096 * 8);
let items = make_history(&[(
"one long task",
vec![
("s1", Some(&big)),
("s2", Some(&big)),
("s3", Some(&big)),
("s4", Some(&big)),
],
)]);
// user + assistant are cheap; every ToolCall is cheap; every ToolResult is heavy.
let item_tokens = vec![1, 1, 1, 5_000, 1, 5_000, 1, 5_000, 1, 5_000];
let estimates = estimates_from_item_tokens(&item_tokens);
let (candidates, protected_start) = evaluate_candidates(&items, 8_000, &estimates);
assert_eq!(protected_start, Some(7));
assert_eq!(candidates.len(), 2);
// 候補は turn1 と turn2 の ToolResult のみ
for &i in &candidates {
if let Item::ToolResult { summary, .. } = &items[i] {
assert!(summary == "s1" || summary == "s2");
@ -177,7 +341,8 @@ mod tests {
("turn3", vec![]),
("turn4", vec![]),
]);
assert!(prunable_indices(&items, 2).is_empty());
let estimates = uniform_estimates(&items, 10);
assert!(prunable_indices(&items, 20, &estimates).is_empty());
}
#[test]
@ -189,7 +354,8 @@ mod tests {
("turn3", vec![("s3", Some("keep me"))]),
("turn4", vec![("s4", Some("keep me too"))]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 80, &estimates);
let count = project(&mut items, &candidates);
assert_eq!(count, 2);
@ -215,7 +381,7 @@ mod tests {
("turn1", vec![("s1", None)]),
("turn2", vec![("s2", Some("hello"))]),
]);
// Manually target s1 (index 3) even though it's already None.
// Manually target s1 even though it's already None.
let target = items
.iter()
.position(|it| matches!(it, Item::ToolResult { summary, .. } if summary == "s1"))
@ -233,27 +399,53 @@ mod tests {
("turn3", vec![]),
("turn4", vec![]),
]);
let candidates = prunable_indices(&items, 2);
let estimates = uniform_estimates(&items, 10);
let candidates = prunable_indices(&items, 20, &estimates);
assert_eq!(project(&mut items, &candidates), 1);
// 2 周目: 候補は一度の prunable_indices 結果を使い回しても 0 件。
assert_eq!(project(&mut items, &candidates), 0);
}
#[test]
fn protected_turns_boundary_exact() {
// 3 turns with protected_turns=2: only turn 1 is a candidate.
fn evaluate_candidates_returns_protected_start_index() {
let big = "x".repeat(64);
let items = make_history(&[
("turn1", vec![("s1", Some(&big))]),
("turn2", vec![("s2", Some("protected"))]),
("turn3", vec![("s3", Some("also protected"))]),
("turn2", vec![("s2", Some(&big))]),
("turn3", vec![("s3", Some("keep"))]),
("turn4", vec![("s4", Some("keep too"))]),
]);
let candidates = prunable_indices(&items, 2);
assert_eq!(candidates.len(), 1);
if let Item::ToolResult { summary, .. } = &items[candidates[0]] {
assert_eq!(summary, "s1");
} else {
panic!("expected ToolResult at candidate index");
}
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 80, &estimates);
assert_eq!(candidates.len(), 2);
// protected_tokens=80 → protected suffix is turn3+turn4, starting at index 8.
assert_eq!(protected_start, Some(8));
}
#[test]
fn evaluate_candidates_reports_zero_start_when_everything_is_protected() {
let items = make_history(&[("only", vec![("s", Some("x"))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 10_000, &estimates);
assert!(candidates.is_empty());
assert_eq!(protected_start, Some(0));
}
#[test]
fn zero_protected_tokens_allows_all_tool_results_as_candidates() {
let big = "x".repeat(64);
let items = make_history(&[("turn1", vec![("s1", Some(&big)), ("s2", Some(&big))])]);
let estimates = uniform_estimates(&items, 10);
let (candidates, protected_start) = evaluate_candidates(&items, 0, &estimates);
assert_eq!(protected_start, Some(items.len()));
assert_eq!(candidates.len(), 2);
}
#[test]
fn malformed_estimate_vector_is_treated_as_no_boundary() {
let items = make_history(&[("turn1", vec![("s1", Some("x"))])]);
let (candidates, protected_start) = evaluate_candidates(&items, 10, &[]);
assert!(candidates.is_empty());
assert_eq!(protected_start, None);
}
}

View File

@ -10,12 +10,14 @@
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
pub mod event;
mod reasoning_item_collector;
mod text_block_collector;
mod timeline;
mod tool_call_collector;
// 公開API
pub use event::*;
pub use reasoning_item_collector::ReasoningItemCollector;
pub use text_block_collector::TextBlockCollector;
pub use timeline::Timeline;
pub use tool_call_collector::ToolCallCollector;
@ -28,6 +30,7 @@ pub use crate::handler::{
Handler,
Kind,
PingKind,
ReasoningItemKind,
StatusKind,
// Block Events
TextBlockEvent,

View File

@ -0,0 +1,77 @@
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
//!
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
//! Worker はターン終了時に `take_collected()` でドレインして
//! `Item::Reasoning` として `worker.history` に append する。
use std::sync::{Arc, Mutex};
use crate::handler::{Handler, ReasoningItemKind};
use crate::llm_client::event::ReasoningItemEvent;
/// 収集された reasoning item の連列。
#[derive(Clone, Default)]
pub struct ReasoningItemCollector {
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
}
impl ReasoningItemCollector {
pub fn new() -> Self {
Self::default()
}
/// 収集済み item を取り出してクリア
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
let mut guard = self.collected.lock().unwrap();
std::mem::take(&mut *guard)
}
/// 収集をクリア
pub fn clear(&self) {
self.collected.lock().unwrap().clear();
}
}
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
type Scope = ();
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
self.collected.lock().unwrap().push(event.clone());
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::llm_client::event::Event;
use crate::timeline::Timeline;
#[test]
fn collects_in_order() {
let collector = ReasoningItemCollector::new();
let mut timeline = Timeline::new();
timeline.on_reasoning_item(collector.clone());
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "first".into(),
signature: Some("sig1".into()),
..Default::default()
}));
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
id: Some("r2".into()),
text: "second".into(),
..Default::default()
}));
let items = collector.take_collected();
assert_eq!(items.len(), 2);
assert_eq!(items[0].text, "first");
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
assert_eq!(items[1].text, "second");
// take は drain なので 2 度目は空
assert!(collector.take_collected().is_empty());
}
}

View File

@ -381,6 +381,7 @@ pub struct Timeline {
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
// Block系ハンドラーBlockTypeごとにグループ化
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
@ -410,6 +411,7 @@ impl Timeline {
ping_handlers: Vec::new(),
status_handlers: Vec::new(),
error_handlers: Vec::new(),
reasoning_item_handlers: Vec::new(),
text_block_handlers: Vec::new(),
thinking_block_handlers: Vec::new(),
tool_use_block_handlers: Vec::new(),
@ -471,6 +473,18 @@ impl Timeline {
self
}
/// `ReasoningItemKind` 用 Handler を登録
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
where
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
H::Scope: Send + Sync,
{
let mut wrapper = HandlerWrapper::new(handler);
wrapper.start_scope();
self.reasoning_item_handlers.push(Box::new(wrapper));
self
}
/// TextBlockKind用のHandlerを登録
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
where
@ -516,12 +530,17 @@ impl Timeline {
Event::Ping(p) => self.dispatch_ping(p),
Event::Status(s) => self.dispatch_status(s),
Event::Error(e) => self.dispatch_error(e),
// Observability-only event: stream trace records it before timeline dispatch.
Event::UnhandledSse(_) => {}
// Block系: スコープ管理しながらディスパッチ
Event::BlockStart(s) => self.handle_block_start(s),
Event::BlockDelta(d) => self.handle_block_delta(d),
Event::BlockStop(s) => self.handle_block_stop(s),
Event::BlockAbort(a) => self.handle_block_abort(a),
// 完成済み reasoning item: 即時ディスパッチ
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
}
}
@ -564,6 +583,12 @@ impl Timeline {
}
}
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
for handler in &mut self.reasoning_item_handlers {
handler.dispatch(event);
}
}
fn handle_block_start(&mut self, start: &BlockStart) {
self.current_block = Some(start.block_type);
@ -655,6 +680,36 @@ mod tests {
assert!(timeline.current_block().is_none());
}
#[test]
fn unhandled_sse_is_ignored_by_timeline_handlers() {
struct TestTextHandler {
calls: Arc<Mutex<Vec<TextBlockEvent>>>,
}
impl Handler<TextBlockKind> for TestTextHandler {
type Scope = ();
fn on_event(&mut self, _scope: &mut (), event: &TextBlockEvent) {
self.calls.lock().unwrap().push(event.clone());
}
}
let calls = Arc::new(Mutex::new(Vec::new()));
let mut timeline = Timeline::new();
timeline.on_text_block(TestTextHandler {
calls: calls.clone(),
});
timeline.dispatch(&Event::UnhandledSse(UnhandledSseEvent {
provider: "openai_responses".to_string(),
event_type: "response.mystery".to_string(),
data_preview: "{}".to_string(),
data_len: 2,
}));
assert!(timeline.current_block().is_none());
assert!(calls.lock().unwrap().is_empty());
}
#[test]
fn test_meta_event_dispatch() {
// シンプルなテスト用構造体

View File

@ -0,0 +1,222 @@
//! Usage 履歴ベースのトークン会計(汎用部分)。
//!
//! `UsageRecord` の列(プロバイダ実測値)と現在の history から、
//! 任意の history index 時点のプロンプト全長トークン数を pure に計算する。
//!
//! # 方針
//!
//! - ローカルトークナイザは持たない。実測値があればそれを採用し、
//! measurement 間はバイト数で按分、最新 measurement より先は最終 rate で外挿する
//! - 推定の出どころは [`EstimateSource`] で呼び出し側に明示する。
//! 課金判断には使えないが、compact / prune / memory extract trigger 等の
//! 閾値判定には十分な精度
//! - `records` は `history_len` 昇順を仮定する(呼び出し側がそのように積む)
use crate::{Item, UsageRecord};
/// 推定の出どころ。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EstimateSource {
/// measurement の境界にちょうど一致(実測値そのもの)
Measured,
/// 連続する 2 つの measurement の間をバイト按分で計算
Interpolated,
/// 最後の measurement より新しい区間を最終 rate で外挿
Extrapolated,
/// measurement が 1 件も無く、バイト数のみのフォールバック
NoData,
}
/// トークン数の推定値。
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenEstimate {
pub tokens: u64,
pub source: EstimateSource,
}
/// `items[..i]` までの累積バイト数(`prefix[i]`)を返す。長さは `items.len()+1`。
pub fn prefix_bytes(items: &[Item]) -> Vec<u64> {
let mut prefix = Vec::with_capacity(items.len() + 1);
let mut acc: u64 = 0;
prefix.push(0);
for item in items {
acc = acc.saturating_add(item_bytes(item));
prefix.push(acc);
}
prefix
}
/// 1 Item の大きさ。JSON シリアライズ長を使う粗い近似。
/// トークン数との絶対変換ではなく区間の按分にしか使わないので、
/// プロバイダごとの overhead は比率でキャンセルされる。
pub fn item_bytes(item: &Item) -> u64 {
serde_json::to_string(item)
.map(|s| s.len() as u64)
.unwrap_or(0)
}
/// `history[..index]` までのトークン数を推定する。
///
/// `prefix` は [`prefix_bytes`] で得た `history.len() + 1` 長の累積バイト列。
/// 呼び出し側が 1 度だけ計算して使い回すことで、線形探索や複数回の推定が
/// O(n) シリアライズで済む(内部で毎回再計算すると O(n²) になる)。
pub fn tokens_at(
history: &[Item],
records: &[UsageRecord],
index: usize,
prefix: &[u64],
) -> TokenEstimate {
debug_assert!(index <= history.len());
debug_assert_eq!(prefix.len(), history.len() + 1);
if index == 0 {
return TokenEstimate {
tokens: 0,
source: EstimateSource::Measured,
};
}
if records.is_empty() {
return TokenEstimate {
tokens: prefix[index] / 4,
source: EstimateSource::NoData,
};
}
// exact matchrev 走査で一番新しい record を採用)
if let Some(r) = records.iter().rev().find(|r| r.history_len == index) {
return TokenEstimate {
tokens: r.input_total_tokens,
source: EstimateSource::Measured,
};
}
let lower = records.iter().rev().find(|r| r.history_len < index);
let upper = records.iter().find(|r| r.history_len > index);
let cap = history.len();
match (lower, upper) {
(Some(lo), Some(up)) => {
let lo_bytes = prefix[lo.history_len.min(cap)];
let up_bytes = prefix[up.history_len.min(cap)];
let at_bytes = prefix[index];
let span_bytes = up_bytes.saturating_sub(lo_bytes);
let span_tokens = up.input_total_tokens.saturating_sub(lo.input_total_tokens);
if span_bytes == 0 || span_tokens == 0 {
return TokenEstimate {
tokens: lo.input_total_tokens,
source: EstimateSource::Interpolated,
};
}
let delta_bytes = at_bytes.saturating_sub(lo_bytes);
let delta_tokens =
(delta_bytes as u128 * span_tokens as u128 / span_bytes as u128) as u64;
TokenEstimate {
tokens: lo.input_total_tokens + delta_tokens,
source: EstimateSource::Interpolated,
}
}
(Some(lo), None) => {
let lo_bytes = prefix[lo.history_len.min(cap)];
let at_bytes = prefix[index];
if lo_bytes == 0 || lo.input_total_tokens == 0 {
return TokenEstimate {
tokens: lo.input_total_tokens,
source: EstimateSource::Extrapolated,
};
}
let delta_bytes = at_bytes.saturating_sub(lo_bytes);
let delta_tokens =
(delta_bytes as u128 * lo.input_total_tokens as u128 / lo_bytes as u128) as u64;
TokenEstimate {
tokens: lo.input_total_tokens + delta_tokens,
source: EstimateSource::Extrapolated,
}
}
(None, Some(up)) => {
let up_bytes = prefix[up.history_len.min(cap)];
let at_bytes = prefix[index];
if up_bytes == 0 {
return TokenEstimate {
tokens: 0,
source: EstimateSource::Interpolated,
};
}
let t = (at_bytes as u128 * up.input_total_tokens as u128 / up_bytes as u128) as u64;
TokenEstimate {
tokens: t,
source: EstimateSource::Interpolated,
}
}
(None, None) => unreachable!("records non-empty but neither lower nor upper matched"),
}
}
/// 現在の history 全体の推定トークン数。
pub fn total_tokens(history: &[Item], records: &[UsageRecord]) -> TokenEstimate {
let prefix = prefix_bytes(history);
tokens_at(history, records, history.len(), &prefix)
}
/// 任意の history index 時点でのプロンプト全長推定。
/// `history_len == 0` で 0 を返す。delta 計算 (extract trigger 等) で
/// `total_tokens_at(now) - total_tokens_at(pointer)` の形で使う。
pub fn total_tokens_at(
history: &[Item],
records: &[UsageRecord],
history_len: usize,
) -> TokenEstimate {
let prefix = prefix_bytes(history);
tokens_at(history, records, history_len.min(history.len()), &prefix)
}
#[cfg(test)]
mod tests {
use super::*;
fn msg(text: &str) -> Item {
Item::user_message(text)
}
fn record(history_len: usize, tokens: u64) -> UsageRecord {
UsageRecord {
history_len,
input_total_tokens: tokens,
cache_read_tokens: 0,
cache_write_tokens: 0,
output_tokens: 0,
}
}
#[test]
fn total_no_data_falls_back_to_byte_estimate() {
let history = vec![msg("hello world")];
let est = total_tokens(&history, &[]);
assert_eq!(est.source, EstimateSource::NoData);
assert!(est.tokens > 0);
}
#[test]
fn total_measured_when_last_record_matches_history_len() {
let history = vec![msg("a"), msg("b"), msg("c")];
let records = vec![record(3, 120)];
let est = total_tokens(&history, &records);
assert_eq!(est.source, EstimateSource::Measured);
assert_eq!(est.tokens, 120);
}
#[test]
fn total_extrapolated_when_history_grew_past_last_measurement() {
let history = vec![msg("a"), msg("b"), msg("c"), msg("d")];
let records = vec![record(3, 100)];
let est = total_tokens(&history, &records);
assert_eq!(est.source, EstimateSource::Extrapolated);
assert!(est.tokens > 100);
}
#[test]
fn total_zero_history_is_zero() {
let est = total_tokens(&[], &[]);
assert_eq!(est.tokens, 0);
}
}

View File

@ -275,7 +275,7 @@ pub struct ToolCall {
///
/// Intermediate representation between tool execution and history.
/// Carries `summary` + optional `content` from [`ToolOutput`].
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolResult {
/// Corresponding tool call ID
pub tool_use_id: String,

View File

@ -0,0 +1,22 @@
//! Per-LLM-request Usage measurement snapshot.
//!
//! 1 リクエストの送信時点での「ある history prefix 長で計測した占有量」を
//! 1 件分にまとめたもの。`UsageEvent` (provider stream イベント) を
//! 受けて呼び出し側 (typically Pod) が組み立て、永続化層
//! (session-store) に流したり、token accounting (`token_counter`) で
//! 履歴として参照したりする。
/// LLM リクエスト送信時点での占有量スナップショット。
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UsageRecord {
/// 送信時の history.len()
pub history_len: usize,
/// history[..history_len] の占有量(プロンプト全長、実測)
pub input_total_tokens: u64,
/// 上記のうちキャッシュから読み出された分
pub cache_read_tokens: u64,
/// 上記のうちこのリクエストでキャッシュに書かれた分
pub cache_write_tokens: u64,
/// このリクエストで生成された出力トークン数
pub output_tokens: u64,
}

File diff suppressed because it is too large Load Diff

View File

@ -4,15 +4,77 @@
mod common;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use std::time::Duration;
use async_trait::async_trait;
use common::MockLlmClient;
use llm_worker::Worker;
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent as ClientStatusEvent};
use llm_worker::llm_client::retry::RetryPolicy;
use llm_worker::llm_client::{ClientError, LlmClient, Request, ResponseStream};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
// =============================================================================
// Tests
// =============================================================================
#[derive(Clone)]
struct FailOnceClient {
calls: Arc<AtomicUsize>,
events: Vec<Event>,
}
#[async_trait]
impl LlmClient for FailOnceClient {
async fn stream(&self, _request: Request) -> Result<ResponseStream, ClientError> {
if self.calls.fetch_add(1, Ordering::SeqCst) == 0 {
return Err(ClientError::Api {
status: Some(504),
code: None,
message: "gateway timeout".into(),
retry_after: None,
});
}
Ok(Box::pin(futures::stream::iter(
self.events.clone().into_iter().map(Ok),
)))
}
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
}
#[tokio::test]
async fn test_callback_llm_retry_event() {
let events = vec![Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
})];
let client = FailOnceClient {
calls: Arc::new(AtomicUsize::new(0)),
events,
};
let mut worker = Worker::new(client).with_retry_policy(RetryPolicy {
base: Duration::from_millis(1),
cap: Duration::from_millis(1),
max_attempts: 2,
total_timeout: Duration::from_secs(1),
});
let notices = Arc::new(Mutex::new(Vec::new()));
let sink = notices.clone();
worker.on_llm_retry(move |llm_call, notice| {
sink.lock().unwrap().push((llm_call, notice.clone()));
});
let result = worker.run("retry once").await;
assert!(result.is_ok(), "worker should succeed after one retry");
let notices = notices.lock().unwrap();
assert_eq!(notices.len(), 1);
assert_eq!(notices[0].0, 0);
assert_eq!(notices[0].1.failed_attempt, 1);
assert_eq!(notices[0].1.max_attempts, 2);
assert_eq!(notices[0].1.status, Some(504));
}
/// Verify that on_text_block correctly receives delta and stop events
#[tokio::test]
@ -149,6 +211,145 @@ async fn test_callback_turn_events() {
assert_eq!(ends[0], 0);
}
/// Stub tool returning a fixed [`ToolOutput`] for result-callback tests.
struct FixedOutputTool {
output: ToolOutput,
}
#[async_trait]
impl Tool for FixedOutputTool {
async fn execute(&self, _input_json: &str) -> Result<ToolOutput, ToolError> {
Ok(self.output.clone())
}
}
fn fixed_tool(name: &'static str, output: ToolOutput) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new(name).input_schema(serde_json::json!({"type":"object"}));
(
meta,
Arc::new(FixedOutputTool {
output: output.clone(),
}) as Arc<dyn Tool>,
)
})
}
/// Verify that on_tool_result fires once per executed tool with
/// summary/content/is_error matching what the tool returned.
#[tokio::test]
async fn test_callback_tool_result_events() {
let events = vec![
Event::tool_use_start(0, "call_1", "fixed"),
Event::tool_input_delta(0, "{}"),
Event::tool_use_stop(0),
Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let mut worker = Worker::new(client);
worker.register_tool(fixed_tool(
"fixed",
ToolOutput {
summary: "did the thing".into(),
content: Some("full detail body".into()),
},
));
let captured: Arc<Mutex<Vec<(String, String, Option<String>, bool)>>> =
Arc::new(Mutex::new(Vec::new()));
let sink = captured.clone();
worker.on_tool_result(move |result| {
sink.lock().unwrap().push((
result.tool_use_id.clone(),
result.summary.clone(),
result.content.clone(),
result.is_error,
));
});
let _ = worker.run("call it").await;
let observed = captured.lock().unwrap();
assert_eq!(observed.len(), 1);
assert_eq!(observed[0].0, "call_1");
assert_eq!(observed[0].1, "did the thing");
assert_eq!(observed[0].2.as_deref(), Some("full detail body"));
assert!(!observed[0].3);
}
/// Stub tool that always fails, for exercising the error path through
/// `on_tool_result`.
struct ErroringTool {
message: String,
}
#[async_trait]
impl Tool for ErroringTool {
async fn execute(&self, _input_json: &str) -> Result<ToolOutput, ToolError> {
Err(ToolError::ExecutionFailed(self.message.clone()))
}
}
fn erroring_tool(name: &'static str, message: &'static str) -> ToolDefinition {
Arc::new(move || {
let meta = ToolMeta::new(name).input_schema(serde_json::json!({"type":"object"}));
(
meta,
Arc::new(ErroringTool {
message: message.to_string(),
}) as Arc<dyn Tool>,
)
})
}
/// Verify on_tool_result also fires for failed executions with
/// is_error=true, and that the ToolOutput content channel stays empty.
#[tokio::test]
async fn test_callback_tool_result_error_path() {
let events = vec![
Event::tool_use_start(0, "call_err", "erroring"),
Event::tool_input_delta(0, "{}"),
Event::tool_use_stop(0),
Event::Status(ClientStatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let mut worker = Worker::new(client);
worker.register_tool(erroring_tool("erroring", "boom"));
let captured: Arc<Mutex<Vec<(String, String, Option<String>, bool)>>> =
Arc::new(Mutex::new(Vec::new()));
let sink = captured.clone();
worker.on_tool_result(move |result| {
sink.lock().unwrap().push((
result.tool_use_id.clone(),
result.summary.clone(),
result.content.clone(),
result.is_error,
));
});
let _ = worker.run("fail it").await;
let observed = captured.lock().unwrap();
assert_eq!(observed.len(), 1);
assert_eq!(observed[0].0, "call_err");
assert!(
observed[0].1.contains("boom"),
"summary should carry the error message: {}",
observed[0].1
);
assert!(observed[0].2.is_none());
assert!(observed[0].3);
}
/// Verify that on_usage callback receives usage events
#[tokio::test]
async fn test_callback_usage_events() {

View File

@ -59,6 +59,7 @@ impl LlmClient for MockLlmClient {
status: Some(500),
code: Some("mock_error".to_string()),
message: "No more mock responses".to_string(),
retry_after: None,
});
}
let events = self.responses[count].clone();

View File

@ -12,7 +12,7 @@ use llm_worker::interceptor::{
Interceptor, PostToolAction, PreToolAction, ToolCallInfo, ToolResultInfo,
};
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput, ToolResult};
mod common;
use common::MockLlmClient;
@ -268,3 +268,59 @@ async fn test_post_tool_call_modification() {
"Result should be modified"
);
}
/// Hook: pre_tool_call synthetic result - skipped tool gets an error result in history.
#[tokio::test]
async fn test_before_tool_call_synthetic_result_committed() {
let events = vec![
Event::tool_use_start(0, "call_1", "blocked_tool"),
Event::tool_input_delta(0, r#"{}"#),
Event::tool_use_stop(0),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::with_responses(vec![
events,
vec![
Event::text_block_start(0),
Event::text_delta(0, "Denied."),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
],
]);
let mut worker = Worker::new(client);
let blocked_tool = SlowTool::new("blocked_tool", 10);
let blocked_clone = blocked_tool.clone();
worker.register_tool(blocked_tool.definition());
struct SyntheticPolicy;
#[async_trait]
impl Interceptor for SyntheticPolicy {
async fn pre_tool_call(&self, info: &mut ToolCallInfo) -> PreToolAction {
PreToolAction::SyntheticResult(ToolResult::error(
info.call.id.clone(),
"permission denied",
))
}
}
worker.set_interceptor(SyntheticPolicy);
let result = worker.run("Test synthetic result").await.unwrap();
assert_eq!(blocked_clone.call_count(), 0, "Blocked tool should not run");
assert!(result.worker.history().iter().any(|item| matches!(
item,
llm_worker::Item::ToolResult {
call_id,
summary,
is_error: true,
..
} if call_id == "call_1" && summary == "permission denied"
)));
}

View File

@ -0,0 +1,210 @@
//! Reasoning history round-trip 統合テスト
//!
//! Worker のストリーム → history append → 次リクエスト送出までの
//! ライフサイクルで `Item::Reasoning` が脱落せず保持されることを確認する。
//!
//! 検証点:
//! - Anthropic 由来の thinking + signature が `Item::Reasoning::signature` として
//! history に残る
//! - OpenAI Responses 由来の reasoning text + summary + encrypted_content が
//! `Item::Reasoning` の各フィールドに展開される
//! - 直前の reasoning は次の outgoing request の `request.items` の先頭付近に
//! 含まれるassistant メッセージの先頭、Anthropic 仕様)
mod common;
use common::MockLlmClient;
use llm_worker::Item;
use llm_worker::Worker;
use llm_worker::llm_client::event::{Event, ReasoningItemEvent, ResponseStatus, StatusEvent};
/// Anthropic 風: thinking ブロック → text → 終了 のシーケンス。
/// Worker history に Reasoning(signature 付き) → assistant_message が並ぶ。
#[tokio::test]
async fn anthropic_thinking_round_trips_signature_into_history() {
let events = vec![
Event::ReasoningItem(ReasoningItemEvent {
id: None,
text: "let me think...".into(),
summary: Vec::new(),
encrypted_content: None,
signature: Some("SIG-OPUS".into()),
}),
Event::text_block_start(0),
Event::text_delta(0, "Here's the answer"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("question?").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
// user / reasoning / assistant_message
assert_eq!(history.len(), 3, "history: {history:?}");
assert!(matches!(history[0], Item::Message { .. }));
match &history[1] {
Item::Reasoning {
text, signature, ..
} => {
assert_eq!(text, "let me think...");
assert_eq!(signature.as_deref(), Some("SIG-OPUS"));
}
other => panic!("expected Reasoning, got {other:?}"),
}
assert_eq!(history[2].as_text(), Some("Here's the answer"));
}
/// OpenAI Responses 風: encrypted_content + summary を持った reasoning が
/// `Item::Reasoning` のフィールドに展開されること。
#[tokio::test]
async fn openai_reasoning_round_trips_encrypted_and_summary() {
let events = vec![
Event::ReasoningItem(ReasoningItemEvent {
id: Some("r1".into()),
text: "inner reasoning".into(),
summary: vec!["sum-A".into(), "sum-B".into()],
encrypted_content: Some("ENC-OPAQUE".into()),
signature: None,
}),
Event::text_block_start(0),
Event::text_delta(0, "answer"),
Event::text_block_stop(0, None),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
match &history[1] {
Item::Reasoning {
text,
summary,
encrypted_content,
signature,
id,
..
} => {
assert_eq!(text, "inner reasoning");
assert_eq!(summary, &vec!["sum-A".to_string(), "sum-B".to_string()]);
assert_eq!(encrypted_content.as_deref(), Some("ENC-OPAQUE"));
assert!(signature.is_none());
assert_eq!(id.as_deref(), Some("r1"));
}
other => panic!("expected Reasoning, got {other:?}"),
}
}
/// Reasoning は assistant ターン内で text/tool_call より先に並ぶことAnthropic
/// が thinking を assistant メッセージの先頭に要求するため)。
#[tokio::test]
async fn reasoning_precedes_text_in_assistant_burst() {
let events = vec![
// text/tool_call とは独立に、ReasoningItem が中盤で発火しても、
// history append 時には assistant items の先頭に置かれる。
Event::text_block_start(0),
Event::text_delta(0, "intermediate"),
Event::text_block_stop(0, None),
Event::ReasoningItem(ReasoningItemEvent {
text: "after text".into(),
signature: Some("SIG".into()),
..Default::default()
}),
Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}),
];
let client = MockLlmClient::new(events);
let worker = Worker::new(client);
let out = worker.run("q").await.expect("run ok");
let worker = out.worker;
let history = worker.history();
// user / reasoning(先頭) / assistant_message
assert!(matches!(history[1], Item::Reasoning { .. }));
assert_eq!(history[2].as_text(), Some("intermediate"));
}
/// resume シナリオ: history.json 由来の Item::Reasoning(signature) を Worker に
/// 注入して run しても、次の outgoing request の `Request::items` にそのまま
/// 載って LLM へ渡るworker は items を改変しない契約)。
#[tokio::test]
async fn injected_reasoning_survives_into_outgoing_request() {
use async_trait::async_trait;
use futures::Stream;
use std::pin::Pin;
use std::sync::{Arc, Mutex};
use llm_worker::llm_client::{ClientError, LlmClient, Request};
/// Request を 1 度だけキャプチャして空ストリームを返す client。
#[derive(Clone)]
struct CapturingClient {
captured: Arc<Mutex<Option<Request>>>,
}
#[async_trait]
impl LlmClient for CapturingClient {
fn clone_boxed(&self) -> Box<dyn LlmClient> {
Box::new(self.clone())
}
async fn stream(
&self,
request: Request,
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>
{
*self.captured.lock().unwrap() = Some(request);
let stream = futures::stream::iter(vec![Ok(Event::Status(StatusEvent {
status: ResponseStatus::Completed,
}))]);
Ok(Box::pin(stream))
}
}
let captured = Arc::new(Mutex::new(None));
let client = CapturingClient {
captured: captured.clone(),
};
let mut worker = Worker::new(client);
// resume: 既存 history を流し込む
worker.set_history(vec![
Item::user_message("prior question"),
Item::reasoning("prior thinking").with_signature("SIG-PRIOR"),
Item::assistant_message("prior answer"),
]);
let _ = worker.run("follow up").await.expect("run ok");
let req = captured
.lock()
.unwrap()
.take()
.expect("client should have received a request");
// Reasoning item が outgoing items に保持されていること
let mut found = false;
for item in &req.items {
if let Item::Reasoning {
text, signature, ..
} = item
{
assert_eq!(text, "prior thinking");
assert_eq!(signature.as_deref(), Some("SIG-PRIOR"));
found = true;
}
}
assert!(
found,
"Reasoning item must survive into outgoing request items: {req:?}",
req = req.items,
);
}

View File

@ -0,0 +1,185 @@
//! HTTP transport の単発 request / error classification テスト。
//!
//! Retry/backoff は Worker の lifecycle 管理に属するため、transport は 1 回だけ
//! request を送り、HTTP status / Retry-After を `ClientError` に載せて返す。
use futures::StreamExt;
use llm_worker::llm_client::LlmClient;
use llm_worker::llm_client::auth::AuthRequirement;
use llm_worker::llm_client::capability::ModelCapability;
use llm_worker::llm_client::error::ClientError;
use llm_worker::llm_client::event::Event;
use llm_worker::llm_client::scheme::Scheme;
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
use llm_worker::llm_client::types::Request;
use serde_json::Value;
use std::time::Duration;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};
/// SSE 本体は触らないテスト用 scheme。`parse_fail` を立てると
/// stream 消費中で `ClientError::Sse` を返す。
#[derive(Clone)]
struct DummyScheme {
parse_fail: bool,
}
impl Scheme for DummyScheme {
type State = ();
fn default_base_url(&self) -> &'static str {
""
}
fn path(&self, _: &str) -> String {
"/v1/chat".into()
}
fn required_auth(&self) -> AuthRequirement {
AuthRequirement::None
}
fn build_request_body(&self, _: &str, _: &Request, _: &ModelCapability) -> Value {
serde_json::json!({})
}
fn parse_sse(&self, _: &str, _: &str, _: &mut ()) -> Result<Vec<Event>, ClientError> {
if self.parse_fail {
Err(ClientError::Sse(
"simulated mid-stream parse failure".into(),
))
} else {
Ok(vec![])
}
}
fn default_capability(&self) -> ModelCapability {
ModelCapability::minimal()
}
}
fn build_transport(base_url: impl Into<String>, parse_fail: bool) -> HttpTransport<DummyScheme> {
HttpTransport::new(
DummyScheme { parse_fail },
"test-model",
base_url,
ResolvedAuth::None,
ModelCapability::minimal(),
)
}
fn ok_sse() -> ResponseTemplate {
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(b"".to_vec(), "text/event-stream")
}
#[tokio::test]
async fn retryable_status_returns_api_error_without_retrying() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(503).set_body_string("upstream connect error"))
.up_to_n_times(1)
.mount(&server)
.await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ok_sse())
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(ClientError::Api {
status: Some(503), ..
}) => {}
Err(other) => panic!("expected Api(503), got {other:?}"),
Ok(_) => panic!("transport must not retry internally"),
}
let received = server.received_requests().await.unwrap();
assert_eq!(
received.len(),
1,
"transport should send exactly one request"
);
}
#[tokio::test]
async fn retry_after_header_is_preserved_on_api_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(503).insert_header("retry-after", "1"))
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(
err @ ClientError::Api {
status: Some(503), ..
},
) => {
assert_eq!(err.retry_after(), Some(Duration::from_secs(1)));
}
Err(other) => panic!("expected Api(503), got {other:?}"),
Ok(_) => panic!("expected error"),
}
}
#[tokio::test]
async fn mid_stream_sse_error_is_stream_item_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(
ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_raw(
b"event: data\ndata: payload\n\n".to_vec(),
"text/event-stream",
),
)
.mount(&server)
.await;
let transport = build_transport(server.uri(), true);
let mut stream = transport
.stream(Request::default())
.await
.expect("status 200 should open stream");
let mut saw_sse_err = false;
while let Some(item) = stream.next().await {
if matches!(item, Err(ClientError::Sse(_))) {
saw_sse_err = true;
}
}
assert!(saw_sse_err, "expected Sse error from stream consumer");
let received = server.received_requests().await.unwrap();
assert_eq!(received.len(), 1, "mid-stream Sse must not reopen stream");
}
#[tokio::test]
async fn non_retryable_status_returns_api_error() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/v1/chat"))
.respond_with(ResponseTemplate::new(401).set_body_string("unauthorized"))
.mount(&server)
.await;
let transport = build_transport(server.uri(), false);
match transport.stream(Request::default()).await {
Err(ClientError::Api {
status: Some(401), ..
}) => {}
Err(other) => panic!("expected Api(401), got {other:?}"),
Ok(_) => panic!("expected error"),
}
let received = server.received_requests().await.unwrap();
assert_eq!(received.len(), 1);
}

View File

@ -5,8 +5,8 @@
mod common;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use common::MockLlmClient;
@ -44,14 +44,12 @@ fn test_mutable_history_manipulation() {
assert!(worker.history().is_empty());
// Add to history
worker.push_item(Item::user_message("Hello"));
worker.push_item(Item::assistant_message("Hi there!"));
worker.append_history(vec![Item::user_message("Hello")]);
worker.append_history(vec![Item::assistant_message("Hi there!")]);
assert_eq!(worker.history().len(), 2);
// Mutable access to history
worker
.history_mut()
.push(Item::user_message("How are you?"));
// Append to history via the callback-aware API.
worker.append_history(vec![Item::user_message("How are you?")]);
assert_eq!(worker.history().len(), 3);
// Clear history
@ -71,34 +69,38 @@ fn test_mutable_history_manipulation() {
#[test]
fn test_mutable_builder_pattern() {
let client = MockLlmClient::new(vec![]);
let worker = Worker::new(client)
.system_prompt("System prompt")
.with_item(Item::user_message("Hello"))
.with_item(Item::assistant_message("Hi!"))
.with_items(vec![
Item::user_message("How are you?"),
Item::assistant_message("I'm fine!"),
]);
let worker = Worker::new(client).system_prompt("System prompt");
assert_eq!(worker.get_system_prompt(), Some("System prompt"));
assert_eq!(worker.history().len(), 4);
assert!(worker.history().is_empty());
}
/// Verify that multiple items can be added with extend_history
/// Verify that multiple items can be added with append_history and callbacks fire.
#[test]
fn test_mutable_extend_history() {
fn test_mutable_append_history() {
let client = MockLlmClient::new(vec![]);
let observed = Arc::new(Mutex::new(Vec::new()));
let observed_for_callback = Arc::clone(&observed);
let mut worker = Worker::new(client);
worker.on_history_append(move |item| {
if let Some(text) = item.as_text() {
observed_for_callback.lock().unwrap().push(text.to_string());
}
});
worker.push_item(Item::user_message("First"));
worker.append_history(vec![Item::user_message("First")]);
worker.extend_history(vec![
worker.append_history(vec![
Item::assistant_message("Response 1"),
Item::user_message("Second"),
Item::assistant_message("Response 2"),
]);
assert_eq!(worker.history().len(), 4);
assert_eq!(
observed.lock().unwrap().as_slice(),
["First", "Response 1", "Second", "Response 2"]
);
}
#[derive(Clone)]
@ -162,8 +164,8 @@ fn test_lock_transition() {
let mut worker = Worker::new(client);
worker.set_system_prompt("System");
worker.push_item(Item::user_message("Hello"));
worker.push_item(Item::assistant_message("Hi"));
worker.append_history(vec![Item::user_message("Hello")]);
worker.append_history(vec![Item::assistant_message("Hi")]);
// Lock
let locked_worker = worker.lock();
@ -180,14 +182,14 @@ fn test_unlock_transition() {
let client = MockLlmClient::new(vec![]);
let mut worker = Worker::new(client);
worker.push_item(Item::user_message("Hello"));
worker.append_history(vec![Item::user_message("Hello")]);
let locked_worker = worker.lock();
// Unlock
let mut worker = locked_worker.unlock();
// History operations are available again in Mutable state
worker.push_item(Item::assistant_message("Hi"));
worker.append_history(vec![Item::assistant_message("Hi")]);
worker.clear_history();
assert!(worker.history().is_empty());
}
@ -310,8 +312,8 @@ async fn test_locked_prefix_len_tracking() {
let mut worker = Worker::new(client);
// Add items beforehand
worker.push_item(Item::user_message("Pre-existing message 1"));
worker.push_item(Item::assistant_message("Pre-existing response 1"));
worker.append_history(vec![Item::user_message("Pre-existing message 1")]);
worker.append_history(vec![Item::assistant_message("Pre-existing response 1")]);
assert_eq!(worker.history().len(), 2);
@ -352,14 +354,18 @@ async fn test_turn_count_increment() -> Result<(), WorkerError> {
let worker = Worker::new(client);
assert_eq!(worker.turn_count(), 0);
assert_eq!(worker.llm_call_count(), 0);
// First run consumes Mutable, returns RunOutput
let mut worker = worker.run("First").await?.worker;
assert_eq!(worker.turn_count(), 1);
// Retry not yet implemented → AgentTurn:LlmCall is 1:1.
assert_eq!(worker.llm_call_count(), 1);
// Subsequent runs on Locked take &mut self
worker.run("Second").await?;
assert_eq!(worker.turn_count(), 2);
assert_eq!(worker.llm_call_count(), 2);
Ok(())
}
@ -376,9 +382,11 @@ async fn test_unlock_edit_relock() {
}),
]]);
let worker = Worker::new(client)
.with_item(Item::user_message("Hello"))
.with_item(Item::assistant_message("Hi"));
let mut worker = Worker::new(client);
worker.append_history(vec![
Item::user_message("Hello"),
Item::assistant_message("Hi"),
]);
// Lock -> Unlock
let locked = worker.lock();
@ -388,7 +396,7 @@ async fn test_unlock_edit_relock() {
// Edit history
unlocked.clear_history();
unlocked.push_item(Item::user_message("Fresh start"));
unlocked.append_history(vec![Item::user_message("Fresh start")]);
// Re-lock
let relocked = unlocked.lock();

View File

@ -5,13 +5,14 @@ edition.workspace = true
license.workspace = true
[dependencies]
llm-worker = { version = "0.2.1", path = "../llm-worker" }
protocol = { version = "0.1.0", path = "../protocol" }
serde = { version = "1.0.228", features = ["derive"] }
arc-swap = "1"
llm-worker = { workspace = true }
protocol = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_ignored = "0.1.14"
thiserror = "2.0.18"
toml = "1.1.2"
tracing = "0.1.44"
thiserror = { workspace = true }
toml = { workspace = true }
tracing = { workspace = true }
[dev-dependencies]
tempfile = "3.27.0"
tempfile = { workspace = true }

View File

@ -9,6 +9,6 @@ Pod の宣言的設定を TOML マニフェストとして定義・パースす
- `ModelConfig` — LLM モデル設定scheme、base_url、model_id、auth
- `SchemeKind` — wire scheme 種別(`Anthropic`, `OpenaiChat`, `OpenaiResponses`, `Gemini`
- `AuthRef` — 認証参照(`None`, `ApiKey { env, file }`, `CodexOAuth`
- `WorkerManifest` — ワーカー設定(システムプロンプト、max_tokens、temperature
- `WorkerManifest` — ワーカー設定(システムプロンプト、生成設定、reasoning
- `ScopeConfig` / `ScopeRule` / `Permission` — allow / deny の宣言的スコープ設定
- `Scope` — 実行時スコープ。`from_config(&ScopeConfig, pwd)` で構築し、`is_readable` / `is_writable` / `permission_at` で問い合わせる

View File

@ -0,0 +1,125 @@
//! Cascade-layer collection helpers.
//!
//! Pod manifests are assembled from up to three on-disk layers (see
//! `pod::PodFactory` for the full cascade story):
//!
//! 1. **User manifest** — Pod CLI uses
//! [`crate::paths::user_manifest_path_with_env_override`]
//! 2. **Project manifest** at the closest `.insomnia/manifest.toml`
//! found by walking up from a starting directory (typically `cwd`)
//! 3. **Programmatic overlay** supplied at the call site
//!
//! This module owns the project-layer discovery and the parser glue.
//! User-layer path resolution lives in [`crate::paths`].
//!
//! Cascade *merging* and final validation stay outside this module —
//! that's the data layer's responsibility (`PodManifestConfig::merge`
//! and `PodManifest::try_from`). This module only handles the I/O and
//! path-discovery glue around them.
use std::path::{Path, PathBuf};
use crate::PodManifestConfig;
/// Errors returned when reading a single manifest layer from disk.
#[derive(Debug, thiserror::Error)]
pub enum LayerLoadError {
#[error("failed to read manifest {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to parse manifest {}: {source}", .path.display())]
Parse {
path: PathBuf,
#[source]
source: toml::de::Error,
},
}
/// Walk up from `start` looking for `.insomnia/manifest.toml`. Returns
/// the closest match, or `None` if none is found before reaching the
/// filesystem root.
pub fn find_project_manifest_from(start: &Path) -> Option<PathBuf> {
let start = start
.canonicalize()
.ok()
.unwrap_or_else(|| start.to_path_buf());
let mut cur: Option<&Path> = Some(start.as_path());
while let Some(dir) = cur {
let candidate = dir.join(".insomnia").join("manifest.toml");
if candidate.is_file() {
return Some(candidate);
}
cur = dir.parent();
}
None
}
/// Read a manifest file from `path` and parse it as a partial
/// [`PodManifestConfig`]. Path resolution against a base directory and
/// merging with other layers are the caller's responsibility.
pub fn load_layer(path: &Path) -> Result<PodManifestConfig, LayerLoadError> {
let toml = std::fs::read_to_string(path).map_err(|source| LayerLoadError::Io {
path: path.to_path_buf(),
source,
})?;
PodManifestConfig::from_toml(&toml).map_err(|source| LayerLoadError::Parse {
path: path.to_path_buf(),
source,
})
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn find_project_manifest_walks_up() {
let tmp = TempDir::new().unwrap();
let root = tmp.path().canonicalize().unwrap();
let manifest = root.join(".insomnia").join("manifest.toml");
std::fs::create_dir_all(manifest.parent().unwrap()).unwrap();
std::fs::write(&manifest, "").unwrap();
let nested = root.join("a").join("b");
std::fs::create_dir_all(&nested).unwrap();
let found = find_project_manifest_from(&nested).unwrap();
assert_eq!(found, manifest);
}
#[test]
fn find_project_manifest_returns_none_when_absent() {
let tmp = TempDir::new().unwrap();
assert!(find_project_manifest_from(tmp.path()).is_none());
}
#[test]
fn load_layer_round_trips_partial_config() {
let tmp = TempDir::new().unwrap();
let path = tmp.path().join("manifest.toml");
std::fs::write(
&path,
r#"
[pod]
name = "from-disk"
"#,
)
.unwrap();
let cfg = load_layer(&path).unwrap();
assert_eq!(cfg.pod.name.as_deref(), Some("from-disk"));
}
#[test]
fn load_layer_io_error_carries_path() {
let bogus = PathBuf::from("/definitely/does/not/exist/manifest.toml");
let err = load_layer(&bogus).unwrap_err();
match err {
LayerLoadError::Io { path, .. } => assert_eq!(path, bogus),
_ => panic!("expected Io variant"),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -8,11 +8,15 @@
/// Byte-size cap applied to any tool's `content` output when no
/// per-tool override is set. See [`crate::ToolOutputLimits`].
pub const TOOL_OUTPUT_MAX_BYTES: usize = 16 * 1024;
pub const TOOL_OUTPUT_MAX_BYTES: usize = 64 * 1024;
/// Number of most-recent turns protected from pruning. See
/// [`crate::CompactionConfig::prune_protected_turns`].
pub const PRUNE_PROTECTED_TURNS: usize = 3;
/// Byte-size cap applied to each submit-time FileRef upload / attachment.
/// See [`crate::FileUploadLimits`].
pub const FILE_UPLOAD_MAX_BYTES: usize = 256 * 1024;
/// Token budget at the history tail protected from pruning. See
/// [`crate::CompactionConfig::prune_protected_tokens`].
pub const PRUNE_PROTECTED_TOKENS: u64 = 8000;
/// Minimum estimated token savings required to trigger a prune. See
/// [`crate::CompactionConfig::prune_min_savings`].
@ -21,27 +25,78 @@ pub const PRUNE_MIN_SAVINGS: u64 = 4096;
/// Token budget retained (unchanged) at the tail of the history across
/// a compact. Items whose cumulative token count fits within this budget
/// starting from the end are kept verbatim; the rest are summarised.
/// See [`crate::CompactionConfig::compact_retained_tokens`].
/// See [`crate::CompactionConfig::retained_tokens`].
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
/// Target size for the deterministic compact overview/index fed to the
/// compact worker. Exceeding this target is tolerated.
/// See [`crate::CompactionConfig::overview_target_tokens`].
pub const COMPACT_OVERVIEW_TARGET_TOKENS: u64 = 8_000;
/// Warning threshold for compact overview/index size. Compaction continues.
/// See [`crate::CompactionConfig::overview_warning_tokens`].
pub const COMPACT_OVERVIEW_WARNING_TOKENS: u64 = 16_000;
/// Hard deterministic-overview deadline. When exceeded, overview generation
/// falls back to a coarser index before the compact worker is started.
/// See [`crate::CompactionConfig::overview_deadline_tokens`].
pub const COMPACT_OVERVIEW_DEADLINE_TOKENS: u64 = 40_000;
/// Default instruction asset reference used when `worker.instruction`
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
/// `$insomnia/` / `$user/` / `$workspace/` namespaces.
pub const DEFAULT_INSTRUCTION: &str = "$insomnia/default";
/// Default language policy used by the main worker for normal prose
/// responses. See [`crate::WorkerManifest::language`].
pub const WORKER_LANGUAGE: &str =
"match the user's language unless they explicitly request another language";
/// Token budget for auto-read file contents injected into the new
/// session after compaction. Limits how much raw file text the
/// compact worker can pull into the compacted context via
/// `mark_read_required`. See
/// [`crate::CompactionConfig::compact_auto_read_budget`].
/// [`crate::CompactionConfig::auto_read_budget_tokens`].
pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;
/// Cumulative input-token cap for the compact worker's own LLM
/// Current prompt-occupancy cap for the compact worker's own LLM
/// calls. Exceeding this aborts the compact run (circuit-breaker
/// path). See
/// [`crate::CompactionConfig::compact_worker_max_input_tokens`].
/// path). See [`crate::CompactionConfig::worker_context_max_tokens`].
pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;
/// Remaining compact-worker context threshold that triggers an instruction
/// to stop exploring and call `write_summary`.
/// See [`crate::CompactionConfig::finish_warning_remaining_tokens`].
pub const COMPACT_FINISH_WARNING_REMAINING_TOKENS: u64 = 8_000;
/// Context reserve preserved for final summary/tool closing turns.
/// See [`crate::CompactionConfig::final_reserve_tokens`].
pub const COMPACT_FINAL_RESERVE_TOKENS: u64 = 4_000;
/// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
/// See [`crate::CompactionConfig::worker_max_turns`].
pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);
/// Target size for the `write_summary` text. Used in prompt/nudge text.
/// See [`crate::CompactionConfig::summary_target_tokens`].
pub const COMPACT_SUMMARY_TARGET_TOKENS: u64 = 2_000;
/// Hard validation cap for the final `write_summary` text.
/// See [`crate::CompactionConfig::summary_max_tokens`].
pub const COMPACT_SUMMARY_MAX_TOKENS: u64 = 4_000;
/// Dry-run cap for the compacted session's initial request context.
/// See [`crate::CompactionConfig::result_context_max_tokens`].
pub const COMPACT_RESULT_CONTEXT_MAX_TOKENS: u64 = 60_000;
/// Number of recently-touched files fed to the compact worker as
/// default references.
pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
/// Optional maximum extract-worker tool-loop depth. `None` means unlimited.
/// See [`crate::MemoryConfig::extract_worker_max_turns`].
pub const MEMORY_EXTRACT_WORKER_MAX_TURNS: Option<u32> = Some(8);
/// Default language used by memory extraction / consolidation workers for
/// durable memory and knowledge text. See [`crate::MemoryConfig::language`].
pub const MEMORY_LANGUAGE: &str = "English";

View File

@ -1,18 +1,27 @@
mod cascade;
mod config;
pub mod defaults;
mod model;
pub mod paths;
mod scope;
pub use cascade::{LayerLoadError, find_project_manifest_from, load_layer};
pub use config::{
CompactionConfigPartial, ModelConfigPartial, PodManifestConfig, PodMetaConfig, ResolveError,
ToolOutputLimitsPartial, WorkerManifestConfig,
CompactionConfigPartial, FileUploadLimitsPartial, PermissionConfigPartial, PodManifestConfig,
PodMetaConfig, ResolveError, ToolOutputLimitsPartial, WorkerManifestConfig,
};
pub use model::{
AuthRef, ModelCapability, ModelManifest, ReasoningControl, ReasoningEffort, SchemeKind,
};
pub use paths::{
user_manifest_path, user_manifest_path_from_env, user_manifest_path_with_env_override,
};
pub use model::{AuthRef, ModelConfig, SchemeKind};
pub use protocol::{Permission, ScopeRule};
pub use scope::{Scope, ScopeError};
pub use scope::{Scope, ScopeError, SharedScope};
use std::collections::HashMap;
use std::num::NonZeroU32;
use std::path::PathBuf;
use serde::{Deserialize, Serialize};
@ -25,17 +34,137 @@ use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PodManifest {
pub pod: PodMeta,
pub model: ModelConfig,
pub model: ModelManifest,
pub worker: WorkerManifest,
pub scope: ScopeConfig,
/// Session/debug persistence settings. Defaults keep extra traces off.
#[serde(default)]
pub session: SessionConfig,
/// Optional manifest-level tool permission policy. Absent means the
/// permission layer is disabled and tool calls run as before.
#[serde(default)]
pub permissions: Option<ToolPermissionConfig>,
#[serde(default)]
pub compaction: Option<CompactionConfig>,
/// Memory subsystem opt-in. Presence of `[memory]` in TOML enables
/// the memory tools (MemoryRead / MemoryWrite / MemoryEdit) and
/// causes Pod to deny generic write access to `<workspace>/memory/`
/// and `<workspace>/knowledge/`. Absent ⇒ legacy behaviour, no
/// memory tools registered.
#[serde(default)]
pub memory: Option<MemoryConfig>,
/// External Agent Skills (`SKILL.md`) directories to ingest as
/// Workflows. Each entry is a path to a skills *root* (i.e. a
/// directory whose children are individual `<name>/SKILL.md` skill
/// bundles). Paths are resolved against the manifest's base
/// directory like other path fields. Absent ⇒ no skills loaded;
/// there is no implicit `$config_dir/skills/` or builtin probe.
#[serde(default)]
pub skills: Option<SkillsConfig>,
}
/// External Agent Skills (`SKILL.md`) ingest configuration. Skills are
/// loaded *only* from the directories listed here — there is no
/// implicit `$config_dir/skills/` or builtin probe. Cascade-merged
/// across manifest layers, so a user-level manifest can declare a
/// shared skill root once while a project manifest adds its own
/// `.claude/skills/` / `.cursor/skills/` paths on top.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SkillsConfig {
/// Skills *roots*. Children of each root must be individual
/// `<name>/SKILL.md` bundles; the directory itself is not a skill.
/// Resolved against the manifest base directory before
/// [`PodManifest`] is materialised.
#[serde(default)]
pub directories: Vec<PathBuf>,
}
/// Memory subsystem configuration. Presence in the manifest enables
/// memory; the workspace root defaults to the Pod's pwd unless an
/// explicit override is given.
///
/// All fields are `Option`; defaults are applied at the consumer
/// (`.unwrap_or(defaults::...)`). This keeps cascade `merge` simple
/// (`upper.x.or(self.x)`) without a separate partial/resolved split.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct MemoryConfig {
/// Override for the workspace root. When `None`, the Pod's pwd
/// (resolved at construction time) is used. When set, must be an
/// absolute path.
#[serde(default)]
pub workspace_root: Option<PathBuf>,
/// Maximum number of records returned by `MemoryQuery` /
/// `KnowledgeQuery` per call. `None` ⇒ tool default (20).
#[serde(default)]
pub query_result_limit: Option<usize>,
/// Lines of context before and after each match in query excerpts.
/// Ignored when the request omits `query`. `None` ⇒ tool default (3).
#[serde(default)]
pub query_excerpt_lines: Option<usize>,
/// Whether the body of `memory/summary.md` is exposed in the resident
/// system-prompt section. `None` ⇒ enabled.
#[serde(default)]
pub inject_summary: Option<bool>,
/// Language used by memory extraction / consolidation workers for durable
/// memory and knowledge text. Free-form so workspaces can use names like
/// `English`, `Japanese`, or locale tags. `None` ⇒
/// [`defaults::MEMORY_LANGUAGE`].
#[serde(default)]
pub language: Option<String>,
/// Optional model for the extract worker. When `None`,
/// the main pod model is cloned via `clone_boxed()`. Lightweight
/// reasoning-capable models (Haiku / 4o-mini / Flash class) are
/// recommended.
#[serde(default)]
pub extract_model: Option<ModelManifest>,
/// Cumulative input-token threshold (since the last extract pointer)
/// that triggers an extract run. `None` disables the extract trigger
/// entirely; memory tools and resident injection still work, only
/// the auto-extract trigger is dormant.
#[serde(default)]
pub extract_threshold: Option<u64>,
/// Optional maximum extract-worker tool-loop depth. `None` leaves
/// the worker unlimited; the default bounds runaway short-context
/// loops. Falls through to
/// [`defaults::MEMORY_EXTRACT_WORKER_MAX_TURNS`] when unset.
#[serde(default)]
pub extract_worker_max_turns: Option<u32>,
/// Optional model for the consolidation worker. When
/// `None`, the main pod model is cloned via `clone_boxed()`.
/// Reasoning-class models are recommended.
#[serde(default)]
pub consolidation_model: Option<ModelManifest>,
/// Consolidation trigger: file-count threshold of `_staging/`. The
/// consolidation run fires when the staging directory has at least
/// this many entries. Either threshold reaching its limit fires
/// consolidation (logical OR). `None` for both thresholds ⇒
/// consolidation disabled.
#[serde(default)]
pub consolidation_threshold_files: Option<usize>,
/// Consolidation trigger: byte-size threshold across all `_staging/`
/// entries. Either threshold reaching its limit fires consolidation.
/// `None` for both thresholds ⇒ consolidation disabled.
#[serde(default)]
pub consolidation_threshold_bytes: Option<u64>,
}
/// Pod metadata.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PodMeta {
pub name: String,
/// Optional path to a TOML override file read as the top layer of
/// `pod::PromptCatalog`. Subject to the same relative-path
/// resolution as other manifest paths (joined against the
/// manifest's base directory). `None` leaves the 4th overlay layer
/// empty; auto-discovered user and workspace packs still apply.
///
/// Note: unlike `worker.instruction`, this is a plain filesystem
/// path — not a `$prefix/` prompt reference. Pack files carry
/// structured TOML data, while `worker.instruction` points at a
/// minijinja `.md` template; the two use different addressing
/// conventions on purpose.
#[serde(default)]
pub prompt_pack: Option<PathBuf>,
}
/// Worker-level configuration embedded in the manifest.
@ -48,18 +177,39 @@ pub struct WorkerManifest {
/// unset manifests fall through to [`defaults::DEFAULT_INSTRUCTION`].
#[serde(default = "default_instruction")]
pub instruction: String,
/// Language policy used by the main worker for normal prose responses.
/// Free-form so workspaces can use names like `English`, `Japanese`,
/// locale tags, or a policy phrase. Unset manifests fall through to
/// [`defaults::WORKER_LANGUAGE`].
#[serde(default = "default_worker_language")]
pub language: String,
#[serde(default)]
pub max_tokens: Option<u32>,
#[serde(default)]
pub max_turns: Option<NonZeroU32>,
#[serde(default)]
pub temperature: Option<f32>,
#[serde(default)]
pub top_p: Option<f32>,
#[serde(default)]
pub top_k: Option<u32>,
#[serde(default)]
pub stop_sequences: Vec<String>,
#[serde(default)]
pub reasoning: Option<ReasoningControl>,
/// Byte-size caps applied to tool `content` before it reaches the
/// conversation history. The section is optional in TOML — when
/// omitted, `ToolOutputLimits::default()` (16KB default cap, no
/// omitted, `ToolOutputLimits::default()` (64 KiB default cap, no
/// per-tool overrides) is applied so truncation is on by default.
#[serde(default)]
pub tool_output: ToolOutputLimits,
/// Byte-size cap applied to submit-time FileRef uploads / attachments.
/// For file refs this caps the file body; for normal directory refs this
/// caps the rendered shallow listing body.
/// This is intentionally separate from tool-output truncation because
/// user-requested file attachments can usually tolerate a larger budget.
#[serde(default)]
pub file_upload: FileUploadLimits,
}
/// Byte-size caps applied to tool execution `content` before it enters
@ -79,14 +229,36 @@ pub struct ToolOutputLimits {
pub per_tool: HashMap<String, usize>,
}
/// Byte-size cap for submit-time FileRef uploads / attachments.
///
/// This governs the `[File: <path>]` system-message attachment produced
/// when a user explicitly submits a `@<path>` file reference, and the
/// rendered body of a shallow `[Dir: <path>]` listing for a normal directory
/// reference. It does not affect tool result truncation; see
/// [`ToolOutputLimits`] for that path.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileUploadLimits {
/// Cap applied to each resolved FileRef file body or directory-listing body.
#[serde(default = "default_file_upload_max_bytes")]
pub max_bytes: usize,
}
fn default_tool_output_max_bytes() -> usize {
defaults::TOOL_OUTPUT_MAX_BYTES
}
fn default_file_upload_max_bytes() -> usize {
defaults::FILE_UPLOAD_MAX_BYTES
}
fn default_instruction() -> String {
defaults::DEFAULT_INSTRUCTION.to_string()
}
fn default_worker_language() -> String {
defaults::WORKER_LANGUAGE.to_string()
}
impl Default for ToolOutputLimits {
fn default() -> Self {
Self {
@ -96,6 +268,14 @@ impl Default for ToolOutputLimits {
}
}
impl Default for FileUploadLimits {
fn default() -> Self {
Self {
max_bytes: default_file_upload_max_bytes(),
}
}
}
impl ToolOutputLimits {
/// Resolve the cap for a given tool name.
pub fn limit_for(&self, tool_name: &str) -> usize {
@ -123,15 +303,56 @@ pub struct ScopeConfig {
pub deny: Vec<ScopeRule>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct SessionConfig {
/// Persist every provider stream event directly to `trace.jsonl` next to the
/// segment log. Intended for debugging stalls between stream requests; off
/// by default because it can be verbose.
#[serde(default)]
pub record_event_trace: bool,
}
/// Manifest-level pattern-based tool permission policy.
///
/// Presence of `[permissions]` enables this layer. Rules are evaluated
/// in declaration order; if none match, [`Self::default_action`] is used.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolPermissionConfig {
pub default_action: ToolPermissionAction,
#[serde(default, rename = "rule")]
pub rules: Vec<ToolPermissionRule>,
}
/// One `[[permissions.rule]]` entry.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ToolPermissionRule {
/// Tool registration name. Matching is case-insensitive at runtime so
/// manifests may use either `Bash` or `bash`.
pub tool: String,
/// Glob-like pattern matched against the tool's permission target
/// (for built-in tools, commonly `command`, `file_path`, or `pattern`).
pub pattern: String,
pub action: ToolPermissionAction,
}
/// Tool permission decision.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ToolPermissionAction {
Allow,
Deny,
Ask,
}
/// Context compaction configuration.
///
/// Controls Prune (content removal from old tool results) and Compact
/// (full history summarisation). Omitting `[compaction]` disables both.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompactionConfig {
/// Number of recent turns protected from pruning.
#[serde(default = "default_prune_protected_turns")]
pub prune_protected_turns: usize,
/// Token budget at the history tail protected from pruning.
#[serde(default = "default_prune_protected_tokens")]
pub prune_protected_tokens: u64,
/// Minimum estimated token savings to trigger a prune.
#[serde(default = "default_prune_min_savings")]
@ -142,8 +363,8 @@ pub struct CompactionConfig {
/// Checked by the Controller after each run. When current occupancy
/// exceeds this value, compact runs before the next turn. `None`
/// disables the between-turns check.
#[serde(default)]
pub compact_threshold: Option<u64>,
#[serde(default, alias = "compact_threshold")]
pub threshold: Option<u64>,
/// Safety-net (between-requests) compaction threshold.
///
@ -152,60 +373,145 @@ pub struct CompactionConfig {
/// Controller can compact before the next LLM request. `None`
/// disables the between-requests check.
///
/// Expected relation: `compact_threshold < compact_request_threshold`
/// (proactive triggers before safety net). A reversed configuration
/// is accepted but logged as a warning.
#[serde(default)]
pub compact_request_threshold: Option<u64>,
/// Expected relation: `threshold < request_threshold` (proactive triggers
/// before safety net). A reversed configuration is accepted but logged as
/// a warning.
#[serde(default, alias = "compact_request_threshold")]
pub request_threshold: Option<u64>,
/// Token budget retained verbatim at the tail of the history after
/// compaction. Measured against the occupancy estimate from
/// `UsageRecord` history; turn boundaries are ignored.
#[serde(default = "default_compact_retained_tokens")]
pub compact_retained_tokens: u64,
#[serde(default = "default_retained_tokens", alias = "compact_retained_tokens")]
pub retained_tokens: u64,
/// Target size for the deterministic overview/index fed to the compact
/// worker. Overshooting this target is not an error.
#[serde(default = "default_overview_target_tokens")]
pub overview_target_tokens: u64,
/// Warning threshold for deterministic overview/index size.
#[serde(default = "default_overview_warning_tokens")]
pub overview_warning_tokens: u64,
/// Deadline threshold for deterministic overview/index generation.
/// Oversized overviews fall back to a coarser deterministic index.
#[serde(default = "default_overview_deadline_tokens")]
pub overview_deadline_tokens: u64,
/// Current prompt-occupancy cap for the compact worker's own LLM
/// requests. Exceeding this aborts the compact run.
#[serde(
default = "default_worker_context_max_tokens",
alias = "compact_worker_max_input_tokens"
)]
pub worker_context_max_tokens: u64,
/// Remaining compact-worker context threshold that triggers a warning and
/// an instruction to stop exploring and call `write_summary`.
#[serde(default = "default_finish_warning_remaining_tokens")]
pub finish_warning_remaining_tokens: u64,
/// Context reserve preserved for final summary/tool closing turns.
#[serde(default = "default_final_reserve_tokens")]
pub final_reserve_tokens: u64,
/// Optional maximum compact-worker tool-loop depth. `None` leaves the
/// worker unlimited; the default bounds runaway short-context loops.
#[serde(
default = "default_worker_max_turns",
alias = "compact_worker_max_turns"
)]
pub worker_max_turns: Option<u32>,
/// Target size for the `write_summary` text. Used in prompt/nudge text.
#[serde(default = "default_summary_target_tokens")]
pub summary_target_tokens: u64,
/// Hard validation cap for the final `write_summary` text.
#[serde(default = "default_summary_max_tokens")]
pub summary_max_tokens: u64,
/// Aggregate token budget for auto-read file contents injected into
/// the compacted session by the compact worker.
#[serde(default = "default_compact_auto_read_budget")]
pub compact_auto_read_budget: u64,
#[serde(
default = "default_auto_read_budget_tokens",
alias = "compact_auto_read_budget"
)]
pub auto_read_budget_tokens: u64,
/// Cumulative input-token cap for the compact worker's own LLM
/// calls. Exceeding this aborts the compact run.
#[serde(default = "default_compact_worker_max_input_tokens")]
pub compact_worker_max_input_tokens: u64,
/// Dry-run cap for the compacted session's initial request context.
#[serde(default = "default_result_context_max_tokens")]
pub result_context_max_tokens: u64,
/// Optional model for the compactor (summary) LLM.
/// If omitted, the main model is cloned via `clone_boxed()`.
#[serde(default)]
pub model: Option<ModelConfig>,
pub model: Option<ModelManifest>,
}
fn default_prune_protected_turns() -> usize {
defaults::PRUNE_PROTECTED_TURNS
fn default_prune_protected_tokens() -> u64 {
defaults::PRUNE_PROTECTED_TOKENS
}
fn default_prune_min_savings() -> u64 {
defaults::PRUNE_MIN_SAVINGS
}
fn default_compact_retained_tokens() -> u64 {
fn default_retained_tokens() -> u64 {
defaults::COMPACT_RETAINED_TOKENS
}
fn default_compact_auto_read_budget() -> u64 {
fn default_overview_target_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_TARGET_TOKENS
}
fn default_overview_warning_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_WARNING_TOKENS
}
fn default_overview_deadline_tokens() -> u64 {
defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS
}
fn default_worker_context_max_tokens() -> u64 {
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
}
fn default_finish_warning_remaining_tokens() -> u64 {
defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS
}
fn default_final_reserve_tokens() -> u64 {
defaults::COMPACT_FINAL_RESERVE_TOKENS
}
fn default_worker_max_turns() -> Option<u32> {
defaults::COMPACT_WORKER_MAX_TURNS
}
fn default_summary_target_tokens() -> u64 {
defaults::COMPACT_SUMMARY_TARGET_TOKENS
}
fn default_summary_max_tokens() -> u64 {
defaults::COMPACT_SUMMARY_MAX_TOKENS
}
fn default_auto_read_budget_tokens() -> u64 {
defaults::COMPACT_AUTO_READ_BUDGET
}
fn default_compact_worker_max_input_tokens() -> u64 {
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
fn default_result_context_max_tokens() -> u64 {
defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
prune_protected_turns: default_prune_protected_turns(),
prune_protected_tokens: default_prune_protected_tokens(),
prune_min_savings: default_prune_min_savings(),
compact_threshold: None,
compact_request_threshold: None,
compact_retained_tokens: default_compact_retained_tokens(),
compact_auto_read_budget: default_compact_auto_read_budget(),
compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(),
threshold: None,
request_threshold: None,
retained_tokens: default_retained_tokens(),
overview_target_tokens: default_overview_target_tokens(),
overview_warning_tokens: default_overview_warning_tokens(),
overview_deadline_tokens: default_overview_deadline_tokens(),
worker_context_max_tokens: default_worker_context_max_tokens(),
finish_warning_remaining_tokens: default_finish_warning_remaining_tokens(),
final_reserve_tokens: default_final_reserve_tokens(),
worker_max_turns: default_worker_max_turns(),
summary_target_tokens: default_summary_target_tokens(),
summary_max_tokens: default_summary_max_tokens(),
auto_read_budget_tokens: default_auto_read_budget_tokens(),
result_context_max_tokens: default_result_context_max_tokens(),
model: None,
}
}
@ -214,6 +520,7 @@ impl Default for CompactionConfig {
impl PodManifest {
/// Parse a manifest from a TOML string.
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
config::reject_removed_manifest_fields(s)?;
toml::from_str(s)
}
}
@ -241,12 +548,18 @@ permission = "write"
fn parse_minimal_manifest() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert_eq!(manifest.pod.name, "test-agent");
assert_eq!(manifest.model.scheme, SchemeKind::Anthropic);
assert_eq!(manifest.model.model_id, "claude-sonnet-4-20250514");
assert_eq!(manifest.model.auth, AuthRef::None);
assert_eq!(manifest.model.scheme, Some(SchemeKind::Anthropic));
assert_eq!(
manifest.model.model_id.as_deref(),
Some("claude-sonnet-4-20250514")
);
assert!(manifest.model.auth.is_none());
assert_eq!(manifest.scope.allow.len(), 1);
assert!(manifest.scope.deny.is_empty());
assert_eq!(manifest.worker.instruction, defaults::DEFAULT_INSTRUCTION);
assert!(manifest.worker.top_p.is_none());
assert!(manifest.worker.top_k.is_none());
assert!(manifest.worker.stop_sequences.is_empty());
}
#[test]
@ -264,6 +577,10 @@ auth = { kind = "api_key", file = "/abs/keys/anthropic" }
instruction = "$user/reviewer"
max_tokens = 4096
temperature = 0.3
top_p = 0.9
top_k = 40
stop_sequences = ["\n\n", "</stop>"]
reasoning = "medium"
[[scope.allow]]
target = "/abs/project"
@ -280,14 +597,21 @@ permission = "write"
"#;
let manifest = PodManifest::from_toml(toml).unwrap();
assert_eq!(manifest.pod.name, "code-reviewer");
let file = match &manifest.model.auth {
AuthRef::ApiKey { file, .. } => file.as_deref(),
let file = match manifest.model.auth.as_ref() {
Some(AuthRef::ApiKey { file, .. }) => file.as_deref(),
_ => panic!("expected ApiKey"),
};
assert_eq!(file, Some(std::path::Path::new("/abs/keys/anthropic")));
assert_eq!(manifest.worker.instruction, "$user/reviewer");
assert_eq!(manifest.worker.max_tokens, Some(4096));
assert_eq!(manifest.worker.temperature, Some(0.3));
assert_eq!(manifest.worker.top_p, Some(0.9));
assert_eq!(manifest.worker.top_k, Some(40));
assert_eq!(manifest.worker.stop_sequences, vec!["\n\n", "</stop>"]);
assert_eq!(
manifest.worker.reasoning,
Some(ReasoningControl::Effort(ReasoningEffort::Medium))
);
let allow = &manifest.scope.allow;
assert_eq!(allow.len(), 2);
assert_eq!(allow[0].permission, Permission::Write);
@ -320,6 +644,16 @@ model_id = "claude-sonnet-4-20250514"
assert_eq!(manifest.worker.max_turns.unwrap().get(), 50);
}
#[test]
fn parse_reasoning_budget() {
let toml = MINIMAL_REQUIRED.replace("[worker]\n", "[worker]\nreasoning = -1\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
assert_eq!(
manifest.worker.reasoning,
Some(ReasoningControl::BudgetTokens(-1))
);
}
#[test]
fn omitted_max_turns_is_none() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
@ -334,14 +668,37 @@ model_id = "claude-sonnet-4-20250514"
#[test]
fn parse_compaction_config() {
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\ncompact_threshold = 80000\n");
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nthreshold = 80000\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.prune_protected_turns, 3);
assert_eq!(c.prune_protected_tokens, 8000);
assert_eq!(c.prune_min_savings, 4096);
assert_eq!(c.compact_threshold, Some(80000));
assert_eq!(c.compact_request_threshold, None);
assert_eq!(c.compact_retained_tokens, 8000);
assert_eq!(c.threshold, Some(80000));
assert_eq!(c.request_threshold, None);
assert_eq!(c.retained_tokens, 8000);
assert_eq!(c.worker_max_turns, Some(20));
}
#[test]
fn reject_removed_prune_protected_turns_field() {
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nprune_protected_turns = 3\n");
let err = PodManifest::from_toml(&toml).unwrap_err();
assert!(
err.to_string().contains("compaction.prune_protected_turns"),
"unexpected error: {err}"
);
}
#[test]
fn parse_compaction_worker_max_turns() {
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
worker_max_turns = 7\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.worker_max_turns, Some(7));
}
#[test]
@ -349,13 +706,13 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_threshold = 80000\n\
compact_request_threshold = 90000\n"
threshold = 80000\n\
request_threshold = 90000\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, Some(80000));
assert_eq!(c.compact_request_threshold, Some(90000));
assert_eq!(c.threshold, Some(80000));
assert_eq!(c.request_threshold, Some(90000));
}
#[test]
@ -363,12 +720,12 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_request_threshold = 90000\n"
request_threshold = 90000\n"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
assert_eq!(c.compact_threshold, None);
assert_eq!(c.compact_request_threshold, Some(90000));
assert_eq!(c.threshold, None);
assert_eq!(c.request_threshold, Some(90000));
}
#[test]
@ -376,7 +733,7 @@ model_id = "claude-sonnet-4-20250514"
let toml = format!(
"{MINIMAL_REQUIRED}\n\
[compaction]\n\
compact_threshold = 80000\n\n\
threshold = 80000\n\n\
[compaction.model]\n\
scheme = \"gemini\"\n\
model_id = \"gemini-2.0-flash\"\n"
@ -384,8 +741,8 @@ model_id = "claude-sonnet-4-20250514"
let manifest = PodManifest::from_toml(&toml).unwrap();
let c = manifest.compaction.unwrap();
let p = c.model.unwrap();
assert_eq!(p.scheme, SchemeKind::Gemini);
assert_eq!(p.model_id, "gemini-2.0-flash");
assert_eq!(p.scheme, Some(SchemeKind::Gemini));
assert_eq!(p.model_id.as_deref(), Some("gemini-2.0-flash"));
}
#[test]
@ -394,6 +751,48 @@ model_id = "claude-sonnet-4-20250514"
assert!(manifest.compaction.is_none());
}
#[test]
fn omitted_memory_is_none() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert!(manifest.memory.is_none());
}
#[test]
fn empty_memory_section_enables_with_default_root() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.expect("memory section parsed");
assert!(mem.workspace_root.is_none());
assert_eq!(mem.inject_summary, None);
}
#[test]
fn memory_section_with_inject_summary_false() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\ninject_summary = false\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(mem.inject_summary, Some(false));
}
#[test]
fn memory_section_with_explicit_root() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nworkspace_root = \"/some/where\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(
mem.workspace_root.unwrap(),
std::path::PathBuf::from("/some/where")
);
}
#[test]
fn memory_section_with_language() {
let toml = format!("{MINIMAL_REQUIRED}\n[memory]\nlanguage = \"Japanese\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
let mem = manifest.memory.unwrap();
assert_eq!(mem.language.as_deref(), Some("Japanese"));
}
#[test]
fn reject_unknown_scheme() {
let toml =
@ -402,15 +801,29 @@ model_id = "claude-sonnet-4-20250514"
}
#[test]
fn omitted_tool_output_falls_back_to_default_16k() {
fn omitted_limits_fall_back_to_defaults() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
let limits = &manifest.worker.tool_output;
assert_eq!(limits.default_max_bytes, 16 * 1024);
assert_eq!(limits.default_max_bytes, defaults::TOOL_OUTPUT_MAX_BYTES);
assert!(limits.per_tool.is_empty());
assert_eq!(
manifest.worker.file_upload.max_bytes,
defaults::FILE_UPLOAD_MAX_BYTES
);
}
#[test]
fn parse_tool_output_limits() {
fn worker_language_defaults_and_parses() {
let manifest = PodManifest::from_toml(MINIMAL_REQUIRED).unwrap();
assert_eq!(manifest.worker.language, defaults::WORKER_LANGUAGE);
let toml = MINIMAL_REQUIRED.replace("[worker]\n", "[worker]\nlanguage = \"Japanese\"\n");
let manifest = PodManifest::from_toml(&toml).unwrap();
assert_eq!(manifest.worker.language, "Japanese");
}
#[test]
fn parse_worker_output_limits() {
let toml = MINIMAL_REQUIRED.replace(
"[worker]\n",
"[worker]\n\
@ -418,7 +831,9 @@ model_id = "claude-sonnet-4-20250514"
default_max_bytes = 8192\n\n\
[worker.tool_output.per_tool]\n\
Read = 32768\n\
Grep = 4096\n",
Grep = 4096\n\n\
[worker.file_upload]\n\
max_bytes = 12345\n",
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let limits = &manifest.worker.tool_output;
@ -426,6 +841,7 @@ model_id = "claude-sonnet-4-20250514"
assert_eq!(limits.limit_for("Read"), 32768);
assert_eq!(limits.limit_for("Grep"), 4096);
assert_eq!(limits.limit_for("Unknown"), 8192);
assert_eq!(manifest.worker.file_upload.max_bytes, 12345);
}
#[test]
@ -437,7 +853,7 @@ model_id = "claude-sonnet-4-20250514"
);
let manifest = PodManifest::from_toml(&toml).unwrap();
let limits = &manifest.worker.tool_output;
assert_eq!(limits.default_max_bytes, 16 * 1024);
assert_eq!(limits.default_max_bytes, defaults::TOOL_OUTPUT_MAX_BYTES);
assert!(limits.per_tool.is_empty());
}

View File

@ -1,8 +1,14 @@
//! LLM モデル宣言型
//!
//! Pod マニフェストの `[model]` セクションで記述する型。`scheme` と
//! `auth` を直交軸として表現し、1 つの汎用アダプタ(`crates/provider`
//! で任意の wire / 認証組合せを受け止める。
//! Pod マニフェストの `[model]` セクションで記述する型。`ref`(プロバイダ
//! とモデルを両方指し示す短縮形)と inline 指定(`scheme` / `model_id`
//! 直書き)の両方を受け入れるため、すべてのフィールドを `Option` として
//! 持つ 1 つの型 [`ModelManifest`] に統合している。実解決ref をプロバイダ
//! カタログ / モデルカタログから引いて `scheme` や `model_id` を埋める)
//! は `crates/provider` の責務で、本モジュールはデータ表現のみを提供する。
//!
//! 同じ型を partialカスケード層と完成形最終マニフェストの両方で
//! 使うことで、merge と最終変換の重複を避ける。
use std::path::PathBuf;
@ -10,27 +16,62 @@ use serde::{Deserialize, Serialize};
// `ModelCapability` は `llm-worker` 側に定義される runtime 構造だが、
// マニフェストで任意に override できるよう型だけ再エクスポートする。
pub use llm_worker::llm_client::capability::ModelCapability;
pub use llm_worker::llm_client::capability::{ModelCapability, ReasoningControl, ReasoningEffort};
/// Pod が使う LLM モデルの宣言。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ModelConfig {
/// wire format
pub scheme: SchemeKind,
/// API のベース URL。未指定なら scheme の既定値にフォールバック
#[serde(default)]
/// Pod マニフェストの `[model]` セクション。
///
/// - ref だけ書く: `[model] ref = "anthropic/claude-sonnet-4-6"`
/// - ref + 一部 override: ref で基底を引き、`auth` 等だけ書き換え
/// - 完全 inline: `ref` を省略して `scheme` / `model_id` / `auth` を直書き
///
/// どの形が有効かの判定は `provider::resolve_model_manifest` が担う。
/// 本クレートは「どこから取るか」を表現するだけで、未設定かどうかを
/// 理由にした hard error は出さない。
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
pub struct ModelManifest {
/// `<provider_id>/<model_id_in_ref>` 形式のカタログ参照。`/` の
/// 最初の 1 文字目で split し provider カタログを引く。
/// OpenRouter の `anthropic/claude-sonnet-4` のように `/` を含む
/// model_id は `openrouter/anthropic/claude-sonnet-4` と書く
/// provider 側で最初の `/` のみ split するため)。
#[serde(default, rename = "ref", skip_serializing_if = "Option::is_none")]
pub ref_: Option<String>,
/// wire format の明示指定。ref 未指定時は必須、ref 指定時は override。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scheme: Option<SchemeKind>,
/// API のベース URL。scheme の既定値を override する。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
/// プロバイダが受け付けるモデル ID
pub model_id: String,
/// 認証方式
#[serde(default)]
pub auth: AuthRef,
/// モデル能力の明示指定。`None` のときは `crates/provider` が
/// scheme 静的テーブル → scheme 既定値の順でフォールバックする。
/// OpenAI 互換ルーターOpenRouter / xAI / Groq 等)で scheme テーブル
/// に載っていないモデル ID を使うときに指定する。
#[serde(default)]
/// プロバイダが受け付けるモデル ID。ref 未指定時は必須。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_id: Option<String>,
/// 認証方式。ref 未指定時は必須、ref 指定時は override。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub auth: Option<AuthRef>,
/// モデル能力の明示指定。未指定時はモデルカタログ → provider
/// `default_capability` → scheme 既定の順で解決される。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub capability: Option<ModelCapability>,
/// モデルのコンテキストウィンドウ上限tokens。カタログ未掲載 / inline
/// モデルでもここで明示 override できる。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub context_window: Option<u64>,
}
impl ModelManifest {
/// `upper` を `self` に上書きマージする。マニフェスト cascade 向け
/// builtin → user → project → overlay の優先順位で呼ばれる)。
pub fn merge(self, upper: Self) -> Self {
Self {
ref_: upper.ref_.or(self.ref_),
scheme: upper.scheme.or(self.scheme),
base_url: upper.base_url.or(self.base_url),
model_id: upper.model_id.or(self.model_id),
auth: upper.auth.or(self.auth),
capability: upper.capability.or(self.capability),
context_window: upper.context_window.or(self.context_window),
}
}
}
/// サポートする wire scheme の種類。
@ -67,6 +108,7 @@ pub enum AuthRef {
file: Option<PathBuf>,
},
/// ChatGPT OAuth`~/.codex/auth.json`)。実装は `llm-auth-codex-oauth` チケット
#[serde(rename = "codex_oauth")]
CodexOAuth,
}

View File

@ -0,0 +1,384 @@
//! Insomnia のホームディレクトリ配下のパス解決を一元化するモジュール。
//!
//! 用途別に三つの base directory を持つ:
//!
//! - **`config_dir`** — 人が手で書く / 編集する設定。`manifest.toml`,
//! `providers.toml`, `models.toml`, `prompts/`, `prompts.toml` 等
//! - **`data_dir`** — プログラムが書く永続データ。`sessions/` 等
//! - **`runtime_dir`** — 再起動で消えてよいランタイム状態。socket,
//! `pods.json`, `pid` ファイル等
//!
//! ## 解決順 (優先順位高 → 低)
//!
//! | base | 1. `INSOMNIA_<KIND>_DIR` | 2. `INSOMNIA_HOME` | 3. `XDG_*` | 4. 既定 |
//! |---|---|---|---|---|
//! | config | `INSOMNIA_CONFIG_DIR` | `$INSOMNIA_HOME/config` | `$XDG_CONFIG_HOME/insomnia` | `$HOME/.config/insomnia` |
//! | data | `INSOMNIA_DATA_DIR` | `$INSOMNIA_HOME` | — | `$HOME/.insomnia` |
//! | runtime | `INSOMNIA_RUNTIME_DIR` | `$INSOMNIA_HOME/run` | `$XDG_RUNTIME_DIR/insomnia` | `$HOME/.insomnia/run` |
//!
//! `INSOMNIA_HOME=$X` のとき config は `$X/config`、data は `$X` 直下、
//! runtime は `$X/run` に集約される。テストや sandbox 利用ではこれ一本
//! で全部 tempdir に向けられる。
//!
//! 解決された各 base が存在するか / ディレクトリかは保証しない —
//! 呼び出し側がファイル操作の前に作成 / 検査する。
use std::ffi::OsString;
use std::path::PathBuf;
/// Environment variable that points at an explicit user manifest.
///
/// Pod CLI treats a non-empty value as an explicit manifest path. Empty values
/// are treated the same as an unset variable, so callers fall back to the
/// auto-discovered user manifest path.
pub const USER_MANIFEST_ENV: &str = "INSOMNIA_USER_MANIFEST";
/// 設定ディレクトリ。`manifest.toml`, `providers.toml`, `models.toml`,
/// `prompts/` などが置かれる。
pub fn config_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_CONFIG_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p.join("config"));
}
if let Some(p) = env_path("XDG_CONFIG_HOME") {
return Some(p.join("insomnia"));
}
Some(env_path("HOME")?.join(".config").join("insomnia"))
}
/// データディレクトリ。`sessions/` などプログラムが書く永続データの
/// 置き場。
pub fn data_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_DATA_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p);
}
Some(env_path("HOME")?.join(".insomnia"))
}
/// ランタイムディレクトリ。socket, `pods.json`, Pod ごとの `pid` /
/// `status.json` 等が置かれる。再起動で消えて構わない。
pub fn runtime_dir() -> Option<PathBuf> {
if let Some(p) = env_path("INSOMNIA_RUNTIME_DIR") {
return Some(p);
}
if let Some(p) = env_path("INSOMNIA_HOME") {
return Some(p.join("run"));
}
if let Some(p) = env_path("XDG_RUNTIME_DIR") {
return Some(p.join("insomnia"));
}
Some(env_path("HOME")?.join(".insomnia").join("run"))
}
// ---- well-known file getters ------------------------------------------------
/// `<config_dir>/manifest.toml` — user manifest の既定位置。
///
/// This deliberately ignores [`USER_MANIFEST_ENV`]. Use
/// [`user_manifest_path_with_env_override`] when mirroring the Pod CLI cascade
/// resolution rules.
pub fn user_manifest_path() -> Option<PathBuf> {
Some(config_dir()?.join("manifest.toml"))
}
/// Resolve an explicit user manifest override from an env value.
///
/// Non-empty values are paths. `None` and empty strings are both treated as no
/// override, matching the Pod CLI's `INSOMNIA_USER_MANIFEST` handling.
pub fn user_manifest_path_from_env(value: Option<OsString>) -> Option<PathBuf> {
value.and_then(|value| {
if value.as_os_str().is_empty() {
None
} else {
Some(PathBuf::from(value))
}
})
}
/// User manifest path using the same env override rule as the Pod CLI cascade.
///
/// A non-empty [`USER_MANIFEST_ENV`] value wins. If the variable is unset or
/// empty, this falls back to [`user_manifest_path`]. The returned path is not
/// guaranteed to exist.
pub fn user_manifest_path_with_env_override() -> Option<PathBuf> {
user_manifest_path_from_env(std::env::var_os(USER_MANIFEST_ENV)).or_else(user_manifest_path)
}
/// `<config_dir>/prompts/` — user prompts ライブラリ。
pub fn user_prompts_dir() -> Option<PathBuf> {
Some(config_dir()?.join("prompts"))
}
/// `<config_dir>/prompts.toml` — user prompt pack。
pub fn user_pack_file() -> Option<PathBuf> {
Some(config_dir()?.join("prompts.toml"))
}
/// `<config_dir>/<file_name>` — providers.toml / models.toml 等の
/// user override ファイル。
pub fn user_catalog_override(file_name: &str) -> Option<PathBuf> {
Some(config_dir()?.join(file_name))
}
/// `<data_dir>/sessions/` — session store のデフォルト位置。
pub fn sessions_dir() -> Option<PathBuf> {
Some(data_dir()?.join("sessions"))
}
/// `<runtime_dir>/pods.json` — machine-wide Pod allocation registry。
pub fn pod_registry_path() -> Option<PathBuf> {
Some(runtime_dir()?.join("pods.json"))
}
/// `<runtime_dir>/<pod_name>/` — Pod ごとのランタイムディレクトリ。
pub fn pod_runtime_dir(pod_name: &str) -> Option<PathBuf> {
Some(runtime_dir()?.join(pod_name))
}
/// `<runtime_dir>/<pod_name>/sock` — Pod の Unix socket パス。
///
/// Pod プロセス内で実際に socket を作成するのは `pod` crate の
/// `RuntimeDir::socket_path()` で、Pod 名が分かっている外部 (TUI の
/// attach フロー等) からの**予測**はこの関数で行う。両者は同じパス
/// を返すことが期待される。
pub fn pod_socket_path(pod_name: &str) -> Option<PathBuf> {
Some(pod_runtime_dir(pod_name)?.join("sock"))
}
// ---- internals --------------------------------------------------------------
/// 空文字列の env は未設定として扱う。`std::env::var` は `Ok("")` と
/// `Err(NotPresent)` を区別するが、パス解決においては両者を未設定と
/// 同等に扱うのが直感的。
fn env_path(name: &str) -> Option<PathBuf> {
std::env::var(name)
.ok()
.filter(|s| !s.is_empty())
.map(PathBuf::from)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::{Mutex, MutexGuard, OnceLock};
/// プロセス全体で env を弄るテスト同士が並行に走らないように保護
/// する。Cargo の test harness はファイル単位で別プロセスにせず
/// マルチスレッドで実行するため、env を読む全テストはこの lock を
/// 取ってから操作する。
fn env_lock() -> MutexGuard<'static, ()> {
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
LOCK.get_or_init(|| Mutex::new(()))
.lock()
.unwrap_or_else(|e| e.into_inner())
}
/// テスト中だけ env を上書きし、drop 時に元の値に戻す RAII guard。
struct EnvGuard {
vars: Vec<(&'static str, Option<String>)>,
_lock: MutexGuard<'static, ()>,
}
impl EnvGuard {
fn new(overrides: &[(&'static str, Option<&str>)]) -> Self {
let lock = env_lock();
let names = [
"INSOMNIA_CONFIG_DIR",
"INSOMNIA_DATA_DIR",
"INSOMNIA_RUNTIME_DIR",
"INSOMNIA_USER_MANIFEST",
"INSOMNIA_HOME",
"XDG_CONFIG_HOME",
"XDG_RUNTIME_DIR",
"HOME",
];
let saved: Vec<_> = names.iter().map(|n| (*n, std::env::var(n).ok())).collect();
// SAFETY: env_lock() 取得済みなので env への並行アクセスは
// この test バイナリ内では発生しない。
unsafe {
for (n, _) in &saved {
std::env::remove_var(n);
}
for (n, v) in overrides {
if let Some(v) = v {
std::env::set_var(n, v);
}
}
}
Self {
vars: saved,
_lock: lock,
}
}
}
impl Drop for EnvGuard {
fn drop(&mut self) {
// SAFETY: lock を握ったまま元に戻す。
unsafe {
for (n, v) in &self.vars {
match v {
Some(v) => std::env::set_var(n, v),
None => std::env::remove_var(n),
}
}
}
}
}
#[test]
fn config_dir_falls_back_to_home_dot_config() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/h/.config/insomnia"));
}
#[test]
fn config_dir_uses_xdg_when_set() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("XDG_CONFIG_HOME", Some("/x"))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/x/insomnia"));
}
#[test]
fn config_dir_insomnia_home_outranks_xdg() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_CONFIG_HOME", Some("/x")),
("INSOMNIA_HOME", Some("/sand")),
]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/sand/config"));
}
#[test]
fn config_dir_explicit_wins_over_insomnia_home() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("INSOMNIA_HOME", Some("/sand")),
("INSOMNIA_CONFIG_DIR", Some("/explicit-cfg")),
]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/explicit-cfg"));
}
#[test]
fn data_dir_default_is_dot_insomnia() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(data_dir().unwrap(), PathBuf::from("/h/.insomnia"));
}
#[test]
fn data_dir_insomnia_home_is_data_dir_itself() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("INSOMNIA_HOME", Some("/sand"))]);
assert_eq!(data_dir().unwrap(), PathBuf::from("/sand"));
}
#[test]
fn runtime_dir_prefers_xdg_runtime_dir() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_RUNTIME_DIR", Some("/xdg-runtime")),
]);
assert_eq!(
runtime_dir().unwrap(),
PathBuf::from("/xdg-runtime/insomnia")
);
}
#[test]
fn runtime_dir_falls_back_to_dot_insomnia_run() {
let _g = EnvGuard::new(&[("HOME", Some("/h"))]);
assert_eq!(runtime_dir().unwrap(), PathBuf::from("/h/.insomnia/run"));
}
#[test]
fn runtime_dir_insomnia_home_is_run_subdir() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("XDG_RUNTIME_DIR", Some("/run/user/1000")),
("INSOMNIA_HOME", Some("/sand")),
]);
assert_eq!(runtime_dir().unwrap(), PathBuf::from("/sand/run"));
}
#[test]
fn empty_env_treated_as_unset() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("XDG_CONFIG_HOME", Some(""))]);
assert_eq!(config_dir().unwrap(), PathBuf::from("/h/.config/insomnia"));
}
#[test]
fn returns_none_when_nothing_set() {
let _g = EnvGuard::new(&[]);
assert!(config_dir().is_none());
assert!(data_dir().is_none());
assert!(runtime_dir().is_none());
}
#[test]
fn user_manifest_env_override_wins_when_non_empty() {
let _g = EnvGuard::new(&[
("HOME", Some("/h")),
("INSOMNIA_USER_MANIFEST", Some("/tmp/user.toml")),
]);
assert_eq!(
user_manifest_path_with_env_override().unwrap(),
PathBuf::from("/tmp/user.toml")
);
}
#[test]
fn empty_user_manifest_env_falls_back_to_default_path() {
let _g = EnvGuard::new(&[("HOME", Some("/h")), ("INSOMNIA_USER_MANIFEST", Some(""))]);
assert_eq!(
user_manifest_path_with_env_override().unwrap(),
PathBuf::from("/h/.config/insomnia/manifest.toml")
);
}
#[test]
fn user_manifest_path_from_env_treats_empty_as_unset() {
assert_eq!(user_manifest_path_from_env(None), None);
assert_eq!(user_manifest_path_from_env(Some(OsString::from(""))), None);
assert_eq!(
user_manifest_path_from_env(Some(OsString::from("/tmp/u.toml"))).unwrap(),
PathBuf::from("/tmp/u.toml")
);
}
#[test]
fn well_known_files_compose_off_base_dirs() {
let _g = EnvGuard::new(&[("INSOMNIA_HOME", Some("/sand"))]);
assert_eq!(
user_manifest_path().unwrap(),
PathBuf::from("/sand/config/manifest.toml")
);
assert_eq!(
user_prompts_dir().unwrap(),
PathBuf::from("/sand/config/prompts")
);
assert_eq!(
user_pack_file().unwrap(),
PathBuf::from("/sand/config/prompts.toml")
);
assert_eq!(
user_catalog_override("providers.toml").unwrap(),
PathBuf::from("/sand/config/providers.toml")
);
assert_eq!(sessions_dir().unwrap(), PathBuf::from("/sand/sessions"));
assert_eq!(
pod_registry_path().unwrap(),
PathBuf::from("/sand/run/pods.json")
);
assert_eq!(
pod_runtime_dir("foo").unwrap(),
PathBuf::from("/sand/run/foo")
);
assert_eq!(
pod_socket_path("foo").unwrap(),
PathBuf::from("/sand/run/foo/sock")
);
}
}

View File

@ -8,6 +8,9 @@
use std::ffi::OsString;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use arc_swap::{ArcSwap, Guard};
use crate::{Permission, ScopeConfig, ScopeRule};
@ -21,7 +24,7 @@ pub struct Scope {
deny: Vec<ResolvedRule>,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
struct ResolvedRule {
/// Absolute, canonicalized-or-normalized target directory/file.
target: PathBuf,
@ -142,7 +145,7 @@ impl Scope {
/// Allow rules with their targets resolved to absolute paths.
///
/// Used by the scope-lock registry, where every Pod's allocation
/// Used by the pod-registry, where every Pod's allocation
/// must be expressed in absolute terms so prefix comparisons are
/// meaningful across processes.
pub fn allow_rules(&self) -> Vec<ScopeRule> {
@ -156,6 +159,23 @@ impl Scope {
.collect()
}
/// Deny rules with their targets resolved to absolute paths.
///
/// Counterpart to [`allow_rules`](Self::allow_rules); together they
/// round-trip through [`ScopeConfig`] for callers that need to
/// rebuild a scope after layering extra rules on top of an
/// already-constructed [`Scope`].
pub fn deny_rules(&self) -> Vec<ScopeRule> {
self.deny
.iter()
.map(|r| ScopeRule {
target: r.target.clone(),
permission: r.permission,
recursive: r.recursive,
})
.collect()
}
/// Iterate over absolute paths granted `Write` by an allow rule.
/// Subset of [`readable_paths`](Self::readable_paths).
pub fn writable_paths(&self) -> impl Iterator<Item = &Path> {
@ -165,6 +185,64 @@ impl Scope {
.map(|r| r.target.as_path())
}
/// Build a new [`Scope`] equal to `self` with `extra_allow` appended
/// to the allow set. Used by dynamic-scope grow paths
/// (e.g. controller adding the bash-output Read rule, future
/// external `GrantScope`).
pub fn with_added_allow_rules(
&self,
extra_allow: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut config = ScopeConfig {
allow: self.allow_rules(),
deny: self.deny_rules(),
};
config.allow.extend(extra_allow);
Self::from_config(&config)
}
/// Build a new [`Scope`] equal to `self` with `extra_deny` appended
/// to the deny set. Used by dynamic-scope shrink paths
/// (e.g. SpawnPod-style delegation that strips Write from the
/// spawner without touching its allow rules).
pub fn with_added_deny_rules(
&self,
extra_deny: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut config = ScopeConfig {
allow: self.allow_rules(),
deny: self.deny_rules(),
};
config.deny.extend(extra_deny);
Self::from_config(&config)
}
/// Build a new [`Scope`] with one matching deny rule removed for each
/// rule in `remove_deny`.
///
/// This is intentionally exact (after the same target resolution used
/// by [`Scope::from_config`]) rather than geometric: reclaiming a
/// delegated child must remove the deny layer that was added for that
/// child without broadening any explicit base deny that merely overlaps
/// the delegated path. Missing rules are ignored, making repeated
/// reclaim calls harmless.
pub fn with_removed_deny_rules(
&self,
remove_deny: impl IntoIterator<Item = ScopeRule>,
) -> Result<Self, ScopeError> {
let mut deny = self.deny.clone();
for rule in remove_deny {
let resolved = resolve_rule(&rule)?;
if let Some(idx) = deny.iter().position(|existing| existing == &resolved) {
deny.remove(idx);
}
}
Ok(Self {
allow: self.allow.clone(),
deny,
})
}
/// Human-readable grouping of allow rules, suitable for embedding in
/// LLM system prompts. Deny rules are intentionally omitted — they
/// only cap effective permission and surface them would mislead the
@ -213,6 +291,71 @@ impl Scope {
}
}
/// Shared, atomically-swappable view of a [`Scope`].
///
/// Built around [`ArcSwap`] so the hot path (permission checks inside
/// `ScopedFs`) reads the current scope lock-free. Mutators are
/// serialised by an internal `Mutex` so concurrent `update` calls do
/// not lose each other's contributions.
///
/// All clones share the same underlying state — a `SharedScope` cloned
/// out to multiple consumers (Pod, ScopedFs, future grant/revoke
/// callers) sees every update.
#[derive(Debug, Clone)]
pub struct SharedScope {
inner: Arc<SharedScopeInner>,
}
#[derive(Debug)]
struct SharedScopeInner {
scope: ArcSwap<Scope>,
/// Serialises read-modify-write update transactions so a producer
/// can read the current scope, build a derived one, and store it
/// without losing concurrent updates.
write_lock: Mutex<()>,
}
impl SharedScope {
/// Wrap an owned [`Scope`] in a shared, atomically-swappable handle.
pub fn new(scope: Scope) -> Self {
Self {
inner: Arc::new(SharedScopeInner {
scope: ArcSwap::from_pointee(scope),
write_lock: Mutex::new(()),
}),
}
}
/// Snapshot the current scope. Cheap and lock-free; the returned
/// guard borrows the live scope for as long as it is held.
pub fn load(&self) -> Guard<Arc<Scope>> {
self.inner.scope.load()
}
/// Snapshot the current scope into an owned `Arc<Scope>`. Useful
/// when the caller needs a value that outlives the load guard
/// (e.g. cloning into another struct).
pub fn snapshot(&self) -> Arc<Scope> {
self.inner.scope.load_full()
}
/// Read-modify-write transaction. `f` is called with the current
/// scope and returns a derived one (or an error). The internal
/// write lock ensures that two concurrent `update` calls see each
/// other's results — the second observes the first's output as its
/// input.
pub fn update<F>(&self, f: F) -> Result<(), ScopeError>
where
F: FnOnce(&Scope) -> Result<Scope, ScopeError>,
{
let _guard = self.inner.write_lock.lock().expect("scope mutex poisoned");
let current = self.inner.scope.load();
let new = f(&current)?;
self.inner.scope.store(Arc::new(new));
Ok(())
}
}
impl ResolvedRule {
fn matches(&self, path: &Path) -> bool {
if self.recursive {
@ -528,4 +671,128 @@ mod tests {
let deep = dir.path().join("a/b/c/new.txt");
assert!(scope.is_writable(&deep));
}
#[test]
fn with_added_allow_rules_grows_readable_set() {
let dir = TempDir::new().unwrap();
let extra = TempDir::new().unwrap();
let base = Scope::writable(dir.path()).unwrap();
assert!(!base.is_readable(&extra.path().join("x")));
let extended = base
.with_added_allow_rules([ScopeRule {
target: extra.path().to_path_buf(),
permission: Permission::Read,
recursive: true,
}])
.unwrap();
assert!(extended.is_readable(&extra.path().join("x")));
assert!(extended.is_writable(&dir.path().join("y")));
}
#[test]
fn with_added_deny_rules_demotes_write_to_read() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let base = Scope::writable(dir.path()).unwrap();
let demoted = base
.with_added_deny_rules([ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
}])
.unwrap();
let f = sub.join("a.txt");
assert_eq!(demoted.permission_at(&f), Some(Permission::Read));
assert_eq!(
demoted.permission_at(&dir.path().join("top.txt")),
Some(Permission::Write)
);
}
#[test]
fn with_removed_deny_rules_reclaims_one_matching_layer() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let rule = ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
};
let base = Scope::writable(dir.path())
.unwrap()
.with_added_deny_rules([rule.clone(), rule.clone()])
.unwrap();
let reclaimed_once = base.with_removed_deny_rules([rule.clone()]).unwrap();
assert_eq!(
reclaimed_once.permission_at(&sub.join("a.txt")),
Some(Permission::Read),
"one duplicate deny layer must remain"
);
let reclaimed_twice = reclaimed_once
.with_removed_deny_rules([rule.clone()])
.unwrap();
assert_eq!(
reclaimed_twice.permission_at(&sub.join("a.txt")),
Some(Permission::Write)
);
let reclaimed_again = reclaimed_twice.with_removed_deny_rules([rule]).unwrap();
assert_eq!(
reclaimed_again.permission_at(&sub.join("a.txt")),
Some(Permission::Write),
"missing rules are ignored for idempotent reclaim"
);
}
#[test]
fn shared_scope_load_returns_current_value() {
let dir = TempDir::new().unwrap();
let shared = SharedScope::new(Scope::writable(dir.path()).unwrap());
assert!(shared.load().is_writable(&dir.path().join("a.txt")));
}
#[test]
fn shared_scope_update_replaces_view_atomically() {
let dir = TempDir::new().unwrap();
let sub = dir.path().join("sub");
std::fs::create_dir(&sub).unwrap();
let shared = SharedScope::new(Scope::writable(dir.path()).unwrap());
let target = sub.join("a.txt");
assert_eq!(
shared.load().permission_at(&target),
Some(Permission::Write)
);
shared
.update(|cur| {
cur.with_added_deny_rules([ScopeRule {
target: sub.clone(),
permission: Permission::Write,
recursive: true,
}])
})
.unwrap();
assert_eq!(shared.load().permission_at(&target), Some(Permission::Read));
}
#[test]
fn shared_scope_clones_share_state() {
let dir = TempDir::new().unwrap();
let extra = TempDir::new().unwrap();
let a = SharedScope::new(Scope::writable(dir.path()).unwrap());
let b = a.clone();
assert!(!b.load().is_readable(&extra.path().join("x")));
a.update(|cur| {
cur.with_added_allow_rules([ScopeRule {
target: extra.path().to_path_buf(),
permission: Permission::Read,
recursive: true,
}])
})
.unwrap();
assert!(b.load().is_readable(&extra.path().join("x")));
}
}

25
crates/memory/Cargo.toml Normal file
View File

@ -0,0 +1,25 @@
[package]
name = "memory"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
async-trait = { workspace = true }
chrono = { version = "0.4", features = ["serde"] }
libc = { workspace = true }
lint-common = { workspace = true }
llm-worker = { workspace = true }
manifest = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
sha2 = { workspace = true }
serde_yaml = "0.9.34"
thiserror = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true, features = ["v7", "serde"] }
[dev-dependencies]
tempfile = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

450
crates/memory/src/audit.rs Normal file
View File

@ -0,0 +1,450 @@
//! Append-only JSONL audit log for memory workers and tools.
//!
//! The log is evidence-only observability data under
//! `.insomnia/memory/_logs/current.log`. It is intentionally separate from
//! `_staging` and `_usage`, and consolidation never consumes it. Operators can
//! follow the latest stream with:
//!
//! ```text
//! tail -f .insomnia/memory/_logs/current.log
//! ```
use std::collections::BTreeMap;
use std::fs::{self, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use uuid::Uuid;
use crate::workspace::WorkspaceLayout;
fn is_zero_usize(value: &usize) -> bool {
*value == 0
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditWorker {
MemoryExtract,
MemoryConsolidation,
}
impl AuditWorker {
pub fn label(self) -> &'static str {
match self {
Self::MemoryExtract => "extract",
Self::MemoryConsolidation => "consolidation",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum WorkerLifecycleStatus {
Started,
Completed,
Skipped,
Failed,
Cancelled,
}
impl WorkerLifecycleStatus {
pub fn label(self) -> &'static str {
match self {
Self::Started => "running",
Self::Completed => "done",
Self::Skipped => "skipped",
Self::Failed => "failed",
Self::Cancelled => "cancelled",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditTrigger {
SessionEnd,
TurnThreshold,
TokenThreshold,
StagingBacklog,
Idle,
Manual,
StartupRecovery,
Unknown,
}
impl AuditTrigger {
pub fn label(self) -> &'static str {
match self {
Self::SessionEnd => "session_end",
Self::TurnThreshold => "turn_threshold",
Self::TokenThreshold => "token_threshold",
Self::StagingBacklog => "staging_backlog",
Self::Idle => "idle",
Self::Manual => "manual",
Self::StartupRecovery => "startup_recovery",
Self::Unknown => "unknown",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditStatus {
Success,
Failed,
Skipped,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct ModelAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ref_: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub scheme: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model_id: Option<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct UsageAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub output_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub total_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_read_input_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub cache_creation_input_tokens: Option<u64>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ExtractAudit {
#[serde(default, skip_serializing_if = "Option::is_none")]
pub session_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub segment_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub entry_range: Option<[u64; 2]>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub history_range: Option<[u64; 2]>,
#[serde(default)]
pub staging_count: usize,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub staging_ids: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub staging_paths: Vec<String>,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct ConsolidationAudit {
#[serde(default)]
pub staging_count: usize,
#[serde(default, skip_serializing_if = "is_zero_usize")]
pub invalid_staging_count: usize,
#[serde(default)]
pub staging_bytes: u64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub consumed_staging_ids: Vec<String>,
#[serde(default)]
pub operations: OperationCounts,
}
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
pub struct OperationCounts {
#[serde(default)]
pub write: usize,
#[serde(default)]
pub edit: usize,
#[serde(default)]
pub delete: usize,
#[serde(default)]
pub drop: usize,
#[serde(default)]
pub merge: usize,
#[serde(default)]
pub trim: usize,
}
impl OperationCounts {
pub fn total_record_changes(&self) -> usize {
self.write + self.edit + self.delete + self.drop + self.merge + self.trim
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct WorkerLifecycleAudit {
pub run_id: Uuid,
pub worker: AuditWorker,
pub status: WorkerLifecycleStatus,
pub trigger: AuditTrigger,
pub reason: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub model: Option<ModelAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub usage: Option<UsageAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub extract: Option<ExtractAudit>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consolidation: Option<ConsolidationAudit>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordOperationAudit {
pub op: String,
pub status: AuditStatus,
pub kind: String,
pub slug: String,
pub path: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub before_hash: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub after_hash: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordUsageAudit {
pub op: String,
pub status: AuditStatus,
pub kind: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub slug: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub query: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub result_count: Option<usize>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(tag = "event", rename_all = "snake_case")]
pub enum AuditPayload {
WorkerLifecycle(WorkerLifecycleAudit),
RecordOperation(RecordOperationAudit),
RecordUsage(RecordUsageAudit),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct AuditEvent {
pub id: Uuid,
pub occurred_at: DateTime<Utc>,
#[serde(flatten)]
pub payload: AuditPayload,
}
impl AuditEvent {
pub fn new(payload: AuditPayload) -> Self {
Self {
id: Uuid::now_v7(),
occurred_at: Utc::now(),
payload,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RecordSnapshot {
pub kind: String,
pub slug: String,
pub path: PathBuf,
pub hash: String,
}
/// Append one audit event to `.insomnia/memory/_logs/current.log`.
pub fn append_audit_event(layout: &WorkspaceLayout, event: &AuditEvent) -> io::Result<()> {
let path = layout.audit_current_log_path();
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let line = serde_json::to_string(event)
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let mut file = OpenOptions::new().create(true).append(true).open(path)?;
writeln!(file, "{line}")?;
Ok(())
}
pub fn append_worker_lifecycle(
layout: &WorkspaceLayout,
audit: WorkerLifecycleAudit,
) -> io::Result<()> {
append_audit_event(
layout,
&AuditEvent::new(AuditPayload::WorkerLifecycle(audit)),
)
}
pub fn append_record_operation(
layout: &WorkspaceLayout,
audit: RecordOperationAudit,
) -> io::Result<()> {
append_audit_event(
layout,
&AuditEvent::new(AuditPayload::RecordOperation(audit)),
)
}
pub fn append_record_usage(layout: &WorkspaceLayout, audit: RecordUsageAudit) -> io::Result<()> {
append_audit_event(layout, &AuditEvent::new(AuditPayload::RecordUsage(audit)))
}
pub fn file_hash(path: &Path) -> io::Result<Option<String>> {
match fs::read(path) {
Ok(bytes) => Ok(Some(hash_bytes(&bytes))),
Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(err),
}
}
pub fn hash_bytes(bytes: &[u8]) -> String {
let digest = Sha256::digest(bytes);
let mut out = String::with_capacity("sha256:".len() + digest.len() * 2);
out.push_str("sha256:");
for byte in digest {
use std::fmt::Write as _;
let _ = write!(&mut out, "{byte:02x}");
}
out
}
pub fn snapshot_records(layout: &WorkspaceLayout) -> BTreeMap<String, RecordSnapshot> {
let mut out = BTreeMap::new();
snapshot_one(&mut out, "summary", "summary", layout.summary_path());
snapshot_dir(&mut out, "decision", layout.decisions_dir());
snapshot_dir(&mut out, "request", layout.requests_dir());
snapshot_dir(&mut out, "knowledge", layout.knowledge_dir());
out
}
pub fn operation_counts_from_snapshots(
before: &BTreeMap<String, RecordSnapshot>,
after: &BTreeMap<String, RecordSnapshot>,
) -> OperationCounts {
let mut counts = OperationCounts::default();
for (key, after_record) in after {
match before.get(key) {
None => counts.write += 1,
Some(before_record) if before_record.hash != after_record.hash => counts.edit += 1,
Some(_) => {}
}
}
for key in before.keys() {
if !after.contains_key(key) {
counts.delete += 1;
}
}
counts
}
fn snapshot_dir(out: &mut BTreeMap<String, RecordSnapshot>, kind: &str, dir: PathBuf) {
let entries = match fs::read_dir(dir) {
Ok(entries) => entries,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
continue;
};
let Some(slug) = name.strip_suffix(".md").map(str::to_string) else {
continue;
};
snapshot_one(out, kind, &slug, path);
}
}
fn snapshot_one(out: &mut BTreeMap<String, RecordSnapshot>, kind: &str, slug: &str, path: PathBuf) {
if !path.is_file() {
return;
}
let Ok(Some(hash)) = file_hash(&path) else {
return;
};
out.insert(
format!("{kind}/{slug}"),
RecordSnapshot {
kind: kind.to_string(),
slug: slug.to_string(),
path,
hash,
},
);
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn setup() -> (TempDir, WorkspaceLayout) {
let dir = TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[test]
fn appends_jsonl_to_current_log() {
let (_dir, layout) = setup();
let run_id = Uuid::now_v7();
append_worker_lifecycle(
&layout,
WorkerLifecycleAudit {
run_id,
worker: AuditWorker::MemoryExtract,
status: WorkerLifecycleStatus::Started,
trigger: AuditTrigger::TokenThreshold,
reason: "tokens_threshold_reached".to_string(),
model: None,
usage: None,
extract: None,
consolidation: None,
},
)
.unwrap();
let text = fs::read_to_string(layout.audit_current_log_path()).unwrap();
let value: serde_json::Value = serde_json::from_str(text.trim()).unwrap();
assert_eq!(value["event"], "worker_lifecycle");
assert_eq!(value["worker"], "memory_extract");
assert_eq!(value["status"], "started");
assert_eq!(value["run_id"], run_id.to_string());
}
#[test]
fn counts_created_edited_deleted_records() {
let (dir, layout) = setup();
let decision_dir = dir.path().join(".insomnia/memory/decisions");
fs::create_dir_all(&decision_dir).unwrap();
fs::write(decision_dir.join("a.md"), "old").unwrap();
fs::write(decision_dir.join("gone.md"), "old").unwrap();
let before = snapshot_records(&layout);
fs::write(decision_dir.join("a.md"), "new").unwrap();
fs::remove_file(decision_dir.join("gone.md")).unwrap();
fs::write(decision_dir.join("created.md"), "new").unwrap();
let after = snapshot_records(&layout);
let counts = operation_counts_from_snapshots(&before, &after);
assert_eq!(counts.write, 1);
assert_eq!(counts.edit, 1);
assert_eq!(counts.delete, 1);
}
#[test]
fn hash_has_sha256_prefix() {
assert_eq!(hash_bytes(b"abc").len(), "sha256:".len() + 64);
assert!(hash_bytes(b"abc").starts_with("sha256:"));
}
}

View File

@ -0,0 +1,292 @@
//! consolidation sub-Worker への最初のユーザー入力を組み立てる。
//!
//! extract (`extract::build_extract_input`) と同じ方針で、固定 schema の
//! markdown セクション列にしてサブWorker に渡す。`docs/plan/memory.md`
//! §Consolidation 入力 / §整理材料 の項目に従い:
//!
//! 1. consumed staging エントリ全文(`source` 込み)
//! 2. 既存 `memory/*` 全文summary / decisions / requests
//! 3. Usage evidence report明示使用回数 + resident exposure cost
//! 4. 整理材料Linter Warn ベース、hard protection 判定はしない)
//!
//! 既存 `knowledge/*` 本文は埋めず、agent に `KnowledgeQuery` 経由で引かせる
//! 設計(`docs/plan/memory.md` §retrieval 経路 / §Consolidation の Knowledge アクセス)。
use std::fmt::Write;
use crate::consolidate::staging::StagingEntry;
use crate::consolidate::tidy::TidyHints;
use crate::usage::UsageReport;
use crate::workspace::{RecordKind, WorkspaceLayout};
/// consolidation sub-Worker の最初の user 入力。
pub fn build_consolidate_input(
layout: &WorkspaceLayout,
staging: &[StagingEntry],
tidy: &TidyHints,
usage_report: &UsageReport,
) -> String {
let mut out = String::new();
out.push_str(
"consolidation input. Run the integration step first \
(fold the staging activity logs into memory and knowledge), then the \
tidy step (clean up existing records). Use the memory tools for \
every write direct file writes are denied by the pod scope.\n\n",
);
out.push_str("## Staging entries (consumed by this run)\n\n");
out.push_str(&render_staging_records(staging));
out.push('\n');
out.push_str("## Existing memory records (full content)\n\n");
out.push_str(&render_existing_memory_records(layout));
out.push('\n');
out.push_str("## Usage evidence report\n\n");
out.push_str(&render_usage_report(usage_report));
out.push('\n');
out.push_str("## Tidy hints\n\n");
out.push_str(&render_tidy_hints(tidy));
out.push('\n');
out.push_str(
"When done, end the turn with a short final assistant message describing \
what changed.",
);
out
}
/// Staging エントリ群を「`### <id>` ヘッダ + 整形 JSON ブロック」で並べる。
/// 空配列なら「(none)」と書く。
pub fn render_staging_records(entries: &[StagingEntry]) -> String {
if entries.is_empty() {
return "(none)\n".to_string();
}
let mut out = String::new();
for entry in entries {
let _ = writeln!(&mut out, "### {}", entry.id);
let json = serde_json::to_string_pretty(&entry.record).unwrap_or_else(|_| "{}".into());
out.push_str("```json\n");
out.push_str(&json);
out.push_str("\n```\n\n");
}
out
}
/// `<workspace>/.insomnia/memory/{summary.md,decisions/*,requests/*}` を
/// 「`### <kind>:<slug>` ヘッダ + raw markdown ブロック」で全文渡す。
pub fn render_existing_memory_records(layout: &WorkspaceLayout) -> String {
let mut out = String::new();
let summary = layout.summary_path();
if let Ok(content) = std::fs::read_to_string(&summary) {
out.push_str("### summary\n");
out.push_str("```markdown\n");
out.push_str(content.trim_end_matches('\n'));
out.push_str("\n```\n\n");
}
push_kind_records(&mut out, layout, RecordKind::Decision);
push_kind_records(&mut out, layout, RecordKind::Request);
if out.is_empty() {
return "(none)\n".to_string();
}
out
}
fn push_kind_records(out: &mut String, layout: &WorkspaceLayout, kind: RecordKind) {
let dir = match kind {
RecordKind::Decision => layout.decisions_dir(),
RecordKind::Request => layout.requests_dir(),
RecordKind::Knowledge | RecordKind::Summary | RecordKind::Workflow => return,
};
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return,
};
let mut paths: Vec<(String, std::path::PathBuf)> = Vec::new();
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => continue,
};
if path.extension().and_then(|s| s.to_str()) != Some("md") {
continue;
}
paths.push((stem.to_string(), path));
}
paths.sort();
for (slug, path) in paths {
let Ok(content) = std::fs::read_to_string(&path) else {
continue;
};
let _ = writeln!(out, "### {}:{}", kind.as_str(), slug);
out.push_str("```markdown\n");
out.push_str(content.trim_end_matches('\n'));
out.push_str("\n```\n\n");
}
}
fn render_usage_report(report: &UsageReport) -> String {
if report.is_empty() {
return "(empty — no explicit memory/knowledge usage events recorded yet. \
Treat this as lack of evidence, not proof that records are unused.)\n"
.to_string();
}
let json = serde_json::to_string_pretty(report).unwrap_or_else(|_| "{}".to_string());
format!(
"This report is evidence only. Do not make hard Knowledge-creation or tidy-protection decisions from it alone.\n\n```json\n{json}\n```\n"
)
}
/// Tidy hints の Markdown 描画。空ヒントなら "(none)" 1 行。
pub fn render_tidy_hints(tidy: &TidyHints) -> String {
if tidy.is_empty() {
return "(none)\n".to_string();
}
let mut out = String::new();
if !tidy.replaced_decisions.is_empty() {
out.push_str("**Replaced decisions still on disk** — collapse if the chain has settled:\n");
for (slug, replaced_by) in &tidy.replaced_decisions {
match replaced_by {
Some(target) => {
let _ = writeln!(&mut out, "- `{slug}` → `{target}`");
}
None => {
let _ = writeln!(&mut out, "- `{slug}` (no `replaced_by` set)");
}
}
}
out.push('\n');
}
if !tidy.sources_overflow.is_empty() {
out.push_str(
"**Sources overflow** — consider trimming to the most recent entries (git log keeps the rest):\n",
);
for s in &tidy.sources_overflow {
let _ = writeln!(
&mut out,
"- {} `{}` ({} sources)",
s.kind.as_str(),
s.slug,
s.count
);
}
out.push('\n');
}
if !tidy.similar_slug_clusters.is_empty() {
out.push_str("**Similar slug clusters** — evaluate for merge / rename:\n");
for c in &tidy.similar_slug_clusters {
let joined = c
.slugs
.iter()
.map(|s| format!("`{s}`"))
.collect::<Vec<_>>()
.join(", ");
let _ = writeln!(&mut out, "- {}: {}", c.kind.as_str(), joined);
}
out.push('\n');
}
out.push_str(
"Use the Usage evidence report as soft context only; \
require an explicit reason before deleting or heavily compressing records with recent use.\n",
);
out
}
#[cfg(test)]
mod tests {
use super::*;
use crate::consolidate::tidy::{SimilarSlugCluster, SourcesOverflow};
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
use chrono::Utc;
use std::path::Path;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn write(p: &Path, content: &str) {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(p, content).unwrap();
}
#[test]
fn build_includes_all_sections_when_populated() {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
write(
&dir.path().join(".insomnia/memory/summary.md"),
&format!("---\nupdated_at: {n}\n---\nstate of the world\n", n = now()),
);
write(
&dir.path().join(".insomnia/memory/decisions/dec.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\nbody\n",
n = now()
),
);
let (_id, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [0, 1],
},
ExtractedPayload::default(),
)
.unwrap();
let staging = crate::consolidate::staging::list_staging_entries(&layout);
let tidy = TidyHints {
replaced_decisions: [("old".to_string(), Some("new".to_string()))]
.into_iter()
.collect(),
sources_overflow: vec![SourcesOverflow {
kind: RecordKind::Decision,
slug: "dec".into(),
count: 12,
}],
similar_slug_clusters: vec![SimilarSlugCluster {
kind: RecordKind::Decision,
slugs: vec!["a".into(), "ab".into()],
}],
};
let report = UsageReport::empty();
let out = build_consolidate_input(&layout, &staging, &tidy, &report);
assert!(out.contains("Staging entries"));
assert!(out.contains("Existing memory records"));
assert!(out.contains("Usage evidence report"));
assert!(out.contains("Tidy hints"));
assert!(out.contains("state of the world"));
assert!(out.contains("decision:dec"));
assert!(out.contains("Replaced decisions"));
assert!(out.contains("Sources overflow"));
assert!(out.contains("Similar slug clusters"));
assert!(out.contains("no explicit memory/knowledge usage events"));
}
#[test]
fn empty_inputs_render_placeholders() {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
let out =
build_consolidate_input(&layout, &[], &TidyHints::default(), &UsageReport::empty());
// Both staging and tidy show "(none)"; existing memory records too.
assert!(out.contains("Staging entries"));
assert!(out.contains("(none)"));
}
}

View File

@ -0,0 +1,304 @@
//! `_staging/.consolidation.lock` による consolidation 占有ファイル。
//!
//! `docs/plan/memory.md` §並走防止 に従い:
//!
//! - ファイルが存在し、記録された Pod が動作している間、その Pod が排他占有
//! - クラッシュで残った stale lock は、所有者 PID が死んでいれば次回 spawn
//! 時に上書き取得できる
//! - cleanup は consumed ID の staging エントリのみ削除し、実行中に extract
//! が追加した分は残す
//!
//! 占有判定は Linux/macOS の `kill(pid, 0)` 経由で行う(`ESRCH` で死亡判定)。
//! Windows は対象外: INSOMNIA は POSIX 環境を前提にしている。
use std::fs;
use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::workspace::WorkspaceLayout;
const LOCK_FILE: &str = ".consolidation.lock";
/// 占有ファイルの中身。`pid` で stale 判定し、`pod_name` / `started_at` /
/// `consumed_ids` は診断とクラッシュ復旧時の参照に使う。
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LockRecord {
pub pid: u32,
pub pod_name: String,
pub started_at: DateTime<Utc>,
/// この consolidation run が起動時スナップショットで確定した consumed staging
/// entry の UUIDv7 列。完了時はこの列のみ削除し、追加分は残す。
pub consumed_ids: Vec<Uuid>,
}
/// 占有取得 / 解放のエラー。
#[derive(Debug, thiserror::Error)]
pub enum LockError {
/// 占有ファイルが既にあり、所有者 PID が生きているのでスキップ。
#[error("consolidation lock held by live pid {pid} (pod {pod_name:?})")]
InUse { pid: u32, pod_name: String },
#[error("io error at {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to (de)serialize lock record: {0}")]
Serde(#[from] serde_json::Error),
}
impl LockError {
fn io(path: impl Into<PathBuf>, source: std::io::Error) -> Self {
Self::Io {
path: path.into(),
source,
}
}
}
/// consolidation が走っている間 RAII で持つ占有ハンドル。`Drop` では何もしない —
/// 完了時の cleanup は consumed ID 列削除と一緒に行う必要があるため、明示
/// 解放 [`StagingLock::release_with_cleanup`] を使う。明示解放しないまま
/// drop された場合は占有ファイルがそのまま残り、次回 spawn 時に PID が
/// 死んでいれば stale 上書きされる。
#[derive(Debug)]
pub struct StagingLock {
path: PathBuf,
record: LockRecord,
}
impl StagingLock {
pub fn record(&self) -> &LockRecord {
&self.record
}
pub fn path(&self) -> &Path {
&self.path
}
/// 占有取得を試みる。既に live な lock があれば
/// [`LockError::InUse`]、stale 判定なら上書き取得する。
/// staging dir が無ければ作成する。
pub fn acquire(
layout: &WorkspaceLayout,
pid: u32,
pod_name: impl Into<String>,
consumed_ids: Vec<Uuid>,
) -> Result<Self, LockError> {
let staging_dir = layout.staging_dir();
fs::create_dir_all(&staging_dir).map_err(|e| LockError::io(&staging_dir, e))?;
let path = staging_dir.join(LOCK_FILE);
if path.exists() {
let raw = fs::read_to_string(&path).map_err(|e| LockError::io(&path, e))?;
// 壊れた lock は stale とみなして上書き許可。
if let Ok(existing) = serde_json::from_str::<LockRecord>(&raw) {
if pid_is_alive(existing.pid) {
return Err(LockError::InUse {
pid: existing.pid,
pod_name: existing.pod_name,
});
}
tracing::warn!(
stale_pid = existing.pid,
stale_pod = %existing.pod_name,
"consolidation stale lock detected, taking over"
);
} else {
tracing::warn!(path = %path.display(), "consolidation lock unparseable, treating as stale");
}
}
let record = LockRecord {
pid,
pod_name: pod_name.into(),
started_at: Utc::now(),
consumed_ids,
};
let json = serde_json::to_string_pretty(&record)?;
fs::write(&path, json).map_err(|e| LockError::io(&path, e))?;
Ok(Self { path, record })
}
/// 占有を解放しつつ consumed ID 列の staging エントリを削除する。
/// 削除対象が見当たらない場合は黙ってスキップ(既に外部で消えていた等)。
/// 占有ファイル自体の削除も best-effort: 失敗時は warn を出すだけで
/// エラーは伝播しない(次回 spawn 時に stale 判定で上書きされる)。
pub fn release_with_cleanup(self, layout: &WorkspaceLayout) {
let staging_dir = layout.staging_dir();
for id in &self.record.consumed_ids {
let target = staging_dir.join(format!("{id}.json"));
match fs::remove_file(&target) {
Ok(_) => {}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => {
tracing::warn!(
path = %target.display(),
error = %e,
"failed to clean up consumed staging entry"
);
}
}
}
self.unlink_lock_only();
}
/// 占有ファイルだけ削除し、staging エントリには触らない。consolidation
/// sub-Worker が途中で失敗した場合に使う: 入力 staging を残したまま
/// 次回再評価で再処理させる(`docs/plan/memory.md` §並走防止 の
/// 「重複作成は同一 slug update に自然収束」運用)。
pub fn release_only(self) {
self.unlink_lock_only();
}
fn unlink_lock_only(&self) {
if let Err(e) = fs::remove_file(&self.path) {
if e.kind() != std::io::ErrorKind::NotFound {
tracing::warn!(
path = %self.path.display(),
error = %e,
"failed to remove consolidation lock"
);
}
}
}
}
#[cfg(unix)]
fn pid_is_alive(pid: u32) -> bool {
// `kill(0, 0)` and `kill(-1, 0)` are POSIX-special (process group / all
// signalable processes) and would yield false positives. Reject pids
// that don't fit a positive `pid_t` so a corrupted lock file with a
// u32::MAX-ish value is treated as stale instead of magically alive.
if pid == 0 || pid > i32::MAX as u32 {
return false;
}
// SAFETY: `kill` with sig 0 only probes whether the target pid exists
// and the caller has permission to signal it. No signal is delivered.
let rc = unsafe { libc::kill(pid as i32, 0) };
if rc == 0 {
return true;
}
// EPERM means the process exists but we can't signal it — still alive
// for our purposes. ESRCH means it's gone.
let errno = std::io::Error::last_os_error()
.raw_os_error()
.unwrap_or(libc::EINVAL);
errno != libc::ESRCH
}
#[cfg(not(unix))]
fn pid_is_alive(_pid: u32) -> bool {
// Unsupported platforms: assume the lock is live so we never overwrite
// someone else's claim. consolidation will skip and try again next post-run.
true
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
fn make_layout() -> (tempfile::TempDir, WorkspaceLayout) {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
std::fs::create_dir_all(layout.staging_dir()).unwrap();
(dir, layout)
}
#[test]
fn acquire_writes_lock_file() {
let (_dir, layout) = make_layout();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", Vec::new()).unwrap();
let path = layout.staging_dir().join(LOCK_FILE);
assert!(path.exists());
assert_eq!(lock.record().pid, std::process::id());
assert_eq!(lock.record().pod_name, "pod");
}
#[test]
fn acquire_rejects_when_live_pid_holds_lock() {
let (_dir, layout) = make_layout();
// Use this test process's pid — it's definitely alive.
let _first =
StagingLock::acquire(&layout, std::process::id(), "pod-a", Vec::new()).unwrap();
let err = StagingLock::acquire(&layout, std::process::id(), "pod-b", Vec::new())
.expect_err("expected InUse");
assert!(matches!(err, LockError::InUse { .. }));
}
#[test]
fn acquire_overwrites_stale_lock() {
let (_dir, layout) = make_layout();
// pid 1 is init on linux but for arbitrarily-large pids we'd need
// `kill(pid, 0)` to return ESRCH. Use u32::MAX which is guaranteed
// dead on every platform we target.
let stale = LockRecord {
pid: u32::MAX,
pod_name: "ghost".into(),
started_at: Utc::now(),
consumed_ids: Vec::new(),
};
std::fs::write(
layout.staging_dir().join(LOCK_FILE),
serde_json::to_string_pretty(&stale).unwrap(),
)
.unwrap();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", Vec::new())
.expect("stale lock must be overwritable");
assert_eq!(lock.record().pid, std::process::id());
}
#[test]
fn release_drops_consumed_entries_and_unlinks_lock() {
let (_dir, layout) = make_layout();
let (id_a, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [0, 0],
},
ExtractedPayload::default(),
)
.unwrap();
let (id_b, _) = write_staging(
&layout,
SourceRef {
segment_id: "s".into(),
range: [1, 1],
},
ExtractedPayload::default(),
)
.unwrap();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", vec![id_a]).unwrap();
let lock_path = lock.path().to_path_buf();
lock.release_with_cleanup(&layout);
assert!(!lock_path.exists(), "lock file must be removed");
assert!(
!layout.staging_dir().join(format!("{id_a}.json")).exists(),
"consumed entry must be deleted"
);
assert!(
layout.staging_dir().join(format!("{id_b}.json")).exists(),
"non-consumed entry must remain"
);
}
#[test]
fn release_is_resilient_to_missing_consumed_entries() {
let (_dir, layout) = make_layout();
let phantom = uuid::Uuid::now_v7();
let lock = StagingLock::acquire(&layout, std::process::id(), "pod", vec![phantom]).unwrap();
let lock_path = lock.path().to_path_buf();
// No file at <staging>/<phantom>.json — release must not panic.
lock.release_with_cleanup(&layout);
assert!(!lock_path.exists());
}
}

View File

@ -0,0 +1,32 @@
//! consolidation: 統合 + 整理。
//!
//! extract が staging に残した活動ログを `memory/*` / `knowledge/*` に
//! 統合し、続けて既存 record を `outdated | superseded | unused | noisy`
//! の観点で整理する disposable Worker を、Pod 側が組み立てるための
//! ヘルパー群を提供する。Pod は次の手順で sub-Worker を構築する:
//!
//! - [`build_consolidate_input`] を sub-Worker の最初の user 入力に
//! - memory 専用 Tool (read / write / edit) と Knowledge / memory 検索ツールを登録
//! - [`StagingLock::acquire`] で並走防止 + consumed ID 確定
//! - sub-Worker run 完了後、[`StagingLock::release_with_cleanup`] で
//! consumed ID 分の staging のみ削除し、占有ファイルを解放
//!
//! system prompt は Pod の `PromptCatalog`
//! (`PodPrompt::MemoryConsolidationSystem`) で管理される。Usage report は
//! 判断材料として渡すだけで、ここでは Knowledge 化や protection の hard decision はしない
//! `docs/plan/memory.md` §Consolidation / 整理材料)。
mod input;
mod lock;
mod staging;
mod tidy;
pub use input::{
build_consolidate_input, render_existing_memory_records, render_staging_records,
render_tidy_hints,
};
pub use lock::{LockError, LockRecord, StagingLock};
pub use staging::{
StagingEntriesSnapshot, StagingEntry, list_staging_entries, list_staging_entries_snapshot,
};
pub use tidy::{TidyHints, collect_tidy_hints};

View File

@ -0,0 +1,190 @@
//! `_staging/*.json` を列挙して [`StagingRecord`] に展開する読み込みヘルパー。
//!
//! consolidation 起動時のスナップショットconsumed ID list 確定)と、整理 step
//! が終わった後の cleanup の双方で使う。`.consolidation.lock` のような
//! 占有ファイルは UUIDv7 として parse できないので自然に除外される。
//!
//! [`StagingRecord`] のスキーマは extract が書き出す側 (`crate::extract`)
//! と単一の真実源 — ここでは読み出す側だけを担当する。
use std::path::PathBuf;
use uuid::Uuid;
use crate::extract::StagingRecord;
use crate::workspace::WorkspaceLayout;
/// staging に積まれている 1 件分のエントリ。`id` は UUIDv7 で、ファイル名
/// `<id>.json` を逆引きしたもの。
#[derive(Debug, Clone)]
pub struct StagingEntry {
pub id: Uuid,
pub path: PathBuf,
pub record: StagingRecord,
/// このファイルのバイト長。閾値判定 (`consolidation_threshold_bytes`)
/// に使う。
pub bytes: u64,
}
/// staging directory の検査結果。`entries` は current schema として読めた
/// staging のみで、`invalid_count` は `.json` だが staging として採用できなかった
/// ファイル数。
#[derive(Debug, Clone, Default)]
pub struct StagingEntriesSnapshot {
pub entries: Vec<StagingEntry>,
pub invalid_count: usize,
}
/// `<staging_dir>/*.json` を読んで UUIDv7 順に並べた [`StagingEntry`]
/// 配列を返す。staging_dir が存在しなければ空配列。読めないファイルや
/// JSON parse 失敗は `tracing::warn!` してスキップ(壊れた個別ファイルが
/// consolidation 全体を止めないように)。
pub fn list_staging_entries(layout: &WorkspaceLayout) -> Vec<StagingEntry> {
list_staging_entries_snapshot(layout).entries
}
/// `<staging_dir>/*.json` を読んで valid staging と invalid staging 件数を返す。
/// invalid は自動 migration / 削除 / archive せず、観測可能にするための件数だけを
/// 呼び出し側へ渡す。
pub fn list_staging_entries_snapshot(layout: &WorkspaceLayout) -> StagingEntriesSnapshot {
let dir = layout.staging_dir();
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return StagingEntriesSnapshot::default(),
};
let mut out: Vec<StagingEntry> = Vec::new();
let mut invalid_count = 0;
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let ext = path.extension().and_then(|s| s.to_str()).unwrap_or("");
if ext != "json" {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => {
invalid_count += 1;
continue;
}
};
let id = match Uuid::parse_str(stem) {
Ok(u) => u,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to parse staging entry id");
continue;
}
};
let bytes = match std::fs::metadata(&path) {
Ok(m) => m.len(),
Err(_) => 0,
};
let raw = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to read staging entry");
continue;
}
};
let record = match serde_json::from_str::<StagingRecord>(&raw) {
Ok(r) => r,
Err(e) => {
invalid_count += 1;
tracing::warn!(path = %path.display(), error = %e, "failed to parse staging entry");
continue;
}
};
out.push(StagingEntry {
id,
path,
record,
bytes,
});
}
out.sort_by_key(|e| e.id);
StagingEntriesSnapshot {
entries: out,
invalid_count,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::{ExtractedPayload, write_staging};
use crate::schema::SourceRef;
fn empty_payload() -> ExtractedPayload {
ExtractedPayload::default()
}
fn source(segment_id: &str, range: [u64; 2]) -> SourceRef {
SourceRef {
segment_id: segment_id.into(),
range,
}
}
#[test]
fn lists_in_uuidv7_order() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let (id1, _) = write_staging(&layout, source("s", [0, 1]), empty_payload()).unwrap();
let (id2, _) = write_staging(&layout, source("s", [2, 3]), empty_payload()).unwrap();
let (id3, _) = write_staging(&layout, source("s", [4, 5]), empty_payload()).unwrap();
let entries = list_staging_entries(&layout);
let ids: Vec<Uuid> = entries.iter().map(|e| e.id).collect();
assert_eq!(ids, vec![id1, id2, id3]);
}
#[test]
fn skips_lock_file_and_counts_invalid_json() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let (_id, _) = write_staging(&layout, source("s", [0, 1]), empty_payload()).unwrap();
// Drop a non-UUID json file, an unparsable UUID-named json file, an
// old-schema UUID-named json file, and a bare lock file alongside.
// Lock files are not `.json`; invalid `.json` files are surfaced
// separately instead of being mistaken for an empty staging directory.
std::fs::write(layout.staging_dir().join("not-a-uuid.json"), "{}").unwrap();
let bad_id = Uuid::now_v7();
std::fs::write(layout.staging_dir().join(format!("{bad_id}.json")), "{").unwrap();
let old_schema_id = Uuid::now_v7();
std::fs::write(
layout.staging_dir().join(format!("{old_schema_id}.json")),
serde_json::json!({
"source": {
"session_id": "legacy-session",
"range": [0, 1]
},
"requests": []
})
.to_string(),
)
.unwrap();
std::fs::write(layout.staging_dir().join(".consolidation.lock"), "{}").unwrap();
let entries = list_staging_entries(&layout);
assert_eq!(entries.len(), 1);
let snapshot = list_staging_entries_snapshot(&layout);
assert_eq!(snapshot.entries.len(), 1);
assert_eq!(snapshot.invalid_count, 3);
}
#[test]
fn missing_dir_returns_empty() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
// No staging dir at all.
assert!(list_staging_entries(&layout).is_empty());
}
}

View File

@ -0,0 +1,356 @@
//! 整理 step が prompt 入力に乗せる「整理材料」スキャナ。
//!
//! `docs/plan/memory.md` §整理GC 相当)の扱い と
//! `tickets/memory-consolidation.md` の整理材料リストに従い、
//! メトリクス未完の現状で機械的に拾えるヒントだけを集める:
//!
//! - `replaced` chain: `status: replaced` の Decision とその `replaced_by`
//! - sources 過多: `sources` / `last_sources` 配列が閾値超過の record
//! - 類似 slug 乱立: 同 kind の slug が Levenshtein 2 以内のクラスター
//!
//! 使用頻度メトリクスベースの保護閾値情報は `tickets/memory-usage-metrics.md`
//! の成果物が出るまで空で渡る。
use std::collections::{BTreeMap, BTreeSet};
use crate::Slug;
use crate::schema::{
DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, split_frontmatter,
};
use crate::workspace::{RecordKind, WorkspaceLayout};
/// `sources` overflow を flag する閾値。`linter::warnings::SOURCES_OVERFLOW_THRESHOLD`
/// と同値10を踏襲する。Linter Warn で sources 過多が検出されるラインと
/// 整理 step で勧告するラインを揃える狙い。
pub const SOURCES_OVERFLOW_THRESHOLD: usize = 10;
/// 類似 slug クラスタリングの距離。`linter::warnings::SIMILAR_SLUG_DISTANCE`
/// と同値。
pub const SIMILAR_SLUG_DISTANCE: usize = 2;
/// 整理 step 用の機械集計ヒント。空フィールドは「対象なし」を意味する。
#[derive(Debug, Default, Clone)]
pub struct TidyHints {
/// `status: replaced` で残っている Decision の slug → `replaced_by` map。
/// `replaced_by` が None でも置き換え滞留として列挙する。
pub replaced_decisions: BTreeMap<String, Option<String>>,
/// kind / slug / sources count の三つ組で sources 累積ラインを表す。
pub sources_overflow: Vec<SourcesOverflow>,
/// 同 kind 内で Levenshtein 距離 `<= SIMILAR_SLUG_DISTANCE` のクラスター。
/// クラスター内の slug は sorted。
pub similar_slug_clusters: Vec<SimilarSlugCluster>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SourcesOverflow {
pub kind: RecordKind,
pub slug: String,
pub count: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SimilarSlugCluster {
pub kind: RecordKind,
pub slugs: Vec<String>,
}
impl TidyHints {
pub fn is_empty(&self) -> bool {
self.replaced_decisions.is_empty()
&& self.sources_overflow.is_empty()
&& self.similar_slug_clusters.is_empty()
}
}
/// workspace を一通りスキャンして [`TidyHints`] を組み立てる。読めない /
/// parse できない record は黙ってスキップLinter は write 経路で守って
/// いるので、ここで顕在化してもどうしようもない)。
pub fn collect_tidy_hints(layout: &WorkspaceLayout) -> TidyHints {
let mut hints = TidyHints::default();
let decisions = read_kind_records(layout, RecordKind::Decision);
let requests = read_kind_records(layout, RecordKind::Request);
let knowledge = read_kind_records(layout, RecordKind::Knowledge);
for (slug, content) in &decisions {
let fm = parse_yaml::<DecisionFrontmatter>(content);
if let Some(fm) = fm.as_ref() {
if matches!(fm.status, crate::schema::DecisionStatus::Replaced) {
hints
.replaced_decisions
.insert(slug.clone(), fm.replaced_by.as_ref().map(|s| s.to_string()));
}
if fm.sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Decision,
slug: slug.clone(),
count: fm.sources.len(),
});
}
}
}
for (slug, content) in &requests {
if let Some(fm) = parse_yaml::<RequestFrontmatter>(content) {
if fm.sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Request,
slug: slug.clone(),
count: fm.sources.len(),
});
}
}
}
for (slug, content) in &knowledge {
if let Some(fm) = parse_yaml::<KnowledgeFrontmatter>(content) {
if fm.last_sources.len() > SOURCES_OVERFLOW_THRESHOLD {
hints.sources_overflow.push(SourcesOverflow {
kind: RecordKind::Knowledge,
slug: slug.clone(),
count: fm.last_sources.len(),
});
}
}
}
hints.sources_overflow.sort_by(|a, b| {
(a.kind.as_str(), a.slug.as_str()).cmp(&(b.kind.as_str(), b.slug.as_str()))
});
let decision_slugs: Vec<&str> = decisions.keys().map(|s| s.as_str()).collect();
let request_slugs: Vec<&str> = requests.keys().map(|s| s.as_str()).collect();
let knowledge_slugs: Vec<&str> = knowledge.keys().map(|s| s.as_str()).collect();
if let Some(c) = cluster_similar(&decision_slugs, RecordKind::Decision) {
hints.similar_slug_clusters.extend(c);
}
if let Some(c) = cluster_similar(&request_slugs, RecordKind::Request) {
hints.similar_slug_clusters.extend(c);
}
if let Some(c) = cluster_similar(&knowledge_slugs, RecordKind::Knowledge) {
hints.similar_slug_clusters.extend(c);
}
hints
.similar_slug_clusters
.sort_by(|a, b| (a.kind.as_str(), &a.slugs).cmp(&(b.kind.as_str(), &b.slugs)));
hints
}
/// `<root>/.insomnia/memory/<kind>/*.md` (Knowledge は
/// `<root>/.insomnia/knowledge/*.md`) を slug ごとに `(slug, full content)`
/// 化して返す。
fn read_kind_records(layout: &WorkspaceLayout, kind: RecordKind) -> BTreeMap<String, String> {
let dir = match kind {
RecordKind::Decision => layout.decisions_dir(),
RecordKind::Request => layout.requests_dir(),
RecordKind::Knowledge => layout.knowledge_dir(),
RecordKind::Summary | RecordKind::Workflow => return BTreeMap::new(),
};
let mut out: BTreeMap<String, String> = BTreeMap::new();
let entries = match std::fs::read_dir(&dir) {
Ok(it) => it,
Err(_) => return out,
};
for entry in entries.flatten() {
let path = entry.path();
if !path.is_file() {
continue;
}
let stem = match path.file_stem().and_then(|s| s.to_str()) {
Some(s) => s,
None => continue,
};
if path.extension().and_then(|s| s.to_str()) != Some("md") {
continue;
}
if Slug::parse(stem).is_err() {
continue;
}
let content = match std::fs::read_to_string(&path) {
Ok(s) => s,
Err(_) => continue,
};
out.insert(stem.to_string(), content);
}
out
}
fn parse_yaml<F: serde::de::DeserializeOwned>(content: &str) -> Option<F> {
let (yaml, _body) = split_frontmatter(content).ok()?;
serde_yaml::from_str::<F>(yaml).ok()
}
/// Connected-component clustering over the `levenshtein <= SIMILAR_SLUG_DISTANCE`
/// graph among same-kind slugs. Returns each cluster of size >= 2 (singleton
/// clusters are not interesting for the integration step). Returns `None`
/// when there are no clusters at all.
fn cluster_similar(slugs: &[&str], kind: RecordKind) -> Option<Vec<SimilarSlugCluster>> {
if slugs.len() < 2 {
return None;
}
let n = slugs.len();
let mut parent: Vec<usize> = (0..n).collect();
fn find(parent: &mut [usize], i: usize) -> usize {
if parent[i] == i {
i
} else {
let root = find(parent, parent[i]);
parent[i] = root;
root
}
}
fn union(parent: &mut [usize], a: usize, b: usize) {
let ra = find(parent, a);
let rb = find(parent, b);
if ra != rb {
parent[ra] = rb;
}
}
for i in 0..n {
for j in (i + 1)..n {
if levenshtein(slugs[i], slugs[j]) <= SIMILAR_SLUG_DISTANCE {
union(&mut parent, i, j);
}
}
}
let mut groups: BTreeMap<usize, Vec<String>> = BTreeMap::new();
for i in 0..n {
let root = find(&mut parent, i);
groups.entry(root).or_default().push(slugs[i].to_string());
}
let mut out: Vec<SimilarSlugCluster> = Vec::new();
let mut seen_canonical: BTreeSet<Vec<String>> = BTreeSet::new();
for (_, mut group) in groups {
if group.len() < 2 {
continue;
}
group.sort();
if seen_canonical.insert(group.clone()) {
out.push(SimilarSlugCluster { kind, slugs: group });
}
}
if out.is_empty() { None } else { Some(out) }
}
/// Iterative two-row Levenshtein distance over chars (matches the Linter's
/// implementation; kept private to avoid widening that crate-internal API).
fn levenshtein(a: &str, b: &str) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
if a.is_empty() {
return b.len();
}
if b.is_empty() {
return a.len();
}
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr: Vec<usize> = vec![0; b.len() + 1];
for (i, ca) in a.iter().enumerate() {
curr[0] = i + 1;
for (j, cb) in b.iter().enumerate() {
let cost = if ca == cb { 0 } else { 1 };
curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b.len()]
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Utc;
use std::path::Path;
fn now() -> String {
Utc::now().to_rfc3339()
}
fn write(p: &Path, content: &str) {
if let Some(parent) = p.parent() {
std::fs::create_dir_all(parent).unwrap();
}
std::fs::write(p, content).unwrap();
}
fn workspace() -> (tempfile::TempDir, WorkspaceLayout) {
let dir = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(dir.path().to_path_buf());
(dir, layout)
}
#[test]
fn collects_replaced_chain() {
let (dir, layout) = workspace();
write(
&dir.path().join(".insomnia/memory/decisions/replaced.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: replaced\nreplaced_by: winner\n---\n",
n = now()
),
);
write(
&dir.path().join(".insomnia/memory/decisions/winner.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
n = now()
),
);
let hints = collect_tidy_hints(&layout);
assert_eq!(
hints.replaced_decisions.get("replaced").cloned(),
Some(Some("winner".into()))
);
assert!(!hints.replaced_decisions.contains_key("winner"));
}
#[test]
fn flags_sources_overflow() {
let (dir, layout) = workspace();
let many_sources: String = (0..15)
.map(|i| format!(" - segment_id: s{i}\n range: [{i}, {i}]\n"))
.collect();
write(
&dir.path().join(".insomnia/memory/decisions/big.md"),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nstatus: open\nsources:\n{m}---\n",
n = now(),
m = many_sources
),
);
let hints = collect_tidy_hints(&layout);
assert_eq!(hints.sources_overflow.len(), 1);
assert_eq!(hints.sources_overflow[0].slug, "big");
assert_eq!(hints.sources_overflow[0].kind, RecordKind::Decision);
assert_eq!(hints.sources_overflow[0].count, 15);
}
#[test]
fn clusters_similar_slugs() {
let (dir, layout) = workspace();
for slug in ["db-pool", "db-pol", "db-pools", "alpha"] {
write(
&dir.path()
.join(format!(".insomnia/memory/decisions/{slug}.md")),
&format!(
"---\ncreated_at: {n}\nupdated_at: {n}\nsources: []\nstatus: open\n---\n",
n = now()
),
);
}
let hints = collect_tidy_hints(&layout);
assert_eq!(hints.similar_slug_clusters.len(), 1);
assert_eq!(
hints.similar_slug_clusters[0].slugs,
vec![
"db-pol".to_string(),
"db-pool".to_string(),
"db-pools".to_string(),
]
);
}
#[test]
fn empty_workspace_yields_empty_hints() {
let (_dir, layout) = workspace();
let hints = collect_tidy_hints(&layout);
assert!(hints.is_empty());
}
}

121
crates/memory/src/error.rs Normal file
View File

@ -0,0 +1,121 @@
//! Errors raised by the memory subsystem.
use std::path::PathBuf;
use lint_common::RecordLintError;
use thiserror::Error;
/// Top-level error for memory operations that don't fit the lint flow.
#[derive(Debug, Error)]
pub enum MemoryError {
#[error("path is not under the memory or knowledge tree: {}", .0.display())]
OutsideMemoryTree(PathBuf),
#[error("path is not absolute: {}", .0.display())]
RelativePath(PathBuf),
#[error("io error at {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
}
impl MemoryError {
pub fn io(path: impl Into<PathBuf>, source: std::io::Error) -> Self {
Self::Io {
path: path.into(),
source,
}
}
}
/// A single Linter violation. Multiple are aggregated in a [`LintReport`].
///
/// `Display` produces a one-line message used directly in the `ToolError`
/// payload returned to the LLM.
#[derive(Debug, Clone, Error, PartialEq, Eq)]
pub enum LintError {
#[error("path is not a valid memory record location: {}", .0.display())]
InvalidPath(PathBuf),
#[error("path is for a different record kind than expected at this location: {}", .0.display())]
WrongRecordKind(PathBuf),
#[error(transparent)]
Record(#[from] RecordLintError),
#[error("missing required frontmatter field: `{0}`")]
MissingField(&'static str),
#[error("invalid value for `{field}`: {message}")]
InvalidField {
field: &'static str,
message: String,
},
#[error("Decisions `status` must be one of open|resolved|replaced (got `{0}`)")]
InvalidStatus(String),
#[error(
"Knowledge with model_invokation: true cannot have description longer than {limit} chars (got {actual})"
)]
DescriptionTooLong { actual: usize, limit: usize },
#[error("body exceeds the size limit for this record kind: {actual} chars > {limit}")]
BodyTooLong { actual: usize, limit: usize },
#[error("slug `{0}` already exists; use the edit tool instead of creating a new record")]
SlugAlreadyExists(String),
#[error("`{field}` references unknown {kind} slug `{slug}`")]
UnknownReference {
field: &'static str,
kind: &'static str,
slug: String,
},
#[error("`replaced_by` chain forms a cycle: {chain}")]
ReplacedByCycle { chain: String },
#[error("`replaced_by` must point to a different slug than the record itself")]
ReplacedBySelf,
}
/// A single Linter warning (non-blocking).
///
/// Warnings ride along in the `ToolOutput.summary` so the agent can act
/// on them when convenient; they never abort the write.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LintWarning {
/// Single-source record exceeds the importance/size threshold.
LowImportanceLargeRecord { chars: usize },
/// `sources` array has grown past the soft cap.
SourcesOverflow { count: usize },
/// Multiple slugs in the same kind are within Levenshtein distance 2.
SimilarSlugs(Vec<String>),
}
impl std::fmt::Display for LintWarning {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::LowImportanceLargeRecord { chars } => write!(
f,
"record is large ({chars} chars) but only has 1 source — consider splitting or trimming"
),
Self::SourcesOverflow { count } => write!(
f,
"`sources` has {count} entries — consider keeping only the most recent and relying on git log for the rest"
),
Self::SimilarSlugs(slugs) => {
write!(f, "similar slugs detected (consider merging): ")?;
for (i, s) in slugs.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{s}")?;
}
Ok(())
}
}
}
}

View File

@ -0,0 +1,87 @@
//! extract sub-Worker への入力テキスト組み立て。
//!
//! `crates/pod/src/pod.rs::build_summary_prompt` と同じ方針で
//! Item 列を flat な行に落とすreasoning は省く、tool call は名前のみ、
//! tool result は summary のみ。conversation 全体を Markdown の単一
//! セクションとして渡し、抽出指示は system prompt 側に寄せる。
use llm_worker::Item;
/// 与えられた `items` を extract sub-Worker の最初の user 入力に整形する。
pub fn build_extract_input(items: &[Item]) -> String {
let mut out = String::new();
out.push_str(
"Extract activity logs from the conversation slice below. \
Follow the system prompt's schema strictly and call `write_extracted` once.\n\n",
);
out.push_str("## Conversation slice\n");
out.push_str(&render_items(items));
out.push_str("\n\nWhen you are done, call `write_extracted` and end the turn.");
out
}
fn render_items(items: &[Item]) -> String {
let mut lines: Vec<String> = Vec::new();
for item in items {
match item {
Item::Message { role, content, .. } => {
let role_label = match role {
llm_worker::Role::User => "User",
llm_worker::Role::Assistant => "Assistant",
llm_worker::Role::System => "System",
};
let text: String = content
.iter()
.map(|p| p.as_text())
.collect::<Vec<_>>()
.join("");
lines.push(format!("[{role_label}] {text}"));
}
Item::ToolCall { name, .. } => {
lines.push(format!("[ToolCall] {name}"));
}
Item::ToolResult { summary, .. } => {
lines.push(format!("[ToolResult] {summary}"));
}
Item::Reasoning { .. } => {}
}
}
lines.join("\n\n")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn renders_user_assistant_pair_and_tool_calls() {
let items = vec![
Item::user_message("hello"),
Item::assistant_message("hi"),
Item::tool_call("c1", "read_file", "{}"),
Item::tool_result("c1", "ok"),
Item::reasoning("internal scratch — should be skipped"),
];
let s = build_extract_input(&items);
assert!(s.contains("[User] hello"));
assert!(s.contains("[Assistant] hi"));
assert!(s.contains("[ToolCall] read_file"));
assert!(s.contains("[ToolResult] ok"));
assert!(!s.contains("scratch"));
}
#[test]
fn tool_result_renders_summary_but_not_content() {
let huge_content = "raw-content-should-never-enter-extract-input".repeat(10_000);
let items = vec![Item::tool_result_with_content(
"c1",
"short summary kept for extraction",
huge_content.clone(),
)];
let s = build_extract_input(&items);
assert!(s.contains("[ToolResult] short summary kept for extraction"));
assert!(!s.contains("raw-content-should-never-enter-extract-input"));
assert!(!s.contains(&huge_content));
}
}

View File

@ -0,0 +1,35 @@
//! extract: 活動抽出。
//!
//! 通常 Pod の post-run hook で発火する disposable Worker と、その
//! 出力を `<workspace>/.insomnia/memory/_staging/<id>.json` に書き出す
//! ヘルパーを提供する。Pod 側はこのモジュールから:
//!
//! - [`build_extract_input`] を sub-Worker の最初の user 入力に
//! - [`write_extracted_tool`] を唯一のツールとして
//! - [`write_staging`] で受け取った JSON を staging に書き出し
//!
//! の順で組み立てる。system prompt は Pod の `PromptCatalog`
//! (`PodPrompt::MemoryExtractSystem`) で管理される。pointer 永続化
//! session-store の `LogEntry::Extension`、domain `"memory.extract"`)は
//! Pod 側が責務を持つ。
//!
//! 出力 JSON の wrap は [`write_staging`] が `source: { segment_id, range }`
//! を機械付与する形で担当し、LLM には source を推論させない。
mod input;
mod payload;
mod pointer;
mod staging;
mod tool;
pub use input::build_extract_input;
pub use payload::{
AttemptEntry, DecisionEntry, DiscussionEntry, ExtractedPayload, RequestEntry, StagingRecord,
};
pub use pointer::{ExtractPointerPayload, fold_pointer};
pub use staging::{StagingError, write_staging};
pub use tool::{ExtractWorkerContext, write_extracted_tool};
/// session-store `LogEntry::Extension` で使う domain 名。
/// pointer の永続化と読み出しはこの定数を使う側が一致している必要がある。
pub const EXTRACT_DOMAIN: &str = "memory.extract";

View File

@ -0,0 +1,88 @@
//! extract 抽出の出力 schema。
//!
//! LLM は [`ExtractedPayload`] そのものsource 抜きを返し、Pod 側
//! ラッパーが [`StagingRecord`] に組み立てて staging へ書き出す。
//! source は機械付与する契約 (`docs/plan/memory.md` §Extract)。
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use crate::schema::SourceRef;
/// LLM が返す活動ログ候補の集合。すべて optional空配列は許容
#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)]
pub struct ExtractedPayload {
#[serde(default)]
pub decisions: Vec<DecisionEntry>,
#[serde(default)]
pub discussions: Vec<DiscussionEntry>,
#[serde(default)]
pub attempts: Vec<AttemptEntry>,
#[serde(default)]
pub requests: Vec<RequestEntry>,
}
impl ExtractedPayload {
/// すべての配列が空であれば true。空ペイロードは
/// "Nothing to save" 扱いで staging への書き込みを省いてよい。
pub fn is_empty(&self) -> bool {
self.decisions.is_empty()
&& self.discussions.is_empty()
&& self.attempts.is_empty()
&& self.requests.is_empty()
}
}
/// 判断したこと(選択肢 + 選んだ + 根拠)。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct DecisionEntry {
/// 検討された選択肢の列挙。
pub options: Vec<String>,
/// 採用された選択肢。
pub chosen: String,
/// 採用理由 / 根拠。
pub rationale: String,
}
/// 議論したこと(トピック + 論点)。結論が出ていなくてもよい。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct DiscussionEntry {
/// 議論の主題。
pub topic: String,
/// 主題の中で挙がった論点 / 観点。
pub points: Vec<String>,
}
/// 試したこと(試行 + 結果 + 成否)。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct AttemptEntry {
/// 何を試したか。
pub action: String,
/// 試した結果。
pub result: String,
/// 試行が目的に対して成功したか。失敗 / 部分成功も含めて bool で表現する。
pub succeeded: bool,
}
/// ユーザー submit の構造化要約。
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
pub struct RequestEntry {
/// ユーザーの意図 / ゴール。
pub intent: String,
/// 対象ファイル / モジュール / 機能(任意)。
#[serde(default, skip_serializing_if = "Option::is_none")]
pub target: Option<String>,
/// 一文サマリ。
pub summary: String,
}
/// staging に書き出される 1 ファイル分のレコード。
///
/// `source` は Pod 側ラッパーが segment_id と log entry range を
/// 機械付与する。LLM はこのフィールドを見ない / 推論しない。
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StagingRecord {
pub source: SourceRef,
#[serde(flatten)]
pub payload: ExtractedPayload,
}

View File

@ -0,0 +1,81 @@
//! `LogEntry::Extension { domain: "memory.extract", payload }` の payload 形式と
//! restore 時の fold ヘルパー。memory crate がドメインを所有するので、
//! session-store / Pod は payload 構造を知らない。
use serde::{Deserialize, Serialize};
use super::EXTRACT_DOMAIN;
/// extract 完了境界の永続化 payload。session log の Extension entry
/// として 1 回ずつ書かれ、最新の 1 件が現行 pointer として有効になる。
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ExtractPointerPayload {
/// 直近 extract が処理した最後の session-store LogEntry の index。
/// 次回の `source.range.start` はこの値 + 1。
pub processed_through_entry: usize,
/// 直近 extract 時点の `history.len()`。次回入力は
/// `history[processed_through_history_len..]` を切り出す。
pub processed_through_history_len: usize,
/// 書き出した staging file の UUIDv7 文字列。LLM が空 payload を返した
/// 場合は staging file を作らず空文字列で記録するpointer は前進する)。
pub staging_id: String,
}
/// `RestoredState.extensions` から最新の extract pointer を取り出す。
/// 未抽出セッションでは `None`。
pub fn fold_pointer(extensions: &[(String, serde_json::Value)]) -> Option<ExtractPointerPayload> {
extensions
.iter()
.rev()
.find(|(domain, _)| domain == EXTRACT_DOMAIN)
.and_then(|(_, value)| serde_json::from_value(value.clone()).ok())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fold_returns_latest_when_multiple_present() {
let exts = vec![
(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({
"processed_through_entry": 5,
"processed_through_history_len": 4,
"staging_id": "old"
}),
),
("other.domain".to_string(), serde_json::json!({ "x": 1 })),
(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({
"processed_through_entry": 11,
"processed_through_history_len": 8,
"staging_id": "new"
}),
),
];
let p = fold_pointer(&exts).unwrap();
assert_eq!(p.processed_through_entry, 11);
assert_eq!(p.processed_through_history_len, 8);
assert_eq!(p.staging_id, "new");
}
#[test]
fn fold_returns_none_when_absent() {
let exts = vec![("other.domain".to_string(), serde_json::json!({ "x": 1 }))];
assert!(fold_pointer(&exts).is_none());
}
#[test]
fn fold_skips_malformed_entries() {
let exts = vec![(
EXTRACT_DOMAIN.to_string(),
serde_json::json!({ "wrong_shape": true }),
)];
// 現状は最新を取り出して JSON 不一致なら None。古いものに fallback
// しないのは、壊れた最新を黙って無視すると意図しない再抽出を招くため。
assert!(fold_pointer(&exts).is_none());
}
}

View File

@ -0,0 +1,114 @@
//! `<workspace>/.insomnia/memory/_staging/<id>.json` への書き出しヘルパー。
//!
//! 1 件 1 ファイル、UUIDv7 命名(短命なので衝突回避と順序を兼ねる)。
//! `source` を機械付与した [`StagingRecord`] 形式で保存する。
use std::fs;
use std::path::PathBuf;
use uuid::Uuid;
use crate::extract::payload::{ExtractedPayload, StagingRecord};
use crate::schema::SourceRef;
use crate::workspace::WorkspaceLayout;
/// staging 書き出し時のエラー。
#[derive(Debug, thiserror::Error)]
pub enum StagingError {
#[error("failed to create staging dir {}: {source}", .path.display())]
CreateDir {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to write staging file {}: {source}", .path.display())]
Write {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to serialize staging record: {0}")]
Serialize(#[from] serde_json::Error),
}
/// `payload` を `source` で wrap して staging に書き出す。
///
/// 戻り値は割り当てられた staging file の (id, path)。`payload` が
/// 完全に空の場合は呼び出し側が事前に `is_empty()` で skip 推奨だが、
/// この関数は空でも正規に書き出す(仕様 §Extract で空配列許容と
/// 明記されており、書く / 書かないの判断は呼び出し側に委ねる)。
pub fn write_staging(
layout: &WorkspaceLayout,
source: SourceRef,
payload: ExtractedPayload,
) -> Result<(Uuid, PathBuf), StagingError> {
let staging_dir = layout.staging_dir();
fs::create_dir_all(&staging_dir).map_err(|source| StagingError::CreateDir {
path: staging_dir.clone(),
source,
})?;
let id = Uuid::now_v7();
let path = staging_dir.join(format!("{id}.json"));
let record = StagingRecord { source, payload };
let json = serde_json::to_string_pretty(&record)?;
fs::write(&path, json).map_err(|source| StagingError::Write {
path: path.clone(),
source,
})?;
Ok((id, path))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::extract::payload::{DecisionEntry, ExtractedPayload};
#[test]
fn writes_record_with_machine_attached_source() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let source = SourceRef {
segment_id: "sess-1".into(),
range: [3, 7],
};
let payload = ExtractedPayload {
decisions: vec![DecisionEntry {
options: vec!["a".into(), "b".into()],
chosen: "a".into(),
rationale: "shorter".into(),
}],
..Default::default()
};
let (id, path) = write_staging(&layout, source.clone(), payload).unwrap();
assert_eq!(path.parent().unwrap(), layout.staging_dir());
assert!(
path.file_name()
.unwrap()
.to_string_lossy()
.contains(&id.to_string())
);
let written: StagingRecord =
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
assert_eq!(written.source.segment_id, "sess-1");
assert_eq!(written.source.range, [3, 7]);
assert_eq!(written.payload.decisions.len(), 1);
}
#[test]
fn empty_payload_is_written_verbatim() {
let tmp = tempfile::TempDir::new().unwrap();
let layout = WorkspaceLayout::new(tmp.path().to_path_buf());
let source = SourceRef {
segment_id: "sess".into(),
range: [0, 0],
};
let (_, path) = write_staging(&layout, source, ExtractedPayload::default()).unwrap();
let written: StagingRecord =
serde_json::from_str(&fs::read_to_string(&path).unwrap()).unwrap();
assert!(written.payload.is_empty());
}
}

View File

@ -0,0 +1,164 @@
//! `write_extracted` ツール実装と sub-Worker 用 context。
//!
//! sub-Worker からは extract worker が出した [`ExtractedPayload`] を
//! 受け取って `Mutex` 越しに [`ExtractWorkerContext`] に置くだけ。
//! Pod 側はランループ完了後に `take_payload()` で取り出して
//! [`super::staging::write_staging`] に渡す。
use std::sync::{Arc, Mutex};
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use crate::extract::payload::ExtractedPayload;
const WRITE_EXTRACTED_DESCRIPTION: &str = "Submit the final activity-log JSON for this slice. \
Pass an object with `decisions`, `discussions`, `attempts`, and `requests` arrays (any may be empty). \
Call this exactly once and end the turn. Do not include `source`, session metadata, or free-form prose \
the wrapper attaches provenance mechanically.";
/// extract sub-Worker の出力受け口。`ExtractedPayload` 1 件をホストする。
#[derive(Debug, Default)]
pub struct ExtractWorkerContext {
payload: Mutex<Option<ExtractedPayload>>,
/// `write_extracted` が複数回呼ばれた回数debug 用)。
/// 後勝ちで上書きするが、Pod 側で warn を出したい場合に参照する。
call_count: Mutex<usize>,
}
impl ExtractWorkerContext {
pub fn new() -> Self {
Self::default()
}
/// sub-Worker 終了後に Pod が呼んで payload を取り出す。
/// 一度も `write_extracted` が呼ばれなければ `None`。
pub fn take_payload(&self) -> Option<ExtractedPayload> {
self.payload
.lock()
.expect("extract worker payload poisoned")
.take()
}
pub fn call_count(&self) -> usize {
*self
.call_count
.lock()
.expect("extract worker call_count poisoned")
}
}
struct WriteExtractedTool {
ctx: Arc<ExtractWorkerContext>,
}
#[async_trait]
impl Tool for WriteExtractedTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let payload: ExtractedPayload = serde_json::from_str(input_json).map_err(|e| {
ToolError::InvalidArgument(format!("invalid write_extracted input: {e}"))
})?;
let summary = format!(
"Recorded activity log: decisions={} discussions={} attempts={} requests={}",
payload.decisions.len(),
payload.discussions.len(),
payload.attempts.len(),
payload.requests.len(),
);
{
let mut guard = self
.ctx
.payload
.lock()
.expect("extract worker payload poisoned");
*guard = Some(payload);
}
{
let mut count = self
.ctx
.call_count
.lock()
.expect("extract worker call_count poisoned");
*count += 1;
}
Ok(ToolOutput {
summary,
content: None,
})
}
}
/// sub-Worker に register する `write_extracted` ツール定義を返す。
pub fn write_extracted_tool(ctx: Arc<ExtractWorkerContext>) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(ExtractedPayload);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("write_extracted")
.description(WRITE_EXTRACTED_DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
(meta, tool)
})
}
#[cfg(test)]
mod tests {
use super::*;
use llm_worker::tool::Tool;
#[tokio::test]
async fn write_extracted_records_payload() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let input = serde_json::json!({
"decisions": [{
"options": ["a", "b"],
"chosen": "a",
"rationale": "test"
}],
"discussions": [],
"attempts": [],
"requests": []
})
.to_string();
let out = tool.execute(&input).await.unwrap();
assert!(out.summary.contains("decisions=1"));
let payload = ctx.take_payload().unwrap();
assert_eq!(payload.decisions.len(), 1);
assert_eq!(ctx.call_count(), 1);
}
#[tokio::test]
async fn last_call_wins_on_multiple_invocations() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let first =
serde_json::json!({"decisions": [], "discussions": [], "attempts": [], "requests": []})
.to_string();
tool.execute(&first).await.unwrap();
let second = serde_json::json!({
"decisions": [],
"discussions": [],
"attempts": [{"action": "x", "result": "ok", "succeeded": true}],
"requests": []
})
.to_string();
tool.execute(&second).await.unwrap();
let payload = ctx.take_payload().unwrap();
assert_eq!(payload.attempts.len(), 1);
assert_eq!(ctx.call_count(), 2);
}
#[tokio::test]
async fn invalid_json_returns_invalid_argument() {
let ctx = Arc::new(ExtractWorkerContext::new());
let tool: Arc<dyn Tool> = Arc::new(WriteExtractedTool { ctx: ctx.clone() });
let res = tool.execute("not json").await;
assert!(matches!(res, Err(ToolError::InvalidArgument(_))));
assert!(ctx.take_payload().is_none());
}
}

35
crates/memory/src/lib.rs Normal file
View File

@ -0,0 +1,35 @@
//! Memory subsystem: persistence layer for `memory/*` and `knowledge/*` records.
//!
//! Self-contained: provides its own Tool implementations (read/write/edit)
//! that target `<workspace>/memory/` and `<workspace>/knowledge/` only,
//! with a pre-write Linter built in. Generic CRUD tools (in the `tools`
//! crate) must not touch these directories — Pod is responsible for
//! denying them at the Scope level when memory is enabled.
pub mod audit;
pub mod consolidate;
pub mod error;
pub mod extract;
pub mod linter;
pub mod resident;
pub mod schema;
pub mod scope;
pub mod tool;
pub mod usage;
pub mod workspace;
pub use error::{LintError, LintWarning, MemoryError};
pub use extract::ExtractPointerPayload;
pub use lint_common::{RecordLintError, Slug, is_valid_slug};
pub use linter::{LintReport, Linter};
pub use resident::{
ResidentKnowledgeEntry, collect_resident_knowledge, collect_resident_summary,
list_knowledge_slugs,
};
pub use scope::deny_write_rules;
pub use usage::{
UsageEvent, UsageEventKind, UsageRecordSnapshot, UsageReport, UsageReportRecord, UsageSource,
append_resident_exposure_event, append_usage_event, append_use_event, build_usage_report,
snapshot_record_from_bytes, snapshot_record_from_layout,
};
pub use workspace::WorkspaceLayout;

Some files were not shown because too many files have changed in this diff Show More