Compare commits
550 Commits
abe21a5e8a
...
365b8c34fd
| Author | SHA1 | Date | |
|---|---|---|---|
| 365b8c34fd | |||
| 4361385946 | |||
| 9ccbdda27c | |||
| 9a0ef7c799 | |||
| 732d6a57b7 | |||
| 3b90f26dea | |||
| 6877447616 | |||
| b808811843 | |||
| 838ccbb65f | |||
| 4d080ca985 | |||
| b1d8f7f181 | |||
| 134d0ce2a1 | |||
| 2820cbbe53 | |||
| 5c6df298aa | |||
| ae196c2a87 | |||
| f56793589f | |||
| 33884bd0ce | |||
| 2ba35cca23 | |||
| 860767a143 | |||
| d65cfe146d | |||
| 1babd021b0 | |||
| bdabe789e3 | |||
| 48c4c9b56b | |||
| b1fb3ec0fa | |||
| b3c739867e | |||
| 5ae886ea99 | |||
| 2c67f99054 | |||
| 67b5d6354c | |||
| cdc42e5a86 | |||
| e49817c2d5 | |||
| 9405ffc633 | |||
| 77e2ad0c40 | |||
| a2771180cc | |||
| 2cfc3b63c2 | |||
| b2e53f2f61 | |||
| b22040ac84 | |||
| 80a4f90004 | |||
| 0b582faebc | |||
| d084923878 | |||
| df3373c3f2 | |||
| a3e852c6b3 | |||
| b25f4c7468 | |||
| 39d40d391b | |||
| f87cf5bd00 | |||
| 9f5e27f3fd | |||
| c101b42619 | |||
| f56ef010a8 | |||
| 8095c86be2 | |||
| 99797b9e40 | |||
| 1ac197fc6c | |||
| 3ff78c03af | |||
| 156a55d1d1 | |||
| 597c6fc3e9 | |||
| a70fe65ed5 | |||
| fa225eb01d | |||
| 8e21e2f3f2 | |||
| f51f17cf93 | |||
| 7e4d90fc1b | |||
| 235ddba9c5 | |||
| fe6f5eb326 | |||
| 06da8c5b00 | |||
| 87b2e8eb16 | |||
| 2f3adc3d14 | |||
| 21ec057de0 | |||
| dd571a963e | |||
| afd65442c5 | |||
| a4358eed14 | |||
| 9685bfffba | |||
| 3a734c30bf | |||
| 6e8aa92e38 | |||
| 811a449c28 | |||
| 0fd995c85e | |||
| e0d7468ebb | |||
| 07dc185032 | |||
| efb0ac7da3 | |||
| e7b0a0b20f | |||
| 5508299e76 | |||
| 59e4aac7f7 | |||
| 6485632a4c | |||
| 8d6b47bef1 | |||
| 6046842242 | |||
| 1c8b349e01 | |||
| d70c10b782 | |||
| 70c0548190 | |||
| 6acaccccf7 | |||
| b9dd0ba0d0 | |||
| 23e218abaa | |||
| 55dedd173c | |||
| df629b4dc6 | |||
| 7c573f36e2 | |||
| ca869195dc | |||
| e6fa660a5f | |||
| f7a3b0adf1 | |||
| ea9e924d35 | |||
| 3b582a4f73 | |||
| 2a721e3776 | |||
| 61347362d1 | |||
| e2688da828 | |||
| a0e544e3e4 | |||
| 96fd9574a2 | |||
| ab4611001e | |||
| 5b20d21ea0 | |||
| 48625f5077 | |||
| fbe8846393 | |||
| 8ae3849cc8 | |||
| e29861f787 | |||
| fa00c1f188 | |||
| 879434e240 | |||
| 4b263f8743 | |||
| 7315114b20 | |||
| f14c8cb614 | |||
| da5d789897 | |||
| 802cbf2f45 | |||
| 18c30c5f90 | |||
| dfec60438e | |||
| 6a5b8ed152 | |||
| baaec0c77f | |||
| fdd2f16df0 | |||
| 3e7a15a2b5 | |||
| b5219dc862 | |||
| c1173dd8a1 | |||
| f03e84a62a | |||
| e80a3fbf8e | |||
| 8947a89e7b | |||
| f46cdd6dbc | |||
| 1a5b5331d6 | |||
| 530027c62b | |||
| 8e7126d177 | |||
| 3fe4a6bc14 | |||
| 12a4ba5edf | |||
| d3b78234c2 | |||
| baf7403c8c | |||
| 5955695db8 | |||
| bacba69d31 | |||
| 08dc6b29f8 | |||
| d08ea1734e | |||
| ec5b891fec | |||
| 5830bb9c85 | |||
| 16ef135f1f | |||
| 15f514dfe2 | |||
| fbd97c3546 | |||
| bb4205b531 | |||
| 077efee13b | |||
| b5d5c03412 | |||
| bee41379fa | |||
| 842e7a3c58 | |||
| e8c16be475 | |||
| 58f54b99f3 | |||
| 5aea9730c6 | |||
| a63f076856 | |||
| ac1d8b1c7d | |||
| d5fcbc2125 | |||
| 45db480b0b | |||
| 4b8aee909b | |||
| 903cfa3060 | |||
| 27a1d07e98 | |||
| 9bfbb2fb4c | |||
| 1a9bb30824 | |||
| 0440d5c6dc | |||
| 01200a0d33 | |||
| d3b7663d41 | |||
| b204909c4c | |||
| 9a89d2419a | |||
| af6427ff67 | |||
| 4c8596db38 | |||
| c779768b6e | |||
| 49b78612d6 | |||
| ce6085b5f4 | |||
| 1b83b2c40a | |||
| 9d04008123 | |||
| 4ebc2c96b3 | |||
| bb6f7e2022 | |||
| 86b48a9fdf | |||
| 59067bd115 | |||
| 6116d72570 | |||
| 8e8c0887de | |||
| 3143353ddc | |||
| f35d99900f | |||
| 6e7494553b | |||
| 904ea6e326 | |||
| b6b158a244 | |||
| e32b210d50 | |||
| a02f34437c | |||
| 1ef094f039 | |||
| e57e23b999 | |||
| 13feb36518 | |||
| 9e4bdf315f | |||
| 068a975488 | |||
| 3d23c4ed40 | |||
| d2149d11d3 | |||
| ada2988105 | |||
| d6cfea463a | |||
| 43330cf624 | |||
| 21a78fb19e | |||
| 0ae6592032 | |||
| 0e1539fefa | |||
| dff72e291b | |||
| c6a9007b58 | |||
| d1c7297f87 | |||
| 3c4a34b13b | |||
| 076cf9af18 | |||
| 2f5f5b8a26 | |||
| a363546a14 | |||
| 599b24fa9e | |||
| 4bdbac6597 | |||
| 20a6748cdd | |||
| 1271d13f26 | |||
| 5882341b21 | |||
| 19730ba7c0 | |||
| 7a76276539 | |||
| 64d12f2a6f | |||
| 668bde46f4 | |||
| 3647614ab0 | |||
| 5a2e69b2bf | |||
| 1d53929250 | |||
| 91a0a935b0 | |||
| bd46491b04 | |||
| 18b0f8b19f | |||
| f5d69504b5 | |||
| 76e1287cbe | |||
| eb791f9e80 | |||
| a1b9c865df | |||
| 985931d6fa | |||
| d35c9f40a7 | |||
| 4d6d5b631c | |||
| 3354c41e66 | |||
| 73d1c05edc | |||
| 9e615a41f0 | |||
| da4f4cc954 | |||
| 01d38f042c | |||
| 9b99f50264 | |||
| 646b47b40f | |||
| 5cf8eb94c7 | |||
| 4d61d044ec | |||
| 967e57c933 | |||
| acfe073b29 | |||
| 0b79e0ed65 | |||
| c8871ec4fe | |||
| 3fece8749b | |||
| cac1f4d4fe | |||
| e664def920 | |||
| f0a1f98912 | |||
| 5ca771ded4 | |||
| 9b15135416 | |||
| b6f99b7651 | |||
| 13c05b1083 | |||
| 05da79f966 | |||
| 92cee690f8 | |||
| 6f0ec92f91 | |||
| 32ed5a812c | |||
| 856a0a2432 | |||
| ced26b952e | |||
| e451b07783 | |||
| f6600feab5 | |||
| 553d67a910 | |||
| 805be47128 | |||
| aa9409869e | |||
| 8ebdd47fbb | |||
| ec1eccd10d | |||
| 42127554d4 | |||
| 9dbfd15687 | |||
| 6c31264377 | |||
| b6b4168503 | |||
| 40cde699a8 | |||
| 1ed45032be | |||
| 64814c2e15 | |||
| 96daebff30 | |||
| 85fe1a094c | |||
| 68249b8072 | |||
| 98018972aa | |||
| 5b1324a630 | |||
| 4e352bb9ff | |||
| 5c8d00e49b | |||
| 94bb8804f4 | |||
| 30023349b9 | |||
| b0e6ab16b1 | |||
| 6e6be6f3ff | |||
| eb9bd84b05 | |||
| 17a7744da1 | |||
| a3082072d7 | |||
| 04a471b669 | |||
| 3266ddb2d4 | |||
| 7527b55de4 | |||
| c57d4be413 | |||
| 344dca6ffa | |||
| 93fe2eb0ff | |||
| 09e465d583 | |||
| 4eb73fa552 | |||
| 2d59ddd228 | |||
| 39882263d3 | |||
| c2caaa21a0 | |||
| 20097e8296 | |||
| 185db7f8cd | |||
| 8870af800f | |||
| 56f9bab7b7 | |||
| 194d29723e | |||
| a22cb479f4 | |||
| 5efe0e4910 | |||
| 6168e3f924 | |||
| 9b676238a2 | |||
| 8df34a1d64 | |||
| 45ef661651 | |||
| 2d8767f940 | |||
| 8f7a023897 | |||
| 302a1a7f58 | |||
| 284d07b569 | |||
| 5fbb9c47dd | |||
| f18cf7c172 | |||
| cae0c1ea2f | |||
| ada1fe6c63 | |||
| fde55c96d4 | |||
| 05c2605aae | |||
| d1a9b622d4 | |||
| a87be4cbc2 | |||
| 30bb096513 | |||
| e0261591b6 | |||
| eb054b3e88 | |||
| 1be6d34010 | |||
| eb0d0433a1 | |||
| 557d5da391 | |||
| 3f987e9885 | |||
| a86f69fd8d | |||
| cae18a4339 | |||
| 69a6f63023 | |||
| 1236c68073 | |||
| d64d1b2ae8 | |||
| 159ffb0c6d | |||
| 97a1c10ef7 | |||
| eeb570c71f | |||
| 9be7caae99 | |||
| 0e7be01807 | |||
| 35c8ee3a73 | |||
| c79c54ba9d | |||
| f1d8f42fd5 | |||
| 14862fbc37 | |||
| ef3f0a8a78 | |||
| 2ef397b562 | |||
| bebe1169c8 | |||
| ba5b8db9cf | |||
| 189ee43a0c | |||
| 6bf1f9a110 | |||
| 8307ca965c | |||
| e97f803104 | |||
| c4bc994cab | |||
| 6d84d4df19 | |||
| ac4133ddf9 | |||
| 6d15d1e2b6 | |||
| ffda357218 | |||
| 09eb29b0b7 | |||
| 300234df57 | |||
| 7e938b2d3b | |||
| 0e98d67a5f | |||
| 31eeded4a6 | |||
| ca27d88869 | |||
| 38efe82544 | |||
| 31a1c1d879 | |||
| e21f43c70a | |||
| e058dc576d | |||
| a05d7533b0 | |||
| 621acbe224 | |||
| e259ab7bd3 | |||
| 1f3ad13c83 | |||
| 2c9db5a27b | |||
| dcc71e3a14 | |||
| 426d477584 | |||
| 09d56272d8 | |||
| de6b8faf55 | |||
| bb2a6013fa | |||
| 709b17d309 | |||
| f74716c2e4 | |||
| f6fe978db4 | |||
| 99d6a4cf4b | |||
| 9782323885 | |||
| 28c2b0eb1c | |||
| 437fe9fe85 | |||
| c647cac983 | |||
| e2d6f00d6d | |||
| 40d19ca702 | |||
| e304b17a7e | |||
| ca0b772242 | |||
| 3962db4d37 | |||
| 5ea99673fc | |||
| dad75b592e | |||
| d1be97fbc2 | |||
| f2b364ec0d | |||
| f1ba5b5686 | |||
| ce7153f6e8 | |||
| 04ad20e760 | |||
| fc2c6bc81c | |||
| 31d5de1a37 | |||
| cfd1879f7e | |||
| eed3f13e51 | |||
| a9d30e1c37 | |||
| 11bd486740 | |||
| fd88c72e2e | |||
| 2ef4f26a8f | |||
| cb3642d12c | |||
| e4d7cc1924 | |||
| c4e1a969c1 | |||
| 2e38a24ac2 | |||
| 8114d3c4fd | |||
| cabf9c967c | |||
| 1c98938b6f | |||
| 5fa3d140ab | |||
| 7d23cff0a9 | |||
| 5246b3ce92 | |||
| 45ede7a6fc | |||
| f8fe6f83aa | |||
| 9998539e71 | |||
| 29ea180b18 | |||
| ee60758138 | |||
| db9faa0fad | |||
| 325ae6fa27 | |||
| d0a1eaeb57 | |||
| 56c6758da5 | |||
| 30abefe747 | |||
| 2ed4bd007b | |||
| 5ebdeff76d | |||
| d80d06ff2e | |||
| f43d8fba3b | |||
| 0a676524ae | |||
| fd89c754f1 | |||
| 2722e0b7ba | |||
| e0c4dbdc73 | |||
| e44d49e80f | |||
| 123fc3b0ad | |||
| 89c2c701fd | |||
| ce6198102f | |||
| c75d777cec | |||
| 1b1dc73d7f | |||
| a730717fc7 | |||
| 45b1e7b6de | |||
| a86c22e6f5 | |||
| 6146b2806f | |||
| c68cd64882 | |||
| c492765d1a | |||
| 7ce77f0ad5 | |||
| 3717569533 | |||
| 676137c246 | |||
| 84fedd8048 | |||
| 9bf6378041 | |||
| d4055fb19d | |||
| 3b2bdcb19a | |||
| ee694b310f | |||
| e513825da9 | |||
| d37347fe68 | |||
| 225e1bf58e | |||
| b7b315cd39 | |||
| 13c9923486 | |||
| 1aa992d07e | |||
| 6c6eb0dcb6 | |||
| 24ade197d1 | |||
| 74a45f86b9 | |||
| 5aea67ff5e | |||
| 230936274b | |||
| e1d672e9c0 | |||
| a89701bc43 | |||
| 25df7a79c1 | |||
| ddd7327290 | |||
| 223d06c77e | |||
| 605e78468c | |||
| 3c510860fa | |||
| ec3bf7324b | |||
| 1b33e63ce2 | |||
| 663ec91b45 | |||
| 34d1e78b40 | |||
| 758ced5e7f | |||
| da16015768 | |||
| 967acd23ee | |||
| 68885a03d8 | |||
| 879858dc94 | |||
| ed412cb6a8 | |||
| 255e370856 | |||
| 911d3b8d6c | |||
| 4ec8f63482 | |||
| 88e29d7bbe | |||
| cc9fa2d632 | |||
| 2af7089396 | |||
| 5d63d0f6e2 | |||
| 73acfcb7f2 | |||
| e7a4b76c54 | |||
| a7b9b6fa4b | |||
| 4ba58723dc | |||
| b685fedf1a | |||
| 74ee96ef82 | |||
| b538c2f1ea | |||
| 84a8bd099b | |||
| 79f342ca60 | |||
| aa138e6583 | |||
| 710220c920 | |||
| 381d31a1dc | |||
| 493ed2c781 | |||
| 81e28a3c07 | |||
| 5848954ca8 | |||
| faa8eb5793 | |||
| 0c29de1b10 | |||
| c48abf062e | |||
| 38e8c66c90 | |||
| 41120cf200 | |||
| b6ffbe4255 | |||
| 92fbd2e3f6 | |||
| 66c6edec3e | |||
| 309dba7203 | |||
| cbf728d66a | |||
| 2db2c1611c | |||
| 3c58b5dde4 | |||
| a0a9df11c0 | |||
| 7ec6e88605 | |||
| 2e004161e4 | |||
| 5a995cf099 | |||
| 2edc2dc245 | |||
| f607a52fbb | |||
| 7fb2e4bc6c | |||
| 17d0430a4d | |||
| 22fe502d71 | |||
| 9b9e37cc84 | |||
| 5bc4a6d6d6 | |||
| 3d0d5ffe85 | |||
| a05eec42d7 | |||
| 8b120504a7 | |||
| bcc7faa0ba | |||
| 47c59a416e | |||
| cdafd5d914 | |||
| eb670bfba5 | |||
| c0d283b47d | |||
| 2c5a0edef3 | |||
| dc1a335e1c | |||
| 0e7a7b02fe | |||
| b19eb52511 | |||
| 0332d446cd | |||
| 29e1bc8253 | |||
| 8e394005b2 | |||
| 02b266dce7 | |||
| 7249a8ee6a | |||
| 9b78c51d0a | |||
| f241dafac8 | |||
| fc8ff9362e | |||
| 496038307f | |||
| 3d2a49e1e4 | |||
| 59bfd89940 | |||
| 89481c2c82 | |||
| 3883fab29d | |||
| 7d1b74fb32 | |||
| 9363c76354 | |||
| f4f398279e | |||
| 0fe05e502e | |||
| 60505f206b | |||
| 4c3f81b4fa | |||
| cff082ff3a | |||
| 66d005aa30 |
119
.claude/agents/ticket-reviewer.md
Normal file
119
.claude/agents/ticket-reviewer.md
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
---
|
||||
name: "ticket-reviewer"
|
||||
description: "Use this agent when a ticket implementation is submitted for review in this project (insomnia). The agent reviews the ticket's premises/requirements and the actual implementation, creates `tickets/<ticket>.review.md` with findings, and updates the original `tickets/<ticket>.md` with review status. Do NOT use this agent for general code review unrelated to a ticket. "
|
||||
model: opus
|
||||
color: purple
|
||||
---
|
||||
|
||||
You are a senior reviewer specialized in the `insomnia` project. You are an expert at evaluating ticket-scoped implementations against their stated premises and requirements, and at safeguarding the codebase from unnecessary complexity or architectural drift. You operate strictly within the project's ticket lifecycle conventions defined in `CLAUDE.md`.
|
||||
|
||||
## Your Core Responsibility
|
||||
|
||||
Given a ticket (normally `tickets/<name>.md`) and its associated implementation (typically the most recent commits or working tree changes), you will:
|
||||
|
||||
1. Read the ticket thoroughly to understand its **背景・前提・要件**.
|
||||
2. Inspect the implementation (diff + surrounding code, not only the diff).
|
||||
3. Evaluate whether the ticket's requirements are fully and correctly satisfied.
|
||||
4. Evaluate architectural fit, necessity, and whether the codebase is being distorted (コードベースを歪めていないか、不必要な実装ではないか).
|
||||
5. Produce `tickets/<name>.review.md` with findings and a clear judgment.
|
||||
6. Update the original `tickets/<name>.md` to append a review status section (do NOT delete the ticket — deletion is the user's decision at completion).
|
||||
|
||||
You must NEVER run `git` write operations (commit, add, push, etc.). Git is the user's responsibility (per CLAUDE.md). You only edit/create files in the working tree.
|
||||
|
||||
## Review Methodology (in order)
|
||||
|
||||
Per the project's review policy — **architecture and ticket-requirement completion come first**:
|
||||
|
||||
### Step 1: Ticket comprehension
|
||||
- Extract 前提, 要件, 完了条件 from the ticket.
|
||||
- Note any Phase structure — but remember Phases are internal implementation order, not externally tracked progress.
|
||||
- Confirm the ticket's intended scope boundary.
|
||||
|
||||
### Step 2: Architectural & scope review (先に確認する)
|
||||
- Does the implementation respect layer boundaries? (e.g., `llm-worker` stays low-level; higher-level features live in upper layers.)
|
||||
- Are new crates named without the `insomnia-` prefix, short and consistent?
|
||||
- Were dependencies added via `cargo add` (not manual edits to Cargo.toml)?
|
||||
- Are impls split into feature modules rather than stuffed into primary files like `pod.rs`?
|
||||
- Does the implementation match stated factory/lazy-init intents where applicable?
|
||||
- Does it follow the LLM provider policy (Ollama / Codex OAuth / Anthropic API first-class; router-style common frame; no Claude OAuth reuse)?
|
||||
- Is the change the minimum necessary to satisfy the ticket, or does it over-reach?
|
||||
|
||||
### Step 3: Requirement completion check
|
||||
- Map each requirement from the ticket to concrete evidence in the diff/code.
|
||||
- Flag any requirement that is unmet, partially met, or silently deferred.
|
||||
- Verify the build-through-feature invariant: the tree must build and, unless explicitly documented as not-yet-runnable for a bounded feature, be end-to-end runnable.
|
||||
|
||||
### Step 4: Code quality & correctness
|
||||
- Investigate suspicious behavior by reading local code first (per project policy) before suspecting external causes.
|
||||
- Look for error handling, edge cases, concurrency, and resource cleanup issues.
|
||||
- Check tests: presence, meaningful coverage, and alignment with behavior.
|
||||
- Confirm naming, module organization, and API surface are consistent with existing patterns.
|
||||
|
||||
### Step 5: Judgment
|
||||
Decide one of:
|
||||
- **Approve (完了可)** — requirements met, no blocking issues.
|
||||
- **Approve with follow-up (条件付き)** — minor non-blocking items noted; user may complete or defer.
|
||||
- **Request changes (要修正)** — blocking issues must be addressed.
|
||||
|
||||
## Output Artifacts
|
||||
|
||||
### A. `tickets/<name>.review.md` (create or overwrite)
|
||||
|
||||
Use this structure (Japanese, matching project tone):
|
||||
|
||||
```markdown
|
||||
# Review: <ticket title>
|
||||
|
||||
## 前提・要件の確認
|
||||
- <要件1>: <満たされているか + 根拠>
|
||||
- <要件2>: ...
|
||||
|
||||
## アーキテクチャ・スコープ
|
||||
- <観点と判断>
|
||||
|
||||
## 指摘事項
|
||||
### Blocking
|
||||
- <項目> — <理由と該当箇所 path:line>
|
||||
|
||||
### Non-blocking / Follow-up
|
||||
- <項目> — <理由>
|
||||
|
||||
### Nits
|
||||
- <項目>
|
||||
|
||||
## 判断
|
||||
<Approve / Approve with follow-up / Request changes> — <一文の理由>
|
||||
```
|
||||
|
||||
Omit empty sections. Cite concrete file paths and line ranges. Be concise; avoid restating obvious code.
|
||||
|
||||
### B. Update `tickets/<name>.md`
|
||||
|
||||
Append (or update if present) a trailing section like:
|
||||
|
||||
```markdown
|
||||
## Review
|
||||
- 状態: <Approve / Approve with follow-up / Request changes>
|
||||
- レビュー詳細: [./<name>.review.md](./<name>.review.md)
|
||||
- 日付: 2026-04-21
|
||||
```
|
||||
|
||||
Do not modify the ticket's 背景・要件 sections unless the user explicitly asked for it. Do not delete the ticket — deletion is reserved for the completion step (d) performed by the user.
|
||||
|
||||
## Operating Principles
|
||||
|
||||
- **Do not commit or stage anything.** File edits only. The user will handle git.
|
||||
- **Do not over-engineer the review.** Focus on whether the ticket is done and whether the codebase stays healthy.
|
||||
- **Prefer concrete citations** (path:line) over abstract complaints.
|
||||
- **Ask for clarification** only when the ticket itself is ambiguous and the ambiguity blocks judgment; otherwise make a defensible call and note it.
|
||||
- **Re-review mode**: if `.review.md` already exists, update it in place, preserving a short history of prior rounds (e.g., `## Round 2` section) so the evolution is visible until the ticket is closed.
|
||||
- **TODO.md is not your concern** unless a requirement explicitly demands it; ticket lifecycle edits to TODO.md are the user's.
|
||||
|
||||
## Quality Self-Check (before finishing)
|
||||
|
||||
1. Did I evaluate architectural fit before nitpicks?
|
||||
2. Did I map every ticket requirement to evidence?
|
||||
3. Are all blocking issues genuinely blocking (not stylistic)?
|
||||
4. Did I avoid making git writes?
|
||||
5. Did I update both `<name>.review.md` and `<name>.md`?
|
||||
6. Is my judgment line unambiguous?
|
||||
26
.claude/skills/worktree-workflow/SKILL.md
Normal file
26
.claude/skills/worktree-workflow/SKILL.md
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
---
|
||||
name: worktree-workflow
|
||||
description: "Worktreeを用いた開発フローを進める。git上の開発に置けるミクロな指示で、プロジェクトの管理に関する指示は提供されていない。"
|
||||
allowed-tools: "Bash(cd *), Bash(git worktree *), Bash(mkdir *), Bash(cp *), Bash(ln *), Bash(ls *), Bash(find *)"
|
||||
---
|
||||
|
||||
# Worktreeを用いた開発
|
||||
|
||||
Goal: 実装を完了させ、ブランチをマージ待ちの状態にする。
|
||||
|
||||
`./.worktree`にworktreeを作成します。
|
||||
エージェントの1セッション=1ワークツリーとしており、ブランチ/イシュー/チケット単位で切ります。
|
||||
|
||||
このワークフローにおいては、ブランチはローカルで並行開発するためのマージ後削除の運用とし、Worktreeと同名のbranchを同時に作って進めます。メインのディレクトリのブランチから切るものとして扱います。
|
||||
|
||||
```
|
||||
git worktree add .worktree/<task-name> -n <task-name>
|
||||
```
|
||||
|
||||
## flake.nixの無効化
|
||||
|
||||
基本的に、CWDを変更できない場合、.envrcによる自動アクティベートは効かないので無視で構わない。
|
||||
|
||||
## 完了時
|
||||
|
||||
マージウィンドウからこのスキルがinvokeされた際は、ブランチのマージ・worktreeの削除まで行う。対して、実装者がマージしてクローズしてはならない。
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
|||
/target
|
||||
.direnv
|
||||
*.local
|
||||
*.local*
|
||||
.env
|
||||
.worktree
|
||||
|
|
|
|||
1
.insomnia/.gitignore
vendored
Normal file
1
.insomnia/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
/memory/
|
||||
13
.insomnia/manifest.toml
Normal file
13
.insomnia/manifest.toml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[scope]
|
||||
allow = [
|
||||
{ target = ".", permission = "write", recursive = true },
|
||||
]
|
||||
|
||||
[session]
|
||||
record_event_trace = true
|
||||
|
||||
[memory]
|
||||
extract_threshold = 50000
|
||||
|
||||
consolidation_threshold_files = 5
|
||||
consolidation_threshold_bytes = 50000
|
||||
143
.insomnia/workflow/auto-maintain.md
Normal file
143
.insomnia/workflow/auto-maintain.md
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
---
|
||||
description: TODO / tickets / docs / git history から次の作業候補を見繕い、課題発見や方針決定を半自動でイテレーションする WIP maintainer workflow
|
||||
model_invokation: false
|
||||
user_invocable: true
|
||||
requires: []
|
||||
---
|
||||
# Auto Maintain Workflow (WIP)
|
||||
|
||||
insomnia を AI maintainer として運用するための半自動 loop。TODO / tickets から「今進められそうな作業」を選ぶだけでなく、課題の発見、設計判断の切り分け、次に人間へ戻すべき問いの整理までを扱う。
|
||||
|
||||
これは unattended 自動開発ではない。実装の並列委譲は `multi-agent-workflow`、worktree の機械的作成は `worktree-workflow` に任せる。本 Workflow はその前段として、何を進めるべきか、何をまだ決めるべきかを整理する。
|
||||
|
||||
参照:
|
||||
|
||||
- `docs/plan/ai-maintainer.md`
|
||||
- `tickets/auto-maintain-workflow.md`
|
||||
|
||||
## 位置づけ
|
||||
|
||||
AI maintainer の目的は、コードを書くこと自体ではなく、プロジェクト状態を前に進めることである。
|
||||
|
||||
この Workflow は WIP として、以下を行う。
|
||||
|
||||
- TODO / tickets / docs / git history を読んで現在地を把握する。
|
||||
- 実装可能な ticket と、方針決定が必要な ticket を分ける。
|
||||
- 小さく実装できる候補を提案する。
|
||||
- 設計相談が必要な論点を人間に戻す。
|
||||
- 運用上の問題や繰り返し発生する詰まりを report / ticket / workflow 改訂候補として整理する。
|
||||
|
||||
## 非目標
|
||||
|
||||
現時点では以下をしない。
|
||||
|
||||
- 常駐 scheduler として自動実行する。
|
||||
- 人間の合意なしに新規 ticket を作る。
|
||||
- 人間の合意なしに既存 ticket を大幅変更する。
|
||||
- 人間の合意なしに ticket 完了削除を行う。
|
||||
- push する。
|
||||
- Workflow を自律生成・自律改訂する。
|
||||
- scope / permission / history persistence / prompt context 加工原則に関わる判断を勝手に決める。
|
||||
|
||||
## 入力として読むもの
|
||||
|
||||
必要に応じて以下を読む。
|
||||
|
||||
1. `TODO.md`
|
||||
2. `tickets/*.md`
|
||||
3. `docs/plan/`
|
||||
4. `docs/report/`
|
||||
5. `git log --oneline` / ticket file の git history
|
||||
6. 既存 worktree / branch 状態
|
||||
7. 最近の失敗や通知、ユーザーからの観測
|
||||
|
||||
TODO と ticket の不整合を見つけたら、勝手に修正せず、まず報告する。ただしユーザーが明示的に「直して」と言った場合は Mode 1 として整理してよい。
|
||||
|
||||
## 分類
|
||||
|
||||
候補を以下に分ける。
|
||||
|
||||
### A. 実装委譲可能
|
||||
|
||||
- 要件と完了条件が具体的。
|
||||
- 影響範囲が限定的。
|
||||
- test / build で確認できる。
|
||||
- 大きな設計判断が不要。
|
||||
- scope を狭く切れる。
|
||||
|
||||
この場合は、人間に候補として提示する。人間が実行を許可したら `$user/multi-agent-workflow` に進む。
|
||||
|
||||
### B. 方針決定が必要
|
||||
|
||||
- 複数の設計方針が自然に導ける。
|
||||
- protocol / permission / scope / persistence / prompt context に触れる。
|
||||
- UX の仕様が未確定。
|
||||
- 既存 ticket の要件が古い。
|
||||
|
||||
この場合は、実装せず、決めるべき問いを短く提示する。
|
||||
|
||||
### C. ticket 整理が必要
|
||||
|
||||
- TODO にあるが ticket がない。
|
||||
- ticket があるが TODO にない。
|
||||
- 完了済みに見えるが残っている。
|
||||
- ticket の前提が変わっている。
|
||||
|
||||
この場合は、不整合と修正案を提示する。修正は人間の許可後に行う。
|
||||
|
||||
### D. report / workflow 改善候補
|
||||
|
||||
- 同じ tool 問題が繰り返し出る。
|
||||
- Workflow の指示が曖昧で実装 Pod が迷った。
|
||||
- AI が過剰に Task tool を使うなど、運用上の癖が出た。
|
||||
- 通知や Pod completion tracking など、開発基盤の不足が観測された。
|
||||
|
||||
この場合は、すぐ ticket 化するか、`docs/report/` に観測として残すか、人間に確認する。
|
||||
|
||||
## 半自動 iteration
|
||||
|
||||
1. 状態把握
|
||||
- TODO / tickets / git status を読む。
|
||||
- 最近完了した流れや未完了 branch を確認する。
|
||||
|
||||
2. 候補抽出
|
||||
- 実装可能そうな ticket を 2〜5 件挙げる。
|
||||
- correctness / developer experience / user-visible UX / cleanup で分類する。
|
||||
|
||||
3. 推奨順位
|
||||
- blocking correctness を最優先。
|
||||
- 実害が出ている運用問題を次点。
|
||||
- 小さく完了できる UX / cleanup を次点。
|
||||
- 大きな設計変更は方針相談に回す。
|
||||
|
||||
4. 人間への提示
|
||||
- 「次に進めるなら X」を1つ推奨する。
|
||||
- 理由を短く述べる。
|
||||
- 実装委譲する場合の scope / test 方針を添える。
|
||||
|
||||
5. 実行への接続
|
||||
- 人間が「進めて」と言ったら `$user/multi-agent-workflow` に接続する。
|
||||
- worktree 作成は `$user/worktree-workflow` に従う。
|
||||
|
||||
## エスカレーション基準
|
||||
|
||||
以下では実装に進まず、人間へ戻す。
|
||||
|
||||
- ticket の要件から複数の設計方針が自然に導ける。
|
||||
- 長期構造、crate boundary、protocol、permission、scope、history persistence に触れる。
|
||||
- prompt context 加工原則に関わる。
|
||||
- 新 ticket の作成、既存 ticket の大幅変更、ticket 完了削除について合意がない。
|
||||
- test 不能、再現不能、または作業範囲外の不具合に遭遇した。
|
||||
- WorkItem / Thread / Lease / maintainer state など、まだ設計中の概念が必要になる。
|
||||
|
||||
|
||||
## まだ固定しないもの
|
||||
|
||||
以下は `docs/plan/ai-maintainer.md` の上位設計に残し、本 Workflow では詳細を固定しない。
|
||||
|
||||
- WorkItemStore / LeaseStore。
|
||||
- operation inbox / trial log。
|
||||
- QA feedback を ticket / review / report のどれに落とすか。
|
||||
- AI 自身の feedback を Knowledge / report / ticket / workflow 改訂のどれにするか。
|
||||
- maintainer doctor。
|
||||
- reviewer Pod の評価基準の機械化。
|
||||
150
.insomnia/workflow/multi-agent-workflow.md
Normal file
150
.insomnia/workflow/multi-agent-workflow.md
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
---
|
||||
description: worktree と子 Pod を使って複数 ticket の実装・レビュー・修正・完了処理を並列に進める orchestration フロー
|
||||
model_invokation: true
|
||||
user_invocable: true
|
||||
requires: []
|
||||
---
|
||||
# Multi-agent Worktree Workflow
|
||||
|
||||
insomnia を insomnia で開発する際の、worktree + 実装 Pod + 親 Pod review の標準フロー。これは **実装を並列に進めるためのフロー** であり、worktree の機械的作成手順は `$user/worktree-workflow`、ticket 候補選定や方針探索の半自動 loop は `$user/auto-maintain` に分ける。
|
||||
|
||||
## 目的
|
||||
|
||||
- 実装差分を ticket ごとの child worktree に隔離する。
|
||||
- 実装 Pod に narrow write scope を渡して並列実装させる。
|
||||
- 親 Pod が diff / test / ticket 要件を review し、必要なら修正依頼する。
|
||||
- approve 後に merge / ticket 完了処理 / main workspace での再検証を行う。
|
||||
|
||||
## 開始条件
|
||||
|
||||
以下が揃っている時に使う。
|
||||
|
||||
- 対象 ticket が決まっている。
|
||||
- ticket の背景・要件・完了条件から実装方針が概ね導ける。
|
||||
- worktree 作成と git 書き込み操作について、人間の許可がある。
|
||||
- main workspace の unrelated dirty changes を把握している。
|
||||
|
||||
設計方針が複数自然に導ける場合、protocol / scope / permission / history persistence に触れる場合、ticket 自体の再定義が必要な場合は、実装委譲前に人間へ戻す。
|
||||
|
||||
## 親 Pod / orchestrator の責務
|
||||
|
||||
1. 状態確認
|
||||
- `git status --short --branch`
|
||||
- 対象 ticket
|
||||
- 関連 TODO / docs / 既存 worktree
|
||||
|
||||
2. worktree 作成
|
||||
- `$user/worktree-workflow` に従い `./.worktree/<task-name>` を作る。
|
||||
- `.insomnia` を sparse checkout で除外する。
|
||||
|
||||
3. 実装 Pod spawn
|
||||
- read scope: main workspace 全体。
|
||||
- write scope: child worktree、または必要最小 directory。
|
||||
- task には以下を明示する。
|
||||
- child worktree path / branch
|
||||
- 対象 ticket path
|
||||
- Bash は必ず child worktree に `cd` すること
|
||||
- main workspace の `TODO.md` / `tickets/` / `docs/report/` / `.insomnia` は編集しないこと
|
||||
- 範囲外事項
|
||||
- 実行すべき build / test / format
|
||||
- 完了報告項目
|
||||
|
||||
4. 監督
|
||||
- `ReadPodOutput` で報告を読む。
|
||||
- 通知が来ない場合でも、worktree の `git status` / `git diff` / test で完了状態を確認する。
|
||||
- 必要なら `SendToPod` で修正依頼する。
|
||||
|
||||
5. review
|
||||
- ticket の背景・要件・完了条件・範囲外に照らして diff を確認する。
|
||||
- build / test / `git diff --check` を確認する。
|
||||
- 必要なら reviewer Pod を read-only で立てる。
|
||||
|
||||
6. merge / lifecycle
|
||||
- approve 後に main workspace へ merge する。
|
||||
- `TODO.md` から該当行を削除し、`tickets/foo.md` を削除して完了 commit を作る。
|
||||
- main workspace で必要な test / `cargo check --workspace` / `cargo fmt --check` を再実行する。
|
||||
|
||||
## 実装 Pod の責務
|
||||
|
||||
- child worktree 内でのみ実装する。
|
||||
- main workspace の管理ファイルを書かない。
|
||||
- 指定された build / test / format を実行する。
|
||||
- ticket 要件外の設計変更、依存関係追加、scope / permission / history persistence / prompt context 加工原則に触れる変更が必要なら止めて報告する。
|
||||
- 完了時に以下を報告する。
|
||||
- worktree path / branch
|
||||
- commit hash(commit した場合)
|
||||
- 変更ファイル
|
||||
- 実装概要
|
||||
- 実行した build / test / format
|
||||
- 未解決事項
|
||||
- review に回せるか
|
||||
|
||||
## 実装 Pod の commit 方針
|
||||
|
||||
実装 Pod には child worktree 内での commit を許可してよい。
|
||||
|
||||
- commit は ticket 内で意味のある粒度にする。
|
||||
- 例: `feat: ...`、`fix: ...`、`test: ...`、`docs: ...`
|
||||
- 実装 Pod は merge / push / branch deletion / worktree remove をしない。
|
||||
- 実装 Pod は `TODO.md` / `tickets/` の完了処理 commit をしない。
|
||||
- 親 Pod は review 時に commit 粒度も確認する。
|
||||
- 必要な修正は、原則追加 commit として積む。履歴改変や squash は人間の明示指示がある時だけ行う。
|
||||
|
||||
## Review → 修正 → 完了の標準形
|
||||
|
||||
### Approve
|
||||
|
||||
1. 実装 Pod を停止し、scope を回収する。
|
||||
2. 親 Pod が main workspace で `git merge --no-ff <branch>` する。
|
||||
3. 親 Pod が `TODO.md` と `tickets/foo.md` を完了処理して commit する。
|
||||
4. main workspace で検証コマンドを再実行する。
|
||||
5. 変更内容・commit・検証結果・残 dirty changes を報告する。
|
||||
|
||||
### Request changes
|
||||
|
||||
1. blocking finding をファイル / 行 / 理由 / 修正方針つきで整理する。
|
||||
2. 実装 Pod が生きていれば `SendToPod` で修正依頼する。
|
||||
3. 停止済みなら、同じ worktree / branch / scope で再 spawn するか、親 Pod が最小修正する。
|
||||
4. 修正後に focused test と必要な broader test を再実行する。
|
||||
5. 再 review する。
|
||||
|
||||
### Non-blocking comments
|
||||
|
||||
- ticket 要件外の改善はその場で混ぜない。
|
||||
- 必要なら後続 ticket / docs/report にする。
|
||||
- non-blocking を理由に completion を遅らせない。
|
||||
|
||||
## 並列実装時の注意
|
||||
|
||||
- 1 ticket = 1 worktree = 1 branch を基本にする。
|
||||
- 複数 Pod に同じ write scope を渡さない。
|
||||
- parent は child の write scope 配下を直接編集しない。
|
||||
- 依存関係がある ticket は、土台 branch を merge してから次 worktree を切る。
|
||||
- parallel に走らせた Pod の完了通知は取りこぼしうるため、`ReadPodOutput` と worktree 状態で確認する。
|
||||
|
||||
## 完了報告の標準形
|
||||
|
||||
```text
|
||||
完了:
|
||||
- ticket: <path>
|
||||
- branch: <name>
|
||||
- commits:
|
||||
- <hash> <subject>
|
||||
- 変更概要: ...
|
||||
- 検証:
|
||||
- cargo fmt --check
|
||||
- cargo check --workspace
|
||||
- cargo test ...
|
||||
- review: approve / approve with comments / request changes
|
||||
- 未解決事項: ...
|
||||
- 残 dirty changes: ...
|
||||
```
|
||||
|
||||
## この Workflow で扱わないもの
|
||||
|
||||
以下は `$user/auto-maintain` または別の設計相談で扱う。
|
||||
|
||||
- ticket 候補を見繕うこと。
|
||||
- 新規 ticket 作成判断。
|
||||
- QA feedback / AI feedback を ticket / report / workflow に落とす判断。
|
||||
- 長期 maintainer loop / WorkItemStore / LeaseStore の設計。
|
||||
98
.insomnia/workflow/worktree-workflow.md
Normal file
98
.insomnia/workflow/worktree-workflow.md
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
---
|
||||
description: insomnia プロジェクトで child git worktree を作成・管理するための機械的手順。実装 Pod に作らせず、親 Pod が main workspace で実行する。
|
||||
model_invokation: false
|
||||
user_invocable: true
|
||||
requires: []
|
||||
---
|
||||
# Worktree Workflow
|
||||
|
||||
insomnia プロジェクトで実装差分を main workspace から分離するため、`./.worktree/<task-name>` に child git worktree を作る。これは **worktree の扱い方だけ** を定める Workflow であり、ticket 選定、実装委譲、review、merge の運用は `$user/multi-agent-workflow` 側で扱う。
|
||||
|
||||
insomnia では Pod の write scope が排他的に委譲されるため、child worktree に `.insomnia` を置かない。main workspace は orchestration / ticket / docs / memory / workflow 管理の場所として残し、child worktree はコード差分専用の作業面として扱う。
|
||||
|
||||
## 適用範囲
|
||||
|
||||
この Workflow は親 Pod / orchestrator が main workspace で実行する。
|
||||
|
||||
- 実装 Pod にこの Workflow を渡して worktree を作らせない。
|
||||
- 実装 Pod は、親 Pod が作成済みの child worktree を受け取り、その中で実装・build・test・報告を行う。
|
||||
- ticket 作成、TODO 更新、review artifact、docs/report は main workspace 側で扱う。
|
||||
|
||||
## 原則
|
||||
|
||||
- 1 ticket / 1 実装 task につき 1 worktree を作る。
|
||||
- worktree path は `./.worktree/<task-name>`。
|
||||
- branch 名は原則 `<task-name>` と同じ kebab-case。
|
||||
- child worktree には `.insomnia` を出さない。
|
||||
- child worktree は実装差分用。`TODO.md` / `tickets/` / `docs/report/` / workflow / memory は原則 main workspace 側で扱う。
|
||||
- push はしない。
|
||||
|
||||
## 事前確認
|
||||
|
||||
作成前に以下を確認する。
|
||||
|
||||
1. 対象 ticket / task が決まっているか。
|
||||
2. `<task-name>` が branch / path 名に使える kebab-case か。
|
||||
3. `git worktree add` を実行してよい許可があるか。
|
||||
4. main workspace に混ぜてはいけない未保存差分がないか。
|
||||
5. 同名 branch / worktree が既に存在しないか。
|
||||
|
||||
同名 branch がある場合は、既存 branch を使うか、人間に確認する。`git worktree add -b` で上書きしない。
|
||||
|
||||
## 作成手順
|
||||
|
||||
main workspace で実行する。
|
||||
|
||||
```bash
|
||||
git worktree add .worktree/<task-name> -b <task-name>
|
||||
|
||||
git -C .worktree/<task-name> sparse-checkout init --no-cone
|
||||
git -C .worktree/<task-name> sparse-checkout set --no-cone \
|
||||
'/*' \
|
||||
'!/.insomnia/' \
|
||||
'!/.insomnia/**'
|
||||
```
|
||||
|
||||
確認する。
|
||||
|
||||
```bash
|
||||
git -C .worktree/<task-name> status --short --branch
|
||||
test ! -e .worktree/<task-name>/.insomnia
|
||||
```
|
||||
|
||||
失敗した場合は、worktree / branch / lock の状態を確認し、勝手に cleanup せず人間へ報告する。
|
||||
|
||||
## 子 Pod へ渡す scope
|
||||
|
||||
子 Pod を使う場合、子 Pod の cwd は main workspace のままになる。必ず作業対象が child worktree であることを明示し、Bash 実行時は毎回 `cd <repo>/.worktree/<task-name> && ...` させる。
|
||||
|
||||
推奨 scope:
|
||||
|
||||
```text
|
||||
read: <repo>
|
||||
write: <repo>/.worktree/<task-name>
|
||||
```
|
||||
|
||||
より狭く切れる場合は、write scope を変更対象 crate / directory まで狭めてよい。ただし build / test に必要な生成物を書けることを確認する。
|
||||
|
||||
## child worktree 内の禁止事項
|
||||
|
||||
- `.insomnia` を作らない / コピーしない。
|
||||
- main workspace の `TODO.md` / `tickets/` / `docs/report/` を編集しない。
|
||||
- merge / push / branch deletion / worktree remove をしない。
|
||||
- scope / permission / history persistence / prompt context 加工原則に関わる設計変更を無断で行わない。
|
||||
|
||||
## 完了時の扱い
|
||||
|
||||
worktree 作成 Workflow としては、完了時に merge しない。merge、ticket 完了、TODO 削除は `$user/multi-agent-workflow` または人間の明示指示で行う。
|
||||
|
||||
実装 Pod へ渡す完了報告項目の標準形:
|
||||
|
||||
- worktree path
|
||||
- branch 名
|
||||
- commit hash(実装 Pod に commit を許可した場合)
|
||||
- 変更ファイル
|
||||
- 実装概要
|
||||
- 実行した build / test / format
|
||||
- 未解決事項
|
||||
- review に回せるか
|
||||
75
AGENTS.md
Normal file
75
AGENTS.md
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
|
||||
|
||||
## このシステムに置ける設計要旨
|
||||
|
||||
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
|
||||
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
|
||||
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
|
||||
|
||||
### LLM コンテキストの加工原則
|
||||
|
||||
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
|
||||
|
||||
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。(pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
|
||||
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
|
||||
|
||||
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
|
||||
|
||||
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
|
||||
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
|
||||
|
||||
---
|
||||
|
||||
## 実際のセッションを読んでデバッグする
|
||||
|
||||
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
|
||||
|
||||
---
|
||||
|
||||
## Git操作
|
||||
|
||||
workflowで明示されない限り、読み取り以外の操作は控えること。
|
||||
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
|
||||
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
|
||||
|
||||
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
|
||||
|
||||
---
|
||||
|
||||
## Ticketの運用について
|
||||
|
||||
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
|
||||
|
||||
### TODO.md
|
||||
|
||||
- 1チケット = 1行。未完了のみ記載し、完了したら行ごと削除する(履歴はgitで追える)
|
||||
- ネストは同一領域のグルーピング(表示用)にのみ使う。実装上の依存関係はネストで表現しない
|
||||
- 完了した子は削除し、親は未完了の子がある限り残す。最後の子が完了したら親ごと削除
|
||||
- Ticketを追加する際は、合わせてTODOも書くこと
|
||||
|
||||
### Ticket の粒度
|
||||
|
||||
- 1チケット = 完了時点で、実装が仕様又は機能として説明できる粒度。
|
||||
- 作成時、背景や要件を前提として書き、実装の方針やコードの詳細は不必要に増やさない。
|
||||
- チケット内のステップ(Phase 1, 2, ...)は実装順序であり、TODO等、外に出さない
|
||||
- ビルドが通り、その機能に限り,まだ動作できないと明示出来ている場合を除いて全体を通して動作させられる状態である必要がある。
|
||||
|
||||
### Ticket のライフサイクル
|
||||
|
||||
gitがタイムラインの単一の情報源。ファイル操作とcommitで状態遷移を表現する。
|
||||
|
||||
a. 作成: `tickets/foo.md` を作成してcommit
|
||||
b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
|
||||
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
|
||||
d. 完了: `tickets/foo.md` と `tickets/foo.review.md` を両方削除してcommit
|
||||
|
||||
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
|
||||
|
||||
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
|
||||
`.review.md` にはレビューの指摘事項と判断結果を記載する。
|
||||
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
|
||||
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
|
||||
|
||||
---
|
||||
|
||||
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。
|
||||
75
CLAUDE.md
Normal file
75
CLAUDE.md
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
全体設計が概ね固まり、随所の細かい仕様を詰めながら実装を進めている。
|
||||
|
||||
## このシステムに置ける設計要旨
|
||||
|
||||
- プロンプトはすべて resources/promptsに集約している。管理効率の工場と同時に、ユーザーがオーバーライドする形式でもある。
|
||||
- E2E(実プロセスをスポーンさせてのテスト)は未設計。
|
||||
- 変更量を最小にするために設計を歪めたり、設計問題に対して不必要な後方互換性を作らない。長期的なメンテナンスと型安全性を追求すること。
|
||||
|
||||
### LLM コンテキストの加工原則
|
||||
|
||||
LLM に投げる context への割り込みは、大きく2種類に分かれる。**前者は許されるが、後者は禁止**。
|
||||
|
||||
Podの状態から純粋に再現可能で、且つ揮発性の無い操作であることが望ましい。(pruning、tool result の content 切り詰め、prompt cache anchor の付与等)。
|
||||
原則として、コンテキストは積み重ねるものであり、一時的にメッセージを差し込むことや、過去のメッセージを改ざんすることはKVキャッシュのヒット率を下げる。
|
||||
|
||||
**禁止**: ターンを跨ぐことができない情報に基づいて、history に記録せずに context だけにコンテンツを差し込むこと。これをやると LLM はそれに反応して生成を行う一方、次以降のターンでhistoryに残らないため、「自分がなぜその発言/tool call をしたか」の根拠が消えるうえ、prompt cache のヒット率も低下させることになる。
|
||||
|
||||
新しい input を context に乗せたいなら、必ず先に `worker.history` に append して commit すること。`history.json` への永続化はそこから自動的についてくる。Notify / PodEvent / `<system-reminder>` 系はこの原則で扱う(→ `tickets/notify-history-persist.md`)。
|
||||
また、キャッシュを破壊するタイミングは正確にコントロールされる必要があり、キャッシュ破壊とトークン消費のトレードオフに基づいて慎重に設計されるべきである。
|
||||
|
||||
---
|
||||
|
||||
## 実際のセッションを読んでデバッグする
|
||||
|
||||
`~/.insomnia/sessions`にすべてのセッションがある。jsonlなので、いい感じにBashで読むこと。
|
||||
|
||||
---
|
||||
|
||||
## Git操作
|
||||
|
||||
workflowで明示されない限り、読み取り以外の操作は控えること。
|
||||
基本はworktree上の一時的なブランチでコミットを重ね、メインブランチに取り込む運用をしている。
|
||||
コミットメッセージは適当に`<prefix>: *簡潔な1行*`で書いている。
|
||||
|
||||
外部の参考プロジェクトは必要に応じてローカルの外部 checkout からReadすること。
|
||||
|
||||
---
|
||||
|
||||
## Ticketの運用について
|
||||
|
||||
`TODO.md`、`tickets/`はgitで管理されていて、時系列の管理はgitを参照して把握すること。
|
||||
|
||||
### TODO.md
|
||||
|
||||
- 1チケット = 1行。未完了のみ記載し、完了したら行ごと削除する(履歴はgitで追える)
|
||||
- ネストは同一領域のグルーピング(表示用)にのみ使う。実装上の依存関係はネストで表現しない
|
||||
- 完了した子は削除し、親は未完了の子がある限り残す。最後の子が完了したら親ごと削除
|
||||
- Ticketを追加する際は、合わせてTODOも書くこと
|
||||
|
||||
### Ticket の粒度
|
||||
|
||||
- 1チケット = 完了時点で、実装が仕様又は機能として説明できる粒度。
|
||||
- 作成時、背景や要件を前提として書き、実装の方針やコードの詳細は不必要に増やさない。
|
||||
- チケット内のステップ(Phase 1, 2, ...)は実装順序であり、TODO等、外に出さない
|
||||
- ビルドが通り、その機能に限り,まだ動作できないと明示出来ている場合を除いて全体を通して動作させられる状態である必要がある。
|
||||
|
||||
### Ticket のライフサイクル
|
||||
|
||||
gitがタイムラインの単一の情報源。ファイル操作とcommitで状態遷移を表現する。
|
||||
|
||||
a. 作成: `tickets/foo.md` を作成してcommit
|
||||
b. 詳細化や前提の変化: `tickets/foo.md` を更新してcommit
|
||||
c. レビュー: `tickets/foo.md` にレビュー状態を追記 + `tickets/foo.review.md` を作成してcommit
|
||||
d. 完了: `tickets/foo.md` と `tickets/foo.review.md` を両方削除してcommit
|
||||
|
||||
worktreeと併用して作業を進める場合、必ずブランチを切る前に対象のチケットをコミットしてから切ること。
|
||||
|
||||
TODO.mdのリンクは完了後に切れるが、そのリンクを元にgitで消されたファイルを読み、内容を把握できる。
|
||||
`.review.md` にはレビューの指摘事項と判断結果を記載する。
|
||||
レビューはdiffの確認だけでなく、チケットはどのような前提・要件であり、それが達成されたかの確認まで含めて行う。
|
||||
常に、提出された実装で良いのか、コードベースを歪めていないか、不必要な実装ではないかを確認すること。
|
||||
|
||||
---
|
||||
|
||||
insomniaでinsomniaを開発している際、AI自身のフィードバックを元に改善を回すために `docs/report/`ディレクトリに感じた障壁や改善案等を書き残す形にした。 明確に力不足な点/ツールの問題があった場合や、ユーザーからの指示があった際に作ること。
|
||||
2637
Cargo.lock
generated
2637
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
49
Cargo.toml
49
Cargo.toml
|
|
@ -1,12 +1,57 @@
|
|||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"crates/insomnia",
|
||||
"crates/client",
|
||||
"crates/daemon",
|
||||
"crates/llm-worker",
|
||||
"crates/llm-worker-macros",
|
||||
"crates/llm-worker-persistence",
|
||||
"crates/session-store",
|
||||
"crates/manifest",
|
||||
"crates/pod",
|
||||
"crates/protocol",
|
||||
"crates/provider",
|
||||
"crates/pod-registry",
|
||||
"crates/session-metrics",
|
||||
"crates/lint-common",
|
||||
"crates/tools",
|
||||
"crates/tui",
|
||||
"crates/memory",
|
||||
"crates/workflow",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
|
||||
[workspace.dependencies]
|
||||
# Internal crates
|
||||
client = { path = "crates/client" }
|
||||
llm-worker = { path = "crates/llm-worker", version = "0.2" }
|
||||
llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" }
|
||||
manifest = { path = "crates/manifest" }
|
||||
lint-common = { path = "crates/lint-common" }
|
||||
memory = { path = "crates/memory" }
|
||||
pod-registry = { path = "crates/pod-registry" }
|
||||
protocol = { path = "crates/protocol" }
|
||||
provider = { path = "crates/provider" }
|
||||
session-metrics = { path = "crates/session-metrics" }
|
||||
session-store = { path = "crates/session-store" }
|
||||
tools = { path = "crates/tools" }
|
||||
|
||||
# External
|
||||
# Note: `reqwest` and `chrono` are not aggregated here because some crates
|
||||
# need `default-features = false`, which workspace inheritance cannot override.
|
||||
async-trait = "0.1"
|
||||
fs4 = "0.13"
|
||||
futures = "0.3"
|
||||
libc = "0.2"
|
||||
schemars = "1.2"
|
||||
serde = "1.0"
|
||||
serde_json = "1.0"
|
||||
sha2 = "0.11"
|
||||
tempfile = "3.27"
|
||||
thiserror = "2.0"
|
||||
tokio = "1.52"
|
||||
toml = "1.1"
|
||||
tracing = "0.1"
|
||||
uuid = "1.23"
|
||||
|
|
|
|||
18
KNOWN_ISSUES.md
Normal file
18
KNOWN_ISSUES.md
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
# Known Issues
|
||||
|
||||
Ticket を切るほどではないが、次に近所を触るときに合わせて拾いたい小粒な所見の置き場。
|
||||
|
||||
## 運用
|
||||
|
||||
- 1 項目 = 出典 (file:line) + 症状 (一文) + トリガー (いつ拾うか、一文)
|
||||
- 関連 ticket があれば `→ [tickets/foo.md]` でリンク
|
||||
- 修正したら同じコミットで該当エントリを削除する (履歴は git)
|
||||
- ここに溜める基準: 「ticket は重い」「だが忘れたら次の触り手が踏む」もの。明確に作業すべきものは ticket 化する
|
||||
|
||||
## エントリ
|
||||
|
||||
- `crates/tui/src/app.rs:478-485` — bad workflow slug を含む `Method::Run` 送信時、`Event::UserMessage` の早期 broadcast で `turn_index += 1` されターンヘッダだけ残る ("ghost turn header")。次に TUI のターンヘッダ / エラー表示周りを触るときに整理。→ [tickets/pod-input-validate-internalize.md] の review 由来。
|
||||
- `crates/pod/src/controller.rs:944` — `worker_error_code` で `PodError::WorkflowResolve(_) => InvalidRequest` が post-commit な resolve エラー (`KnowledgeNotFound` 等) にも適用される。意味論的には妥当方向だが、resolve 系のエラー粒度を分けたくなったタイミングで再評価。
|
||||
- `crates/pod/tests/controller_test.rs` — `double_run_returns_error` がたまに失敗する flakiness を観測。`pod-interrupt-prep-internalize` 以前から存在する別件。次に controller_test の Run 連投系のタイミングを触るときに併せて原因を切り分け。
|
||||
- `crates/session-store/src/fs_store.rs:117-122` — `FsStore::read_entry_count` が `fs::read_to_string` で全文ロードしてから行数カウントするため O(n)。`ensure_head_or_fork` は run-start でしか呼ばれず現状は許容範囲だが、長期セッションが普通になった時点で `\n` バイト数の cheap count か末尾 seek に置き換える。
|
||||
- `crates/session-store/src/segment.rs:121` `ensure_head_or_fork` (free fn, test 専用・本番 caller ゼロ) と `crates/pod/src/pod.rs` `Pod::ensure_segment_head` (本番 inline) に live auto-fork の検知 + forked_from 記録が二重実装されている。entry-hash-abolish 以前からの重複で、両方独立にテスト済みだが drift 必至。session-store 側を本番から呼ぶ形に寄せるか free fn を畳むかは要設計判断。Pod state / fork 周辺を次に触るときに統合を検討。
|
||||
13
README.md
13
README.md
|
|
@ -3,16 +3,3 @@
|
|||
insomnia(i6a)は不休のエージェントループを回すためのエージェントプラットフォーム。
|
||||
|
||||
ワークフローを統括し、四六時中電力を消費し、イテレーションします。
|
||||
|
||||
## Crates
|
||||
|
||||
| クレート | 概要 |
|
||||
|---|---|
|
||||
| `insomnia` | トップレベルアプリケーション(未実装) |
|
||||
| `llm-worker` | 自律的なLLMシステムを構築するためのライブラリ |
|
||||
| `llm-worker-macros` | `llm-worker`用の手続きマクロ (`#[tool_registry]`, `#[tool]`) |
|
||||
|
||||
## ドキュメント
|
||||
|
||||
- [要件](crates/llm-worker/docs/requirements.md) — llm-workerに求める性能 (R1-R4)
|
||||
- [アーキテクチャ](crates/llm-worker/docs/architecture.md) — 3層構成とモジュール配置
|
||||
|
|
|
|||
12
TODO.md
12
TODO.md
|
|
@ -1,7 +1,5 @@
|
|||
- [x] 永続化データ構造の制定
|
||||
- [ ] テスト設計
|
||||
- [x] ツール出力の遅延読み込み設計 (ToolOutput / BlobStore / auto_summarize)
|
||||
- [ ] ツール設計
|
||||
- [ ] ツールの動的追加/削除 (unregister, replace)
|
||||
- [ ] ToolDefinition ファクトリの遅延初期化修正 (現状 register 時に即時呼び出しされている。セッション開始=初回メッセージ送信時まで遅延させる)
|
||||
- [x] inspect ツール実装
|
||||
# TODO legacy notice
|
||||
|
||||
Active repository work items have been migrated to `work-items/`.
|
||||
|
||||
Use `./tickets.sh list --status all` for the generated/current view and `./tickets.sh doctor` to validate the migration state.
|
||||
|
|
|
|||
11
crates/client/Cargo.toml
Normal file
11
crates/client/Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "client"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
protocol = { workspace = true }
|
||||
manifest = { workspace = true }
|
||||
tokio = { workspace = true, features = ["rt", "macros", "net", "io-util", "sync", "time", "process", "fs"] }
|
||||
uuid = { workspace = true }
|
||||
15
crates/client/src/lib.rs
Normal file
15
crates/client/src/lib.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
//! Pod プロトコルを喋るクライアント。
|
||||
//!
|
||||
//! - [`PodClient`]: 既存 pod の Unix ソケットへ接続して `Method` を送り、
|
||||
//! `Event` を受け取る低レベル接続。
|
||||
//! - [`spawn`]: pod バイナリをサブプロセスとして起動し、`INSOMNIA-READY`
|
||||
//! ハンドシェイクが終わるまで待つフロー。subprocess を立ち上げる必要が
|
||||
//! ない呼び出し側 (=既存 pod に attach する場合) は使わなくてよい。
|
||||
//!
|
||||
//! TUI / GUI / E2E ハーネスはこの crate に依存して protocol を喋る。
|
||||
|
||||
mod pod_client;
|
||||
pub mod spawn;
|
||||
|
||||
pub use pod_client::PodClient;
|
||||
pub use spawn::{SpawnConfig, SpawnError, SpawnReady, spawn_pod};
|
||||
45
crates/client/src/pod_client.rs
Normal file
45
crates/client/src/pod_client.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use protocol::stream::{JsonLineReader, JsonLineWriter};
|
||||
use protocol::{Event, Method};
|
||||
use tokio::net::UnixStream;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
pub struct PodClient {
|
||||
writer: JsonLineWriter<tokio::io::WriteHalf<UnixStream>>,
|
||||
event_rx: mpsc::Receiver<Event>,
|
||||
}
|
||||
|
||||
impl PodClient {
|
||||
pub async fn connect(path: &Path) -> Result<Self, io::Error> {
|
||||
let stream = UnixStream::connect(path).await?;
|
||||
let (reader, writer) = tokio::io::split(stream);
|
||||
let writer = JsonLineWriter::new(writer);
|
||||
|
||||
let (event_tx, event_rx) = mpsc::channel::<Event>(256);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut reader = JsonLineReader::new(reader);
|
||||
while let Ok(Some(event)) = reader.next::<Event>().await {
|
||||
if event_tx.send(event).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Self { writer, event_rx })
|
||||
}
|
||||
|
||||
pub async fn send(&mut self, method: &Method) -> Result<(), io::Error> {
|
||||
self.writer.write(method).await
|
||||
}
|
||||
|
||||
pub fn try_next_event(&mut self) -> Option<Event> {
|
||||
self.event_rx.try_recv().ok()
|
||||
}
|
||||
|
||||
pub async fn next_event(&mut self) -> Option<Event> {
|
||||
self.event_rx.recv().await
|
||||
}
|
||||
}
|
||||
299
crates/client/src/spawn.rs
Normal file
299
crates/client/src/spawn.rs
Normal file
|
|
@ -0,0 +1,299 @@
|
|||
//! pod バイナリをサブプロセスとして立ち上げ、`INSOMNIA-READY` を待つ
|
||||
//! ハンドシェイク。
|
||||
//!
|
||||
//! - 親プロセス (TUI / GUI / E2E) は overlay TOML を組み立ててこの関数に
|
||||
//! 渡す。pod はそれを受けて socket を bind し、stderr に
|
||||
//! `INSOMNIA-READY\t<name>\t<socket>` を吐く。
|
||||
//! - 待機中の stderr 行は `progress` コールバック越しに呼び出し側へ流す。
|
||||
//! UI の進捗表示や E2E のログ収集はここで賄う。
|
||||
//! - `kill_on_drop = false` + `process_group(0)` により、親プロセス
|
||||
//! ライフサイクルから切り離した detached pod を作る。ready 後の lifecycle
|
||||
//! 管理は runtime ディレクトリ / socket を介して行う。
|
||||
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::process::Command;
|
||||
use uuid::Uuid;
|
||||
|
||||
const READY_PREFIX: &str = "INSOMNIA-READY\t";
|
||||
const READY_TIMEOUT: Duration = Duration::from_secs(20);
|
||||
|
||||
/// `spawn_pod` の入力。
|
||||
pub struct SpawnConfig {
|
||||
/// `pod.name` として使う識別子。runtime ディレクトリ
|
||||
/// (`manifest::paths::pod_runtime_dir`) の解決と、ready 行に乗る
|
||||
/// 名前との突き合わせに使う。
|
||||
pub pod_name: String,
|
||||
/// `--overlay` で pod に渡す TOML 文字列。
|
||||
pub overlay_toml: String,
|
||||
/// pod の current_dir。
|
||||
pub cwd: PathBuf,
|
||||
/// `Some(id)` のとき `--session <id>` を付与し、当該セッションから
|
||||
/// resume させる。
|
||||
pub resume_from: Option<Uuid>,
|
||||
/// true のとき `--pod <pod_name>` を付与し、pod 側で name-keyed state
|
||||
/// があれば resume、なければ同名の新規 Pod として起動させる。
|
||||
pub resume_by_pod_name: bool,
|
||||
}
|
||||
|
||||
pub struct SpawnReady {
|
||||
pub pod_name: String,
|
||||
pub socket_path: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpawnError {
|
||||
Io(io::Error),
|
||||
/// runtime ディレクトリが解決できなかった (環境変数未設定等)。
|
||||
RuntimeDirUnavailable,
|
||||
PodLaunchFailed(io::Error),
|
||||
PodExitedEarly {
|
||||
stderr_tail: String,
|
||||
},
|
||||
Timeout,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SpawnError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Io(e) => write!(f, "io error: {e}"),
|
||||
Self::RuntimeDirUnavailable => write!(
|
||||
f,
|
||||
"could not resolve runtime directory (set INSOMNIA_HOME, INSOMNIA_RUNTIME_DIR, XDG_RUNTIME_DIR, or HOME)"
|
||||
),
|
||||
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
|
||||
Self::PodExitedEarly { stderr_tail } => {
|
||||
if stderr_tail.is_empty() {
|
||||
write!(f, "pod exited before becoming ready")
|
||||
} else {
|
||||
write!(f, "pod exited before becoming ready: {stderr_tail}")
|
||||
}
|
||||
}
|
||||
Self::Timeout => write!(
|
||||
f,
|
||||
"pod did not become ready within {}s",
|
||||
READY_TIMEOUT.as_secs()
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for SpawnError {}
|
||||
|
||||
impl From<io::Error> for SpawnError {
|
||||
fn from(e: io::Error) -> Self {
|
||||
Self::Io(e)
|
||||
}
|
||||
}
|
||||
|
||||
/// pod を spawn し、`INSOMNIA-READY` ハンドシェイクが終わるまで待つ。
|
||||
///
|
||||
/// `progress` は ready 行を見つけるまでに観測した stderr の各行で呼ばれる
|
||||
/// (ready 行自体は除外される)。UI の表示更新や E2E ログ取得に使う。
|
||||
pub async fn spawn_pod<F>(config: SpawnConfig, mut progress: F) -> Result<SpawnReady, SpawnError>
|
||||
where
|
||||
F: FnMut(&str),
|
||||
{
|
||||
let pod_bin = resolve_pod_command();
|
||||
|
||||
let pod_runtime_dir = manifest::paths::pod_runtime_dir(&config.pod_name)
|
||||
.ok_or(SpawnError::RuntimeDirUnavailable)?;
|
||||
std::fs::create_dir_all(&pod_runtime_dir).map_err(SpawnError::Io)?;
|
||||
let stderr_path = pod_runtime_dir.join("stderr.log");
|
||||
let stderr_file = std::fs::File::create(&stderr_path).map_err(SpawnError::Io)?;
|
||||
|
||||
let mut command = Command::new(&pod_bin);
|
||||
command
|
||||
.arg("--overlay")
|
||||
.arg(&config.overlay_toml)
|
||||
.current_dir(&config.cwd)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::from(stderr_file))
|
||||
.process_group(0);
|
||||
if config.resume_by_pod_name {
|
||||
command.arg("--pod").arg(&config.pod_name);
|
||||
}
|
||||
if let Some(id) = config.resume_from {
|
||||
command.arg("--session").arg(id.to_string());
|
||||
}
|
||||
let mut child = command.spawn().map_err(SpawnError::PodLaunchFailed)?;
|
||||
|
||||
// Default `kill_on_drop = false` plus `process_group(0)` makes this
|
||||
// a detached Pod once startup succeeds: dropping the handle does not
|
||||
// terminate it, and terminal-generated signals for the parent's
|
||||
// process group do not hit the Pod. Runtime state/socket files are
|
||||
// the source of truth after that point.
|
||||
let ready = match wait_for_ready_file(&mut progress, &stderr_path, &mut child).await {
|
||||
Ok(ready) => ready,
|
||||
Err(e) => {
|
||||
let _ = child.start_kill();
|
||||
let _ = child.wait().await;
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
tokio::spawn(async move {
|
||||
let _ = child.wait().await;
|
||||
});
|
||||
Ok(ready)
|
||||
}
|
||||
|
||||
async fn wait_for_ready_file<F>(
|
||||
progress: &mut F,
|
||||
stderr_path: &Path,
|
||||
child: &mut tokio::process::Child,
|
||||
) -> Result<SpawnReady, SpawnError>
|
||||
where
|
||||
F: FnMut(&str),
|
||||
{
|
||||
let mut tail = StderrTail::new();
|
||||
let deadline = tokio::time::Instant::now() + READY_TIMEOUT;
|
||||
let mut offset = 0usize;
|
||||
|
||||
loop {
|
||||
let content = match tokio::fs::read_to_string(stderr_path).await {
|
||||
Ok(content) => content,
|
||||
Err(e) if e.kind() == io::ErrorKind::NotFound => String::new(),
|
||||
Err(e) => return Err(SpawnError::Io(e)),
|
||||
};
|
||||
if content.len() > offset {
|
||||
for line in content[offset..].lines() {
|
||||
if let Some(rest) = line.strip_prefix(READY_PREFIX) {
|
||||
let mut parts = rest.splitn(2, '\t');
|
||||
let pod_name = parts.next().unwrap_or("").to_string();
|
||||
let socket_str = parts.next().unwrap_or("").to_string();
|
||||
if pod_name.is_empty() || socket_str.is_empty() {
|
||||
return Err(SpawnError::PodExitedEarly {
|
||||
stderr_tail: format!("malformed ready line: {line}"),
|
||||
});
|
||||
}
|
||||
let socket_path = PathBuf::from(socket_str);
|
||||
wait_for_socket(
|
||||
&socket_path,
|
||||
deadline,
|
||||
child,
|
||||
stderr_path,
|
||||
&mut tail,
|
||||
&mut offset,
|
||||
)
|
||||
.await?;
|
||||
return Ok(SpawnReady {
|
||||
pod_name,
|
||||
socket_path,
|
||||
});
|
||||
}
|
||||
tail.push(line);
|
||||
progress(line);
|
||||
}
|
||||
offset = content.len();
|
||||
}
|
||||
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
return Err(SpawnError::Timeout);
|
||||
}
|
||||
tokio::select! {
|
||||
status = child.wait() => {
|
||||
let _ = status;
|
||||
// Pod は exit 直前に最終 stderr 行を flush することがある。
|
||||
// child.wait() が解決した後に再読みして、原因行を取りこ
|
||||
// ぼさず PodExitedEarly に載せる。
|
||||
drain_stderr_into_tail(stderr_path, &mut tail, &mut offset).await;
|
||||
return Err(SpawnError::PodExitedEarly {
|
||||
stderr_tail: tail.into_string(),
|
||||
});
|
||||
}
|
||||
_ = tokio::time::sleep(Duration::from_millis(100)) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn wait_for_socket(
|
||||
socket_path: &Path,
|
||||
deadline: tokio::time::Instant,
|
||||
child: &mut tokio::process::Child,
|
||||
stderr_path: &Path,
|
||||
tail: &mut StderrTail,
|
||||
offset: &mut usize,
|
||||
) -> Result<(), SpawnError> {
|
||||
loop {
|
||||
match tokio::net::UnixStream::connect(socket_path).await {
|
||||
Ok(_) => return Ok(()),
|
||||
Err(e)
|
||||
if e.kind() == io::ErrorKind::NotFound
|
||||
|| e.kind() == io::ErrorKind::ConnectionRefused => {}
|
||||
Err(e) => return Err(SpawnError::Io(e)),
|
||||
}
|
||||
if tokio::time::Instant::now() >= deadline {
|
||||
return Err(SpawnError::Timeout);
|
||||
}
|
||||
tokio::select! {
|
||||
status = child.wait() => {
|
||||
let _ = status;
|
||||
drain_stderr_into_tail(stderr_path, tail, offset).await;
|
||||
return Err(SpawnError::PodExitedEarly {
|
||||
stderr_tail: tail.as_string(),
|
||||
});
|
||||
}
|
||||
_ = tokio::time::sleep(Duration::from_millis(50)) => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn drain_stderr_into_tail(stderr_path: &Path, tail: &mut StderrTail, offset: &mut usize) {
|
||||
let Ok(content) = tokio::fs::read_to_string(stderr_path).await else {
|
||||
return;
|
||||
};
|
||||
if content.len() <= *offset {
|
||||
return;
|
||||
}
|
||||
for line in content[*offset..].lines() {
|
||||
if !line.starts_with(READY_PREFIX) {
|
||||
tail.push(line);
|
||||
}
|
||||
}
|
||||
*offset = content.len();
|
||||
}
|
||||
|
||||
/// Resolves the binary used to launch a child Pod. Must point at a
|
||||
/// `pod`-compatible executable — the parent reads the child's stderr
|
||||
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
|
||||
/// extra lines on stderr will pollute that handshake.
|
||||
///
|
||||
/// `INSOMNIA_POD_COMMAND` overrides the lookup (used by tests to inject
|
||||
/// a mock binary). Otherwise we defer to `PATH` — missing binary
|
||||
/// surfaces as the spawn `io::Error`.
|
||||
fn resolve_pod_command() -> PathBuf {
|
||||
if let Ok(cmd) = std::env::var("INSOMNIA_POD_COMMAND")
|
||||
&& !cmd.is_empty()
|
||||
{
|
||||
return PathBuf::from(cmd);
|
||||
}
|
||||
PathBuf::from("pod")
|
||||
}
|
||||
|
||||
struct StderrTail {
|
||||
lines: std::collections::VecDeque<String>,
|
||||
}
|
||||
|
||||
impl StderrTail {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
lines: std::collections::VecDeque::with_capacity(8),
|
||||
}
|
||||
}
|
||||
fn push(&mut self, line: &str) {
|
||||
if self.lines.len() == 8 {
|
||||
self.lines.pop_front();
|
||||
}
|
||||
self.lines.push_back(line.to_string());
|
||||
}
|
||||
fn as_string(&self) -> String {
|
||||
self.lines.iter().cloned().collect::<Vec<_>>().join(" | ")
|
||||
}
|
||||
fn into_string(self) -> String {
|
||||
self.lines.into_iter().collect::<Vec<_>>().join(" | ")
|
||||
}
|
||||
}
|
||||
10
crates/daemon/Cargo.toml
Normal file
10
crates/daemon/Cargo.toml
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "daemon"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
manifest = { workspace = true }
|
||||
protocol = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
9
crates/daemon/README.md
Normal file
9
crates/daemon/README.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# daemon
|
||||
|
||||
Pod のライフサイクルを管理する常駐デーモン。未実装。
|
||||
|
||||
## 依存クレート
|
||||
|
||||
- `manifest` — マニフェスト設定
|
||||
- `protocol` — 通信プロトコル型
|
||||
- `tokio` — 非同期ランタイム
|
||||
1
crates/daemon/src/lib.rs
Normal file
1
crates/daemon/src/lib.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
[package]
|
||||
name = "insomnia"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
llm-worker = { path = "../llm-worker" }
|
||||
llm-worker-persistence = { path = "../llm-worker-persistence" }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
toml = "0.8"
|
||||
uuid = { version = "1", features = ["v7", "serde"] }
|
||||
thiserror = "2.0"
|
||||
tokio = { version = "1.49", features = ["fs"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
|
||||
tempfile = "3.24"
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
pub mod manifest;
|
||||
pub mod pod;
|
||||
pub mod provider;
|
||||
pub mod scope;
|
||||
|
||||
pub use manifest::{PodManifest, ProviderConfig, ProviderKind};
|
||||
pub use pod::{Pod, PodError, PodId, PodRunResult, apply_worker_manifest, new_pod_id};
|
||||
pub use provider::build_client;
|
||||
pub use scope::Scope;
|
||||
|
|
@ -1,164 +0,0 @@
|
|||
use std::path::PathBuf;
|
||||
|
||||
use serde::Deserialize;
|
||||
|
||||
/// Declarative configuration for a Pod.
|
||||
///
|
||||
/// Parsed from a TOML manifest file. Describes the provider, model,
|
||||
/// system prompt, and optional directory scope.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct PodManifest {
|
||||
pub pod: PodMeta,
|
||||
pub provider: ProviderConfig,
|
||||
pub worker: WorkerManifest,
|
||||
#[serde(default)]
|
||||
pub scope: Option<ScopeConfig>,
|
||||
}
|
||||
|
||||
/// Pod metadata.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct PodMeta {
|
||||
pub name: String,
|
||||
}
|
||||
|
||||
/// LLM provider configuration.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ProviderConfig {
|
||||
pub kind: ProviderKind,
|
||||
pub model: String,
|
||||
/// Environment variable name holding the API key.
|
||||
#[serde(default)]
|
||||
pub api_key_env: Option<String>,
|
||||
/// Custom base URL for the provider API.
|
||||
#[serde(default)]
|
||||
pub base_url: Option<String>,
|
||||
}
|
||||
|
||||
/// Supported LLM providers.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum ProviderKind {
|
||||
Anthropic,
|
||||
Openai,
|
||||
Gemini,
|
||||
Ollama,
|
||||
}
|
||||
|
||||
/// Worker-level configuration embedded in the manifest.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct WorkerManifest {
|
||||
#[serde(default)]
|
||||
pub system_prompt: Option<String>,
|
||||
#[serde(default)]
|
||||
pub max_tokens: Option<u32>,
|
||||
#[serde(default)]
|
||||
pub temperature: Option<f32>,
|
||||
}
|
||||
|
||||
/// Directory scope configuration.
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct ScopeConfig {
|
||||
pub root: PathBuf,
|
||||
}
|
||||
|
||||
impl PodManifest {
|
||||
/// Parse a manifest from a TOML string.
|
||||
pub fn from_toml(s: &str) -> Result<Self, toml::de::Error> {
|
||||
toml::from_str(s)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parse_minimal_manifest() {
|
||||
let toml = r#"
|
||||
[pod]
|
||||
name = "test-agent"
|
||||
|
||||
[provider]
|
||||
kind = "anthropic"
|
||||
model = "claude-sonnet-4-20250514"
|
||||
|
||||
[worker]
|
||||
"#;
|
||||
let manifest = PodManifest::from_toml(toml).unwrap();
|
||||
assert_eq!(manifest.pod.name, "test-agent");
|
||||
assert_eq!(manifest.provider.kind, ProviderKind::Anthropic);
|
||||
assert_eq!(manifest.provider.model, "claude-sonnet-4-20250514");
|
||||
assert!(manifest.provider.api_key_env.is_none());
|
||||
assert!(manifest.scope.is_none());
|
||||
assert!(manifest.worker.system_prompt.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_full_manifest() {
|
||||
let toml = r#"
|
||||
[pod]
|
||||
name = "code-reviewer"
|
||||
|
||||
[provider]
|
||||
kind = "anthropic"
|
||||
model = "claude-sonnet-4-20250514"
|
||||
api_key_env = "ANTHROPIC_API_KEY"
|
||||
|
||||
[worker]
|
||||
system_prompt = "You are a code reviewer."
|
||||
max_tokens = 4096
|
||||
temperature = 0.3
|
||||
|
||||
[scope]
|
||||
root = "./src"
|
||||
"#;
|
||||
let manifest = PodManifest::from_toml(toml).unwrap();
|
||||
assert_eq!(manifest.pod.name, "code-reviewer");
|
||||
assert_eq!(
|
||||
manifest.provider.api_key_env.as_deref(),
|
||||
Some("ANTHROPIC_API_KEY")
|
||||
);
|
||||
assert_eq!(
|
||||
manifest.worker.system_prompt.as_deref(),
|
||||
Some("You are a code reviewer.")
|
||||
);
|
||||
assert_eq!(manifest.worker.max_tokens, Some(4096));
|
||||
assert_eq!(manifest.worker.temperature, Some(0.3));
|
||||
assert_eq!(
|
||||
manifest.scope.as_ref().unwrap().root,
|
||||
PathBuf::from("./src")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_ollama_no_api_key() {
|
||||
let toml = r#"
|
||||
[pod]
|
||||
name = "local-agent"
|
||||
|
||||
[provider]
|
||||
kind = "ollama"
|
||||
model = "llama3"
|
||||
|
||||
[worker]
|
||||
"#;
|
||||
let manifest = PodManifest::from_toml(toml).unwrap();
|
||||
assert_eq!(manifest.provider.kind, ProviderKind::Ollama);
|
||||
assert!(manifest.provider.api_key_env.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reject_unknown_provider() {
|
||||
let toml = r#"
|
||||
[pod]
|
||||
name = "test"
|
||||
|
||||
[provider]
|
||||
kind = "unknown_provider"
|
||||
model = "x"
|
||||
|
||||
[worker]
|
||||
"#;
|
||||
assert!(PodManifest::from_toml(toml).is_err());
|
||||
}
|
||||
}
|
||||
|
|
@ -1,180 +0,0 @@
|
|||
use llm_worker::llm_client::client::LlmClient;
|
||||
use llm_worker::llm_client::RequestConfig;
|
||||
use llm_worker::Worker;
|
||||
use llm_worker_persistence::{
|
||||
Session, SessionConfig, SessionError, SessionId, Store, StoreError,
|
||||
};
|
||||
|
||||
use crate::manifest::{PodManifest, WorkerManifest};
|
||||
use crate::scope::Scope;
|
||||
|
||||
/// Pod identifier. UUID v7 (time-ordered).
|
||||
pub type PodId = uuid::Uuid;
|
||||
|
||||
/// Generate a new Pod ID.
|
||||
pub fn new_pod_id() -> PodId {
|
||||
uuid::Uuid::now_v7()
|
||||
}
|
||||
|
||||
/// An independent agent execution unit.
|
||||
///
|
||||
/// Wraps a persistent [`Session`] with manifest metadata and an optional
|
||||
/// directory scope. This is the primary abstraction in insomnia.
|
||||
pub struct Pod<C: LlmClient, St: Store> {
|
||||
id: PodId,
|
||||
manifest: PodManifest,
|
||||
session: Session<C, St>,
|
||||
scope: Option<Scope>,
|
||||
}
|
||||
|
||||
impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||
/// Create a new Pod from a pre-built Worker and store.
|
||||
///
|
||||
/// The caller is responsible for constructing the `LlmClient` from the
|
||||
/// manifest's provider config. This keeps Pod free of provider-specific
|
||||
/// dependencies.
|
||||
pub async fn new(
|
||||
manifest: PodManifest,
|
||||
worker: Worker<C>,
|
||||
store: St,
|
||||
scope: Option<Scope>,
|
||||
) -> Result<Self, PodError> {
|
||||
let session = Session::new(worker, store, SessionConfig::default()).await?;
|
||||
Ok(Self {
|
||||
id: new_pod_id(),
|
||||
manifest,
|
||||
session,
|
||||
scope,
|
||||
})
|
||||
}
|
||||
|
||||
/// Restore a Pod from a persisted session.
|
||||
pub async fn restore(
|
||||
id: PodId,
|
||||
session_id: SessionId,
|
||||
manifest: PodManifest,
|
||||
client: C,
|
||||
store: St,
|
||||
scope: Option<Scope>,
|
||||
) -> Result<Self, PodError> {
|
||||
let session = Session::restore(client, store, session_id, SessionConfig::default()).await?;
|
||||
Ok(Self {
|
||||
id,
|
||||
manifest,
|
||||
session,
|
||||
scope,
|
||||
})
|
||||
}
|
||||
|
||||
/// The Pod's unique identifier.
|
||||
pub fn id(&self) -> PodId {
|
||||
self.id
|
||||
}
|
||||
|
||||
/// The session ID used for persistence.
|
||||
pub fn session_id(&self) -> SessionId {
|
||||
self.session.session_id()
|
||||
}
|
||||
|
||||
/// The Pod's manifest.
|
||||
pub fn manifest(&self) -> &PodManifest {
|
||||
&self.manifest
|
||||
}
|
||||
|
||||
/// The Pod's directory scope, if any.
|
||||
pub fn scope(&self) -> Option<&Scope> {
|
||||
self.scope.as_ref()
|
||||
}
|
||||
|
||||
/// Direct access to the underlying session.
|
||||
///
|
||||
/// Use this to register tools, hooks, or subscribers on the worker
|
||||
/// before calling [`run`](Self::run).
|
||||
pub fn session_mut(&mut self) -> &mut Session<C, St> {
|
||||
&mut self.session
|
||||
}
|
||||
|
||||
/// Send user input and run until the LLM turn completes.
|
||||
pub async fn run(&mut self, input: impl Into<String>) -> Result<PodRunResult, PodError> {
|
||||
let result = self.session.run(input).await?;
|
||||
Ok(result.into())
|
||||
}
|
||||
|
||||
/// Resume from a paused state.
|
||||
pub async fn resume(&mut self) -> Result<PodRunResult, PodError> {
|
||||
let result = self.session.resume().await?;
|
||||
Ok(result.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl<St: Store> Pod<Box<dyn LlmClient>, St> {
|
||||
/// Create a Pod entirely from a manifest.
|
||||
///
|
||||
/// Builds the LLM client from the provider config, applies worker
|
||||
/// settings, and creates a new persistent session.
|
||||
pub async fn from_manifest(
|
||||
manifest: PodManifest,
|
||||
store: St,
|
||||
scope: Option<Scope>,
|
||||
) -> Result<Self, PodError> {
|
||||
let client = crate::provider::build_client(&manifest.provider)?;
|
||||
let mut worker = Worker::new(client);
|
||||
apply_worker_manifest(&mut worker, &manifest.worker);
|
||||
let session = Session::new(worker, store, SessionConfig::default()).await?;
|
||||
Ok(Self {
|
||||
id: new_pod_id(),
|
||||
manifest,
|
||||
session,
|
||||
scope,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply worker-level manifest settings to a Worker.
|
||||
pub fn apply_worker_manifest<C: LlmClient>(worker: &mut Worker<C>, wm: &WorkerManifest) {
|
||||
if let Some(ref prompt) = wm.system_prompt {
|
||||
worker.set_system_prompt(prompt);
|
||||
}
|
||||
let mut config = RequestConfig::new();
|
||||
if let Some(max_tokens) = wm.max_tokens {
|
||||
config.max_tokens = Some(max_tokens);
|
||||
}
|
||||
if let Some(temperature) = wm.temperature {
|
||||
config.temperature = Some(temperature);
|
||||
}
|
||||
worker.set_request_config(config);
|
||||
}
|
||||
|
||||
/// Result of a Pod run.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PodRunResult {
|
||||
/// The LLM finished its turn normally.
|
||||
Finished,
|
||||
/// The LLM paused (e.g. awaiting user confirmation via a hook).
|
||||
Paused,
|
||||
}
|
||||
|
||||
impl From<llm_worker::WorkerResult> for PodRunResult {
|
||||
fn from(r: llm_worker::WorkerResult) -> Self {
|
||||
match r {
|
||||
llm_worker::WorkerResult::Finished => PodRunResult::Finished,
|
||||
llm_worker::WorkerResult::Paused => PodRunResult::Paused,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pod errors.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum PodError {
|
||||
#[error(transparent)]
|
||||
Session(#[from] SessionError),
|
||||
|
||||
#[error(transparent)]
|
||||
Store(#[from] StoreError),
|
||||
|
||||
#[error("scope violation: {path} is outside the allowed directory")]
|
||||
ScopeViolation { path: String },
|
||||
|
||||
#[error("provider configuration error: {0}")]
|
||||
ProviderConfig(String),
|
||||
}
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
use llm_worker::llm_client::client::LlmClient;
|
||||
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
|
||||
use llm_worker::llm_client::providers::gemini::GeminiClient;
|
||||
use llm_worker::llm_client::providers::ollama::OllamaClient;
|
||||
use llm_worker::llm_client::providers::openai::OpenAIClient;
|
||||
|
||||
use crate::manifest::{ProviderConfig, ProviderKind};
|
||||
use crate::pod::PodError;
|
||||
|
||||
/// Build an [`LlmClient`] from a [`ProviderConfig`].
|
||||
///
|
||||
/// Resolves the API key from the environment variable specified in the config.
|
||||
pub fn build_client(config: &ProviderConfig) -> Result<Box<dyn LlmClient>, PodError> {
|
||||
let api_key = config
|
||||
.api_key_env
|
||||
.as_deref()
|
||||
.map(std::env::var)
|
||||
.transpose()
|
||||
.map_err(|e| PodError::ProviderConfig(format!("env var: {e}")))?;
|
||||
|
||||
match config.kind {
|
||||
ProviderKind::Anthropic => {
|
||||
let key = api_key.ok_or_else(|| {
|
||||
PodError::ProviderConfig("anthropic requires api_key_env".into())
|
||||
})?;
|
||||
let mut client = AnthropicClient::new(key, &config.model);
|
||||
if let Some(ref url) = config.base_url {
|
||||
client = client.with_base_url(url);
|
||||
}
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
ProviderKind::Openai => {
|
||||
let key = api_key.ok_or_else(|| {
|
||||
PodError::ProviderConfig("openai requires api_key_env".into())
|
||||
})?;
|
||||
let mut client = OpenAIClient::new(key, &config.model);
|
||||
if let Some(ref url) = config.base_url {
|
||||
client = client.with_base_url(url);
|
||||
}
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
ProviderKind::Gemini => {
|
||||
let key = api_key.ok_or_else(|| {
|
||||
PodError::ProviderConfig("gemini requires api_key_env".into())
|
||||
})?;
|
||||
let mut client = GeminiClient::new(key, &config.model);
|
||||
if let Some(ref url) = config.base_url {
|
||||
client = client.with_base_url(url);
|
||||
}
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
ProviderKind::Ollama => {
|
||||
let mut client = OllamaClient::new(&config.model);
|
||||
if let Some(ref url) = config.base_url {
|
||||
client = client.with_base_url(url);
|
||||
}
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
/// Directory scope constraining a Pod's write access.
|
||||
///
|
||||
/// Read access is unrestricted — only write operations are checked against the scope.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Scope {
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
impl Scope {
|
||||
/// Create a new scope rooted at the given directory.
|
||||
///
|
||||
/// The path is canonicalized to resolve symlinks and relative components.
|
||||
pub fn new(root: impl Into<PathBuf>) -> std::io::Result<Self> {
|
||||
let root = root.into().canonicalize()?;
|
||||
Ok(Self { root })
|
||||
}
|
||||
|
||||
/// The root directory of this scope.
|
||||
pub fn root(&self) -> &Path {
|
||||
&self.root
|
||||
}
|
||||
|
||||
/// Check whether `path` falls within this scope.
|
||||
///
|
||||
/// The path is canonicalized before comparison.
|
||||
pub fn contains(&self, path: &Path) -> bool {
|
||||
match path.canonicalize() {
|
||||
Ok(canonical) => canonical.starts_with(&self.root),
|
||||
Err(_) => {
|
||||
// Path doesn't exist yet — check the parent directory instead.
|
||||
// This handles write_file to a new file inside the scope.
|
||||
match path.parent().and_then(|p| p.canonicalize().ok()) {
|
||||
Some(parent) => parent.starts_with(&self.root),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn contains_file_inside_scope() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let scope = Scope::new(dir.path()).unwrap();
|
||||
|
||||
let file = dir.path().join("test.txt");
|
||||
fs::write(&file, "hello").unwrap();
|
||||
|
||||
assert!(scope.contains(&file));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_file_outside_scope() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let outside = TempDir::new().unwrap();
|
||||
let scope = Scope::new(dir.path()).unwrap();
|
||||
|
||||
let file = outside.path().join("test.txt");
|
||||
fs::write(&file, "hello").unwrap();
|
||||
|
||||
assert!(!scope.contains(&file));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_new_file_in_existing_parent() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let scope = Scope::new(dir.path()).unwrap();
|
||||
|
||||
// File doesn't exist yet, but parent dir is inside scope
|
||||
let new_file = dir.path().join("new.txt");
|
||||
assert!(scope.contains(&new_file));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn contains_nested_directory() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let nested = dir.path().join("a/b/c");
|
||||
fs::create_dir_all(&nested).unwrap();
|
||||
let scope = Scope::new(dir.path()).unwrap();
|
||||
|
||||
let file = nested.join("test.txt");
|
||||
assert!(scope.contains(&file));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_traversal_attack() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let scope = Scope::new(dir.path()).unwrap();
|
||||
|
||||
let traversal = dir.path().join("../../../etc/passwd");
|
||||
assert!(!scope.contains(&traversal));
|
||||
}
|
||||
}
|
||||
13
crates/lint-common/Cargo.toml
Normal file
13
crates/lint-common/Cargo.toml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[package]
|
||||
name = "lint-common"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
serde_json = { workspace = true }
|
||||
81
crates/lint-common/src/frontmatter.rs
Normal file
81
crates/lint-common/src/frontmatter.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
//! Common frontmatter helpers.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
use crate::RecordLintError;
|
||||
|
||||
/// Trait record frontmatter types implement so linters can drive them uniformly.
|
||||
pub trait Frontmatter: Sized {
|
||||
/// Hard upper bound on body chars (excluding the frontmatter block).
|
||||
const BODY_LIMIT: usize;
|
||||
|
||||
fn created_at(&self) -> Option<DateTime<Utc>>;
|
||||
fn updated_at(&self) -> Option<DateTime<Utc>>;
|
||||
}
|
||||
|
||||
const FRONTMATTER_DELIM: &str = "---";
|
||||
|
||||
/// Split a markdown document into `(yaml_frontmatter, body)`.
|
||||
///
|
||||
/// Expects the document to start with `---\n` and have a closing
|
||||
/// `---\n` (or `---` at EOF) somewhere downstream. Trailing newline
|
||||
/// after the closing delimiter is consumed.
|
||||
pub fn split_frontmatter(content: &str) -> Result<(&str, &str), RecordLintError> {
|
||||
// The opening delimiter must be the very first line.
|
||||
let after_open = content
|
||||
.strip_prefix(FRONTMATTER_DELIM)
|
||||
.and_then(|s| s.strip_prefix('\n').or(Some(s)))
|
||||
.ok_or(RecordLintError::MissingFrontmatter)?;
|
||||
|
||||
// Look for the closing `---` on its own line.
|
||||
let mut yaml_end = None;
|
||||
let mut byte_offset = 0usize;
|
||||
for line in after_open.split_inclusive('\n') {
|
||||
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
|
||||
if trimmed == FRONTMATTER_DELIM {
|
||||
yaml_end = Some((byte_offset, byte_offset + line.len()));
|
||||
break;
|
||||
}
|
||||
byte_offset += line.len();
|
||||
}
|
||||
|
||||
let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| {
|
||||
RecordLintError::MalformedFrontmatter("missing closing `---` line".to_string())
|
||||
})?;
|
||||
|
||||
let yaml = &after_open[..yaml_end_excl];
|
||||
let body = &after_open[body_start..];
|
||||
Ok((yaml, body))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn splits_simple() {
|
||||
let doc = "---\nfoo: 1\n---\nbody here\n";
|
||||
let (y, b) = split_frontmatter(doc).unwrap();
|
||||
assert_eq!(y, "foo: 1\n");
|
||||
assert_eq!(b, "body here\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_leading_delim_errors() {
|
||||
let err = split_frontmatter("hello").unwrap_err();
|
||||
assert!(matches!(err, RecordLintError::MissingFrontmatter));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_closing_delim_errors() {
|
||||
let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err();
|
||||
assert!(matches!(err, RecordLintError::MalformedFrontmatter(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn handles_empty_body() {
|
||||
let doc = "---\nfoo: 1\n---\n";
|
||||
let (_, b) = split_frontmatter(doc).unwrap();
|
||||
assert_eq!(b, "");
|
||||
}
|
||||
}
|
||||
20
crates/lint-common/src/lib.rs
Normal file
20
crates/lint-common/src/lib.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
//! Shared record lint primitives for memory and workflow files.
|
||||
|
||||
mod frontmatter;
|
||||
mod slug;
|
||||
|
||||
pub use frontmatter::{Frontmatter, split_frontmatter};
|
||||
pub use slug::{Slug, is_valid_slug};
|
||||
|
||||
/// Common lint errors for Markdown record syntax shared by memory and workflow.
|
||||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
|
||||
pub enum RecordLintError {
|
||||
#[error("invalid slug `{0}`: must match ^[a-z0-9](?:[a-z0-9-]{{0,62}}[a-z0-9])?$")]
|
||||
InvalidSlug(String),
|
||||
|
||||
#[error("malformed frontmatter: {0}")]
|
||||
MalformedFrontmatter(String),
|
||||
|
||||
#[error("frontmatter is missing or document is empty")]
|
||||
MissingFrontmatter,
|
||||
}
|
||||
146
crates/lint-common/src/slug.rs
Normal file
146
crates/lint-common/src/slug.rs
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
//! Slug type and validation.
|
||||
//!
|
||||
//! Syntax (agent-skills compatible):
|
||||
//! ^[a-z0-9](?:[a-z0-9-]{0,62}[a-z0-9])?$
|
||||
//! - 1–64 chars
|
||||
//! - lowercase ASCII alphanumerics and `-`
|
||||
//! - cannot start or end with `-`
|
||||
//! - no consecutive `--`
|
||||
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
|
||||
use crate::RecordLintError;
|
||||
|
||||
const MIN_LEN: usize = 1;
|
||||
const MAX_LEN: usize = 64;
|
||||
|
||||
/// Validated slug. Constructible only via [`Slug::parse`].
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)]
|
||||
#[serde(transparent)]
|
||||
pub struct Slug(String);
|
||||
|
||||
impl Slug {
|
||||
/// Parse and validate. Returns [`RecordLintError::InvalidSlug`] on rejection.
|
||||
pub fn parse(s: impl Into<String>) -> Result<Self, RecordLintError> {
|
||||
let s = s.into();
|
||||
if is_valid_slug(&s) {
|
||||
Ok(Self(s))
|
||||
} else {
|
||||
Err(RecordLintError::InvalidSlug(s))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn into_string(self) -> String {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Slug {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<str> for Slug {
|
||||
fn as_ref(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Slug {
|
||||
type Err = RecordLintError;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
Self::parse(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Slug {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let raw = String::deserialize(deserializer)?;
|
||||
Self::parse(raw).map_err(serde::de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
/// Pure-fn predicate matching the agent-skills slug regex without
|
||||
/// pulling in the `regex` crate.
|
||||
pub fn is_valid_slug(s: &str) -> bool {
|
||||
let bytes = s.as_bytes();
|
||||
let len = bytes.len();
|
||||
if len < MIN_LEN || len > MAX_LEN {
|
||||
return false;
|
||||
}
|
||||
if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) {
|
||||
return false;
|
||||
}
|
||||
let mut prev_dash = false;
|
||||
for &b in bytes {
|
||||
if b == b'-' {
|
||||
if prev_dash {
|
||||
return false;
|
||||
}
|
||||
prev_dash = true;
|
||||
} else if is_alnum_lower(b) {
|
||||
prev_dash = false;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn is_alnum_lower(b: u8) -> bool {
|
||||
b.is_ascii_digit() || b.is_ascii_lowercase()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn accepts_basic_slugs() {
|
||||
for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] {
|
||||
assert!(is_valid_slug(s), "expected `{s}` valid");
|
||||
assert!(Slug::parse(s).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_bad_slugs() {
|
||||
for s in [
|
||||
"", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä",
|
||||
] {
|
||||
assert!(!is_valid_slug(s), "expected `{s}` invalid");
|
||||
assert!(Slug::parse(s).is_err());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn enforces_length_bounds() {
|
||||
let too_long = "a".repeat(MAX_LEN + 1);
|
||||
assert!(!is_valid_slug(&too_long));
|
||||
let max = "a".repeat(MAX_LEN);
|
||||
assert!(is_valid_slug(&max));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserializes_via_serde() {
|
||||
let json = "\"valid-slug\"";
|
||||
let slug: Slug = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(slug.as_str(), "valid-slug");
|
||||
|
||||
let bad = "\"BAD\"";
|
||||
let err: Result<Slug, _> = serde_json::from_str(bad);
|
||||
assert!(err.is_err());
|
||||
}
|
||||
}
|
||||
9
crates/llm-worker-macros/README.md
Normal file
9
crates/llm-worker-macros/README.md
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# llm-worker-macros
|
||||
|
||||
Rust メソッドを LLM 呼び出し可能なツールとして自動登録する手続きマクロクレート。引数構造体・Tool トレイト実装・ToolDefinition を自動生成する。
|
||||
|
||||
## 公開マクロ
|
||||
|
||||
- `#[tool_registry]` — impl ブロックに付与し、内部の `#[tool]` メソッドを一括処理
|
||||
- `#[tool]` — メソッドをツールとしてマーク
|
||||
- `#[description = "..."]` — 引数に説明を付与(JSON Schema の description に反映)
|
||||
|
|
@ -192,13 +192,13 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
|
|||
let result_handling = if is_result_type(&sig.output) {
|
||||
quote! {
|
||||
match result {
|
||||
Ok(val) => Ok(format!("{:?}", val)),
|
||||
Ok(val) => Ok(format!("{:?}", val).into()),
|
||||
Err(e) => Err(::llm_worker::tool::ToolError::ExecutionFailed(format!("{}", e))),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
quote! {
|
||||
Ok(format!("{:?}", result))
|
||||
Ok(format!("{:?}", result).into())
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ fn generate_tool_impl(self_ty: &Type, method: &syn::ImplItemFn) -> proc_macro2::
|
|||
|
||||
#[async_trait::async_trait]
|
||||
impl ::llm_worker::tool::Tool for #tool_struct_name {
|
||||
async fn execute(&self, input_json: &str) -> Result<String, ::llm_worker::tool::ToolError> {
|
||||
async fn execute(&self, input_json: &str) -> Result<::llm_worker::tool::ToolOutput, ::llm_worker::tool::ToolError> {
|
||||
#execute_body
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,21 +0,0 @@
|
|||
[package]
|
||||
name = "llm-worker-persistence"
|
||||
description = "Session persistence for llm-worker via append-only JSONL logs"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
llm-worker = { path = "../llm-worker" }
|
||||
async-trait = "0.1"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
tokio = { version = "1.49", features = ["fs", "io-util"] }
|
||||
uuid = { version = "1", features = ["v7", "serde"] }
|
||||
thiserror = "2.0"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.49", features = ["macros", "rt-multi-thread", "fs", "io-util"] }
|
||||
tempfile = "3.24"
|
||||
futures = "0.3"
|
||||
async-trait = "0.1"
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
//! [`ToolOutputProcessor`] implementation backed by a [`BlobStore`].
|
||||
//!
|
||||
//! Converts large tool output strings into [`ToolOutput::Stored`] and
|
||||
//! persists the content via a [`BlobStore`], returning a summary with
|
||||
//! a blob reference for conversation history.
|
||||
|
||||
use crate::blob_store::BlobStore;
|
||||
use async_trait::async_trait;
|
||||
use llm_worker::tool::{ToolError, ToolOutput, ToolOutputProcessor};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// A [`ToolOutputProcessor`] that stores large outputs in a [`BlobStore`].
|
||||
///
|
||||
/// Small outputs (≤ `INLINE_THRESHOLD` bytes) pass through unchanged.
|
||||
/// Large outputs are stored as blobs, and a summary with a `[blob:<id>]`
|
||||
/// reference replaces the original content in conversation history.
|
||||
pub struct BlobOutputProcessor<B: BlobStore> {
|
||||
blob_store: Arc<B>,
|
||||
}
|
||||
|
||||
impl<B: BlobStore> BlobOutputProcessor<B> {
|
||||
/// Create a new processor backed by the given blob store.
|
||||
pub fn new(blob_store: Arc<B>) -> Self {
|
||||
Self { blob_store }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<B: BlobStore + 'static> ToolOutputProcessor for BlobOutputProcessor<B> {
|
||||
async fn process(&self, output: String) -> Result<String, ToolError> {
|
||||
let tool_output = ToolOutput::from(output);
|
||||
|
||||
match tool_output {
|
||||
ToolOutput::Inline(s) => Ok(s),
|
||||
ToolOutput::Stored { summary, content } => {
|
||||
let blob_id = self
|
||||
.blob_store
|
||||
.store(&content)
|
||||
.await
|
||||
.map_err(|e| ToolError::Internal(format!("blob store error: {e}")))?;
|
||||
|
||||
// Prepend blob reference to the summary
|
||||
Ok(format!("[blob:{blob_id}] {summary}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
//! Blob storage abstraction for large tool outputs.
|
||||
//!
|
||||
//! [`BlobStore`] provides async storage and retrieval of [`Content`] blobs,
|
||||
//! keeping them separate from session logs. Session logs reference blobs
|
||||
//! by [`BlobId`] in tool result summaries.
|
||||
|
||||
use llm_worker::tool::Content;
|
||||
use std::future::Future;
|
||||
|
||||
/// Unique blob identifier. UUID v7 (time-ordered).
|
||||
pub type BlobId = uuid::Uuid;
|
||||
|
||||
/// Generate a new blob ID.
|
||||
pub fn new_blob_id() -> BlobId {
|
||||
uuid::Uuid::now_v7()
|
||||
}
|
||||
|
||||
/// Errors from the blob store.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum BlobStoreError {
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("serialization error: {0}")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
|
||||
#[error("blob not found: {0}")]
|
||||
NotFound(BlobId),
|
||||
}
|
||||
|
||||
/// Async blob storage backend.
|
||||
///
|
||||
/// Stores and retrieves [`Content`] blobs independently of session logs.
|
||||
/// All methods take `&self` — implementations should use interior mutability
|
||||
/// when needed.
|
||||
pub trait BlobStore: Send + Sync {
|
||||
/// Store content and return its assigned ID.
|
||||
fn store(
|
||||
&self,
|
||||
content: &Content,
|
||||
) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
|
||||
|
||||
/// Load content by ID.
|
||||
fn load(
|
||||
&self,
|
||||
id: BlobId,
|
||||
) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
|
||||
|
||||
/// Check if a blob exists.
|
||||
fn exists(
|
||||
&self,
|
||||
id: BlobId,
|
||||
) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
|
||||
}
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
//! Debug-only raw stream event recording.
|
||||
//!
|
||||
//! [`TraceEntry`] captures every LLM stream event verbatim for debugging
|
||||
//! and post-hoc analysis. Written to a separate `.trace.jsonl` file,
|
||||
//! completely independent of the session log used for state restoration.
|
||||
//!
|
||||
//! Disabled by default. Enable via `SessionConfig::record_event_trace`.
|
||||
|
||||
use llm_worker::llm_client::event::Event;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A single trace entry recording a raw stream event.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TraceEntry {
|
||||
/// Timestamp in milliseconds since Unix epoch.
|
||||
pub ts: u64,
|
||||
/// Turn number at the time of recording.
|
||||
pub turn: usize,
|
||||
/// The raw stream event.
|
||||
pub event: Event,
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
//! Filesystem-backed blob store.
|
||||
//!
|
||||
//! Layout:
|
||||
//! - Text blobs: `{root}/{blob_id}.txt`
|
||||
//! - Structured blobs: `{root}/{blob_id}.json`
|
||||
|
||||
use crate::blob_store::{new_blob_id, BlobId, BlobStore, BlobStoreError};
|
||||
use llm_worker::tool::Content;
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs;
|
||||
|
||||
/// Filesystem-backed blob store.
|
||||
///
|
||||
/// Each blob is stored as a single file. Text content uses `.txt`,
|
||||
/// structured (JSON) content uses `.json`.
|
||||
#[derive(Clone)]
|
||||
pub struct FsBlobStore {
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
impl FsBlobStore {
|
||||
/// Create a new `FsBlobStore` rooted at the given directory.
|
||||
/// Creates the directory if it does not exist.
|
||||
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, BlobStoreError> {
|
||||
let root = root.into();
|
||||
fs::create_dir_all(&root).await?;
|
||||
Ok(Self { root })
|
||||
}
|
||||
|
||||
fn text_path(&self, id: BlobId) -> PathBuf {
|
||||
self.root.join(format!("{id}.txt"))
|
||||
}
|
||||
|
||||
fn json_path(&self, id: BlobId) -> PathBuf {
|
||||
self.root.join(format!("{id}.json"))
|
||||
}
|
||||
|
||||
/// Resolve the actual path for a blob, checking both extensions.
|
||||
fn resolve_path(&self, id: BlobId) -> Option<(PathBuf, bool)> {
|
||||
let txt = self.text_path(id);
|
||||
if txt.exists() {
|
||||
return Some((txt, true));
|
||||
}
|
||||
let json = self.json_path(id);
|
||||
if json.exists() {
|
||||
return Some((json, false));
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl BlobStore for FsBlobStore {
|
||||
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
|
||||
let id = new_blob_id();
|
||||
match content {
|
||||
Content::Text(text) => {
|
||||
fs::write(self.text_path(id), text.as_bytes()).await?;
|
||||
}
|
||||
Content::Structured(value) => {
|
||||
let json = serde_json::to_string_pretty(value)?;
|
||||
fs::write(self.json_path(id), json.as_bytes()).await?;
|
||||
}
|
||||
}
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
|
||||
let (path, is_text) = self
|
||||
.resolve_path(id)
|
||||
.ok_or(BlobStoreError::NotFound(id))?;
|
||||
let bytes = fs::read_to_string(&path).await?;
|
||||
if is_text {
|
||||
Ok(Content::Text(bytes))
|
||||
} else {
|
||||
let value = serde_json::from_str(&bytes)?;
|
||||
Ok(Content::Structured(value))
|
||||
}
|
||||
}
|
||||
|
||||
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
|
||||
Ok(self.resolve_path(id).is_some())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
//! Filesystem-backed JSONL store.
|
||||
//!
|
||||
//! Layout:
|
||||
//! - Session log: `{root}/{session_id}.jsonl`
|
||||
//! - Event trace: `{root}/{session_id}.trace.jsonl`
|
||||
|
||||
use crate::event_trace::TraceEntry;
|
||||
use crate::session_log::LogEntry;
|
||||
use crate::store::{Store, StoreError};
|
||||
use crate::SessionId;
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
|
||||
/// Filesystem-backed JSONL store.
|
||||
///
|
||||
/// Each session is stored as a single `.jsonl` file with one [`LogEntry`]
|
||||
/// per line. Writes use append mode for crash safety.
|
||||
#[derive(Clone)]
|
||||
pub struct FsStore {
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
impl FsStore {
|
||||
/// Create a new `FsStore` rooted at the given directory.
|
||||
/// Creates the directory if it does not exist.
|
||||
pub async fn new(root: impl Into<PathBuf>) -> Result<Self, StoreError> {
|
||||
let root = root.into();
|
||||
fs::create_dir_all(&root).await?;
|
||||
Ok(Self { root })
|
||||
}
|
||||
|
||||
fn log_path(&self, id: SessionId) -> PathBuf {
|
||||
self.root.join(format!("{id}.jsonl"))
|
||||
}
|
||||
|
||||
fn trace_path(&self, id: SessionId) -> PathBuf {
|
||||
self.root.join(format!("{id}.trace.jsonl"))
|
||||
}
|
||||
|
||||
async fn append_line(&self, path: &Path, line: &str) -> Result<(), StoreError> {
|
||||
let mut file = fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(path)
|
||||
.await?;
|
||||
file.write_all(line.as_bytes()).await?;
|
||||
file.write_all(b"\n").await?;
|
||||
file.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_jsonl<T: serde::de::DeserializeOwned>(
|
||||
content: &str,
|
||||
) -> Result<Vec<T>, StoreError> {
|
||||
let mut entries = Vec::new();
|
||||
for (i, line) in content.lines().enumerate() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let entry: T =
|
||||
serde_json::from_str(line).map_err(|e| StoreError::Corrupt {
|
||||
line: i + 1,
|
||||
message: e.to_string(),
|
||||
})?;
|
||||
entries.push(entry);
|
||||
}
|
||||
Ok(entries)
|
||||
}
|
||||
}
|
||||
|
||||
impl Store for FsStore {
|
||||
async fn append(&self, id: SessionId, entry: &LogEntry) -> Result<(), StoreError> {
|
||||
let line = serde_json::to_string(entry)?;
|
||||
self.append_line(&self.log_path(id), &line).await
|
||||
}
|
||||
|
||||
async fn read_all(&self, id: SessionId) -> Result<Vec<LogEntry>, StoreError> {
|
||||
let path = self.log_path(id);
|
||||
if !path.exists() {
|
||||
return Err(StoreError::NotFound(id));
|
||||
}
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
Self::parse_jsonl(&content)
|
||||
}
|
||||
|
||||
async fn list_sessions(&self) -> Result<Vec<SessionId>, StoreError> {
|
||||
let mut sessions = Vec::new();
|
||||
let mut dir = fs::read_dir(&self.root).await?;
|
||||
while let Some(entry) = dir.next_entry().await? {
|
||||
let path = entry.path();
|
||||
// Only match .jsonl files, not .trace.jsonl
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
if name.ends_with(".jsonl") && !name.ends_with(".trace.jsonl") {
|
||||
let stem = name.trim_end_matches(".jsonl");
|
||||
if let Ok(id) = stem.parse::<SessionId>() {
|
||||
sessions.push(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
// UUID v7: lexicographic sort = chronological sort, newest first
|
||||
sessions.sort_by(|a, b| b.cmp(a));
|
||||
Ok(sessions)
|
||||
}
|
||||
|
||||
async fn create_session(
|
||||
&self,
|
||||
id: SessionId,
|
||||
entries: &[LogEntry],
|
||||
) -> Result<(), StoreError> {
|
||||
let path = self.log_path(id);
|
||||
let mut content = String::new();
|
||||
for entry in entries {
|
||||
content.push_str(&serde_json::to_string(entry)?);
|
||||
content.push('\n');
|
||||
}
|
||||
fs::write(&path, content.as_bytes()).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn exists(&self, id: SessionId) -> Result<bool, StoreError> {
|
||||
Ok(self.log_path(id).exists())
|
||||
}
|
||||
|
||||
async fn append_trace(
|
||||
&self,
|
||||
id: SessionId,
|
||||
entry: &TraceEntry,
|
||||
) -> Result<(), StoreError> {
|
||||
let line = serde_json::to_string(entry)?;
|
||||
self.append_line(&self.trace_path(id), &line).await
|
||||
}
|
||||
}
|
||||
|
|
@ -1,668 +0,0 @@
|
|||
//! Built-in `inspect` tool for retrieving stored blob content.
|
||||
//!
|
||||
//! When large tool outputs are stored in a [`BlobStore`], only a summary
|
||||
//! with a `[blob:<id>]` reference is placed in conversation history.
|
||||
//! This tool lets the LLM retrieve details on demand, with optional
|
||||
//! selectors for partial access.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
use serde_json::json;
|
||||
|
||||
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
|
||||
use llm_worker::state::Mutable;
|
||||
use llm_worker::ToolRegistryError;
|
||||
use llm_worker::Worker;
|
||||
use llm_worker::llm_client::LlmClient;
|
||||
|
||||
use crate::blob_store::{BlobId, BlobStore};
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
/// Maximum lines shown in the default text preview.
|
||||
const DEFAULT_PREVIEW_LINES: usize = 50;
|
||||
/// Maximum array elements shown in the default preview.
|
||||
const DEFAULT_PREVIEW_ELEMENTS: usize = 5;
|
||||
/// Maximum object keys whose values are shown in the default preview.
|
||||
const DEFAULT_PREVIEW_KEYS: usize = 3;
|
||||
|
||||
// ─── Selector ────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Parsed selector for partial blob content retrieval.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum Selector {
|
||||
/// Extract a range of lines (1-based, inclusive).
|
||||
Lines { start: usize, end: usize },
|
||||
/// Extract a range of array elements (0-based, exclusive end).
|
||||
Slice { start: usize, end: usize },
|
||||
/// Extract a specific key from a JSON object.
|
||||
Key(String),
|
||||
}
|
||||
|
||||
fn parse_selector(s: &str) -> Result<Selector, ToolError> {
|
||||
if let Some(rest) = s.strip_prefix("lines:") {
|
||||
let (a, b) = rest
|
||||
.split_once('-')
|
||||
.ok_or_else(|| ToolError::InvalidArgument(format!(
|
||||
"invalid lines selector '{s}': expected format lines:N-M"
|
||||
)))?;
|
||||
let start: usize = a.parse().map_err(|_| {
|
||||
ToolError::InvalidArgument(format!("invalid start line number: '{a}'"))
|
||||
})?;
|
||||
let end: usize = b.parse().map_err(|_| {
|
||||
ToolError::InvalidArgument(format!("invalid end line number: '{b}'"))
|
||||
})?;
|
||||
if start == 0 {
|
||||
return Err(ToolError::InvalidArgument(
|
||||
"line numbers are 1-based, got 0".into(),
|
||||
));
|
||||
}
|
||||
if start > end {
|
||||
return Err(ToolError::InvalidArgument(format!(
|
||||
"start line ({start}) must be <= end line ({end})"
|
||||
)));
|
||||
}
|
||||
Ok(Selector::Lines { start, end })
|
||||
} else if let Some(rest) = s.strip_prefix("slice:") {
|
||||
let (a, b) = rest
|
||||
.split_once("..")
|
||||
.ok_or_else(|| ToolError::InvalidArgument(format!(
|
||||
"invalid slice selector '{s}': expected format slice:N..M"
|
||||
)))?;
|
||||
let start: usize = a.parse().map_err(|_| {
|
||||
ToolError::InvalidArgument(format!("invalid start index: '{a}'"))
|
||||
})?;
|
||||
let end: usize = b.parse().map_err(|_| {
|
||||
ToolError::InvalidArgument(format!("invalid end index: '{b}'"))
|
||||
})?;
|
||||
if start > end {
|
||||
return Err(ToolError::InvalidArgument(format!(
|
||||
"start index ({start}) must be <= end index ({end})"
|
||||
)));
|
||||
}
|
||||
Ok(Selector::Slice { start, end })
|
||||
} else if let Some(rest) = s.strip_prefix("key:") {
|
||||
if rest.is_empty() {
|
||||
return Err(ToolError::InvalidArgument("key name must not be empty".into()));
|
||||
}
|
||||
Ok(Selector::Key(rest.to_string()))
|
||||
} else {
|
||||
Err(ToolError::InvalidArgument(format!(
|
||||
"unrecognized selector format: '{s}'. Expected: lines:N-M, slice:N..M, or key:NAME"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
// ─── InspectTool ─────────────────────────────────────────────────────────────
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct InspectArgs {
|
||||
blob_id: String,
|
||||
selector: Option<String>,
|
||||
}
|
||||
|
||||
/// Built-in tool that retrieves stored blob content.
|
||||
pub struct InspectTool<B: BlobStore> {
|
||||
blob_store: Arc<B>,
|
||||
}
|
||||
|
||||
impl<B: BlobStore> InspectTool<B> {
|
||||
pub fn new(blob_store: Arc<B>) -> Self {
|
||||
Self { blob_store }
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: BlobStore + 'static> InspectTool<B> {
|
||||
/// Create a [`ToolDefinition`] factory for this tool.
|
||||
pub fn tool_definition(blob_store: Arc<B>) -> ToolDefinition {
|
||||
Arc::new(move || {
|
||||
let meta = ToolMeta::new("inspect")
|
||||
.description(
|
||||
"Retrieve content from a stored blob referenced by [blob:<id>] in conversation history. \
|
||||
Supports selectors for partial access: \
|
||||
'lines:N-M' (text line range, 1-based inclusive), \
|
||||
'slice:N..M' (array element range, 0-based exclusive end), \
|
||||
'key:NAME' (object key lookup). \
|
||||
Without a selector, returns metadata and a preview.",
|
||||
)
|
||||
.input_schema(json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"blob_id": {
|
||||
"type": "string",
|
||||
"description": "The blob UUID from a [blob:<id>] reference"
|
||||
},
|
||||
"selector": {
|
||||
"type": "string",
|
||||
"description": "Optional: 'lines:N-M', 'slice:N..M', or 'key:NAME'"
|
||||
}
|
||||
},
|
||||
"required": ["blob_id"]
|
||||
}));
|
||||
let tool = Arc::new(InspectTool::new(Arc::clone(&blob_store))) as Arc<dyn Tool>;
|
||||
(meta, tool)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<B: BlobStore + 'static> Tool for InspectTool<B> {
|
||||
async fn execute(&self, input_json: &str) -> Result<String, ToolError> {
|
||||
let args: InspectArgs = serde_json::from_str(input_json)
|
||||
.map_err(|e| ToolError::InvalidArgument(format!("invalid arguments: {e}")))?;
|
||||
|
||||
let blob_id: BlobId = args
|
||||
.blob_id
|
||||
.parse()
|
||||
.map_err(|_| ToolError::InvalidArgument(format!(
|
||||
"invalid blob_id: '{}' is not a valid UUID", args.blob_id
|
||||
)))?;
|
||||
|
||||
let content = self
|
||||
.blob_store
|
||||
.load(blob_id)
|
||||
.await
|
||||
.map_err(|e| ToolError::ExecutionFailed(format!("{e}")))?;
|
||||
|
||||
match args.selector {
|
||||
None => Ok(default_view(&content)),
|
||||
Some(sel) => {
|
||||
let selector = parse_selector(&sel)?;
|
||||
apply_selector(&content, &selector)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Default view ────────────────────────────────────────────────────────────
|
||||
|
||||
use llm_worker::tool::Content;
|
||||
|
||||
fn default_view(content: &Content) -> String {
|
||||
match content {
|
||||
Content::Text(text) => default_view_text(text),
|
||||
Content::Structured(value) => default_view_structured(value),
|
||||
}
|
||||
}
|
||||
|
||||
fn default_view_text(text: &str) -> String {
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let total = lines.len();
|
||||
let size = text.len();
|
||||
let preview_end = total.min(DEFAULT_PREVIEW_LINES);
|
||||
|
||||
let mut out = format!("type: text\nlines: {total}\nsize: {size} bytes\n\n");
|
||||
out.push_str(&format!("── preview (lines 1-{preview_end}) ──\n"));
|
||||
for line in &lines[..preview_end] {
|
||||
out.push_str(line);
|
||||
out.push('\n');
|
||||
}
|
||||
if total > DEFAULT_PREVIEW_LINES {
|
||||
out.push_str(&format!("... ({} more lines)\n", total - DEFAULT_PREVIEW_LINES));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn default_view_structured(value: &serde_json::Value) -> String {
|
||||
use serde_json::Value;
|
||||
match value {
|
||||
Value::Array(arr) => {
|
||||
let total = arr.len();
|
||||
let preview_end = total.min(DEFAULT_PREVIEW_ELEMENTS);
|
||||
let mut out = format!("type: json_array\nentries: {total}\n\n");
|
||||
out.push_str(&format!("── preview (0..{preview_end}) ──\n"));
|
||||
for item in &arr[..preview_end] {
|
||||
if let Ok(json) = serde_json::to_string_pretty(item) {
|
||||
out.push_str(&json);
|
||||
out.push('\n');
|
||||
}
|
||||
}
|
||||
if total > DEFAULT_PREVIEW_ELEMENTS {
|
||||
out.push_str(&format!("... ({} more entries)\n", total - DEFAULT_PREVIEW_ELEMENTS));
|
||||
}
|
||||
out
|
||||
}
|
||||
Value::Object(map) => {
|
||||
let total = map.len();
|
||||
let mut out = format!("type: json_object\nkeys: {total}\n\n── keys ──\n");
|
||||
for (key, val) in map.iter() {
|
||||
out.push_str(&format!("{key}: {}\n", value_type_label(val)));
|
||||
}
|
||||
// Preview first N key-value pairs
|
||||
let preview_keys: Vec<_> = map.iter().take(DEFAULT_PREVIEW_KEYS).collect();
|
||||
if !preview_keys.is_empty() {
|
||||
out.push_str("\n── preview ──\n");
|
||||
for (key, val) in preview_keys {
|
||||
if let Ok(json) = serde_json::to_string_pretty(val) {
|
||||
out.push_str(&format!("{key}: {json}\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
other => {
|
||||
// Scalar — just show it
|
||||
serde_json::to_string_pretty(other).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn value_type_label(value: &serde_json::Value) -> &'static str {
|
||||
match value {
|
||||
serde_json::Value::Null => "null",
|
||||
serde_json::Value::Bool(_) => "bool",
|
||||
serde_json::Value::Number(_) => "number",
|
||||
serde_json::Value::String(_) => "string",
|
||||
serde_json::Value::Array(_) => "array",
|
||||
serde_json::Value::Object(_) => "object",
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Selector application ────────────────────────────────────────────────────
|
||||
|
||||
fn apply_selector(content: &Content, selector: &Selector) -> Result<String, ToolError> {
|
||||
match (content, selector) {
|
||||
(Content::Text(text), Selector::Lines { start, end }) => {
|
||||
let lines: Vec<&str> = text.lines().collect();
|
||||
let total = lines.len();
|
||||
// Convert 1-based inclusive to 0-based
|
||||
let from = (*start - 1).min(total);
|
||||
let to = (*end).min(total);
|
||||
if from >= total {
|
||||
return Ok(format!("(no lines — content has {total} lines)"));
|
||||
}
|
||||
Ok(lines[from..to].join("\n"))
|
||||
}
|
||||
|
||||
(Content::Structured(serde_json::Value::Array(arr)), Selector::Slice { start, end }) => {
|
||||
let total = arr.len();
|
||||
let from = (*start).min(total);
|
||||
let to = (*end).min(total);
|
||||
let slice = &arr[from..to];
|
||||
serde_json::to_string_pretty(slice)
|
||||
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}")))
|
||||
}
|
||||
|
||||
(Content::Structured(serde_json::Value::Object(map)), Selector::Key(key)) => {
|
||||
match map.get(key.as_str()) {
|
||||
Some(val) => serde_json::to_string_pretty(val)
|
||||
.map_err(|e| ToolError::Internal(format!("JSON serialization error: {e}"))),
|
||||
None => {
|
||||
let available: Vec<_> = map.keys().collect();
|
||||
Err(ToolError::InvalidArgument(format!(
|
||||
"key '{key}' not found. Available keys: {available:?}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Type mismatches
|
||||
(Content::Text(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
|
||||
"slice selector only applies to JSON arrays, but this blob contains text. Use 'lines:N-M' instead.".into(),
|
||||
)),
|
||||
(Content::Text(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
|
||||
"key selector only applies to JSON objects, but this blob contains text. Use 'lines:N-M' instead.".into(),
|
||||
)),
|
||||
(Content::Structured(_), Selector::Lines { .. }) => Err(ToolError::InvalidArgument(
|
||||
"lines selector only applies to text content, but this blob contains JSON. Use 'slice:N..M' or 'key:NAME' instead.".into(),
|
||||
)),
|
||||
(Content::Structured(serde_json::Value::Object(_)), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
|
||||
"slice selector only applies to JSON arrays, but this blob is a JSON object. Use 'key:NAME' instead.".into(),
|
||||
)),
|
||||
(Content::Structured(serde_json::Value::Array(_)), Selector::Key(_)) => Err(ToolError::InvalidArgument(
|
||||
"key selector only applies to JSON objects, but this blob is a JSON array. Use 'slice:N..M' instead.".into(),
|
||||
)),
|
||||
(Content::Structured(_), Selector::Slice { .. }) => Err(ToolError::InvalidArgument(
|
||||
"slice selector only applies to JSON arrays.".into(),
|
||||
)),
|
||||
(Content::Structured(_), Selector::Key(_)) => Err(ToolError::InvalidArgument(
|
||||
"key selector only applies to JSON objects.".into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Registration helper ─────────────────────────────────────────────────────
|
||||
|
||||
/// Register the `inspect` tool on a [`Worker`].
|
||||
///
|
||||
/// Call this alongside [`BlobOutputProcessor`](crate::BlobOutputProcessor)
|
||||
/// setup so the LLM can retrieve stored blob content.
|
||||
pub fn register_inspect_tool<C, B>(
|
||||
worker: &mut Worker<C, Mutable>,
|
||||
blob_store: Arc<B>,
|
||||
) -> Result<(), ToolRegistryError>
|
||||
where
|
||||
C: LlmClient,
|
||||
B: BlobStore + 'static,
|
||||
{
|
||||
worker.register_tool(InspectTool::<B>::tool_definition(blob_store))
|
||||
}
|
||||
|
||||
// ─── Tests ───────────────────────────────────────────────────────────────────
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::blob_store::{new_blob_id, BlobStoreError};
|
||||
use llm_worker::tool::Content;
|
||||
use std::collections::HashMap;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
// ── In-memory BlobStore for tests ────────────────────────────────────
|
||||
|
||||
struct MemBlobStore {
|
||||
blobs: Mutex<HashMap<BlobId, Content>>,
|
||||
}
|
||||
|
||||
impl MemBlobStore {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
blobs: Mutex::new(HashMap::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BlobStore for MemBlobStore {
|
||||
async fn store(&self, content: &Content) -> Result<BlobId, BlobStoreError> {
|
||||
let id = new_blob_id();
|
||||
self.blobs.lock().await.insert(id, content.clone());
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
async fn load(&self, id: BlobId) -> Result<Content, BlobStoreError> {
|
||||
self.blobs
|
||||
.lock()
|
||||
.await
|
||||
.get(&id)
|
||||
.cloned()
|
||||
.ok_or(BlobStoreError::NotFound(id))
|
||||
}
|
||||
|
||||
async fn exists(&self, id: BlobId) -> Result<bool, BlobStoreError> {
|
||||
Ok(self.blobs.lock().await.contains_key(&id))
|
||||
}
|
||||
}
|
||||
|
||||
// ── Selector parsing ─────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn parse_lines_valid() {
|
||||
assert_eq!(
|
||||
parse_selector("lines:1-50").unwrap(),
|
||||
Selector::Lines { start: 1, end: 50 }
|
||||
);
|
||||
assert_eq!(
|
||||
parse_selector("lines:5-5").unwrap(),
|
||||
Selector::Lines { start: 5, end: 5 }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_lines_zero_start() {
|
||||
let err = parse_selector("lines:0-5").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_lines_inverted() {
|
||||
let err = parse_selector("lines:50-20").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_lines_missing_dash() {
|
||||
let err = parse_selector("lines:20").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_slice_valid() {
|
||||
assert_eq!(
|
||||
parse_selector("slice:0..10").unwrap(),
|
||||
Selector::Slice { start: 0, end: 10 }
|
||||
);
|
||||
assert_eq!(
|
||||
parse_selector("slice:3..8").unwrap(),
|
||||
Selector::Slice { start: 3, end: 8 }
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_slice_inverted() {
|
||||
let err = parse_selector("slice:10..3").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_key_valid() {
|
||||
assert_eq!(
|
||||
parse_selector("key:results").unwrap(),
|
||||
Selector::Key("results".into())
|
||||
);
|
||||
// Key name with colon
|
||||
assert_eq!(
|
||||
parse_selector("key:nested:key").unwrap(),
|
||||
Selector::Key("nested:key".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_key_empty() {
|
||||
let err = parse_selector("key:").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_unknown_prefix() {
|
||||
let err = parse_selector("unknown:foo").unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
// ── Default view ─────────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn default_view_text_short() {
|
||||
let text = "line1\nline2\nline3\n";
|
||||
let content = Content::Text(text.into());
|
||||
let view = default_view(&content);
|
||||
assert!(view.contains("type: text"));
|
||||
assert!(view.contains("lines: 3"));
|
||||
assert!(view.contains("line1"));
|
||||
assert!(!view.contains("more lines"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_view_text_long() {
|
||||
let text: String = (1..=100).map(|i| format!("line {i}\n")).collect();
|
||||
let content = Content::Text(text);
|
||||
let view = default_view(&content);
|
||||
assert!(view.contains("type: text"));
|
||||
assert!(view.contains("lines: 100"));
|
||||
assert!(view.contains("line 1"));
|
||||
assert!(view.contains("line 50"));
|
||||
assert!(!view.contains("line 51\n"));
|
||||
assert!(view.contains("50 more lines"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_view_array() {
|
||||
let arr: Vec<serde_json::Value> = (0..20).map(|i| json!({"id": i})).collect();
|
||||
let content = Content::Structured(json!(arr));
|
||||
let view = default_view(&content);
|
||||
assert!(view.contains("type: json_array"));
|
||||
assert!(view.contains("entries: 20"));
|
||||
assert!(view.contains("15 more entries"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_view_object() {
|
||||
let content = Content::Structured(json!({
|
||||
"name": "test",
|
||||
"count": 42,
|
||||
"items": [1, 2, 3],
|
||||
"nested": {"a": 1}
|
||||
}));
|
||||
let view = default_view(&content);
|
||||
assert!(view.contains("type: json_object"));
|
||||
assert!(view.contains("keys: 4"));
|
||||
assert!(view.contains("── keys ──"));
|
||||
assert!(view.contains("── preview ──"));
|
||||
}
|
||||
|
||||
// ── Selector application ─────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn apply_lines_on_text() {
|
||||
let text = "a\nb\nc\nd\ne\nf\n";
|
||||
let content = Content::Text(text.into());
|
||||
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 4 }).unwrap();
|
||||
assert_eq!(result, "b\nc\nd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_lines_clamp() {
|
||||
let text = "a\nb\nc\n";
|
||||
let content = Content::Text(text.into());
|
||||
let result = apply_selector(&content, &Selector::Lines { start: 2, end: 100 }).unwrap();
|
||||
assert_eq!(result, "b\nc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_lines_beyond_content() {
|
||||
let text = "a\nb\n";
|
||||
let content = Content::Text(text.into());
|
||||
let result = apply_selector(&content, &Selector::Lines { start: 10, end: 20 }).unwrap();
|
||||
assert!(result.contains("no lines"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_slice_on_array() {
|
||||
let content = Content::Structured(json!([10, 20, 30, 40, 50]));
|
||||
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 3 }).unwrap();
|
||||
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
|
||||
assert_eq!(parsed, vec![20, 30]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_slice_clamp() {
|
||||
let content = Content::Structured(json!([10, 20, 30]));
|
||||
let result = apply_selector(&content, &Selector::Slice { start: 1, end: 100 }).unwrap();
|
||||
let parsed: Vec<i64> = serde_json::from_str(&result).unwrap();
|
||||
assert_eq!(parsed, vec![20, 30]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_key_on_object() {
|
||||
let content = Content::Structured(json!({"name": "test", "count": 42}));
|
||||
let result = apply_selector(&content, &Selector::Key("name".into())).unwrap();
|
||||
assert_eq!(result.trim(), "\"test\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_key_not_found() {
|
||||
let content = Content::Structured(json!({"name": "test"}));
|
||||
let err = apply_selector(&content, &Selector::Key("missing".into())).unwrap_err();
|
||||
match err {
|
||||
ToolError::InvalidArgument(msg) => {
|
||||
assert!(msg.contains("missing"));
|
||||
assert!(msg.contains("name"));
|
||||
}
|
||||
_ => panic!("expected InvalidArgument"),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Type mismatch errors ─────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn lines_on_json_error() {
|
||||
let content = Content::Structured(json!([1, 2, 3]));
|
||||
let err = apply_selector(&content, &Selector::Lines { start: 1, end: 3 }).unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slice_on_text_error() {
|
||||
let content = Content::Text("hello".into());
|
||||
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn key_on_text_error() {
|
||||
let content = Content::Text("hello".into());
|
||||
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slice_on_object_error() {
|
||||
let content = Content::Structured(json!({"a": 1}));
|
||||
let err = apply_selector(&content, &Selector::Slice { start: 0, end: 3 }).unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn key_on_array_error() {
|
||||
let content = Content::Structured(json!([1, 2, 3]));
|
||||
let err = apply_selector(&content, &Selector::Key("foo".into())).unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
// ── Integration via execute() ────────────────────────────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn execute_default_view() {
|
||||
let store = Arc::new(MemBlobStore::new());
|
||||
let text = (1..=100).map(|i| format!("line {i}")).collect::<Vec<_>>().join("\n");
|
||||
let blob_id = store.store(&Content::Text(text)).await.unwrap();
|
||||
|
||||
let tool = InspectTool::new(store);
|
||||
let result = tool
|
||||
.execute(&json!({"blob_id": blob_id.to_string()}).to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(result.contains("type: text"));
|
||||
assert!(result.contains("lines: 100"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn execute_with_selector() {
|
||||
let store = Arc::new(MemBlobStore::new());
|
||||
let blob_id = store
|
||||
.store(&Content::Structured(json!({"name": "test", "value": 42})))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let tool = InspectTool::new(store);
|
||||
let result = tool
|
||||
.execute(&json!({"blob_id": blob_id.to_string(), "selector": "key:name"}).to_string())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result.trim(), "\"test\"");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn execute_invalid_blob_id() {
|
||||
let store = Arc::new(MemBlobStore::new());
|
||||
let tool = InspectTool::new(store);
|
||||
let err = tool
|
||||
.execute(&json!({"blob_id": "not-a-uuid"}).to_string())
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, ToolError::InvalidArgument(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn execute_blob_not_found() {
|
||||
let store = Arc::new(MemBlobStore::new());
|
||||
let tool = InspectTool::new(store);
|
||||
let fake_id = new_blob_id();
|
||||
let err = tool
|
||||
.execute(&json!({"blob_id": fake_id.to_string()}).to_string())
|
||||
.await
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, ToolError::ExecutionFailed(_)));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
//! Session persistence for `llm-worker` via append-only JSONL logs.
|
||||
//!
|
||||
//! # Architecture
|
||||
//!
|
||||
//! Sessions are recorded as a sequence of [`LogEntry`] values, one per line
|
||||
//! in a `.jsonl` file. Reading the log and collecting entries reconstructs
|
||||
//! the full [`Worker`] state — no separate snapshots or checkpoints needed.
|
||||
//!
|
||||
//! Debug-mode [`TraceEntry`] records capture raw stream events in a separate
|
||||
//! `.trace.jsonl` file, independent of the session log.
|
||||
//!
|
||||
//! # Quick start
|
||||
//!
|
||||
//! ```ignore
|
||||
//! use llm_worker_persistence::{Session, SessionConfig, FsStore};
|
||||
//!
|
||||
//! let store = FsStore::new("./sessions").await?;
|
||||
//! let worker = Worker::new(client);
|
||||
//! let mut session = Session::new(worker, store, SessionConfig::default()).await?;
|
||||
//! session.run("Hello!").await?;
|
||||
//! ```
|
||||
|
||||
pub mod blob_output_processor;
|
||||
pub mod blob_store;
|
||||
pub mod event_trace;
|
||||
pub mod fs_blob_store;
|
||||
pub mod fs_store;
|
||||
pub mod inspect_tool;
|
||||
pub mod session;
|
||||
pub mod session_log;
|
||||
pub mod store;
|
||||
|
||||
pub use blob_output_processor::BlobOutputProcessor;
|
||||
pub use blob_store::{BlobId, BlobStore, BlobStoreError};
|
||||
pub use inspect_tool::{InspectTool, register_inspect_tool};
|
||||
pub use event_trace::TraceEntry;
|
||||
pub use fs_blob_store::FsBlobStore;
|
||||
pub use fs_store::FsStore;
|
||||
pub use session::{Session, SessionConfig, SessionError};
|
||||
pub use session_log::{LogEntry, Outcome, RestoredState, collect_state};
|
||||
pub use store::{Store, StoreError};
|
||||
|
||||
/// Session identifier. UUID v7 (time-ordered, lexicographically sortable).
|
||||
pub type SessionId = uuid::Uuid;
|
||||
|
||||
/// Generate a new session ID.
|
||||
pub fn new_session_id() -> SessionId {
|
||||
uuid::Uuid::now_v7()
|
||||
}
|
||||
|
|
@ -1,338 +0,0 @@
|
|||
//! Persistent session wrapper around [`Worker`].
|
||||
//!
|
||||
//! [`Session`] intercepts `Worker` operations and appends [`LogEntry`] records
|
||||
//! to a [`Store`]. It does not modify `Worker` internals — all persistence
|
||||
//! happens by observing state before and after each operation.
|
||||
|
||||
use crate::session_log::{self, LogEntry, Outcome};
|
||||
use crate::store::{Store, StoreError};
|
||||
use crate::SessionId;
|
||||
use llm_worker::llm_client::client::LlmClient;
|
||||
use llm_worker::state::Mutable;
|
||||
use llm_worker::{Worker, WorkerError, WorkerResult};
|
||||
|
||||
/// Configuration for session persistence.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SessionConfig {
|
||||
/// Record raw stream events to a separate trace file.
|
||||
/// Default: `false`.
|
||||
pub record_event_trace: bool,
|
||||
}
|
||||
|
||||
impl Default for SessionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
record_event_trace: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Errors from session operations.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SessionError {
|
||||
#[error(transparent)]
|
||||
Worker(#[from] WorkerError),
|
||||
|
||||
#[error(transparent)]
|
||||
Store(#[from] StoreError),
|
||||
}
|
||||
|
||||
/// Persistent session wrapping a [`Worker`].
|
||||
///
|
||||
/// The `worker` field is public for direct access to Worker APIs
|
||||
/// (tool registration, hook setup, subscriber management, etc.).
|
||||
/// State-mutating operations (`run`, `resume`) should go through
|
||||
/// Session methods to ensure proper logging.
|
||||
pub struct Session<C: LlmClient, St: Store> {
|
||||
pub worker: Worker<C, Mutable>,
|
||||
store: St,
|
||||
session_id: SessionId,
|
||||
_config: SessionConfig,
|
||||
}
|
||||
|
||||
impl<C: LlmClient, St: Store> Session<C, St> {
|
||||
/// Create a new session, writing the initial `SessionStart` entry.
|
||||
pub async fn new(
|
||||
worker: Worker<C, Mutable>,
|
||||
store: St,
|
||||
config: SessionConfig,
|
||||
) -> Result<Self, StoreError> {
|
||||
let session_id = crate::new_session_id();
|
||||
let start = LogEntry::SessionStart {
|
||||
ts: session_log::now_millis(),
|
||||
system_prompt: worker.get_system_prompt().map(String::from),
|
||||
config: worker.request_config().clone(),
|
||||
history: worker.history().to_vec(),
|
||||
};
|
||||
store.append(session_id, &start).await?;
|
||||
|
||||
Ok(Self {
|
||||
worker,
|
||||
store,
|
||||
session_id,
|
||||
_config: config,
|
||||
})
|
||||
}
|
||||
|
||||
/// Restore a session from a stored log.
|
||||
///
|
||||
/// Reads all log entries, collects state from them,
|
||||
/// and returns a `Session` ready for `resume()`.
|
||||
pub async fn restore(
|
||||
client: C,
|
||||
store: St,
|
||||
session_id: SessionId,
|
||||
config: SessionConfig,
|
||||
) -> Result<Self, SessionError> {
|
||||
let entries = store.read_all(session_id).await?;
|
||||
let state = session_log::collect_state(&entries);
|
||||
|
||||
let mut worker = Worker::new(client);
|
||||
if let Some(ref prompt) = state.system_prompt {
|
||||
worker.set_system_prompt(prompt);
|
||||
}
|
||||
worker.set_history(state.history);
|
||||
worker.set_request_config(state.config);
|
||||
worker.set_turn_count(state.turn_count);
|
||||
worker.set_last_run_interrupted(state.last_run_interrupted);
|
||||
|
||||
Ok(Self {
|
||||
worker,
|
||||
store,
|
||||
session_id,
|
||||
_config: config,
|
||||
})
|
||||
}
|
||||
|
||||
/// The session ID.
|
||||
pub fn session_id(&self) -> SessionId {
|
||||
self.session_id
|
||||
}
|
||||
|
||||
/// Reference to the underlying store.
|
||||
pub fn store(&self) -> &St {
|
||||
&self.store
|
||||
}
|
||||
|
||||
/// Run a user turn, logging all state changes.
|
||||
pub async fn run(
|
||||
&mut self,
|
||||
user_input: impl Into<String>,
|
||||
) -> Result<WorkerResult, SessionError> {
|
||||
let history_before = self.worker.history().len();
|
||||
|
||||
let result = self.worker.run(user_input).await;
|
||||
|
||||
self.log_history_delta(history_before).await?;
|
||||
self.log_turn_end().await?;
|
||||
self.log_outcome(&result).await?;
|
||||
|
||||
result.map_err(SessionError::Worker)
|
||||
}
|
||||
|
||||
/// Resume from a paused state, logging all state changes.
|
||||
pub async fn resume(&mut self) -> Result<WorkerResult, SessionError> {
|
||||
let history_before = self.worker.history().len();
|
||||
|
||||
let result = self.worker.resume().await;
|
||||
|
||||
self.log_history_delta(history_before).await?;
|
||||
self.log_turn_end().await?;
|
||||
self.log_outcome(&result).await?;
|
||||
|
||||
result.map_err(SessionError::Worker)
|
||||
}
|
||||
|
||||
/// Fork this session at its current state.
|
||||
/// Returns the new session ID. The new log contains a `SessionStart`
|
||||
/// seeded with the current history.
|
||||
pub async fn fork(&self) -> Result<SessionId, StoreError> {
|
||||
let fork_id = crate::new_session_id();
|
||||
let start = LogEntry::SessionStart {
|
||||
ts: session_log::now_millis(),
|
||||
system_prompt: self.worker.get_system_prompt().map(String::from),
|
||||
config: self.worker.request_config().clone(),
|
||||
history: self.worker.history().to_vec(),
|
||||
};
|
||||
self.store.create_session(fork_id, &[start]).await?;
|
||||
Ok(fork_id)
|
||||
}
|
||||
|
||||
/// Fork from an arbitrary point in a stored session's log.
|
||||
/// Replays entries up to `up_to_entry` and creates a new session
|
||||
/// with that reconstructed state.
|
||||
pub async fn fork_at(
|
||||
store: &St,
|
||||
source_id: SessionId,
|
||||
up_to_entry: usize,
|
||||
) -> Result<SessionId, StoreError> {
|
||||
let entries = store.read_all(source_id).await?;
|
||||
let truncated = &entries[..up_to_entry.min(entries.len())];
|
||||
let state = session_log::collect_state(truncated);
|
||||
|
||||
let fork_id = crate::new_session_id();
|
||||
let start = LogEntry::SessionStart {
|
||||
ts: session_log::now_millis(),
|
||||
system_prompt: state.system_prompt,
|
||||
config: state.config,
|
||||
history: state.history,
|
||||
};
|
||||
store.create_session(fork_id, &[start]).await?;
|
||||
Ok(fork_id)
|
||||
}
|
||||
|
||||
/// Log a `CacheLocked` entry.
|
||||
pub async fn log_cache_locked(
|
||||
&self,
|
||||
locked_prefix_len: usize,
|
||||
) -> Result<(), StoreError> {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::CacheLocked {
|
||||
ts: session_log::now_millis(),
|
||||
locked_prefix_len,
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Log a `CacheUnlocked` entry.
|
||||
pub async fn log_cache_unlocked(&self) -> Result<(), StoreError> {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::CacheUnlocked {
|
||||
ts: session_log::now_millis(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Log a `ConfigChanged` entry.
|
||||
pub async fn log_config_changed(&self) -> Result<(), StoreError> {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::ConfigChanged {
|
||||
ts: session_log::now_millis(),
|
||||
config: self.worker.request_config().clone(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
// ── Private helpers ──────────────────────────────────────────────────
|
||||
|
||||
async fn log_history_delta(&self, before_len: usize) -> Result<(), StoreError> {
|
||||
let history = self.worker.history();
|
||||
if history.len() <= before_len {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let ts = session_log::now_millis();
|
||||
let new_items = &history[before_len..];
|
||||
let mut i = 0;
|
||||
|
||||
// Classify and group items by type.
|
||||
// The actual items from history are used (not pre-constructed copies),
|
||||
// so any modifications by hooks (e.g. on_prompt_submit) are captured correctly.
|
||||
while i < new_items.len() {
|
||||
let item = &new_items[i];
|
||||
if item.is_user_message() {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::UserInput {
|
||||
ts,
|
||||
item: new_items[i].clone(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
i += 1;
|
||||
} else if item.is_tool_result() {
|
||||
let start = i;
|
||||
while i < new_items.len() && new_items[i].is_tool_result() {
|
||||
i += 1;
|
||||
}
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::ToolResults {
|
||||
ts,
|
||||
items: new_items[start..i].to_vec(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
} else if item.is_assistant_message()
|
||||
|| item.is_tool_call()
|
||||
|| item.is_reasoning()
|
||||
{
|
||||
let start = i;
|
||||
while i < new_items.len()
|
||||
&& (new_items[i].is_assistant_message()
|
||||
|| new_items[i].is_tool_call()
|
||||
|| new_items[i].is_reasoning())
|
||||
{
|
||||
i += 1;
|
||||
}
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::AssistantItems {
|
||||
ts,
|
||||
items: new_items[start..i].to_vec(),
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
} else {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::HookInjectedItems {
|
||||
ts,
|
||||
items: vec![new_items[i].clone()],
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn log_turn_end(&self) -> Result<(), StoreError> {
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::TurnEnd {
|
||||
ts: session_log::now_millis(),
|
||||
turn_count: self.worker.turn_count(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn log_outcome(
|
||||
&self,
|
||||
result: &Result<WorkerResult, WorkerError>,
|
||||
) -> Result<(), StoreError> {
|
||||
let outcome = match result {
|
||||
Ok(WorkerResult::Finished) => Outcome::Finished,
|
||||
Ok(WorkerResult::Paused) => Outcome::Paused,
|
||||
Err(e) => Outcome::Error {
|
||||
message: e.to_string(),
|
||||
},
|
||||
};
|
||||
self.store
|
||||
.append(
|
||||
self.session_id,
|
||||
&LogEntry::RunOutcome {
|
||||
ts: session_log::now_millis(),
|
||||
outcome,
|
||||
interrupted: self.worker.last_run_interrupted(),
|
||||
},
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
//! Session log types for append-only JSONL persistence.
|
||||
//!
|
||||
//! Each [`LogEntry`] represents a single state transition in a session,
|
||||
//! serialized as one line in a `.jsonl` file. Reading all entries and
|
||||
//! collecting them via [`collect_state`] reconstructs the full [`Worker`] state.
|
||||
|
||||
use llm_worker::llm_client::types::{Item, RequestConfig};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A single session log entry, serialized as one JSONL line.
|
||||
///
|
||||
/// Variants correspond to specific mutation points in `Worker`:
|
||||
/// - `SessionStart` — always the first entry; captures initial state
|
||||
/// - `UserInput` / `AssistantItems` / `ToolResults` / `HookInjectedItems` — history appends
|
||||
/// - `TurnEnd` — turn boundary marker
|
||||
/// - `CacheLocked` / `CacheUnlocked` — KV cache state transitions
|
||||
/// - `RunOutcome` — marks end of a `run()` or `resume()` call
|
||||
/// - `ConfigChanged` — `RequestConfig` mutation
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum LogEntry {
|
||||
/// Session start. Always the first entry in a log.
|
||||
/// For forked sessions, `history` contains the seed state from the parent.
|
||||
SessionStart {
|
||||
ts: u64,
|
||||
system_prompt: Option<String>,
|
||||
config: RequestConfig,
|
||||
history: Vec<Item>,
|
||||
},
|
||||
|
||||
/// User input pushed to history (worker.rs:229).
|
||||
UserInput { ts: u64, item: Item },
|
||||
|
||||
/// Assistant response items added to history (worker.rs:1040-1041).
|
||||
AssistantItems { ts: u64, items: Vec<Item> },
|
||||
|
||||
/// Tool execution results added to history (worker.rs:897-900, 1072-1076).
|
||||
ToolResults { ts: u64, items: Vec<Item> },
|
||||
|
||||
/// Items injected by `on_turn_end` hook via `ContinueWithMessages` (worker.rs:1055).
|
||||
HookInjectedItems { ts: u64, items: Vec<Item> },
|
||||
|
||||
/// Turn boundary. Records the turn count after increment.
|
||||
TurnEnd { ts: u64, turn_count: usize },
|
||||
|
||||
/// KV cache locked. Records the history prefix length that is now immutable.
|
||||
CacheLocked { ts: u64, locked_prefix_len: usize },
|
||||
|
||||
/// KV cache unlocked.
|
||||
CacheUnlocked { ts: u64 },
|
||||
|
||||
/// Outcome of a `run()` or `resume()` call.
|
||||
/// This is metadata for auditing; state collection does not branch on the outcome.
|
||||
RunOutcome {
|
||||
ts: u64,
|
||||
outcome: Outcome,
|
||||
interrupted: bool,
|
||||
},
|
||||
|
||||
/// `RequestConfig` changed.
|
||||
ConfigChanged { ts: u64, config: RequestConfig },
|
||||
}
|
||||
|
||||
/// Outcome of a run/resume call. Metadata for auditing only.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum Outcome {
|
||||
Finished,
|
||||
Paused,
|
||||
Error { message: String },
|
||||
}
|
||||
|
||||
/// State collected from log entries.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RestoredState {
|
||||
pub system_prompt: Option<String>,
|
||||
pub config: RequestConfig,
|
||||
pub history: Vec<Item>,
|
||||
pub turn_count: usize,
|
||||
pub locked_prefix_len: usize,
|
||||
pub last_run_interrupted: bool,
|
||||
}
|
||||
|
||||
/// Replay a sequence of log entries to reconstruct worker state.
|
||||
pub fn collect_state(entries: &[LogEntry]) -> RestoredState {
|
||||
let mut state = RestoredState {
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: Vec::new(),
|
||||
turn_count: 0,
|
||||
locked_prefix_len: 0,
|
||||
last_run_interrupted: false,
|
||||
};
|
||||
|
||||
for entry in entries {
|
||||
match entry {
|
||||
LogEntry::SessionStart {
|
||||
system_prompt,
|
||||
config,
|
||||
history,
|
||||
..
|
||||
} => {
|
||||
state.system_prompt = system_prompt.clone();
|
||||
state.config = config.clone();
|
||||
state.history = history.clone();
|
||||
}
|
||||
LogEntry::UserInput { item, .. } => {
|
||||
state.history.push(item.clone());
|
||||
}
|
||||
LogEntry::AssistantItems { items, .. } => {
|
||||
state.history.extend(items.iter().cloned());
|
||||
}
|
||||
LogEntry::ToolResults { items, .. } => {
|
||||
state.history.extend(items.iter().cloned());
|
||||
}
|
||||
LogEntry::HookInjectedItems { items, .. } => {
|
||||
state.history.extend(items.iter().cloned());
|
||||
}
|
||||
LogEntry::TurnEnd { turn_count, .. } => {
|
||||
state.turn_count = *turn_count;
|
||||
}
|
||||
LogEntry::CacheLocked {
|
||||
locked_prefix_len, ..
|
||||
} => {
|
||||
state.locked_prefix_len = *locked_prefix_len;
|
||||
}
|
||||
LogEntry::CacheUnlocked { .. } => {
|
||||
state.locked_prefix_len = 0;
|
||||
}
|
||||
LogEntry::RunOutcome { interrupted, .. } => {
|
||||
state.last_run_interrupted = *interrupted;
|
||||
}
|
||||
LogEntry::ConfigChanged { config, .. } => {
|
||||
state.config = config.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state
|
||||
}
|
||||
|
||||
/// Get the current timestamp in milliseconds since Unix epoch.
|
||||
pub fn now_millis() -> u64 {
|
||||
std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.expect("system clock before Unix epoch")
|
||||
.as_millis() as u64
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn replay_empty() {
|
||||
let state = collect_state(&[]);
|
||||
assert!(state.history.is_empty());
|
||||
assert_eq!(state.turn_count, 0);
|
||||
assert_eq!(state.locked_prefix_len, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_session_start_sets_initial_state() {
|
||||
let entries = vec![LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: Some("You are helpful.".into()),
|
||||
config: RequestConfig::default().with_max_tokens(1024),
|
||||
history: vec![Item::user_message("seed")],
|
||||
}];
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
|
||||
assert_eq!(state.config.max_tokens, Some(1024));
|
||||
assert_eq!(state.history.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_full_turn() {
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
},
|
||||
LogEntry::UserInput {
|
||||
ts: 2000,
|
||||
item: Item::user_message("Hello"),
|
||||
},
|
||||
LogEntry::AssistantItems {
|
||||
ts: 3000,
|
||||
items: vec![Item::assistant_message("Hi!")],
|
||||
},
|
||||
LogEntry::TurnEnd {
|
||||
ts: 3100,
|
||||
turn_count: 1,
|
||||
},
|
||||
LogEntry::RunOutcome {
|
||||
ts: 3200,
|
||||
outcome: Outcome::Finished,
|
||||
interrupted: false,
|
||||
},
|
||||
];
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.history.len(), 2);
|
||||
assert_eq!(state.turn_count, 1);
|
||||
assert!(!state.last_run_interrupted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_with_tool_calls() {
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
},
|
||||
LogEntry::UserInput {
|
||||
ts: 2000,
|
||||
item: Item::user_message("Check weather"),
|
||||
},
|
||||
LogEntry::AssistantItems {
|
||||
ts: 3000,
|
||||
items: vec![Item::tool_call("call_1", "get_weather", r#"{"city":"Tokyo"}"#)],
|
||||
},
|
||||
LogEntry::ToolResults {
|
||||
ts: 3500,
|
||||
items: vec![Item::tool_result("call_1", "Sunny, 25C")],
|
||||
},
|
||||
LogEntry::AssistantItems {
|
||||
ts: 4000,
|
||||
items: vec![Item::assistant_message("It's sunny in Tokyo!")],
|
||||
},
|
||||
LogEntry::TurnEnd {
|
||||
ts: 4100,
|
||||
turn_count: 1,
|
||||
},
|
||||
];
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.history.len(), 4);
|
||||
assert!(state.history[1].is_tool_call());
|
||||
assert!(state.history[2].is_tool_result());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_cache_lock_unlock() {
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![Item::user_message("a"), Item::assistant_message("b")],
|
||||
},
|
||||
LogEntry::CacheLocked {
|
||||
ts: 2000,
|
||||
locked_prefix_len: 2,
|
||||
},
|
||||
LogEntry::CacheUnlocked { ts: 3000 },
|
||||
];
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.locked_prefix_len, 0);
|
||||
|
||||
// Check locked state before unlock
|
||||
let state_locked = collect_state(&entries[..2]);
|
||||
assert_eq!(state_locked.locked_prefix_len, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replay_config_changed() {
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
},
|
||||
LogEntry::ConfigChanged {
|
||||
ts: 2000,
|
||||
config: RequestConfig::default().with_temperature(0.5),
|
||||
},
|
||||
];
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.config.temperature, Some(0.5));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,68 +0,0 @@
|
|||
//! Persistence backend abstraction.
|
||||
//!
|
||||
//! [`Store`] defines the async interface for reading and writing session logs.
|
||||
//! Implementations handle the physical storage (filesystem, database, etc.).
|
||||
|
||||
use crate::event_trace::TraceEntry;
|
||||
use crate::session_log::LogEntry;
|
||||
use crate::SessionId;
|
||||
use std::future::Future;
|
||||
|
||||
/// Errors from the persistence store.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum StoreError {
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("serialization error: {0}")]
|
||||
Serde(#[from] serde_json::Error),
|
||||
|
||||
#[error("session not found: {0}")]
|
||||
NotFound(SessionId),
|
||||
|
||||
#[error("log corrupted at line {line}: {message}")]
|
||||
Corrupt { line: usize, message: String },
|
||||
}
|
||||
|
||||
/// Async persistence backend for session logs.
|
||||
///
|
||||
/// All methods take `&self` — implementations should use interior mutability
|
||||
/// (e.g., append-mode file handles) when needed.
|
||||
pub trait Store: Send + Sync {
|
||||
/// Append a single log entry to the session.
|
||||
fn append(
|
||||
&self,
|
||||
id: SessionId,
|
||||
entry: &LogEntry,
|
||||
) -> impl Future<Output = Result<(), StoreError>> + Send;
|
||||
|
||||
/// Read all log entries for a session, in order.
|
||||
fn read_all(
|
||||
&self,
|
||||
id: SessionId,
|
||||
) -> impl Future<Output = Result<Vec<LogEntry>, StoreError>> + Send;
|
||||
|
||||
/// List all session IDs, most recent first.
|
||||
fn list_sessions(&self)
|
||||
-> impl Future<Output = Result<Vec<SessionId>, StoreError>> + Send;
|
||||
|
||||
/// Create a new session with initial entries.
|
||||
fn create_session(
|
||||
&self,
|
||||
id: SessionId,
|
||||
entries: &[LogEntry],
|
||||
) -> impl Future<Output = Result<(), StoreError>> + Send;
|
||||
|
||||
/// Check if a session exists.
|
||||
fn exists(
|
||||
&self,
|
||||
id: SessionId,
|
||||
) -> impl Future<Output = Result<bool, StoreError>> + Send;
|
||||
|
||||
/// Append a trace entry to the debug event trace file.
|
||||
fn append_trace(
|
||||
&self,
|
||||
id: SessionId,
|
||||
entry: &TraceEntry,
|
||||
) -> impl Future<Output = Result<(), StoreError>> + Send;
|
||||
}
|
||||
|
|
@ -1,176 +0,0 @@
|
|||
use llm_worker::llm_client::types::{Item, RequestConfig};
|
||||
use llm_worker_persistence::{
|
||||
FsStore, LogEntry, Outcome, Store, TraceEntry, new_session_id, collect_state,
|
||||
};
|
||||
|
||||
#[tokio::test]
|
||||
async fn round_trip_write_and_read() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
let id = new_session_id();
|
||||
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: Some("You are helpful.".into()),
|
||||
config: RequestConfig::default().with_max_tokens(1024),
|
||||
history: vec![],
|
||||
},
|
||||
LogEntry::UserInput {
|
||||
ts: 2000,
|
||||
item: Item::user_message("Hello"),
|
||||
},
|
||||
LogEntry::AssistantItems {
|
||||
ts: 3000,
|
||||
items: vec![Item::assistant_message("Hi there!")],
|
||||
},
|
||||
LogEntry::TurnEnd {
|
||||
ts: 3100,
|
||||
turn_count: 1,
|
||||
},
|
||||
LogEntry::RunOutcome {
|
||||
ts: 3200,
|
||||
outcome: Outcome::Finished,
|
||||
interrupted: false,
|
||||
},
|
||||
];
|
||||
|
||||
// Write entries one by one
|
||||
for entry in &entries {
|
||||
store.append(id, entry).await.unwrap();
|
||||
}
|
||||
|
||||
// Read back
|
||||
let read_back = store.read_all(id).await.unwrap();
|
||||
assert_eq!(read_back.len(), entries.len());
|
||||
|
||||
// Replay and verify state
|
||||
let state = collect_state(&read_back);
|
||||
assert_eq!(state.system_prompt.as_deref(), Some("You are helpful."));
|
||||
assert_eq!(state.config.max_tokens, Some(1024));
|
||||
assert_eq!(state.history.len(), 2);
|
||||
assert_eq!(state.turn_count, 1);
|
||||
assert!(!state.last_run_interrupted);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn create_session_writes_all_entries() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
let id = new_session_id();
|
||||
|
||||
let entries = vec![
|
||||
LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![Item::user_message("seed"), Item::assistant_message("ok")],
|
||||
},
|
||||
];
|
||||
|
||||
store.create_session(id, &entries).await.unwrap();
|
||||
let read_back = store.read_all(id).await.unwrap();
|
||||
assert_eq!(read_back.len(), 1);
|
||||
|
||||
let state = collect_state(&read_back);
|
||||
assert_eq!(state.history.len(), 2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn list_sessions_returns_newest_first() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
|
||||
let id1 = new_session_id();
|
||||
// Small delay to ensure different UUID v7 timestamps
|
||||
tokio::time::sleep(std::time::Duration::from_millis(2)).await;
|
||||
let id2 = new_session_id();
|
||||
|
||||
let start = LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
};
|
||||
|
||||
store.append(id1, &start).await.unwrap();
|
||||
store.append(id2, &start).await.unwrap();
|
||||
|
||||
let sessions = store.list_sessions().await.unwrap();
|
||||
assert_eq!(sessions.len(), 2);
|
||||
assert_eq!(sessions[0], id2); // newest first
|
||||
assert_eq!(sessions[1], id1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn exists_returns_correct_state() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
let id = new_session_id();
|
||||
|
||||
assert!(!store.exists(id).await.unwrap());
|
||||
|
||||
store
|
||||
.append(
|
||||
id,
|
||||
&LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(store.exists(id).await.unwrap());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn not_found_error_for_missing_session() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
let id = new_session_id();
|
||||
|
||||
let result = store.read_all(id).await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn trace_entries_in_separate_file() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
let id = new_session_id();
|
||||
|
||||
// Write a log entry
|
||||
store
|
||||
.append(
|
||||
id,
|
||||
&LogEntry::SessionStart {
|
||||
ts: 1000,
|
||||
system_prompt: None,
|
||||
config: RequestConfig::default(),
|
||||
history: vec![],
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Write a trace entry
|
||||
let trace = TraceEntry {
|
||||
ts: 1500,
|
||||
turn: 0,
|
||||
event: llm_worker::llm_client::event::Event::Ping(
|
||||
llm_worker::llm_client::event::PingEvent { timestamp: None },
|
||||
),
|
||||
};
|
||||
store.append_trace(id, &trace).await.unwrap();
|
||||
|
||||
// Log should have 1 entry, unaffected by trace
|
||||
let log = store.read_all(id).await.unwrap();
|
||||
assert_eq!(log.len(), 1);
|
||||
|
||||
// Trace file should exist separately
|
||||
let trace_path = dir.path().join(format!("{id}.trace.jsonl"));
|
||||
assert!(trace_path.exists());
|
||||
}
|
||||
|
|
@ -1,335 +0,0 @@
|
|||
mod common;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use common::MockLlmClient;
|
||||
use llm_worker::hook::{Hook, HookError, OnTurnEnd, OnTurnEndResult};
|
||||
use llm_worker::llm_client::event::{Event, ResponseStatus, StatusEvent};
|
||||
use llm_worker::llm_client::types::{Item, RequestConfig};
|
||||
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta};
|
||||
use llm_worker::Worker;
|
||||
use llm_worker_persistence::{
|
||||
FsStore, LogEntry, Outcome, Session, SessionConfig, Store, collect_state,
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Helpers
|
||||
// =============================================================================
|
||||
|
||||
fn simple_text_events() -> Vec<Event> {
|
||||
vec![
|
||||
Event::text_block_start(0),
|
||||
Event::text_delta(0, "Hello!"),
|
||||
Event::text_block_stop(0, None),
|
||||
Event::Status(StatusEvent {
|
||||
status: ResponseStatus::Completed,
|
||||
}),
|
||||
]
|
||||
}
|
||||
|
||||
fn tool_call_events() -> Vec<Vec<Event>> {
|
||||
vec![
|
||||
// 1st response: tool call
|
||||
vec![
|
||||
Event::tool_use_start(0, "call_1", "get_weather"),
|
||||
Event::tool_input_delta(0, r#"{"city":"Tokyo"}"#),
|
||||
Event::tool_use_stop(0),
|
||||
Event::Status(StatusEvent {
|
||||
status: ResponseStatus::Completed,
|
||||
}),
|
||||
],
|
||||
// 2nd response: final text
|
||||
vec![
|
||||
Event::text_block_start(0),
|
||||
Event::text_delta(0, "It's sunny in Tokyo!"),
|
||||
Event::text_block_stop(0, None),
|
||||
Event::Status(StatusEvent {
|
||||
status: ResponseStatus::Completed,
|
||||
}),
|
||||
],
|
||||
]
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MockWeatherTool;
|
||||
|
||||
#[async_trait]
|
||||
impl Tool for MockWeatherTool {
|
||||
async fn execute(&self, _input_json: &str) -> Result<String, ToolError> {
|
||||
Ok("Sunny, 25C".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
fn weather_tool_definition() -> ToolDefinition {
|
||||
Arc::new(|| {
|
||||
let meta = ToolMeta::new("get_weather")
|
||||
.description("Get weather")
|
||||
.input_schema(serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": { "type": "string" }
|
||||
},
|
||||
"required": ["city"]
|
||||
}));
|
||||
(meta, Arc::new(MockWeatherTool) as Arc<dyn Tool>)
|
||||
})
|
||||
}
|
||||
|
||||
/// Hook that forces Pause on the first turn end.
|
||||
struct PauseOnFirstTurnEnd;
|
||||
|
||||
#[async_trait]
|
||||
impl Hook<OnTurnEnd> for PauseOnFirstTurnEnd {
|
||||
async fn call(&self, _input: &mut Vec<Item>) -> Result<OnTurnEndResult, HookError> {
|
||||
Ok(OnTurnEndResult::Paused)
|
||||
}
|
||||
}
|
||||
|
||||
async fn make_store() -> (tempfile::TempDir, FsStore) {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let store = FsStore::new(dir.path()).await.unwrap();
|
||||
(dir, store)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Tests
|
||||
// =============================================================================
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_run_logs_entries() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(simple_text_events());
|
||||
let worker = Worker::new(client);
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
session.run("Hi").await.unwrap();
|
||||
|
||||
let entries = store.read_all(sid).await.unwrap();
|
||||
|
||||
// SessionStart, UserInput, AssistantItems, TurnEnd, RunOutcome (at minimum)
|
||||
assert!(entries.len() >= 4, "expected at least 4 entries, got {}", entries.len());
|
||||
|
||||
// First entry is SessionStart
|
||||
assert!(matches!(entries[0], LogEntry::SessionStart { .. }));
|
||||
|
||||
// Has a RunOutcome with Finished
|
||||
let has_finished = entries.iter().any(|e| matches!(
|
||||
e,
|
||||
LogEntry::RunOutcome { outcome: Outcome::Finished, .. }
|
||||
));
|
||||
assert!(has_finished, "should have a Finished outcome");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_restore_round_trip() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(simple_text_events());
|
||||
let mut worker = Worker::new(client);
|
||||
worker.set_system_prompt("You are helpful.");
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
session.run("Hi").await.unwrap();
|
||||
|
||||
let original_history = session.worker.history().to_vec();
|
||||
let original_turn_count = session.worker.turn_count();
|
||||
|
||||
// Restore
|
||||
let restore_client = MockLlmClient::new(vec![]); // won't be called
|
||||
let restored = Session::restore(restore_client, store.clone(), sid, SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(restored.worker.history().len(), original_history.len());
|
||||
assert_eq!(restored.worker.turn_count(), original_turn_count);
|
||||
assert_eq!(
|
||||
restored.worker.get_system_prompt().map(String::from),
|
||||
Some("You are helpful.".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_run_with_tool_call() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::with_responses(tool_call_events());
|
||||
let mut worker = Worker::new(client);
|
||||
worker.register_tool(weather_tool_definition()).unwrap();
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
session.run("What's the weather?").await.unwrap();
|
||||
|
||||
let entries = store.read_all(sid).await.unwrap();
|
||||
|
||||
let has_tool_results = entries.iter().any(|e| matches!(e, LogEntry::ToolResults { .. }));
|
||||
assert!(has_tool_results, "should have ToolResults entry");
|
||||
|
||||
let has_assistant = entries.iter().any(|e| matches!(e, LogEntry::AssistantItems { .. }));
|
||||
assert!(has_assistant, "should have AssistantItems entry");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_resume_after_pause() {
|
||||
let (_dir, store) = make_store().await;
|
||||
|
||||
// First run: tool call with pause hook → Paused
|
||||
let client = MockLlmClient::with_responses(tool_call_events());
|
||||
let mut worker = Worker::new(client);
|
||||
worker.register_tool(weather_tool_definition()).unwrap();
|
||||
worker.add_on_turn_end_hook(PauseOnFirstTurnEnd);
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
let result = session.run("Weather?").await.unwrap();
|
||||
assert!(matches!(result, llm_worker::WorkerResult::Paused));
|
||||
|
||||
// Check RunOutcome is Paused
|
||||
let entries = store.read_all(sid).await.unwrap();
|
||||
let has_paused = entries.iter().any(|e| matches!(
|
||||
e,
|
||||
LogEntry::RunOutcome { outcome: Outcome::Paused, .. }
|
||||
));
|
||||
assert!(has_paused, "should have Paused outcome");
|
||||
|
||||
// Restore and resume
|
||||
let resume_client = MockLlmClient::with_responses(vec![vec![
|
||||
Event::text_block_start(0),
|
||||
Event::text_delta(0, "After resume"),
|
||||
Event::text_block_stop(0, None),
|
||||
Event::Status(StatusEvent {
|
||||
status: ResponseStatus::Completed,
|
||||
}),
|
||||
]]);
|
||||
let mut restored = Session::restore(resume_client, store.clone(), sid, SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert!(restored.worker.last_run_interrupted());
|
||||
|
||||
// resume may or may not succeed depending on Worker internal state,
|
||||
// but the restore itself should work
|
||||
let _ = restored.resume().await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_fork_preserves_state() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(simple_text_events());
|
||||
let mut worker = Worker::new(client);
|
||||
worker.set_system_prompt("System prompt");
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
session.run("Hello").await.unwrap();
|
||||
|
||||
let original_history_len = session.worker.history().len();
|
||||
let fork_id = session.fork().await.unwrap();
|
||||
|
||||
// Fork should have a SessionStart with the current history
|
||||
let fork_entries = store.read_all(fork_id).await.unwrap();
|
||||
assert_eq!(fork_entries.len(), 1);
|
||||
assert!(matches!(&fork_entries[0], LogEntry::SessionStart { .. }));
|
||||
|
||||
let fork_state = collect_state(&fork_entries);
|
||||
assert_eq!(fork_state.history.len(), original_history_len);
|
||||
assert_eq!(fork_state.system_prompt.as_deref(), Some("System prompt"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_fork_at_truncates() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(simple_text_events());
|
||||
let worker = Worker::new(client);
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
session.run("Hello").await.unwrap();
|
||||
|
||||
let all_entries = store.read_all(sid).await.unwrap();
|
||||
assert!(all_entries.len() > 2);
|
||||
|
||||
// Fork at entry 2 (SessionStart + UserInput only)
|
||||
let fork_id = Session::<MockLlmClient, FsStore>::fork_at(&store, sid, 2)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let fork_entries = store.read_all(fork_id).await.unwrap();
|
||||
assert_eq!(fork_entries.len(), 1); // Just the new SessionStart
|
||||
|
||||
let fork_state = collect_state(&fork_entries);
|
||||
// Should have the state from replaying only the first 2 entries
|
||||
let original_truncated_state = collect_state(&all_entries[..2]);
|
||||
assert_eq!(fork_state.history.len(), original_truncated_state.history.len());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_config_changed_logged() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(vec![]);
|
||||
let worker = Worker::new(client);
|
||||
|
||||
let mut session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
// Modify config via worker and log it
|
||||
session.worker.set_request_config(RequestConfig::default().with_temperature(0.7));
|
||||
session.log_config_changed().await.unwrap();
|
||||
|
||||
let entries = store.read_all(sid).await.unwrap();
|
||||
let has_config_changed = entries.iter().any(|e| matches!(
|
||||
e,
|
||||
LogEntry::ConfigChanged { config, .. } if config.temperature == Some(0.7)
|
||||
));
|
||||
assert!(has_config_changed, "should have ConfigChanged entry");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_cache_lock_unlock_logged() {
|
||||
let (_dir, store) = make_store().await;
|
||||
let client = MockLlmClient::new(vec![]);
|
||||
let worker = Worker::new(client);
|
||||
|
||||
let session = Session::new(worker, store.clone(), SessionConfig::default())
|
||||
.await
|
||||
.unwrap();
|
||||
let sid = session.session_id();
|
||||
|
||||
session.log_cache_locked(5).await.unwrap();
|
||||
session.log_cache_unlocked().await.unwrap();
|
||||
|
||||
let entries = store.read_all(sid).await.unwrap();
|
||||
|
||||
let has_locked = entries.iter().any(|e| matches!(
|
||||
e,
|
||||
LogEntry::CacheLocked { locked_prefix_len: 5, .. }
|
||||
));
|
||||
assert!(has_locked, "should have CacheLocked entry");
|
||||
|
||||
let has_unlocked = entries.iter().any(|e| matches!(e, LogEntry::CacheUnlocked { .. }));
|
||||
assert!(has_unlocked, "should have CacheUnlocked entry");
|
||||
|
||||
// State after all entries: unlocked
|
||||
let state = collect_state(&entries);
|
||||
assert_eq!(state.locked_prefix_len, 0);
|
||||
}
|
||||
|
|
@ -6,22 +6,24 @@ edition.workspace = true
|
|||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
thiserror = "2.0"
|
||||
tracing = "0.1"
|
||||
async-trait = "0.1"
|
||||
futures = "0.3"
|
||||
tokio = { version = "1.49", features = ["macros", "rt-multi-thread"] }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread", "time"] }
|
||||
tokio-util = "0.7"
|
||||
reqwest = { version = "0.13.1", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
|
||||
reqwest = { version = "0.13", default-features = false, features = ["stream", "json", "native-tls", "http2"] }
|
||||
eventsource-stream = "0.2"
|
||||
llm-worker-macros = { path = "../llm-worker-macros", version = "0.2" }
|
||||
zstd = "0.13"
|
||||
llm-worker-macros = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
clap = { version = "4.5", features = ["derive", "env"] }
|
||||
schemars = "1.2"
|
||||
tempfile = "3.24"
|
||||
schemars = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
dotenv = "0.15"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
trybuild = "1.0.116"
|
||||
wiremock = "0.6.5"
|
||||
|
|
|
|||
23
crates/llm-worker/README.md
Normal file
23
crates/llm-worker/README.md
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
# llm-worker
|
||||
|
||||
LLM との対話を管理する低レベル基盤クレート。会話履歴、ツール実行、イベントストリーミング、ライフサイクルフックを統合した `Worker` 抽象を提供する。
|
||||
|
||||
## 公開型
|
||||
|
||||
### コア
|
||||
|
||||
- `Worker<C, S>` — LLM 対話の中央管理(ターン実行、ツール呼び出し、キャンセル)
|
||||
- `WorkerConfig` / `WorkerResult` / `WorkerError` — 設定・実行結果・エラー
|
||||
- `Item` / `ContentPart` / `Role` — 会話履歴の構成要素
|
||||
|
||||
### モジュール
|
||||
|
||||
- `llm_client` — プロバイダ抽象(`LlmClient` トレイト、`Request`, `RequestConfig`, Anthropic/OpenAI/Gemini/Ollama 実装)
|
||||
- `tool` — ツール定義・実行(`Tool` トレイト、`ToolDefinition`, `ToolOutput`, サイズ判定による Inline/Stored 切替)
|
||||
- `tool_server` — ツール登録・ルックアップ(`ToolServer`, `ToolServerHandle`)
|
||||
- `hook` — 実行フローへの介入ポイント(`Hook` トレイト、`PreToolCall`, `PostToolCall`, `OnTurnEnd` など)
|
||||
- クロージャベースイベント購読(`Worker::on_text_block()`, `on_tool_use_block()`, `on_usage()` 等)
|
||||
- `timeline` — イベントストリームのディスパッチ(`Handler` トレイト、各ブロックコレクター)。パワーユーザー向けに `timeline_mut()` も提供
|
||||
- `event` — ストリーミングイベント型(`Event`, `BlockStart`, `BlockDelta` など)
|
||||
- `state` — 型状態パターンによるキャッシュ保護(`Mutable` / `CacheLocked`)
|
||||
cratesの整理Add READMEsRE to all crates@@
|
||||
|
|
@ -33,7 +33,7 @@ llm-workerは3層構成でLLMとのインタラクションを管理する。
|
|||
| `tool` / `tool_server` | ツール定義・登録・実行 | R3 |
|
||||
| `timeline` | イベントストリーム処理、Handler dispatch | — |
|
||||
| `handler` | Handler/Kind trait、ブロック別ハンドラ | — |
|
||||
| `subscriber` | WorkerSubscriber trait、UI向けイベント配信 | — |
|
||||
| `callback` | クロージャベースイベント購読(`on_text_block`, `on_usage` 等) | — |
|
||||
| `llm_client` | LLMプロバイダへのHTTPリクエスト/ストリーミング | — |
|
||||
| `llm_client/scheme` | プロバイダ固有ワイヤーフォーマット変換 | — |
|
||||
| `llm_client/providers` | Anthropic, OpenAI, Gemini, Ollama実装 | — |
|
||||
|
|
|
|||
|
|
@ -1,132 +0,0 @@
|
|||
# ツール出力の遅延読み込み設計
|
||||
|
||||
## 課題
|
||||
|
||||
ツール実行結果(ファイル内容、検索結果等)は サイズが予測不能 で、
|
||||
全量を `Item::ToolResult { output: String }` として LLM コンテキストに
|
||||
載せると、トークン消費が爆発する。
|
||||
|
||||
## 方針
|
||||
|
||||
- ツール出力に **Inline / Stored** の区別を導入する
|
||||
- Stored な出力は **BlobStore** に保存し、履歴には要約のみ載せる
|
||||
- LLM が詳細を見たい場合は **inspect ツール** で部分取得する
|
||||
|
||||
## データ型
|
||||
|
||||
### ToolOutput(llm-worker 側)
|
||||
|
||||
```rust
|
||||
pub enum ToolOutput {
|
||||
/// 小さな結果: そのまま history に載る
|
||||
Inline(String),
|
||||
/// 大きな結果: summary だけ history に載り、全体は BlobStore に保存される
|
||||
Stored {
|
||||
summary: String,
|
||||
content: Content,
|
||||
},
|
||||
}
|
||||
|
||||
pub enum Content {
|
||||
Text(String),
|
||||
Structured(serde_json::Value),
|
||||
}
|
||||
```
|
||||
|
||||
- `Tool::execute()` の戻り値は `Result<String, ToolError>` のまま据え置き
|
||||
- `From<String> for ToolOutput` で閾値ベースの自動昇格を行う
|
||||
- ツール実装者が明示的に `ToolOutput` を返したい場合は別トレイトメソッドを用意
|
||||
|
||||
### BlobStore(llm-worker-persistence 側)
|
||||
|
||||
```rust
|
||||
pub type BlobId = uuid::Uuid; // UUID v7
|
||||
|
||||
pub trait BlobStore: Send + Sync {
|
||||
fn store(&self, content: &Content) -> impl Future<Output = Result<BlobId, BlobStoreError>> + Send;
|
||||
fn load(&self, id: BlobId) -> impl Future<Output = Result<Content, BlobStoreError>> + Send;
|
||||
fn exists(&self, id: BlobId) -> impl Future<Output = Result<bool, BlobStoreError>> + Send;
|
||||
}
|
||||
```
|
||||
|
||||
### FsBlobStore レイアウト
|
||||
|
||||
```
|
||||
blobs/
|
||||
├── {blob_id}.txt # Content::Text
|
||||
└── {blob_id}.json # Content::Structured
|
||||
```
|
||||
|
||||
セッションとは独立したフラットなストア。セッションとの紐付けは
|
||||
ログ側の参照(summary 内の `[blob:<id>]`)で行う。
|
||||
|
||||
## 自動サマリ
|
||||
|
||||
`From<String>` による自動昇格時のサマリ生成ルール:
|
||||
|
||||
| 項目 | 値 |
|
||||
|---|---|
|
||||
| Inline 閾値 | 800 bytes |
|
||||
| サマリ上限 | 400 bytes |
|
||||
| 先頭行数 | 5 行 |
|
||||
| 末尾行数 | 3 行 |
|
||||
|
||||
### Text のサマリ形式
|
||||
|
||||
```
|
||||
[blob:<id>] text | {N} lines
|
||||
── head ──
|
||||
{先頭5行}
|
||||
── tail ──
|
||||
{末尾3行}
|
||||
```
|
||||
|
||||
### Structured (JSON Array) のサマリ形式
|
||||
|
||||
```
|
||||
[blob:<id>] json_array | {N} entries
|
||||
── schema ──
|
||||
{最初の要素のキー: 型}
|
||||
── head ──
|
||||
{先頭2要素}
|
||||
```
|
||||
|
||||
### Structured (JSON Object) のサマリ形式
|
||||
|
||||
```
|
||||
[blob:<id>] json_object | {N} keys
|
||||
── keys ──
|
||||
{キー一覧と各値の型/サイズ}
|
||||
```
|
||||
|
||||
## Worker への統合
|
||||
|
||||
```
|
||||
Tool::execute() → Result<String, ToolError>
|
||||
│
|
||||
▼ From<String> for ToolOutput
|
||||
ToolOutput::Inline(s) ← len ≤ 800
|
||||
ToolOutput::Stored { .. } ← len > 800
|
||||
│
|
||||
▼ Worker が BlobStore に保存
|
||||
Item::ToolResult { output: summary } ← history に載る
|
||||
│
|
||||
▼ LLM が詳細を見たい場合
|
||||
inspect(blob_id, selector?) → 部分取得
|
||||
```
|
||||
|
||||
Worker はオプショナルに `BlobStore` を保持する。
|
||||
BlobStore が未設定の場合は従来通り全量 Inline として扱う。
|
||||
|
||||
## inspect ツール
|
||||
|
||||
Worker に BlobStore が設定されている場合、自動的に登録される組み込みツール。
|
||||
|
||||
```
|
||||
inspect(blob_id, selector?)
|
||||
```
|
||||
|
||||
- selector 省略: メタ情報 + 先頭部分
|
||||
- `lines:20-50`: 行範囲(Text 用)
|
||||
- `slice:3..8`: インデックス範囲(Array 用)
|
||||
- `key:results`: キー指定(Object 用)
|
||||
|
|
@ -20,9 +20,16 @@ mod recorder;
|
|||
mod scenarios;
|
||||
|
||||
use clap::{Parser, ValueEnum};
|
||||
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
|
||||
use llm_worker::llm_client::providers::gemini::GeminiClient;
|
||||
use llm_worker::llm_client::providers::openai::OpenAIClient;
|
||||
use llm_worker::llm_client::scheme::{
|
||||
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
|
||||
};
|
||||
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
|
||||
|
||||
fn make_transport<S: Scheme>(scheme: S, model: &str, auth: ResolvedAuth) -> HttpTransport<S> {
|
||||
let cap = scheme.default_capability();
|
||||
let base_url = scheme.default_base_url().to_string();
|
||||
HttpTransport::new(scheme, model.to_string(), base_url, auth, cap)
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
|
|
@ -60,7 +67,7 @@ async fn run_scenario_with_anthropic(
|
|||
let api_key = std::env::var("ANTHROPIC_API_KEY")
|
||||
.expect("ANTHROPIC_API_KEY environment variable must be set");
|
||||
let model = model.as_deref().unwrap_or("claude-sonnet-4-20250514");
|
||||
let client = AnthropicClient::new(&api_key, model);
|
||||
let client = make_transport(AnthropicScheme::new(), model, ResolvedAuth::ApiKey(api_key));
|
||||
|
||||
recorder::record_request(
|
||||
&client,
|
||||
|
|
@ -82,7 +89,7 @@ async fn run_scenario_with_openai(
|
|||
let api_key =
|
||||
std::env::var("OPENAI_API_KEY").expect("OPENAI_API_KEY environment variable must be set");
|
||||
let model = model.as_deref().unwrap_or("gpt-4o");
|
||||
let client = OpenAIClient::new(&api_key, model);
|
||||
let client = make_transport(OpenAIScheme::new(), model, ResolvedAuth::ApiKey(api_key));
|
||||
|
||||
recorder::record_request(
|
||||
&client,
|
||||
|
|
@ -101,10 +108,15 @@ async fn run_scenario_with_ollama(
|
|||
subdir: &str,
|
||||
model: Option<String>,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
use llm_worker::llm_client::providers::ollama::OllamaClient;
|
||||
// Ollama typically runs local, no key needed or placeholder
|
||||
let model = model.as_deref().unwrap_or("llama3"); // default example
|
||||
let client = OllamaClient::new(model); // base_url placeholder, handled by client default
|
||||
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
|
||||
let model = model.as_deref().unwrap_or("llama3");
|
||||
let client = HttpTransport::new(
|
||||
AnthropicScheme::new(),
|
||||
model.to_string(),
|
||||
"http://localhost:11434".to_string(),
|
||||
ResolvedAuth::None,
|
||||
AnthropicScheme::new().default_capability(),
|
||||
);
|
||||
|
||||
recorder::record_request(
|
||||
&client,
|
||||
|
|
@ -126,7 +138,7 @@ async fn run_scenario_with_gemini(
|
|||
let api_key =
|
||||
std::env::var("GEMINI_API_KEY").expect("GEMINI_API_KEY environment variable must be set");
|
||||
let model = model.as_deref().unwrap_or("gemini-2.0-flash");
|
||||
let client = GeminiClient::new(&api_key, model);
|
||||
let client = make_transport(GeminiScheme::new(), model, ResolvedAuth::ApiKey(api_key));
|
||||
|
||||
recorder::record_request(
|
||||
&client,
|
||||
|
|
|
|||
|
|
@ -2,11 +2,10 @@
|
|||
//!
|
||||
//! Example of cancelling from another thread during streaming
|
||||
|
||||
use llm_worker::llm_client::providers::anthropic::AnthropicClient;
|
||||
use llm_worker::llm_client::scheme::{Scheme, anthropic::AnthropicScheme};
|
||||
use llm_worker::llm_client::transport::{HttpTransport, ResolvedAuth};
|
||||
use llm_worker::{Worker, WorkerResult};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
|
@ -24,46 +23,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
let api_key =
|
||||
std::env::var("ANTHROPIC_API_KEY").expect("ANTHROPIC_API_KEY environment variable not set");
|
||||
|
||||
let client = AnthropicClient::new(&api_key, "claude-sonnet-4-20250514");
|
||||
let worker = Arc::new(Mutex::new(Worker::new(client)));
|
||||
let scheme = AnthropicScheme::new();
|
||||
let model = "claude-sonnet-4-20250514".to_string();
|
||||
let cap = scheme.default_capability();
|
||||
let base_url = scheme.default_base_url().to_string();
|
||||
let client = HttpTransport::new(scheme, model, base_url, ResolvedAuth::ApiKey(api_key), cap);
|
||||
let worker = Worker::new(client);
|
||||
|
||||
println!("🚀 Starting Worker...");
|
||||
println!("💡 Will cancel after 2 seconds\n");
|
||||
|
||||
// Get cancel sender first (without holding lock)
|
||||
let cancel_tx = {
|
||||
let w = worker.lock().await;
|
||||
w.cancel_sender()
|
||||
};
|
||||
// Get cancel sender before run (Mutable state)
|
||||
let cancel_tx = worker.cancel_sender();
|
||||
|
||||
// Task 1: Run Worker
|
||||
let worker_clone = worker.clone();
|
||||
let task = tokio::spawn(async move {
|
||||
let mut w = worker_clone.lock().await;
|
||||
println!("📡 Sending request to LLM...");
|
||||
|
||||
match w.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
|
||||
Ok(WorkerResult::Finished) => {
|
||||
println!("✅ Task completed normally");
|
||||
}
|
||||
Ok(WorkerResult::Paused) => {
|
||||
println!("⏸️ Task paused");
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❌ Task error: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Task 2: Cancel after 2 seconds
|
||||
// Task: Cancel after 2 seconds
|
||||
tokio::spawn(async move {
|
||||
tokio::time::sleep(Duration::from_secs(2)).await;
|
||||
println!("\n🛑 Cancelling worker...");
|
||||
let _ = cancel_tx.send(()).await;
|
||||
});
|
||||
|
||||
// Wait for task completion
|
||||
task.await?;
|
||||
println!("📡 Sending request to LLM...");
|
||||
|
||||
match worker.run("Tell me a very long story about a brave knight. Make it as detailed as possible with many paragraphs.").await {
|
||||
Ok(out) => match out.result {
|
||||
WorkerResult::Finished => println!("✅ Task completed normally"),
|
||||
WorkerResult::Paused => println!("⏸️ Task paused"),
|
||||
WorkerResult::LimitReached => println!("🔒 Turn limit reached"),
|
||||
WorkerResult::Yielded => println!("↩️ Task yielded"),
|
||||
},
|
||||
Err(e) => {
|
||||
println!("❌ Task error: {}", e);
|
||||
}
|
||||
}
|
||||
|
||||
println!("\n✨ Demo complete!");
|
||||
|
||||
|
|
|
|||
|
|
@ -41,13 +41,14 @@ use tracing_subscriber::EnvFilter;
|
|||
use clap::{Parser, ValueEnum};
|
||||
use llm_worker::{
|
||||
Worker,
|
||||
hook::{Hook, HookError, PostToolCall, PostToolCallContext, PostToolCallResult},
|
||||
interceptor::{Interceptor, PostToolAction, ToolResultInfo},
|
||||
llm_client::{
|
||||
LlmClient,
|
||||
providers::{
|
||||
anthropic::AnthropicClient, gemini::GeminiClient, ollama::OllamaClient,
|
||||
openai::OpenAIClient,
|
||||
capability::{CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport},
|
||||
scheme::{
|
||||
Scheme, anthropic::AnthropicScheme, gemini::GeminiScheme, openai_chat::OpenAIScheme,
|
||||
},
|
||||
transport::{HttpTransport, ResolvedAuth},
|
||||
},
|
||||
timeline::{Handler, TextBlockEvent, TextBlockKind, ToolUseBlockEvent, ToolUseBlockKind},
|
||||
};
|
||||
|
|
@ -270,34 +271,34 @@ impl Handler<ToolUseBlockKind> for ToolCallPrinter {
|
|||
}
|
||||
}
|
||||
|
||||
/// Hook that displays tool execution results
|
||||
struct ToolResultPrinterHook {
|
||||
/// Policy that displays tool execution results.
|
||||
struct ToolResultPrinterPolicy {
|
||||
call_names: Arc<Mutex<HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
impl ToolResultPrinterHook {
|
||||
impl ToolResultPrinterPolicy {
|
||||
fn new(call_names: Arc<Mutex<HashMap<String, String>>>) -> Self {
|
||||
Self { call_names }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Hook<PostToolCall> for ToolResultPrinterHook {
|
||||
async fn call(&self, ctx: &mut PostToolCallContext) -> Result<PostToolCallResult, HookError> {
|
||||
impl Interceptor for ToolResultPrinterPolicy {
|
||||
async fn post_tool_call(&self, info: &mut ToolResultInfo) -> PostToolAction {
|
||||
let name = self
|
||||
.call_names
|
||||
.lock()
|
||||
.unwrap()
|
||||
.remove(&ctx.result.tool_use_id)
|
||||
.unwrap_or_else(|| ctx.result.tool_use_id.clone());
|
||||
.remove(&info.result.tool_use_id)
|
||||
.unwrap_or_else(|| info.result.tool_use_id.clone());
|
||||
|
||||
if ctx.result.is_error {
|
||||
println!(" Result ({}): ❌ {}", name, ctx.result.content);
|
||||
if info.result.is_error {
|
||||
println!(" Result ({}): ❌ {}", name, info.result.summary);
|
||||
} else {
|
||||
println!(" Result ({}): ✅ {}", name, ctx.result.content);
|
||||
println!(" Result ({}): ✅ {}", name, info.result.summary);
|
||||
}
|
||||
|
||||
Ok(PostToolCallResult::Continue)
|
||||
PostToolAction::Continue
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -327,6 +328,22 @@ fn get_api_key(args: &Args) -> Result<String, String> {
|
|||
}
|
||||
|
||||
/// Create client based on provider
|
||||
fn default_capability() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_transport<S: Scheme>(scheme: S, model: String, auth: ResolvedAuth) -> Box<dyn LlmClient> {
|
||||
let cap = scheme.default_capability();
|
||||
let base_url = scheme.default_base_url().to_string();
|
||||
Box::new(HttpTransport::new(scheme, model, base_url, auth, cap))
|
||||
}
|
||||
|
||||
fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
|
||||
let model = args
|
||||
.model
|
||||
|
|
@ -336,21 +353,32 @@ fn create_client(args: &Args) -> Result<Box<dyn LlmClient>, String> {
|
|||
let api_key = get_api_key(args)?;
|
||||
|
||||
match args.provider {
|
||||
Provider::Anthropic => {
|
||||
let client = AnthropicClient::new(&api_key, &model);
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
Provider::Gemini => {
|
||||
let client = GeminiClient::new(&api_key, &model);
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
Provider::Openai => {
|
||||
let client = OpenAIClient::new(&api_key, &model);
|
||||
Ok(Box::new(client))
|
||||
}
|
||||
Provider::Anthropic => Ok(build_transport(
|
||||
AnthropicScheme::new(),
|
||||
model,
|
||||
ResolvedAuth::ApiKey(api_key),
|
||||
)),
|
||||
Provider::Gemini => Ok(build_transport(
|
||||
GeminiScheme::new(),
|
||||
model,
|
||||
ResolvedAuth::ApiKey(api_key),
|
||||
)),
|
||||
Provider::Openai => Ok(build_transport(
|
||||
OpenAIScheme::new(),
|
||||
model,
|
||||
ResolvedAuth::ApiKey(api_key),
|
||||
)),
|
||||
Provider::Ollama => {
|
||||
let client = OllamaClient::new(&model);
|
||||
Ok(Box::new(client))
|
||||
// Ollama = Anthropic scheme + base_url 差し替え + 認証なし
|
||||
let scheme = AnthropicScheme::new();
|
||||
let cap = default_capability();
|
||||
Ok(Box::new(HttpTransport::new(
|
||||
scheme,
|
||||
model,
|
||||
"http://localhost:11434".to_string(),
|
||||
ResolvedAuth::None,
|
||||
cap,
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -438,10 +466,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
// Register tools (unless --no-tools)
|
||||
if !args.no_tools {
|
||||
let app = AppContext;
|
||||
worker
|
||||
.register_tool(app.get_current_time_definition())
|
||||
.unwrap();
|
||||
worker.register_tool(app.calculate_definition()).unwrap();
|
||||
worker.register_tool(app.get_current_time_definition());
|
||||
worker.register_tool(app.calculate_definition());
|
||||
}
|
||||
|
||||
// Register streaming display handlers
|
||||
|
|
@ -450,7 +476,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
.on_text_block(StreamingPrinter::new())
|
||||
.on_tool_use_block(ToolCallPrinter::new(tool_call_names.clone()));
|
||||
|
||||
worker.add_post_tool_call_hook(ToolResultPrinterHook::new(tool_call_names));
|
||||
worker.set_interceptor(ToolResultPrinterPolicy::new(tool_call_names));
|
||||
|
||||
// One-shot mode
|
||||
if let Some(prompt) = args.prompt {
|
||||
|
|
@ -465,7 +491,27 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
return Ok(());
|
||||
}
|
||||
|
||||
// Interactive loop
|
||||
// Interactive loop — first input transitions Mutable → Locked
|
||||
print!("\n👤 You: ");
|
||||
io::stdout().flush()?;
|
||||
|
||||
let mut first_input = String::new();
|
||||
io::stdin().read_line(&mut first_input)?;
|
||||
let first_input = first_input.trim();
|
||||
|
||||
if first_input == "quit" || first_input == "exit" || first_input.is_empty() {
|
||||
println!("\n👋 Goodbye!");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut locked = match worker.run(first_input).await {
|
||||
Ok(out) => out.worker,
|
||||
Err(e) => {
|
||||
eprintln!("\n❌ Error: {}", e);
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
print!("\n👤 You: ");
|
||||
io::stdout().flush()?;
|
||||
|
|
@ -483,8 +529,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
break;
|
||||
}
|
||||
|
||||
// Run Worker (Worker manages history)
|
||||
match worker.run(input).await {
|
||||
match locked.run(input).await {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
eprintln!("\n❌ Error: {}", e);
|
||||
|
|
|
|||
291
crates/llm-worker/src/callback.rs
Normal file
291
crates/llm-worker/src/callback.rs
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
//! Closure-based event callback API
|
||||
//!
|
||||
//! Provides a closure-based alternative to implementing `Handler<K>` directly.
|
||||
//! Register callbacks on `Worker` via `on_text_block()`, `on_tool_use_block()`,
|
||||
//! `on_usage()`, etc.
|
||||
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use crate::handler::{
|
||||
Handler, Kind, TextBlockEvent, TextBlockKind, ThinkingBlockEvent, ThinkingBlockKind,
|
||||
ToolUseBlockEvent, ToolUseBlockKind, ToolUseBlockStart,
|
||||
};
|
||||
use crate::tool::ToolCall;
|
||||
|
||||
// =============================================================================
|
||||
// TextBlock Closure Handler
|
||||
// =============================================================================
|
||||
|
||||
/// Callback scope for a text block.
|
||||
///
|
||||
/// Passed to the setup closure registered with `Worker::on_text_block()`.
|
||||
/// Register per-block callbacks via `on_delta()` and `on_stop()`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// worker.on_text_block(|block| {
|
||||
/// block.on_delta(|text| print!("{}", text));
|
||||
/// block.on_stop(|full_text| println!("\n--- {} chars ---", full_text.len()));
|
||||
/// });
|
||||
/// ```
|
||||
pub struct TextBlockScope {
|
||||
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
}
|
||||
|
||||
impl TextBlockScope {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
on_delta: None,
|
||||
on_stop: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a callback for each text delta (streaming fragment).
|
||||
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
|
||||
self.on_delta = Some(Box::new(f));
|
||||
}
|
||||
|
||||
/// Register a callback invoked when the block completes.
|
||||
///
|
||||
/// Receives the full accumulated text of the block.
|
||||
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
|
||||
self.on_stop = Some(Box::new(f));
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-block state created by Timeline's scope lifecycle.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct TextBlockClosureState {
|
||||
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
/// Closure-based `Handler<TextBlockKind>` adapter.
|
||||
pub(crate) struct ClosureTextBlockHandler {
|
||||
pub(crate) setup: Box<dyn FnMut(&mut TextBlockScope) + Send + Sync>,
|
||||
}
|
||||
|
||||
impl Handler<TextBlockKind> for ClosureTextBlockHandler {
|
||||
type Scope = TextBlockClosureState;
|
||||
|
||||
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
|
||||
match event {
|
||||
TextBlockEvent::Start(_) => {
|
||||
scope.buffer.clear();
|
||||
let mut builder = TextBlockScope::new();
|
||||
(self.setup)(&mut builder);
|
||||
scope.on_delta = builder.on_delta;
|
||||
scope.on_stop = builder.on_stop;
|
||||
}
|
||||
TextBlockEvent::Delta(text) => {
|
||||
scope.buffer.push_str(text);
|
||||
if let Some(f) = &mut scope.on_delta {
|
||||
f(text);
|
||||
}
|
||||
}
|
||||
TextBlockEvent::Stop(_) => {
|
||||
if let Some(f) = &mut scope.on_stop {
|
||||
f(&scope.buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ThinkingBlock Closure Handler
|
||||
// =============================================================================
|
||||
|
||||
/// Callback scope for a thinking block.
|
||||
///
|
||||
/// Mirrors `TextBlockScope`. Some providers (or some configurations)
|
||||
/// emit thinking metadata without plaintext deltas — in that case the
|
||||
/// block fires `Start` and `Stop` with no `Delta` in between, which is
|
||||
/// expected and not an error.
|
||||
pub struct ThinkingBlockScope {
|
||||
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
pub(crate) on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
}
|
||||
|
||||
impl ThinkingBlockScope {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
on_delta: None,
|
||||
on_stop: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a callback for each thinking text delta (streaming fragment).
|
||||
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
|
||||
self.on_delta = Some(Box::new(f));
|
||||
}
|
||||
|
||||
/// Register a callback invoked when the block completes.
|
||||
///
|
||||
/// Receives the full accumulated thinking text. May be empty when
|
||||
/// the provider didn't emit any plaintext deltas.
|
||||
pub fn on_stop(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
|
||||
self.on_stop = Some(Box::new(f));
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ThinkingBlockClosureState {
|
||||
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
on_stop: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
buffer: String,
|
||||
}
|
||||
|
||||
pub(crate) struct ClosureThinkingBlockHandler {
|
||||
pub(crate) setup: Box<dyn FnMut(&mut ThinkingBlockScope) + Send + Sync>,
|
||||
}
|
||||
|
||||
impl Handler<ThinkingBlockKind> for ClosureThinkingBlockHandler {
|
||||
type Scope = ThinkingBlockClosureState;
|
||||
|
||||
fn on_event(&mut self, scope: &mut Self::Scope, event: &ThinkingBlockEvent) {
|
||||
match event {
|
||||
ThinkingBlockEvent::Start(_) => {
|
||||
scope.buffer.clear();
|
||||
let mut builder = ThinkingBlockScope::new();
|
||||
(self.setup)(&mut builder);
|
||||
scope.on_delta = builder.on_delta;
|
||||
scope.on_stop = builder.on_stop;
|
||||
}
|
||||
ThinkingBlockEvent::Delta(text) => {
|
||||
scope.buffer.push_str(text);
|
||||
if let Some(f) = &mut scope.on_delta {
|
||||
f(text);
|
||||
}
|
||||
}
|
||||
ThinkingBlockEvent::Stop(_) => {
|
||||
if let Some(f) = &mut scope.on_stop {
|
||||
f(&scope.buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ToolUseBlock Closure Handler
|
||||
// =============================================================================
|
||||
|
||||
/// Callback scope for a tool use block.
|
||||
///
|
||||
/// Passed to the setup closure registered with `Worker::on_tool_use_block()`.
|
||||
/// The setup closure also receives `&ToolUseBlockStart` with `id` and `name`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// worker.on_tool_use_block(|start, block| {
|
||||
/// println!("Tool: {} ({})", start.name, start.id);
|
||||
/// block.on_delta(|json| { /* streaming JSON fragment */ });
|
||||
/// block.on_stop(|call| println!("Done: {}", call.name));
|
||||
/// });
|
||||
/// ```
|
||||
pub struct ToolUseBlockScope {
|
||||
pub(crate) on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
pub(crate) on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
|
||||
}
|
||||
|
||||
impl ToolUseBlockScope {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
on_delta: None,
|
||||
on_stop: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Register a callback for each JSON input delta (streaming fragment).
|
||||
pub fn on_delta(&mut self, f: impl FnMut(&str) + Send + Sync + 'static) {
|
||||
self.on_delta = Some(Box::new(f));
|
||||
}
|
||||
|
||||
/// Register a callback invoked when the block completes.
|
||||
///
|
||||
/// Receives the fully assembled `ToolCall` with parsed JSON input.
|
||||
pub fn on_stop(&mut self, f: impl FnMut(&ToolCall) + Send + Sync + 'static) {
|
||||
self.on_stop = Some(Box::new(f));
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-block state for tool use closure handler.
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ToolUseBlockClosureState {
|
||||
on_delta: Option<Box<dyn FnMut(&str) + Send + Sync>>,
|
||||
on_stop: Option<Box<dyn FnMut(&ToolCall) + Send + Sync>>,
|
||||
id: String,
|
||||
name: String,
|
||||
input_json: String,
|
||||
}
|
||||
|
||||
/// Closure-based `Handler<ToolUseBlockKind>` adapter.
|
||||
pub(crate) struct ClosureToolUseBlockHandler {
|
||||
pub(crate) setup: Box<dyn FnMut(&ToolUseBlockStart, &mut ToolUseBlockScope) + Send + Sync>,
|
||||
}
|
||||
|
||||
impl Handler<ToolUseBlockKind> for ClosureToolUseBlockHandler {
|
||||
type Scope = ToolUseBlockClosureState;
|
||||
|
||||
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
|
||||
match event {
|
||||
ToolUseBlockEvent::Start(start) => {
|
||||
scope.id = start.id.clone();
|
||||
scope.name = start.name.clone();
|
||||
scope.input_json.clear();
|
||||
let mut builder = ToolUseBlockScope::new();
|
||||
(self.setup)(start, &mut builder);
|
||||
scope.on_delta = builder.on_delta;
|
||||
scope.on_stop = builder.on_stop;
|
||||
}
|
||||
ToolUseBlockEvent::InputJsonDelta(json) => {
|
||||
scope.input_json.push_str(json);
|
||||
if let Some(f) = &mut scope.on_delta {
|
||||
f(json);
|
||||
}
|
||||
}
|
||||
ToolUseBlockEvent::Stop(_) => {
|
||||
let input: serde_json::Value =
|
||||
serde_json::from_str(&scope.input_json).unwrap_or_default();
|
||||
let tool_call = ToolCall {
|
||||
id: std::mem::take(&mut scope.id),
|
||||
name: std::mem::take(&mut scope.name),
|
||||
input,
|
||||
};
|
||||
if let Some(f) = &mut scope.on_stop {
|
||||
f(&tool_call);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Generic Meta Event Closure Handler
|
||||
// =============================================================================
|
||||
|
||||
/// Closure-based `Handler<K>` adapter for meta events (Usage, Status, Error).
|
||||
pub(crate) struct ClosureMetaHandler<F, K>
|
||||
where
|
||||
K: Kind,
|
||||
{
|
||||
pub(crate) callback: F,
|
||||
pub(crate) _kind: PhantomData<K>,
|
||||
}
|
||||
|
||||
impl<F, K> Handler<K> for ClosureMetaHandler<F, K>
|
||||
where
|
||||
F: FnMut(&K::Event) + Send + Sync,
|
||||
K: Kind,
|
||||
{
|
||||
type Scope = ();
|
||||
|
||||
fn on_event(&mut self, _scope: &mut (), event: &K::Event) {
|
||||
(self.callback)(event);
|
||||
}
|
||||
}
|
||||
|
|
@ -91,6 +91,16 @@ impl Kind for ErrorKind {
|
|||
type Event = ErrorEvent;
|
||||
}
|
||||
|
||||
/// Reasoning item Kind - 完成済み reasoning item の永続化用
|
||||
///
|
||||
/// 1 reasoning item につき 1 度だけ発火する。Worker は
|
||||
/// `ReasoningItemCollector` 経由で受け取り、ターン終了時に
|
||||
/// `Item::Reasoning` として history に append する。
|
||||
pub struct ReasoningItemKind;
|
||||
impl Kind for ReasoningItemKind {
|
||||
type Event = ReasoningItemEvent;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Block Kind Definitions
|
||||
// =============================================================================
|
||||
|
|
|
|||
|
|
@ -1,310 +0,0 @@
|
|||
//! Hook-related type definitions
|
||||
//!
|
||||
//! Types used for turn control and intervention in the Worker layer
|
||||
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use thiserror::Error;
|
||||
|
||||
// =============================================================================
|
||||
// Hook Event Kinds
|
||||
// =============================================================================
|
||||
|
||||
pub trait HookEventKind: Send + Sync + 'static {
|
||||
type Input;
|
||||
type Output;
|
||||
}
|
||||
|
||||
pub struct OnPromptSubmit;
|
||||
pub struct PreLlmRequest;
|
||||
pub struct PreToolCall;
|
||||
pub struct PostToolCall;
|
||||
pub struct OnTurnEnd;
|
||||
pub struct OnAbort;
|
||||
pub struct OnTextDelta;
|
||||
pub struct OnToolCallDelta;
|
||||
pub struct OnStreamChunk;
|
||||
pub struct OnStreamComplete;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum OnPromptSubmitResult {
|
||||
Continue,
|
||||
Cancel(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PreLlmRequestResult {
|
||||
Continue,
|
||||
Cancel(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PreToolCallResult {
|
||||
Continue,
|
||||
Skip,
|
||||
Abort(String),
|
||||
Pause,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PostToolCallResult {
|
||||
Continue,
|
||||
Abort(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum OnTurnEndResult {
|
||||
Finish,
|
||||
ContinueWithMessages(Vec<crate::Item>),
|
||||
Paused,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum StreamHookResult {
|
||||
Continue,
|
||||
Abort(String),
|
||||
Pause,
|
||||
}
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::tool::{Tool, ToolMeta};
|
||||
|
||||
/// Input context for PreToolCall
|
||||
pub struct ToolCallContext {
|
||||
/// Tool call information (modifiable)
|
||||
pub call: ToolCall,
|
||||
/// Tool meta information (immutable)
|
||||
pub meta: ToolMeta,
|
||||
/// Tool instance (for state access)
|
||||
pub tool: Arc<dyn Tool>,
|
||||
}
|
||||
|
||||
/// Input context for PostToolCall
|
||||
pub struct PostToolCallContext {
|
||||
/// Tool call information
|
||||
pub call: ToolCall,
|
||||
/// Tool execution result (modifiable)
|
||||
pub result: ToolResult,
|
||||
/// Tool meta information (immutable)
|
||||
pub meta: ToolMeta,
|
||||
/// Tool instance (for state access)
|
||||
pub tool: Arc<dyn Tool>,
|
||||
}
|
||||
|
||||
/// Input context for OnTextDelta
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TextDeltaContext {
|
||||
/// Block index
|
||||
pub index: usize,
|
||||
/// Text delta content
|
||||
pub delta: String,
|
||||
}
|
||||
|
||||
/// Input context for OnToolCallDelta
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ToolCallDeltaContext {
|
||||
/// Block index
|
||||
pub index: usize,
|
||||
/// Partial JSON fragment
|
||||
pub delta_json_fragment: String,
|
||||
}
|
||||
|
||||
/// Input context for OnStreamChunk
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StreamChunkContext {
|
||||
/// Public worker-level event
|
||||
pub event: crate::event::Event,
|
||||
}
|
||||
|
||||
/// Input context for OnStreamComplete
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct StreamCompleteContext {
|
||||
/// Current turn number
|
||||
pub turn: usize,
|
||||
/// Number of streamed events in this request
|
||||
pub event_count: usize,
|
||||
}
|
||||
|
||||
impl HookEventKind for OnPromptSubmit {
|
||||
type Input = crate::Item;
|
||||
type Output = OnPromptSubmitResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for PreLlmRequest {
|
||||
type Input = Vec<crate::Item>;
|
||||
type Output = PreLlmRequestResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for PreToolCall {
|
||||
type Input = ToolCallContext;
|
||||
type Output = PreToolCallResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for PostToolCall {
|
||||
type Input = PostToolCallContext;
|
||||
type Output = PostToolCallResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for OnTurnEnd {
|
||||
type Input = Vec<crate::Item>;
|
||||
type Output = OnTurnEndResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for OnAbort {
|
||||
type Input = String;
|
||||
type Output = ();
|
||||
}
|
||||
|
||||
impl HookEventKind for OnTextDelta {
|
||||
type Input = TextDeltaContext;
|
||||
type Output = StreamHookResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for OnToolCallDelta {
|
||||
type Input = ToolCallDeltaContext;
|
||||
type Output = StreamHookResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for OnStreamChunk {
|
||||
type Input = StreamChunkContext;
|
||||
type Output = StreamHookResult;
|
||||
}
|
||||
|
||||
impl HookEventKind for OnStreamComplete {
|
||||
type Input = StreamCompleteContext;
|
||||
type Output = StreamHookResult;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Tool Call / Result Types
|
||||
// =============================================================================
|
||||
|
||||
/// Tool call information
|
||||
///
|
||||
/// Represents a ToolUse block from LLM, modifiable in Hook processing
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ToolCall {
|
||||
/// Tool call ID (used for linking with response)
|
||||
pub id: String,
|
||||
/// Tool name
|
||||
pub name: String,
|
||||
/// Input arguments (JSON)
|
||||
pub input: Value,
|
||||
}
|
||||
|
||||
/// Tool execution result
|
||||
///
|
||||
/// Represents the result after tool execution, modifiable in Hook processing
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ToolResult {
|
||||
/// Corresponding tool call ID
|
||||
pub tool_use_id: String,
|
||||
/// Result content
|
||||
pub content: String,
|
||||
/// Whether this is an error
|
||||
#[serde(default)]
|
||||
pub is_error: bool,
|
||||
}
|
||||
|
||||
impl ToolResult {
|
||||
/// Create a success result
|
||||
pub fn success(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
|
||||
Self {
|
||||
tool_use_id: tool_use_id.into(),
|
||||
content: content.into(),
|
||||
is_error: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an error result
|
||||
pub fn error(tool_use_id: impl Into<String>, content: impl Into<String>) -> Self {
|
||||
Self {
|
||||
tool_use_id: tool_use_id.into(),
|
||||
content: content.into(),
|
||||
is_error: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Hook Error
|
||||
// =============================================================================
|
||||
|
||||
/// Hook error
|
||||
#[derive(Debug, Error)]
|
||||
pub enum HookError {
|
||||
/// Processing was aborted
|
||||
#[error("Aborted: {0}")]
|
||||
Aborted(String),
|
||||
/// Internal error
|
||||
#[error("Hook error: {0}")]
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Hook Trait
|
||||
// =============================================================================
|
||||
|
||||
/// Trait for handling Hook events
|
||||
///
|
||||
/// Each event type has a different return type, constrained via `HookEventKind`.
|
||||
#[async_trait]
|
||||
pub trait Hook<E: HookEventKind>: Send + Sync {
|
||||
async fn call(&self, input: &mut E::Input) -> Result<E::Output, HookError>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Hook Registry
|
||||
// =============================================================================
|
||||
|
||||
/// Registry holding all Hooks
|
||||
///
|
||||
/// Used internally by Worker to manage all Hook types.
|
||||
pub struct HookRegistry {
|
||||
/// on_prompt_submit Hook
|
||||
pub(crate) on_prompt_submit: Vec<Box<dyn Hook<OnPromptSubmit>>>,
|
||||
/// pre_llm_request Hook
|
||||
pub(crate) pre_llm_request: Vec<Box<dyn Hook<PreLlmRequest>>>,
|
||||
/// pre_tool_call Hook
|
||||
pub(crate) pre_tool_call: Vec<Box<dyn Hook<PreToolCall>>>,
|
||||
/// post_tool_call Hook
|
||||
pub(crate) post_tool_call: Vec<Box<dyn Hook<PostToolCall>>>,
|
||||
/// on_turn_end Hook
|
||||
pub(crate) on_turn_end: Vec<Box<dyn Hook<OnTurnEnd>>>,
|
||||
/// on_abort Hook
|
||||
pub(crate) on_abort: Vec<Box<dyn Hook<OnAbort>>>,
|
||||
/// on_text_delta Hook
|
||||
pub(crate) on_text_delta: Vec<Box<dyn Hook<OnTextDelta>>>,
|
||||
/// on_tool_call_delta Hook
|
||||
pub(crate) on_tool_call_delta: Vec<Box<dyn Hook<OnToolCallDelta>>>,
|
||||
/// on_stream_chunk Hook
|
||||
pub(crate) on_stream_chunk: Vec<Box<dyn Hook<OnStreamChunk>>>,
|
||||
/// on_stream_complete Hook
|
||||
pub(crate) on_stream_complete: Vec<Box<dyn Hook<OnStreamComplete>>>,
|
||||
}
|
||||
|
||||
impl Default for HookRegistry {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl HookRegistry {
|
||||
/// Create an empty HookRegistry
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
on_prompt_submit: Vec::new(),
|
||||
pre_llm_request: Vec::new(),
|
||||
pre_tool_call: Vec::new(),
|
||||
post_tool_call: Vec::new(),
|
||||
on_turn_end: Vec::new(),
|
||||
on_abort: Vec::new(),
|
||||
on_text_delta: Vec::new(),
|
||||
on_tool_call_delta: Vec::new(),
|
||||
on_stream_chunk: Vec::new(),
|
||||
on_stream_complete: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
185
crates/llm-worker/src/interceptor.rs
Normal file
185
crates/llm-worker/src/interceptor.rs
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
//! Interceptor - control flow delegation for the Worker execution loop
|
||||
//!
|
||||
//! Defines the [`Interceptor`] trait that upper layers (e.g. Pod) implement
|
||||
//! to inject orchestration decisions (approval, skip, pause, abort)
|
||||
//! into the Worker's turn loop without the Worker knowing about
|
||||
//! higher-level concepts.
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
use crate::Item;
|
||||
use crate::tool::{Tool, ToolCall, ToolMeta, ToolResult};
|
||||
|
||||
// =============================================================================
|
||||
// Action Enums
|
||||
// =============================================================================
|
||||
|
||||
/// Action after prompt submission.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum PromptAction {
|
||||
/// Proceed normally.
|
||||
Continue,
|
||||
/// Cancel with a reason.
|
||||
Cancel(String),
|
||||
/// Proceed, and append these items to history right after the user
|
||||
/// message. Mirrors [`TurnEndAction::ContinueWithMessages`] for the
|
||||
/// submit edge: lets the upper layer attach resolver-produced
|
||||
/// system messages (e.g. `@<path>` file content) so they sit
|
||||
/// adjacent to the user message that referenced them.
|
||||
ContinueWith(Vec<Item>),
|
||||
}
|
||||
|
||||
/// Action before an LLM request.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PreRequestAction {
|
||||
/// Proceed normally.
|
||||
Continue,
|
||||
/// Cancel with a reason (treated as an error).
|
||||
Cancel(String),
|
||||
/// Yield control to the caller for external processing.
|
||||
///
|
||||
/// The Worker exits the turn loop cleanly with `WorkerResult::Yielded`.
|
||||
/// The caller is expected to resume execution later.
|
||||
Yield,
|
||||
}
|
||||
|
||||
/// Action before a tool call.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PreToolAction {
|
||||
/// Proceed with execution.
|
||||
Continue,
|
||||
/// Skip this tool call (do not execute).
|
||||
Skip,
|
||||
/// Do not execute the tool call; commit this synthetic result instead.
|
||||
///
|
||||
/// This preserves provider-visible `tool_use` / `tool_result` pairing
|
||||
/// without aborting the whole turn.
|
||||
SyntheticResult(ToolResult),
|
||||
/// Abort the entire run.
|
||||
Abort(String),
|
||||
/// Pause execution (can be resumed later).
|
||||
Pause,
|
||||
}
|
||||
|
||||
/// Action after a tool call.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PostToolAction {
|
||||
/// Proceed normally.
|
||||
Continue,
|
||||
/// Abort the entire run.
|
||||
Abort(String),
|
||||
}
|
||||
|
||||
/// Action at the end of a turn (when LLM produces no tool calls).
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum TurnEndAction {
|
||||
/// Turn is finished, return to caller.
|
||||
Finish,
|
||||
/// Continue with additional messages injected into history.
|
||||
ContinueWithMessages(Vec<Item>),
|
||||
/// Pause execution (can be resumed later).
|
||||
Pause,
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Context Types
|
||||
// =============================================================================
|
||||
|
||||
/// Context for pre-tool-call decisions.
|
||||
pub struct ToolCallInfo {
|
||||
/// Tool call information (modifiable).
|
||||
pub call: ToolCall,
|
||||
/// Tool meta information.
|
||||
pub meta: ToolMeta,
|
||||
/// Tool instance (for state access).
|
||||
pub tool: Arc<dyn Tool>,
|
||||
}
|
||||
|
||||
/// Context for post-tool-call decisions.
|
||||
pub struct ToolResultInfo {
|
||||
/// Original tool call.
|
||||
pub call: ToolCall,
|
||||
/// Tool execution result (modifiable).
|
||||
pub result: ToolResult,
|
||||
/// Tool meta information.
|
||||
pub meta: ToolMeta,
|
||||
/// Tool instance (for state access).
|
||||
pub tool: Arc<dyn Tool>,
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Interceptor Trait
|
||||
// =============================================================================
|
||||
|
||||
/// Intercepts the Worker execution loop at key decision points.
|
||||
///
|
||||
/// All methods have default implementations that let the Worker
|
||||
/// proceed without intervention. Upper layers (e.g. Pod) provide
|
||||
/// richer implementations for approval flows, permission checks, etc.
|
||||
#[async_trait]
|
||||
pub trait Interceptor: Send + Sync {
|
||||
/// Called after receiving user input, before adding to history.
|
||||
async fn on_prompt_submit(&self, _item: &mut Item) -> PromptAction {
|
||||
PromptAction::Continue
|
||||
}
|
||||
|
||||
/// Items that should be **committed to `worker.history`** just
|
||||
/// before the next LLM request. Returned items are `extend`ed into
|
||||
/// the persistent history (and therefore picked up by the per-turn
|
||||
/// clone that backs the LLM request, plus the usual
|
||||
/// history-persistence path).
|
||||
///
|
||||
/// Use this for inputs that arrive from outside the LLM and need
|
||||
/// to be reflected in the on-disk history — notifications,
|
||||
/// cross-Pod events, system reminders. Do **not** use
|
||||
/// [`Self::pre_llm_request`] for that purpose: it mutates a
|
||||
/// per-request clone, so any committed assistant response that
|
||||
/// reacts to the injection would have no visible trigger on the
|
||||
/// next turn (or after resume / compaction).
|
||||
///
|
||||
/// `pre_llm_request` remains the right place for purely
|
||||
/// reproducible per-request transformations (pruning, content
|
||||
/// trimming, cache anchors) that depend only on the existing
|
||||
/// history.
|
||||
async fn pending_history_appends(&self) -> Vec<Item> {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// Called before each LLM request. The context starts as a clone
|
||||
/// of `worker.history` (after `pending_history_appends` and the
|
||||
/// Worker's own prune projection have been applied) and can be
|
||||
/// further modified for that single request only — mutations here
|
||||
/// are **not** persisted back to history. Use
|
||||
/// [`Self::pending_history_appends`] for inputs that need to land
|
||||
/// in history.
|
||||
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
|
||||
PreRequestAction::Continue
|
||||
}
|
||||
|
||||
/// Called before each tool is executed.
|
||||
async fn pre_tool_call(&self, _info: &mut ToolCallInfo) -> PreToolAction {
|
||||
PreToolAction::Continue
|
||||
}
|
||||
|
||||
/// Called after each tool completes.
|
||||
async fn post_tool_call(&self, _info: &mut ToolResultInfo) -> PostToolAction {
|
||||
PostToolAction::Continue
|
||||
}
|
||||
|
||||
/// Called when a turn ends with no tool calls.
|
||||
async fn on_turn_end(&self, _history: &[Item]) -> TurnEndAction {
|
||||
TurnEndAction::Finish
|
||||
}
|
||||
|
||||
/// Called when execution is interrupted (abort or cancel).
|
||||
async fn on_abort(&self, _reason: &str) {}
|
||||
}
|
||||
|
||||
/// Default interceptor: no intervention. Worker proceeds through the loop
|
||||
/// without any external control flow decisions.
|
||||
pub(crate) struct DefaultInterceptor;
|
||||
|
||||
#[async_trait]
|
||||
impl Interceptor for DefaultInterceptor {}
|
||||
|
|
@ -6,8 +6,8 @@
|
|||
//!
|
||||
//! - [`Worker`] - Central component for managing LLM interactions
|
||||
//! - [`tool::Tool`] - Tools that can be invoked by the LLM
|
||||
//! - [`hook::Hook`] - Hooks for intercepting turn progression
|
||||
//! - [`subscriber::WorkerSubscriber`] - Subscribing to streaming events
|
||||
//! - [`interceptor::Interceptor`] - Control-flow delegation for the execution loop
|
||||
//! - Closure-based event callbacks via `Worker::on_text_block()`, `on_tool_use_block()`, etc.
|
||||
//!
|
||||
//! # Quick Start
|
||||
//!
|
||||
|
|
@ -27,26 +27,38 @@
|
|||
//!
|
||||
//! # Cache Protection
|
||||
//!
|
||||
//! To maximize KV cache hit rate, transition to the locked state
|
||||
//! with [`Worker::lock()`] before execution.
|
||||
//! `run()` automatically locks the cache. To edit state between turns,
|
||||
//! call `unlock_cache()` first; the next `run()` re-locks automatically.
|
||||
//!
|
||||
//! ```ignore
|
||||
//! let mut locked = worker.lock();
|
||||
//! locked.run("user input").await?;
|
||||
//! worker.run("user input").await?;
|
||||
//! worker.unlock_cache();
|
||||
//! worker.set_system_prompt("new prompt");
|
||||
//! worker.run("next input").await?;
|
||||
//! ```
|
||||
|
||||
mod handler;
|
||||
mod message;
|
||||
mod worker;
|
||||
|
||||
pub(crate) mod callback;
|
||||
pub mod event;
|
||||
pub mod hook;
|
||||
pub mod interceptor;
|
||||
pub mod llm_client;
|
||||
pub mod prune;
|
||||
pub mod state;
|
||||
pub mod subscriber;
|
||||
pub mod timeline;
|
||||
pub mod token_counter;
|
||||
pub mod tool;
|
||||
pub mod tool_server;
|
||||
pub mod usage_record;
|
||||
|
||||
pub use callback::{TextBlockScope, ThinkingBlockScope, ToolUseBlockScope};
|
||||
pub use handler::ToolUseBlockStart;
|
||||
pub use interceptor::Interceptor;
|
||||
pub use message::{ContentPart, Item, Message, Role};
|
||||
pub use worker::{ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult};
|
||||
pub use tool::{ToolCall, ToolOutputLimits, ToolResult};
|
||||
pub use usage_record::UsageRecord;
|
||||
pub use worker::{
|
||||
LlmRetryNotice, RunOutput, ToolRegistryError, Worker, WorkerConfig, WorkerError, WorkerResult,
|
||||
};
|
||||
|
|
|
|||
57
crates/llm-worker/src/llm_client/auth.rs
Normal file
57
crates/llm-worker/src/llm_client/auth.rs
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
//! `Scheme` 実装と通信層が要求する認証要件、および動的認証プロバイダ。
|
||||
//!
|
||||
//! マニフェスト側の型(`ModelConfig` / `SchemeKind` / `AuthRef`)は
|
||||
//! `crates/manifest` に置き、llm-worker はそれを知らずに済む。
|
||||
//! `AuthRequirement` は scheme が宣言する「この scheme はどんな認証を
|
||||
//! 期待するか」のランタイム記述で、manifest 側の `AuthRef` との
|
||||
//! 照合(`AuthRef → ResolvedAuth` 変換の適否)は `crates/provider`
|
||||
//! で行う。
|
||||
//!
|
||||
//! Codex OAuth のようにリクエスト毎にトークンが変わり得る認証は
|
||||
//! [`AuthProvider`] trait を `crates/provider` 側で実装し、
|
||||
//! [`super::transport::ResolvedAuth::Custom`] 経由で transport に渡す。
|
||||
|
||||
use async_trait::async_trait;
|
||||
use reqwest::header::{HeaderName, HeaderValue};
|
||||
|
||||
use super::error::ClientError;
|
||||
|
||||
/// `Scheme::required_auth()` が返す認証要件。
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AuthRequirement {
|
||||
/// 認証を行わない(Ollama など)
|
||||
None,
|
||||
/// `Authorization: Bearer <token>` ヘッダ(token は API key 相当)
|
||||
Bearer,
|
||||
/// `x-api-key: <token>` ヘッダ(Anthropic 形式)
|
||||
XApiKey,
|
||||
/// クエリパラメータ `?<name>=<token>`(Gemini 形式)
|
||||
QueryParam { name: &'static str },
|
||||
/// 複合ヘッダ(Codex OAuth 等、`crates/provider` 側で解決)
|
||||
Custom,
|
||||
}
|
||||
|
||||
/// リクエスト毎に認証ヘッダを動的に組み立てるプロバイダ。
|
||||
///
|
||||
/// Codex OAuth のように access_token が refresh で更新されたり、
|
||||
/// `ChatGPT-Account-Id` / `X-OpenAI-Fedramp` のような複数ヘッダを
|
||||
/// 同時に注入する必要があるケースで使う。実体は `crates/provider`
|
||||
/// 側に置き、llm-worker は trait を知るだけ。
|
||||
///
|
||||
/// 返したヘッダはそのまま `HeaderMap` に挿入される。`Authorization`
|
||||
/// 含む scheme 既定の認証ヘッダは送出されないので、必要なら
|
||||
/// 実装側でセットすること。
|
||||
#[async_trait]
|
||||
pub trait AuthProvider: Send + Sync + std::fmt::Debug {
|
||||
/// 1 リクエスト分の認証ヘッダを返す。refresh が必要なら内部で行う。
|
||||
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError>;
|
||||
|
||||
/// ChatGPT Codex backend 向けの複合認証かどうか。
|
||||
///
|
||||
/// transport は provider crate の具象型を知らないため、この hook だけで
|
||||
/// Codex CLI 互換の wire behavior(conversation header / request compression 等)
|
||||
/// を切り替える。
|
||||
fn is_codex_backend(&self) -> bool {
|
||||
false
|
||||
}
|
||||
}
|
||||
169
crates/llm-worker/src/llm_client/capability.rs
Normal file
169
crates/llm-worker/src/llm_client/capability.rs
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
//! モデル能力メタデータ
|
||||
//!
|
||||
//! `ModelCapability` はモデルが持つ機能差を表現する。scheme は同じでも
|
||||
//! モデルごとに reasoning 可否や prompt caching 方式が違うため、scheme
|
||||
//! から分離して保持する。
|
||||
//!
|
||||
//! 値の供給経路は 2 通り:
|
||||
//! 1. scheme 実装側の `model_id → ModelCapability` 静的テーブル(既知モデル)
|
||||
//! 2. `ModelConfig::capability` での明示 override(未知モデル、または上書き)
|
||||
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
/// モデル能力メタデータ
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct ModelCapability {
|
||||
pub tool_calling: ToolCallingSupport,
|
||||
pub structured_output: StructuredOutput,
|
||||
#[serde(default)]
|
||||
pub reasoning: Option<ReasoningSupport>,
|
||||
#[serde(default)]
|
||||
pub vision: bool,
|
||||
pub prompt_caching: CacheStrategy,
|
||||
}
|
||||
|
||||
impl ModelCapability {
|
||||
/// 何もサポートしない安全側デフォルト。未知モデルのフォールバック用。
|
||||
pub const fn minimal() -> Self {
|
||||
Self {
|
||||
tool_calling: ToolCallingSupport::None,
|
||||
structured_output: StructuredOutput::None,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// ツール呼び出しサポート
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ToolCallingSupport {
|
||||
/// 非サポート
|
||||
None,
|
||||
/// 1 回のレスポンスで 1 ツールのみ
|
||||
Sequential,
|
||||
/// 1 回のレスポンスで複数ツール並行
|
||||
Parallel,
|
||||
}
|
||||
|
||||
/// Structured output サポート
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum StructuredOutput {
|
||||
None,
|
||||
/// `json_object` モード(スキーマなし JSON 強制)
|
||||
JsonObject,
|
||||
/// JSON Schema 指定で構造化出力
|
||||
JsonSchema,
|
||||
}
|
||||
|
||||
/// Reasoning(extended thinking)サポート
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ReasoningSupport {
|
||||
/// OpenAI 形式: `reasoning.effort` (low/medium/high)
|
||||
Effort,
|
||||
/// Anthropic 形式: `thinking.budget_tokens`
|
||||
BudgetTokens,
|
||||
/// 両対応(内部では共通 `ReasoningControl` として扱い、各 scheme で投影)
|
||||
Both,
|
||||
}
|
||||
|
||||
/// Prompt caching 戦略
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||
pub enum CacheStrategy {
|
||||
/// Anthropic: `cache_control` マーカーを明示挿入
|
||||
Explicit { max_breakpoints: u8 },
|
||||
/// それ以外: サーバ側自動 prefix、または未サポート
|
||||
Auto,
|
||||
}
|
||||
|
||||
/// Reasoning 制御(共通型、scheme 側で各社形式に投影)。
|
||||
///
|
||||
/// 文字列は provider-native な effort label、数値は provider-native な
|
||||
/// thinking budget token として扱う。どちらか一方だけを型で表現する。
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
#[serde(untagged)]
|
||||
pub enum ReasoningControl {
|
||||
Effort(ReasoningEffort),
|
||||
BudgetTokens(i32),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum ReasoningEffort {
|
||||
Minimal,
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
XHigh,
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl ReasoningEffort {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Self::Minimal => "minimal",
|
||||
Self::Low => "low",
|
||||
Self::Medium => "medium",
|
||||
Self::High => "high",
|
||||
Self::XHigh => "xhigh",
|
||||
Self::Other(label) => label.as_str(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for ReasoningEffort {
|
||||
fn from(value: String) -> Self {
|
||||
match value.as_str() {
|
||||
"minimal" => Self::Minimal,
|
||||
"low" => Self::Low,
|
||||
"medium" => Self::Medium,
|
||||
"high" => Self::High,
|
||||
"xhigh" => Self::XHigh,
|
||||
_ => Self::Other(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for ReasoningEffort {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.serialize_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for ReasoningEffort {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
String::deserialize(deserializer).map(Self::from)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{ReasoningControl, ReasoningEffort};
|
||||
|
||||
#[test]
|
||||
fn reasoning_control_deserializes_effort_labels() {
|
||||
let known: ReasoningControl = serde_json::from_str(r#""xhigh""#).unwrap();
|
||||
assert_eq!(known, ReasoningControl::Effort(ReasoningEffort::XHigh));
|
||||
|
||||
let unknown: ReasoningControl = serde_json::from_str(r#""provider-native""#).unwrap();
|
||||
assert_eq!(
|
||||
unknown,
|
||||
ReasoningControl::Effort(ReasoningEffort::Other("provider-native".into()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_control_deserializes_signed_budget() {
|
||||
let dynamic: ReasoningControl = serde_json::from_str("-1").unwrap();
|
||||
assert_eq!(dynamic, ReasoningControl::BudgetTokens(-1));
|
||||
}
|
||||
}
|
||||
|
|
@ -36,6 +36,8 @@ impl std::fmt::Display for ConfigWarning {
|
|||
}
|
||||
}
|
||||
|
||||
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>;
|
||||
|
||||
/// LLMクライアントのtrait
|
||||
///
|
||||
/// 各プロバイダはこのtraitを実装し、統一されたインターフェースを提供する。
|
||||
|
|
@ -49,10 +51,13 @@ pub trait LlmClient: Send + Sync {
|
|||
/// # Returns
|
||||
/// * `Ok(Stream)` - イベントストリーム
|
||||
/// * `Err(ClientError)` - エラー
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError>;
|
||||
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError>;
|
||||
|
||||
/// Clone this client into a new `Box<dyn LlmClient>`.
|
||||
///
|
||||
/// Used when a second client instance is needed (e.g. for context
|
||||
/// compaction) without access to the original construction parameters.
|
||||
fn clone_boxed(&self) -> Box<dyn LlmClient>;
|
||||
|
||||
/// 設定をバリデーションし、未サポートの設定があれば警告を返す
|
||||
///
|
||||
|
|
@ -68,18 +73,25 @@ pub trait LlmClient: Send + Sync {
|
|||
}
|
||||
}
|
||||
|
||||
impl Clone for Box<dyn LlmClient> {
|
||||
fn clone(&self) -> Self {
|
||||
self.clone_boxed()
|
||||
}
|
||||
}
|
||||
|
||||
/// `Box<dyn LlmClient>` に対する `LlmClient` の実装
|
||||
///
|
||||
/// これにより、動的ディスパッチを使用するクライアントも `Worker` で利用可能になる。
|
||||
#[async_trait]
|
||||
impl LlmClient for Box<dyn LlmClient> {
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
|
||||
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
|
||||
(**self).stream(request).await
|
||||
}
|
||||
|
||||
fn clone_boxed(&self) -> Box<dyn LlmClient> {
|
||||
(**self).clone_boxed()
|
||||
}
|
||||
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
(**self).validate_config(config)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
//! LLMクライアントエラー型
|
||||
|
||||
use std::fmt;
|
||||
use std::{fmt, time::Duration};
|
||||
|
||||
/// LLMクライアントのエラー
|
||||
#[derive(Debug)]
|
||||
|
|
@ -16,6 +16,12 @@ pub enum ClientError {
|
|||
status: Option<u16>,
|
||||
code: Option<String>,
|
||||
message: String,
|
||||
retry_after: Option<Duration>,
|
||||
},
|
||||
/// A request lifecycle phase exceeded its hard timeout.
|
||||
Timeout {
|
||||
phase: &'static str,
|
||||
timeout: Duration,
|
||||
},
|
||||
/// 設定エラー
|
||||
Config(String),
|
||||
|
|
@ -31,6 +37,7 @@ impl fmt::Display for ClientError {
|
|||
status,
|
||||
code,
|
||||
message,
|
||||
..
|
||||
} => {
|
||||
write!(f, "API error")?;
|
||||
if let Some(s) = status {
|
||||
|
|
@ -41,6 +48,9 @@ impl fmt::Display for ClientError {
|
|||
}
|
||||
write!(f, ": {}", message)
|
||||
}
|
||||
ClientError::Timeout { phase, timeout } => {
|
||||
write!(f, "{phase} timed out after {}s", timeout.as_secs())
|
||||
}
|
||||
ClientError::Config(msg) => write!(f, "Config error: {}", msg),
|
||||
}
|
||||
}
|
||||
|
|
@ -67,3 +77,96 @@ impl From<serde_json::Error> for ClientError {
|
|||
ClientError::Json(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl ClientError {
|
||||
pub fn status(&self) -> Option<u16> {
|
||||
match self {
|
||||
ClientError::Api { status, .. } => *status,
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn retry_after(&self) -> Option<Duration> {
|
||||
match self {
|
||||
ClientError::Api { retry_after, .. } => *retry_after,
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// transient な失敗としてリトライ対象になるかを判定する。
|
||||
///
|
||||
/// 対象:
|
||||
/// - `Api { status }` のうち 408 / 425 / 429 / 500 / 502 / 503 / 504 / 529
|
||||
/// - `Http(reqwest::Error)` のうち `is_connect()` または `is_timeout()`
|
||||
/// - `Timeout { .. }` の lifecycle hard timeout
|
||||
///
|
||||
/// それ以外(Json、Sse、Config、上記以外の Api ステータス)は false。
|
||||
/// SSE 読み出し開始後の失敗は呼び出し側で `Sse` として上に流すため、
|
||||
/// ここで対象外にしておけば自動的に弾かれる。
|
||||
pub fn is_retryable(error: &ClientError) -> bool {
|
||||
match error {
|
||||
ClientError::Api {
|
||||
status: Some(code), ..
|
||||
} => matches!(*code, 408 | 425 | 429 | 500 | 502 | 503 | 504 | 529),
|
||||
ClientError::Api { status: None, .. } => false,
|
||||
ClientError::Timeout { .. } => true,
|
||||
ClientError::Http(e) => e.is_connect() || e.is_timeout(),
|
||||
ClientError::Json(_) | ClientError::Sse(_) | ClientError::Config(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn api_err(status: Option<u16>) -> ClientError {
|
||||
ClientError::Api {
|
||||
status,
|
||||
code: None,
|
||||
message: String::new(),
|
||||
retry_after: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn retryable_status_codes() {
|
||||
for code in [408u16, 425, 429, 500, 502, 503, 504, 529] {
|
||||
assert!(
|
||||
is_retryable(&api_err(Some(code))),
|
||||
"status {code} should be retryable",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_retryable_status_codes() {
|
||||
for code in [400u16, 401, 403, 404, 409, 410, 422, 501] {
|
||||
assert!(
|
||||
!is_retryable(&api_err(Some(code))),
|
||||
"status {code} should not be retryable",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn api_without_status_not_retryable() {
|
||||
assert!(!is_retryable(&api_err(None)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lifecycle_timeout_is_retryable() {
|
||||
assert!(is_retryable(&ClientError::Timeout {
|
||||
phase: "stream_open",
|
||||
timeout: Duration::from_secs(30),
|
||||
}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn json_sse_config_not_retryable() {
|
||||
let json_err = serde_json::from_str::<serde_json::Value>("not json").unwrap_err();
|
||||
assert!(!is_retryable(&ClientError::Json(json_err)));
|
||||
assert!(!is_retryable(&ClientError::Sse("boom".into())));
|
||||
assert!(!is_retryable(&ClientError::Config("boom".into())));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,11 @@ use serde::{Deserialize, Serialize};
|
|||
///
|
||||
/// # イベントの種類
|
||||
///
|
||||
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`
|
||||
/// - **メタイベント**: `Ping`, `Usage`, `Status`, `Error`, `UnhandledSse`
|
||||
/// - **ブロックイベント**: `BlockStart`, `BlockDelta`, `BlockStop`, `BlockAbort`
|
||||
/// - **永続化イベント**: `ReasoningItem` (history に commit すべき完成済み
|
||||
/// reasoning item。streaming 表示用の Thinking BlockStart/Delta/Stop と
|
||||
/// は別経路で発火する)
|
||||
///
|
||||
/// # ブロックのライフサイクル
|
||||
///
|
||||
|
|
@ -32,6 +35,10 @@ pub enum Event {
|
|||
Status(StatusEvent),
|
||||
/// エラー発生
|
||||
Error(ErrorEvent),
|
||||
/// Scheme が生成内容として解釈しない未対応 SSE イベント。
|
||||
///
|
||||
/// stream trace 用の観測イベントであり、timeline / history には反映しない。
|
||||
UnhandledSse(UnhandledSseEvent),
|
||||
|
||||
/// ブロック開始(テキスト、ツール使用等)
|
||||
BlockStart(BlockStart),
|
||||
|
|
@ -41,6 +48,18 @@ pub enum Event {
|
|||
BlockStop(BlockStop),
|
||||
/// ブロック中断
|
||||
BlockAbort(BlockAbort),
|
||||
|
||||
/// Reasoning item の完成。scheme が「次の request に送り返すための
|
||||
/// reasoning material が揃った」点で 1 度だけ発火する。
|
||||
///
|
||||
/// - Anthropic: 1 つの `thinking` content_block 完了ごと
|
||||
/// - OpenAI Responses: 1 つの reasoning output_item 完了ごと
|
||||
///
|
||||
/// 上位層(Worker / ReasoningItemCollector)はこれを `Item::Reasoning`
|
||||
/// として `worker.history` に append する。streaming 表示用の
|
||||
/// `BlockStart(Thinking)` / `BlockDelta(Thinking)` / `BlockStop(Thinking)`
|
||||
/// は依然として並行発火する(live display と round-trip persist の責務分離)。
|
||||
ReasoningItem(ReasoningItemEvent),
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
|
|
@ -54,17 +73,27 @@ pub struct PingEvent {
|
|||
}
|
||||
|
||||
/// 使用量イベント
|
||||
///
|
||||
/// プロバイダから受信した 1 LLM リクエスト分のトークン会計。
|
||||
/// 各 scheme で正規化され、フィールドの意味は全プロバイダ共通:
|
||||
///
|
||||
/// - `input_tokens` は **送信した prompt prefix 全体の占有量**(プロンプト全長)。
|
||||
/// キャッシュヒット分も含まれる。Anthropic は raw API では非キャッシュ分のみを
|
||||
/// `input_tokens` として返すため、`AnthropicScheme::convert_usage` で
|
||||
/// `cache_read + cache_creation` を加算してこの規約に揃えている。
|
||||
/// - `cache_read_input_tokens` / `cache_creation_input_tokens` は上記の内訳で、
|
||||
/// 料金会計用。占有量からは差し引かない。
|
||||
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
|
||||
pub struct UsageEvent {
|
||||
/// 入力トークン数
|
||||
/// 送信した prompt prefix の総トークン数(占有量、キャッシュ込み)
|
||||
pub input_tokens: Option<u64>,
|
||||
/// 出力トークン数
|
||||
/// このリクエストで生成された出力トークン数
|
||||
pub output_tokens: Option<u64>,
|
||||
/// 合計トークン数
|
||||
/// `input_tokens + output_tokens`
|
||||
pub total_tokens: Option<u64>,
|
||||
/// キャッシュ読み込みトークン数
|
||||
/// `input_tokens` のうちキャッシュから読まれた分(割引料金)
|
||||
pub cache_read_input_tokens: Option<u64>,
|
||||
/// キャッシュ作成トークン数
|
||||
/// `input_tokens` のうちこのリクエストでキャッシュに書かれた分(割増料金、Anthropic)
|
||||
pub cache_creation_input_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
|
|
@ -94,6 +123,18 @@ pub struct ErrorEvent {
|
|||
pub message: String,
|
||||
}
|
||||
|
||||
/// 未対応 SSE イベントの観測用メタイベント。
|
||||
///
|
||||
/// `data_preview` は provider から受け取った raw SSE data の bounded preview、
|
||||
/// `data_len` は preview 前の raw data byte length。
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub struct UnhandledSseEvent {
|
||||
pub provider: String,
|
||||
pub event_type: String,
|
||||
pub data_preview: String,
|
||||
pub data_len: usize,
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Block Types
|
||||
// =============================================================================
|
||||
|
|
@ -202,6 +243,31 @@ impl BlockAbort {
|
|||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Reasoning Item Event
|
||||
// =============================================================================
|
||||
|
||||
/// 完成済み reasoning item。scheme が round-trip に必要なすべての
|
||||
/// material(text, summary, encrypted_content, signature, id)を揃えて
|
||||
/// 1 度だけ発火する。
|
||||
///
|
||||
/// `Item::Reasoning` のフィールドを 1:1 に持つ。
|
||||
#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
|
||||
pub struct ReasoningItemEvent {
|
||||
/// scheme 側で観測した item id(OpenAI Responses の `id`)。
|
||||
pub id: Option<String>,
|
||||
/// reasoning 本体テキスト。Anthropic は `thinking` 累積、OpenAI は
|
||||
/// `reasoning_text` 累積。redacted_thinking では空。
|
||||
pub text: String,
|
||||
/// summary (OpenAI Responses の `summary_text[]`)。他 scheme は空。
|
||||
pub summary: Vec<String>,
|
||||
/// 暗号化された opaque blob(Anthropic `redacted_thinking.data` /
|
||||
/// OpenAI Responses `encrypted_content`)。
|
||||
pub encrypted_content: Option<String>,
|
||||
/// Anthropic extended thinking signature。round-trip 必須。
|
||||
pub signature: Option<String>,
|
||||
}
|
||||
|
||||
/// 停止理由
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
pub enum StopReason {
|
||||
|
|
|
|||
|
|
@ -16,14 +16,19 @@
|
|||
//! - `providers`: プロバイダ固有のクライアント実装
|
||||
//! - `scheme`: APIスキーマ(リクエスト/レスポンス変換)
|
||||
|
||||
pub mod auth;
|
||||
pub mod capability;
|
||||
pub mod client;
|
||||
pub mod error;
|
||||
pub mod event;
|
||||
pub mod types;
|
||||
|
||||
pub mod providers;
|
||||
pub mod retry;
|
||||
pub mod scheme;
|
||||
pub mod transport;
|
||||
|
||||
pub use auth::*;
|
||||
pub use capability::*;
|
||||
pub use client::*;
|
||||
pub use error::*;
|
||||
pub use event::*;
|
||||
|
|
|
|||
|
|
@ -1,201 +0,0 @@
|
|||
//! Anthropic プロバイダ実装
|
||||
//!
|
||||
//! Anthropic Messages APIと通信し、Eventストリームを出力
|
||||
|
||||
use std::pin::Pin;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, LlmClient, Request, event::Event, scheme::anthropic::AnthropicScheme,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::{Stream, StreamExt, TryStreamExt, future::ready};
|
||||
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
|
||||
|
||||
/// Anthropic クライアント
|
||||
pub struct AnthropicClient {
|
||||
/// HTTPクライアント
|
||||
http_client: reqwest::Client,
|
||||
/// APIキー
|
||||
api_key: String,
|
||||
/// モデル名
|
||||
model: String,
|
||||
/// スキーマ
|
||||
scheme: AnthropicScheme,
|
||||
/// ベースURL
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl AnthropicClient {
|
||||
/// 新しいAnthropicクライアントを作成
|
||||
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
|
||||
Self {
|
||||
http_client: reqwest::Client::new(),
|
||||
api_key: api_key.into(),
|
||||
model: model.into(),
|
||||
scheme: AnthropicScheme::default(),
|
||||
base_url: "https://api.anthropic.com".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// カスタムHTTPクライアントを設定
|
||||
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
|
||||
self.http_client = client;
|
||||
self
|
||||
}
|
||||
|
||||
/// スキーマを設定
|
||||
pub fn with_scheme(mut self, scheme: AnthropicScheme) -> Self {
|
||||
self.scheme = scheme;
|
||||
self
|
||||
}
|
||||
|
||||
/// ベースURLを設定
|
||||
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
|
||||
self.base_url = url.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// リクエストヘッダーを構築
|
||||
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
||||
headers.insert(
|
||||
"x-api-key",
|
||||
HeaderValue::from_str(&self.api_key)
|
||||
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?,
|
||||
);
|
||||
headers.insert(
|
||||
"anthropic-version",
|
||||
HeaderValue::from_str(&self.scheme.api_version)
|
||||
.map_err(|e| ClientError::Config(format!("Invalid API version: {}", e)))?,
|
||||
);
|
||||
|
||||
// 細粒度ツールストリーミングを有効にする場合
|
||||
if self.scheme.fine_grained_tool_streaming {
|
||||
headers.insert(
|
||||
"anthropic-beta",
|
||||
HeaderValue::from_static("fine-grained-tool-streaming-2025-05-14"),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmClient for AnthropicClient {
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
|
||||
let url = format!("{}/v1/messages", self.base_url);
|
||||
let headers = self.build_headers()?;
|
||||
let body = self.scheme.build_request(&self.model, &request);
|
||||
|
||||
let response = self
|
||||
.http_client
|
||||
.post(&url)
|
||||
.headers(headers)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
// エラーレスポンスをチェック
|
||||
if !response.status().is_success() {
|
||||
let status = response.status().as_u16();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
// JSONでエラーをパースしてみる
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||
let error = json.get("error").unwrap_or(&json);
|
||||
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(&text)
|
||||
.to_string();
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code,
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code: None,
|
||||
message: text,
|
||||
});
|
||||
}
|
||||
|
||||
// SSEストリームを構築
|
||||
let scheme = self.scheme.clone();
|
||||
let byte_stream = response
|
||||
.bytes_stream()
|
||||
.map_err(|e| std::io::Error::other(e));
|
||||
let event_stream = byte_stream.eventsource();
|
||||
|
||||
// AnthropicはBlockStopイベントに正しいblock_typeを含まないため、
|
||||
// クライアント側で状態を追跡して補完する
|
||||
let mut current_block_type = None;
|
||||
|
||||
let stream = event_stream.filter_map(move |result| {
|
||||
ready(match result {
|
||||
Ok(event) => {
|
||||
// SSEイベントをパース
|
||||
match scheme.parse_event(&event.event, &event.data) {
|
||||
Ok(Some(mut evt)) => {
|
||||
// ブロックタイプの追跡と修正
|
||||
match &evt {
|
||||
Event::BlockStart(start) => {
|
||||
current_block_type = Some(start.block_type);
|
||||
}
|
||||
Event::BlockStop(stop) => {
|
||||
if let Some(block_type) = current_block_type.take() {
|
||||
// 正しいブロックタイプで上書き
|
||||
// (Event::BlockStopの中身を置換)
|
||||
evt =
|
||||
Event::BlockStop(crate::llm_client::event::BlockStop {
|
||||
block_type,
|
||||
..stop.clone()
|
||||
});
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(Ok(evt))
|
||||
}
|
||||
Ok(None) => None,
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
}
|
||||
Err(e) => Some(Err(ClientError::Sse(e.to_string()))),
|
||||
})
|
||||
});
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_client_creation() {
|
||||
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
|
||||
assert_eq!(client.model, "claude-sonnet-4-20250514");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_headers() {
|
||||
let client = AnthropicClient::new("test-key", "claude-sonnet-4-20250514");
|
||||
let headers = client.build_headers().unwrap();
|
||||
|
||||
assert!(headers.contains_key("x-api-key"));
|
||||
assert!(headers.contains_key("anthropic-version"));
|
||||
assert!(headers.contains_key("anthropic-beta"));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,185 +0,0 @@
|
|||
//! Gemini プロバイダ実装
|
||||
//!
|
||||
//! Google Gemini APIと通信し、Eventストリームを出力
|
||||
|
||||
use std::pin::Pin;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, LlmClient, Request, event::Event, scheme::gemini::GeminiScheme,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
|
||||
|
||||
/// Gemini クライアント
|
||||
pub struct GeminiClient {
|
||||
/// HTTPクライアント
|
||||
http_client: reqwest::Client,
|
||||
/// APIキー
|
||||
api_key: String,
|
||||
/// モデル名
|
||||
model: String,
|
||||
/// スキーマ
|
||||
scheme: GeminiScheme,
|
||||
/// ベースURL
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl GeminiClient {
|
||||
/// 新しいGeminiクライアントを作成
|
||||
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
|
||||
Self {
|
||||
http_client: reqwest::Client::new(),
|
||||
api_key: api_key.into(),
|
||||
model: model.into(),
|
||||
scheme: GeminiScheme::default(),
|
||||
base_url: "https://generativelanguage.googleapis.com".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// カスタムHTTPクライアントを設定
|
||||
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
|
||||
self.http_client = client;
|
||||
self
|
||||
}
|
||||
|
||||
/// スキーマを設定
|
||||
pub fn with_scheme(mut self, scheme: GeminiScheme) -> Self {
|
||||
self.scheme = scheme;
|
||||
self
|
||||
}
|
||||
|
||||
/// ベースURLを設定
|
||||
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
|
||||
self.base_url = url.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// リクエストヘッダーを構築
|
||||
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmClient for GeminiClient {
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
|
||||
// URL構築: base_url/v1beta/models/{model}:streamGenerateContent?alt=sse&key={api_key}
|
||||
let url = format!(
|
||||
"{}/v1beta/models/{}:streamGenerateContent?alt=sse&key={}",
|
||||
self.base_url, self.model, self.api_key
|
||||
);
|
||||
|
||||
let headers = self.build_headers()?;
|
||||
let body = self.scheme.build_request(&request);
|
||||
|
||||
let response = self
|
||||
.http_client
|
||||
.post(&url)
|
||||
.headers(headers)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
// エラーレスポンスをチェック
|
||||
if !response.status().is_success() {
|
||||
let status = response.status().as_u16();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
// JSONでエラーをパースしてみる
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||
// Gemini error format: { "error": { "code": xxx, "message": "...", "status": "..." } }
|
||||
let error = json.get("error").unwrap_or(&json);
|
||||
let code = error
|
||||
.get("status")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from);
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(&text)
|
||||
.to_string();
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code,
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code: None,
|
||||
message: text,
|
||||
});
|
||||
}
|
||||
|
||||
// SSEストリームを構築
|
||||
let scheme = self.scheme.clone();
|
||||
let byte_stream = response
|
||||
.bytes_stream()
|
||||
.map_err(|e| std::io::Error::other(e));
|
||||
let event_stream = byte_stream.eventsource();
|
||||
|
||||
let stream = event_stream
|
||||
.map(move |result| {
|
||||
match result {
|
||||
Ok(event) => {
|
||||
// SSEイベントをパース
|
||||
// Geminiは "data: {...}" 形式で送る
|
||||
match scheme.parse_event(&event.data) {
|
||||
Ok(Some(events)) => Ok(Some(events)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
Err(e) => Err(ClientError::Sse(e.to_string())),
|
||||
}
|
||||
})
|
||||
// flatten Option<Vec<Event>> stream to Stream<Event>
|
||||
.map(|res| {
|
||||
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
|
||||
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
|
||||
Ok(None) => Box::pin(futures::stream::empty()),
|
||||
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
|
||||
};
|
||||
s
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_client_creation() {
|
||||
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
|
||||
assert_eq!(client.model, "gemini-2.0-flash");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_headers() {
|
||||
let client = GeminiClient::new("test-key", "gemini-2.0-flash");
|
||||
let headers = client.build_headers().unwrap();
|
||||
|
||||
assert!(headers.contains_key("content-type"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_custom_base_url() {
|
||||
let client = GeminiClient::new("test-key", "gemini-2.0-flash")
|
||||
.with_base_url("https://custom.api.example.com");
|
||||
assert_eq!(client.base_url, "https://custom.api.example.com");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
//! プロバイダ実装
|
||||
//!
|
||||
//! 各プロバイダ固有のHTTPクライアント実装
|
||||
|
||||
pub mod anthropic;
|
||||
pub mod gemini;
|
||||
pub mod ollama;
|
||||
pub mod openai;
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
//! Ollama プロバイダ実装
|
||||
//!
|
||||
//! OllamaはOpenAI互換APIを提供するため、OpenAIクライアントと互換性がある。
|
||||
//! デフォルトのベースURLと認証設定が異なる。
|
||||
|
||||
use std::pin::Pin;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, LlmClient, Request, event::Event, providers::openai::OpenAIClient,
|
||||
scheme::openai::OpenAIScheme,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use futures::Stream;
|
||||
|
||||
/// Ollama クライアント
|
||||
///
|
||||
/// 内部的にOpenAIClientを使用するラッパー、もしくはOpenAIClientと同様の実装を持つ。
|
||||
/// ここではOpenAIClient構成をカスタマイズして提供する。
|
||||
pub struct OllamaClient {
|
||||
inner: OpenAIClient,
|
||||
}
|
||||
|
||||
impl OllamaClient {
|
||||
/// 新しいOllamaクライアントを作成
|
||||
pub fn new(model: impl Into<String>) -> Self {
|
||||
// Ollama usually runs on localhost:11434/v1
|
||||
// API key is "ollama" or ignored
|
||||
let base_url = "http://localhost:11434";
|
||||
|
||||
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
|
||||
|
||||
let client = OpenAIClient::new("ollama", model)
|
||||
.with_base_url(base_url)
|
||||
.with_scheme(scheme);
|
||||
// Currently OpenAIScheme sets include_usage: true. Ollama supports checks?
|
||||
// Assuming Ollama modern versions support usage.
|
||||
|
||||
Self { inner: client }
|
||||
}
|
||||
|
||||
/// ベースURLを設定
|
||||
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
|
||||
self.inner = self.inner.with_base_url(url);
|
||||
self
|
||||
}
|
||||
|
||||
/// カスタムHTTPクライアントを設定
|
||||
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
|
||||
self.inner = self.inner.with_http_client(client);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmClient for OllamaClient {
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
|
||||
self.inner.stream(request).await
|
||||
}
|
||||
}
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
//! OpenAI プロバイダ実装
|
||||
//!
|
||||
//! OpenAI Chat Completions APIと通信し、Eventストリームを出力
|
||||
|
||||
use std::pin::Pin;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, ConfigWarning, LlmClient, Request, RequestConfig, event::Event,
|
||||
scheme::openai::OpenAIScheme,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use reqwest::header::{CONTENT_TYPE, HeaderMap, HeaderValue};
|
||||
|
||||
/// OpenAI クライアント
|
||||
pub struct OpenAIClient {
|
||||
/// HTTPクライアント
|
||||
http_client: reqwest::Client,
|
||||
/// APIキー
|
||||
api_key: String,
|
||||
/// モデル名
|
||||
model: String,
|
||||
/// スキーマ
|
||||
scheme: OpenAIScheme,
|
||||
/// ベースURL
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl OpenAIClient {
|
||||
/// 新しいOpenAIクライアントを作成
|
||||
pub fn new(api_key: impl Into<String>, model: impl Into<String>) -> Self {
|
||||
Self {
|
||||
http_client: reqwest::Client::new(),
|
||||
api_key: api_key.into(),
|
||||
model: model.into(),
|
||||
scheme: OpenAIScheme::default(),
|
||||
base_url: "https://api.openai.com".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// カスタムHTTPクライアントを設定
|
||||
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
|
||||
self.http_client = client;
|
||||
self
|
||||
}
|
||||
|
||||
/// スキーマを設定
|
||||
pub fn with_scheme(mut self, scheme: OpenAIScheme) -> Self {
|
||||
self.scheme = scheme;
|
||||
self
|
||||
}
|
||||
|
||||
/// ベースURLを設定
|
||||
pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
|
||||
self.base_url = url.into();
|
||||
self
|
||||
}
|
||||
|
||||
/// リクエストヘッダーを構築
|
||||
fn build_headers(&self) -> Result<HeaderMap, ClientError> {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
||||
|
||||
let api_key_val = if self.api_key.is_empty() {
|
||||
// For providers like Ollama, API key might be empty/dummy.
|
||||
// But typical OpenAI requires it.
|
||||
// We'll allow empty if user intends it, but usually it's checked.
|
||||
HeaderValue::from_static("")
|
||||
} else {
|
||||
let mut val = HeaderValue::from_str(&format!("Bearer {}", self.api_key))
|
||||
.map_err(|e| ClientError::Config(format!("Invalid API key: {}", e)))?;
|
||||
val.set_sensitive(true);
|
||||
val
|
||||
};
|
||||
|
||||
if !api_key_val.is_empty() {
|
||||
headers.insert("Authorization", api_key_val);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmClient for OpenAIClient {
|
||||
async fn stream(
|
||||
&self,
|
||||
request: Request,
|
||||
) -> Result<Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>>, ClientError> {
|
||||
// Construct the URL: base_url usually ends without slash, path starts with slash or vice versa.
|
||||
// Standard OpenAI base is "https://api.openai.com". Endpoint is "/v1/chat/completions".
|
||||
// If external base_url includes /v1, we should be careful.
|
||||
// Let's assume defaults. If user provides "http://localhost:11434/v1", we append "/chat/completions".
|
||||
// Or cleaner: user provides full base up to version?
|
||||
// Anthropic client uses "{}/v1/messages".
|
||||
// Let's stick to appending "/v1/chat/completions" if base is just host,
|
||||
// OR assume base includes /v1 if user overrides it?
|
||||
// Let's use robust joining or simple assumption matching Anthropic pattern:
|
||||
// Default: https://api.openai.com -> https://api.openai.com/v1/chat/completions
|
||||
|
||||
// However, Ollama default is http://localhost:11434/v1/chat/completions if using OpenAI compact.
|
||||
// If we configure base_url via `with_base_url`, it's flexible.
|
||||
// Let's try to detect if /v1 is present or just append consistently.
|
||||
// Ideally `base_url` should be the root passed to `new`.
|
||||
|
||||
let url = if self.base_url.ends_with("/v1") {
|
||||
format!("{}/chat/completions", self.base_url)
|
||||
} else if self.base_url.ends_with("/") {
|
||||
format!("{}v1/chat/completions", self.base_url)
|
||||
} else {
|
||||
format!("{}/v1/chat/completions", self.base_url)
|
||||
};
|
||||
|
||||
let headers = self.build_headers()?;
|
||||
let body = self.scheme.build_request(&self.model, &request);
|
||||
|
||||
let response = self
|
||||
.http_client
|
||||
.post(&url)
|
||||
.headers(headers)
|
||||
.json(&body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
// エラーレスポンスをチェック
|
||||
if !response.status().is_success() {
|
||||
let status = response.status().as_u16();
|
||||
let text = response.text().await.unwrap_or_default();
|
||||
|
||||
// JSONでエラーをパースしてみる
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||
// OpenAI error format: { "error": { "message": "...", "type": "...", ... } }
|
||||
let error = json.get("error").unwrap_or(&json);
|
||||
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(&text)
|
||||
.to_string();
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code,
|
||||
message,
|
||||
});
|
||||
}
|
||||
|
||||
return Err(ClientError::Api {
|
||||
status: Some(status),
|
||||
code: None,
|
||||
message: text,
|
||||
});
|
||||
}
|
||||
|
||||
// SSEストリームを構築
|
||||
let scheme = self.scheme.clone();
|
||||
let byte_stream = response
|
||||
.bytes_stream()
|
||||
.map_err(|e| std::io::Error::other(e));
|
||||
let event_stream = byte_stream.eventsource();
|
||||
|
||||
let stream = event_stream
|
||||
.map(move |result| {
|
||||
match result {
|
||||
Ok(event) => {
|
||||
// SSEイベントをパース
|
||||
// OpenAI stream events are "data: {...}"
|
||||
// event.event is usually "message" (default) or empty.
|
||||
// parse_event takes data string.
|
||||
|
||||
if event.data == "[DONE]" {
|
||||
// End of stream handled inside parse_event usually returning None
|
||||
Ok(None)
|
||||
} else {
|
||||
match scheme.parse_event(&event.data) {
|
||||
Ok(Some(events)) => Ok(Some(events)),
|
||||
Ok(None) => Ok(None),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Err(ClientError::Sse(e.to_string())),
|
||||
}
|
||||
})
|
||||
// flatten Option<Vec<Event>> stream to Stream<Event>
|
||||
// map returns Result<Option<Vec<Event>>, Error>
|
||||
// We want Stream<Item = Result<Event, Error>>
|
||||
.map(|res| {
|
||||
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
|
||||
Ok(Some(events)) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
|
||||
Ok(None) => Box::pin(futures::stream::empty()),
|
||||
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
|
||||
};
|
||||
s
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
let mut warnings = Vec::new();
|
||||
|
||||
// OpenAI does not support top_k
|
||||
if config.top_k.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI"));
|
||||
}
|
||||
|
||||
warnings
|
||||
}
|
||||
}
|
||||
104
crates/llm-worker/src/llm_client/retry.rs
Normal file
104
crates/llm-worker/src/llm_client/retry.rs
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
//! LLM response stream を開く前の transient error 向けリトライポリシー。
|
||||
//!
|
||||
//! Worker が `LlmClient::stream` の open error に対して `is_retryable` を見て
|
||||
//! retry / backoff / TUI event / cancellation をまとめて管理する。
|
||||
//! SSE 読み出し開始後の失敗は対象外。
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
/// 指数バックオフ + ジッター + 累積タイムアウトを表すポリシー。
|
||||
///
|
||||
/// `Default` は llm-worker 全体の固定値を返す。manifest 経由の上書きが
|
||||
/// 必要になったら拡張する(現状は不要 → `tickets/llm-worker-transient-retry.md`)。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct RetryPolicy {
|
||||
/// 指数の基準値。`base * 2^attempt` を `cap` で頭打ちにした上限から
|
||||
/// フルジッターで実際の wait を抽選する。
|
||||
pub base: Duration,
|
||||
/// 1 回あたりの wait の上限。
|
||||
pub cap: Duration,
|
||||
/// 試行の合計回数(初回 + リトライ)。`1` ならリトライしない。
|
||||
pub max_attempts: u32,
|
||||
/// 初回送信開始からの累積タイムアウト。これを超える wait は打ち切る。
|
||||
pub total_timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for RetryPolicy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
base: Duration::from_millis(500),
|
||||
cap: Duration::from_secs(10),
|
||||
max_attempts: 4,
|
||||
total_timeout: Duration::from_secs(30),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RetryPolicy {
|
||||
/// `attempt` 回目の失敗(0-indexed)後に待つ時間を返す。
|
||||
/// `Retry-After` で上書きしたい場合は呼び出さず、その値をそのまま使う。
|
||||
pub fn backoff(&self, attempt: u32) -> Duration {
|
||||
let shift = attempt.min(20);
|
||||
let base_nanos = self.base.as_nanos() as u64;
|
||||
let exp_nanos = base_nanos.saturating_mul(1u64 << shift);
|
||||
let cap_nanos = self.cap.as_nanos() as u64;
|
||||
let upper = exp_nanos.min(cap_nanos);
|
||||
Duration::from_nanos(jitter_nanos(upper))
|
||||
}
|
||||
}
|
||||
|
||||
/// `[0, max_nanos]` から擬似乱数的に 1 つ取り出す。`SystemTime` の
|
||||
/// 下位ビットを splitmix64 で攪拌するだけの軽量実装で、暗号的乱数性は
|
||||
/// 持たないがフルジッターのぶつかり回避には十分。
|
||||
fn jitter_nanos(max_nanos: u64) -> u64 {
|
||||
if max_nanos == 0 {
|
||||
return 0;
|
||||
}
|
||||
let seed = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_nanos() as u64)
|
||||
.unwrap_or(0);
|
||||
let mut x = seed.wrapping_add(0x9E37_79B9_7F4A_7C15);
|
||||
x = (x ^ (x >> 30)).wrapping_mul(0xBF58_476D_1CE4_E5B9);
|
||||
x = (x ^ (x >> 27)).wrapping_mul(0x94D0_49BB_1331_11EB);
|
||||
x ^= x >> 31;
|
||||
x % (max_nanos + 1)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_policy_values() {
|
||||
let p = RetryPolicy::default();
|
||||
assert_eq!(p.base, Duration::from_millis(500));
|
||||
assert_eq!(p.cap, Duration::from_secs(10));
|
||||
assert_eq!(p.max_attempts, 4);
|
||||
assert_eq!(p.total_timeout, Duration::from_secs(30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backoff_respects_cap() {
|
||||
let p = RetryPolicy::default();
|
||||
for attempt in 0..30u32 {
|
||||
assert!(
|
||||
p.backoff(attempt) <= p.cap,
|
||||
"attempt {attempt} exceeded cap",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backoff_zero_when_base_zero() {
|
||||
let p = RetryPolicy {
|
||||
base: Duration::ZERO,
|
||||
cap: Duration::from_secs(10),
|
||||
max_attempts: 4,
|
||||
total_timeout: Duration::from_secs(30),
|
||||
};
|
||||
for attempt in 0..5 {
|
||||
assert_eq!(p.backoff(attempt), Duration::ZERO);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
//! Anthropic scheme の wire-level 既定 capability。
|
||||
//!
|
||||
//! モデル ID 固有のテーブル(`claude-*` など)は高レベル構築層
|
||||
//! (`provider::capability`)の責務。ここでは未知モデルでも「この wire で
|
||||
//! 安全に送れる最小共通項」を返すだけに留める。
|
||||
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
/// Scheme 既定の capability。
|
||||
///
|
||||
/// Ollama の `/v1/messages` 流用を想定して `cache_control` を送らない
|
||||
/// `CacheStrategy::Auto` にする。
|
||||
pub(crate) fn default_capability() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ use crate::llm_client::{
|
|||
use serde::Deserialize;
|
||||
|
||||
use super::AnthropicScheme;
|
||||
use super::scheme_impl::{AnthropicState, PendingThinking};
|
||||
|
||||
/// Anthropic SSEイベントタイプ
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
|
|
@ -75,7 +76,21 @@ pub(crate) enum ContentBlock {
|
|||
#[serde(rename = "text")]
|
||||
Text { text: String },
|
||||
#[serde(rename = "thinking")]
|
||||
Thinking { thinking: String },
|
||||
Thinking {
|
||||
#[serde(default)]
|
||||
thinking: String,
|
||||
/// 非ストリーミングレスポンス由来の初期 signature(通常はストリームでは
|
||||
/// 空 → `signature_delta` で埋まる)。
|
||||
#[serde(default)]
|
||||
signature: Option<String>,
|
||||
},
|
||||
#[serde(rename = "redacted_thinking")]
|
||||
RedactedThinking {
|
||||
/// 暗号化された opaque blob。signature ではなく、まるごと
|
||||
/// `redacted_thinking.data` として送り返す必要がある。
|
||||
#[serde(default)]
|
||||
data: String,
|
||||
},
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
|
|
@ -228,7 +243,9 @@ impl AnthropicScheme {
|
|||
fn convert_block_start(&self, event: &ContentBlockStartEvent) -> Event {
|
||||
let (block_type, metadata) = match &event.content_block {
|
||||
ContentBlock::Text { .. } => (BlockType::Text, BlockMetadata::Text),
|
||||
ContentBlock::Thinking { .. } => (BlockType::Thinking, BlockMetadata::Thinking),
|
||||
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
|
||||
(BlockType::Thinking, BlockMetadata::Thinking)
|
||||
}
|
||||
ContentBlock::ToolUse { id, name, .. } => (
|
||||
BlockType::ToolUse,
|
||||
BlockMetadata::ToolUse {
|
||||
|
|
@ -264,13 +281,139 @@ impl AnthropicScheme {
|
|||
}))
|
||||
}
|
||||
|
||||
/// state を持ち回す上位パース。
|
||||
///
|
||||
/// `parse_event` の単発 Event に加えて、以下を行う:
|
||||
/// - `content_block_stop` の `block_type` を直前の Start 値で書き戻す
|
||||
/// - `thinking` / `redacted_thinking` ブロックの本体・signature・data を
|
||||
/// `state.pending_thinking` に蓄積し、`content_block_stop` で
|
||||
/// `Event::ReasoningItem` を追加発火する
|
||||
/// - `signature_delta` を蓄積(Stream channel には流さず、reasoning event
|
||||
/// にだけ反映する)
|
||||
pub(crate) fn parse_with_state(
|
||||
&self,
|
||||
event_type: &str,
|
||||
data: &str,
|
||||
state: &mut AnthropicState,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
let Some(parsed_event_type) = AnthropicEventType::parse(event_type) else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
|
||||
// signature_delta はストリーム表示には流さず、state にだけ蓄積。
|
||||
// それ以外は parse_event で標準 Event 化する。
|
||||
let mut emitted: Vec<Event> = Vec::new();
|
||||
|
||||
match parsed_event_type {
|
||||
AnthropicEventType::ContentBlockStart => {
|
||||
let raw: ContentBlockStartEvent = serde_json::from_str(data)?;
|
||||
state.current_block_type = Some(match &raw.content_block {
|
||||
ContentBlock::Text { .. } => BlockType::Text,
|
||||
ContentBlock::Thinking { .. } | ContentBlock::RedactedThinking { .. } => {
|
||||
BlockType::Thinking
|
||||
}
|
||||
ContentBlock::ToolUse { .. } => BlockType::ToolUse,
|
||||
});
|
||||
match &raw.content_block {
|
||||
ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature,
|
||||
} => {
|
||||
state.pending_thinking = Some(PendingThinking {
|
||||
text: thinking.clone(),
|
||||
signature: signature.clone(),
|
||||
redacted_data: None,
|
||||
});
|
||||
}
|
||||
ContentBlock::RedactedThinking { data: blob } => {
|
||||
state.pending_thinking = Some(PendingThinking {
|
||||
text: String::new(),
|
||||
signature: None,
|
||||
redacted_data: Some(blob.clone()),
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
emitted.push(self.convert_block_start(&raw));
|
||||
}
|
||||
AnthropicEventType::ContentBlockDelta => {
|
||||
let raw: ContentBlockDeltaEvent = serde_json::from_str(data)?;
|
||||
match &raw.delta {
|
||||
DeltaBlock::ThinkingDelta { thinking } => {
|
||||
if let Some(pending) = state.pending_thinking.as_mut() {
|
||||
pending.text.push_str(thinking);
|
||||
}
|
||||
emitted.push(Event::BlockDelta(BlockDelta {
|
||||
index: raw.index,
|
||||
delta: DeltaContent::Thinking(thinking.clone()),
|
||||
}));
|
||||
}
|
||||
DeltaBlock::SignatureDelta { signature } => {
|
||||
if let Some(pending) = state.pending_thinking.as_mut() {
|
||||
// 通常 1 回しか来ないが、複数 fragment 来ても連結しておく
|
||||
match &mut pending.signature {
|
||||
Some(acc) => acc.push_str(signature),
|
||||
None => pending.signature = Some(signature.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
DeltaBlock::TextDelta { text } => {
|
||||
emitted.push(Event::BlockDelta(BlockDelta {
|
||||
index: raw.index,
|
||||
delta: DeltaContent::Text(text.clone()),
|
||||
}));
|
||||
}
|
||||
DeltaBlock::InputJsonDelta { partial_json } => {
|
||||
emitted.push(Event::BlockDelta(BlockDelta {
|
||||
index: raw.index,
|
||||
delta: DeltaContent::InputJson(partial_json.clone()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
AnthropicEventType::ContentBlockStop => {
|
||||
let raw: ContentBlockStopEvent = serde_json::from_str(data)?;
|
||||
let block_type = state.current_block_type.take().unwrap_or(BlockType::Text);
|
||||
emitted.push(Event::BlockStop(BlockStop {
|
||||
index: raw.index,
|
||||
block_type,
|
||||
stop_reason: None,
|
||||
}));
|
||||
if matches!(block_type, BlockType::Thinking) {
|
||||
if let Some(pending) = state.pending_thinking.take() {
|
||||
emitted.push(Event::ReasoningItem(pending.into_event()));
|
||||
}
|
||||
}
|
||||
}
|
||||
// 残りは state を必要としない。既存 parse_event に委譲。
|
||||
_ => {
|
||||
if let Some(event) = self.parse_event(event_type, data)? {
|
||||
emitted.push(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(emitted)
|
||||
}
|
||||
|
||||
fn convert_usage(&self, usage: &UsageData) -> UsageEvent {
|
||||
let input = usage.input_tokens.unwrap_or(0);
|
||||
// Anthropic の `input_tokens` は **キャッシュ外** の入力トークンのみで、
|
||||
// プロンプト全長は input_tokens + cache_read + cache_creation。
|
||||
// UsageEvent の `input_tokens` には「占有量(プロンプト全長)」を載せる
|
||||
// 規約に合わせて、ここでキャッシュ分を足し込む。
|
||||
// cache_read_input_tokens / cache_creation_input_tokens は内訳として
|
||||
// 別フィールドに残るので、料金計算側で `input - cache_read - cache_creation`
|
||||
// により非キャッシュ入力分は逆算可能。
|
||||
let raw_input = usage.input_tokens.unwrap_or(0);
|
||||
let cache_read = usage.cache_read_input_tokens.unwrap_or(0);
|
||||
let cache_creation = usage.cache_creation_input_tokens.unwrap_or(0);
|
||||
let input_total = raw_input + cache_read + cache_creation;
|
||||
let output = usage.output_tokens.unwrap_or(0);
|
||||
|
||||
UsageEvent {
|
||||
input_tokens: usage.input_tokens,
|
||||
input_tokens: usage.input_tokens.map(|_| input_total),
|
||||
output_tokens: usage.output_tokens,
|
||||
total_tokens: Some(input + output),
|
||||
total_tokens: Some(input_total + output),
|
||||
cache_read_input_tokens: usage.cache_read_input_tokens,
|
||||
cache_creation_input_tokens: usage.cache_creation_input_tokens,
|
||||
}
|
||||
|
|
@ -289,12 +432,33 @@ mod tests {
|
|||
let event = scheme.parse_event("message_start", data).unwrap().unwrap();
|
||||
match event {
|
||||
Event::Usage(u) => {
|
||||
// キャッシュなしなので input_total = raw_input = 10
|
||||
assert_eq!(u.input_tokens, Some(10));
|
||||
}
|
||||
_ => panic!("Expected Usage event"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_usage_includes_cache_in_input_total() {
|
||||
// Anthropic の input_tokens はキャッシュ外のみで、占有量は
|
||||
// input + cache_read + cache_creation。
|
||||
// UsageEvent.input_tokens は占有量に正規化される。
|
||||
let scheme = AnthropicScheme::new();
|
||||
let usage = UsageData {
|
||||
input_tokens: Some(100),
|
||||
output_tokens: Some(50),
|
||||
cache_read_input_tokens: Some(800),
|
||||
cache_creation_input_tokens: Some(200),
|
||||
};
|
||||
let event = scheme.convert_usage(&usage);
|
||||
// 100 + 800 + 200 = 1100
|
||||
assert_eq!(event.input_tokens, Some(1100));
|
||||
assert_eq!(event.cache_read_input_tokens, Some(800));
|
||||
assert_eq!(event.cache_creation_input_tokens, Some(200));
|
||||
assert_eq!(event.total_tokens, Some(1150));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_content_block_start_text() {
|
||||
let scheme = AnthropicScheme::new();
|
||||
|
|
@ -359,6 +523,117 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn thinking_block_emits_reasoning_item_with_signature() {
|
||||
// thinking ブロックが完了したら ReasoningItem に text+signature が乗ること
|
||||
let scheme = AnthropicScheme::new();
|
||||
let mut state = AnthropicState::default();
|
||||
|
||||
let evs = scheme
|
||||
.parse_with_state(
|
||||
"content_block_start",
|
||||
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
assert!(matches!(evs[0], Event::BlockStart(_)));
|
||||
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_delta",
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"hello "}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_delta",
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"world"}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_delta",
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"SIG-XYZ"}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let stop_evs = scheme
|
||||
.parse_with_state(
|
||||
"content_block_stop",
|
||||
r#"{"type":"content_block_stop","index":0}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
// BlockStop と ReasoningItem の 2 件が並ぶ
|
||||
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
|
||||
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
|
||||
panic!("expected ReasoningItem, got {:?}", stop_evs[1]);
|
||||
};
|
||||
assert_eq!(reasoning.text, "hello world");
|
||||
assert_eq!(reasoning.signature.as_deref(), Some("SIG-XYZ"));
|
||||
assert!(reasoning.encrypted_content.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn redacted_thinking_emits_reasoning_item_with_data() {
|
||||
let scheme = AnthropicScheme::new();
|
||||
let mut state = AnthropicState::default();
|
||||
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_start",
|
||||
r#"{"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"opaque-blob"}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
let stop_evs = scheme
|
||||
.parse_with_state(
|
||||
"content_block_stop",
|
||||
r#"{"type":"content_block_stop","index":0}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
let Event::ReasoningItem(reasoning) = &stop_evs[1] else {
|
||||
panic!("expected ReasoningItem");
|
||||
};
|
||||
assert!(reasoning.text.is_empty());
|
||||
assert!(reasoning.signature.is_none());
|
||||
assert_eq!(reasoning.encrypted_content.as_deref(), Some("opaque-blob"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn text_block_does_not_emit_reasoning_item() {
|
||||
let scheme = AnthropicScheme::new();
|
||||
let mut state = AnthropicState::default();
|
||||
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_start",
|
||||
r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
scheme
|
||||
.parse_with_state(
|
||||
"content_block_delta",
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"hi"}}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
let stop_evs = scheme
|
||||
.parse_with_state(
|
||||
"content_block_stop",
|
||||
r#"{"type":"content_block_stop","index":0}"#,
|
||||
&mut state,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(stop_evs.len(), 1);
|
||||
assert!(matches!(stop_evs[0], Event::BlockStop(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_ping() {
|
||||
let scheme = AnthropicScheme::new();
|
||||
|
|
|
|||
|
|
@ -3,8 +3,12 @@
|
|||
//! - リクエストJSON生成
|
||||
//! - SSEイベントパース → Event変換
|
||||
|
||||
mod capability;
|
||||
mod events;
|
||||
mod request;
|
||||
mod scheme_impl;
|
||||
|
||||
pub use scheme_impl::AnthropicState;
|
||||
|
||||
/// Anthropicスキーマ
|
||||
///
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
107
crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs
Normal file
107
crates/llm-worker/src/llm_client/scheme/anthropic/scheme_impl.rs
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
//! `impl Scheme for AnthropicScheme`
|
||||
//!
|
||||
//! Anthropic Messages API の wire 表現に必要な URL・ヘッダ・SSE パース・
|
||||
//! リクエスト body 生成を共通 `Scheme` trait にぶら下げる。
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError,
|
||||
auth::AuthRequirement,
|
||||
capability::ModelCapability,
|
||||
event::{BlockType, Event, ReasoningItemEvent},
|
||||
scheme::Scheme,
|
||||
types::Request,
|
||||
};
|
||||
|
||||
use super::AnthropicScheme;
|
||||
|
||||
/// Anthropic の SSE パースで必要な状態。
|
||||
///
|
||||
/// 1. `content_block_stop` イベントは `block_type` を持たない仕様なので、
|
||||
/// 直前の `content_block_start` で観測した `block_type` を保持して
|
||||
/// `BlockStop` に書き戻す。
|
||||
/// 2. `thinking` ブロック中の `thinking_delta` テキストと `signature_delta`
|
||||
/// 署名、および `redacted_thinking` ブロックの `data` を蓄積し、
|
||||
/// `content_block_stop` で `Event::ReasoningItem` を発火する
|
||||
/// (round-trip 永続化のため)。
|
||||
#[derive(Debug, Default)]
|
||||
pub struct AnthropicState {
|
||||
pub(crate) current_block_type: Option<BlockType>,
|
||||
pub(crate) pending_thinking: Option<PendingThinking>,
|
||||
}
|
||||
|
||||
/// 1 つの `thinking` または `redacted_thinking` content_block の蓄積バッファ。
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct PendingThinking {
|
||||
pub(crate) text: String,
|
||||
pub(crate) signature: Option<String>,
|
||||
pub(crate) redacted_data: Option<String>,
|
||||
}
|
||||
|
||||
impl PendingThinking {
|
||||
pub(crate) fn into_event(self) -> ReasoningItemEvent {
|
||||
ReasoningItemEvent {
|
||||
id: None,
|
||||
text: self.text,
|
||||
summary: Vec::new(),
|
||||
encrypted_content: self.redacted_data,
|
||||
signature: self.signature,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Scheme for AnthropicScheme {
|
||||
type State = AnthropicState;
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
"https://api.anthropic.com"
|
||||
}
|
||||
|
||||
fn path(&self, _model_id: &str) -> String {
|
||||
"/v1/messages".to_string()
|
||||
}
|
||||
|
||||
fn required_auth(&self) -> AuthRequirement {
|
||||
// Ollama の `/v1/messages` 互換では認証が要らないが、それは
|
||||
// `AuthRef::None` + `build_headers` 側の「ResolvedAuth::None
|
||||
// なら何もしない」分岐で吸収する(`accepts` 判定で弾かれない
|
||||
// よう、現状は XApiKey を要求しつつ、None 側でもパスするよう
|
||||
// にする戦略)。
|
||||
AuthRequirement::XApiKey
|
||||
}
|
||||
|
||||
fn additional_headers(&self) -> Vec<(&'static str, String)> {
|
||||
let mut headers = vec![("anthropic-version", self.api_version.clone())];
|
||||
if self.fine_grained_tool_streaming {
|
||||
headers.push((
|
||||
"anthropic-beta",
|
||||
"fine-grained-tool-streaming-2025-05-14".to_string(),
|
||||
));
|
||||
}
|
||||
headers
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
&self,
|
||||
model_id: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> Value {
|
||||
let req = self.build_request(model_id, request, capability);
|
||||
serde_json::to_value(&req).expect("AnthropicRequest is always serialisable")
|
||||
}
|
||||
|
||||
fn parse_sse(
|
||||
&self,
|
||||
event_type: &str,
|
||||
data: &str,
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
self.parse_with_state(event_type, data, state)
|
||||
}
|
||||
|
||||
fn default_capability(&self) -> ModelCapability {
|
||||
super::capability::default_capability()
|
||||
}
|
||||
}
|
||||
20
crates/llm-worker/src/llm_client/scheme/gemini/capability.rs
Normal file
20
crates/llm-worker/src/llm_client/scheme/gemini/capability.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
//! Gemini scheme の wire-level 既定 capability。
|
||||
//!
|
||||
//! モデル ID 固有のテーブル(`gemini-*` バージョン別の reasoning 有無)は
|
||||
//! 高レベル構築層(`provider::capability`)の責務。ここでは wire の
|
||||
//! 保守的 default のみ。
|
||||
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
/// Scheme 既定の capability(未知モデル / 未明示モデル用)。
|
||||
pub(crate) fn default_capability() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: true,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
|
@ -131,6 +131,7 @@ impl GeminiScheme {
|
|||
status: None,
|
||||
code: Some("parse_error".to_string()),
|
||||
message: format!("Failed to parse Gemini SSE data: {} -> {}", e, data),
|
||||
retry_after: None,
|
||||
})?;
|
||||
|
||||
let mut events = Vec::new();
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@
|
|||
//! - リクエストJSON生成
|
||||
//! - SSEイベントパース → Event変換
|
||||
|
||||
mod capability;
|
||||
mod events;
|
||||
mod request;
|
||||
mod scheme_impl;
|
||||
|
||||
/// Geminiスキーマ
|
||||
///
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ use serde_json::Value;
|
|||
|
||||
use crate::llm_client::{
|
||||
Request,
|
||||
types::{Item, Role, ToolDefinition},
|
||||
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
|
||||
types::{Item, Role, ToolDefinition, parse_tool_arguments},
|
||||
};
|
||||
|
||||
use super::GeminiScheme;
|
||||
|
|
@ -139,11 +140,26 @@ pub(crate) struct GeminiGenerationConfig {
|
|||
/// Stop sequences
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub stop_sequences: Vec<String>,
|
||||
/// Thinking / reasoning 設定(Gemini 2.5 以降)。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub thinking_config: Option<GeminiThinkingConfig>,
|
||||
}
|
||||
|
||||
/// Gemini thinking config (gemini-2.5 以降)
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub(crate) struct GeminiThinkingConfig {
|
||||
/// Token budget for thinking. `-1` means dynamic.
|
||||
pub thinking_budget: i32,
|
||||
}
|
||||
|
||||
impl GeminiScheme {
|
||||
/// Build Gemini request from Request
|
||||
pub(crate) fn build_request(&self, request: &Request) -> GeminiRequest {
|
||||
pub(crate) fn build_request(
|
||||
&self,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> GeminiRequest {
|
||||
let contents = self.convert_items_to_contents(&request.items);
|
||||
|
||||
// System prompt
|
||||
|
|
@ -177,6 +193,24 @@ impl GeminiScheme {
|
|||
None
|
||||
};
|
||||
|
||||
// Reasoning の投影: capability が BudgetTokens / Both をサポートし、
|
||||
// request 側で budget_tokens が指定されているときだけ thinking_config を付ける。
|
||||
let supports_budget = matches!(
|
||||
capability.reasoning,
|
||||
Some(ReasoningSupport::BudgetTokens | ReasoningSupport::Both),
|
||||
);
|
||||
let thinking_config = request
|
||||
.config
|
||||
.reasoning
|
||||
.as_ref()
|
||||
.filter(|_| supports_budget)
|
||||
.and_then(|rc| match rc {
|
||||
ReasoningControl::BudgetTokens(budget) => Some(GeminiThinkingConfig {
|
||||
thinking_budget: *budget,
|
||||
}),
|
||||
ReasoningControl::Effort(_) => None,
|
||||
});
|
||||
|
||||
// Generation config
|
||||
let generation_config = Some(GeminiGenerationConfig {
|
||||
max_output_tokens: request.config.max_tokens,
|
||||
|
|
@ -184,6 +218,7 @@ impl GeminiScheme {
|
|||
top_p: request.config.top_p,
|
||||
top_k: request.config.top_k,
|
||||
stop_sequences: request.config.stop_sequences.clone(),
|
||||
thinking_config,
|
||||
});
|
||||
|
||||
GeminiRequest {
|
||||
|
|
@ -216,9 +251,8 @@ impl GeminiScheme {
|
|||
);
|
||||
|
||||
let gemini_role = match role {
|
||||
Role::User => "user",
|
||||
Role::User | Role::System => "user",
|
||||
Role::Assistant => "model",
|
||||
Role::System => continue, // Skip system role items
|
||||
};
|
||||
|
||||
let parts: Vec<GeminiPart> = content
|
||||
|
|
@ -245,9 +279,8 @@ impl GeminiScheme {
|
|||
});
|
||||
}
|
||||
|
||||
// Parse arguments
|
||||
let args = serde_json::from_str(arguments)
|
||||
.unwrap_or_else(|_| Value::Object(serde_json::Map::new()));
|
||||
// Parse arguments (normalize non-object / legacy "null" payloads to {})
|
||||
let args = parse_tool_arguments(arguments);
|
||||
|
||||
pending_model_parts.push(GeminiPart::FunctionCall {
|
||||
function_call: GeminiFunctionCall {
|
||||
|
|
@ -258,7 +291,10 @@ impl GeminiScheme {
|
|||
}
|
||||
|
||||
Item::ToolResult {
|
||||
call_id, output, ..
|
||||
call_id,
|
||||
summary,
|
||||
content,
|
||||
..
|
||||
} => {
|
||||
// Flush pending model parts first
|
||||
if !pending_model_parts.is_empty() {
|
||||
|
|
@ -268,12 +304,16 @@ impl GeminiScheme {
|
|||
});
|
||||
}
|
||||
|
||||
let text = match content {
|
||||
Some(c) => format!("{summary}\n{c}"),
|
||||
None => summary.clone(),
|
||||
};
|
||||
pending_user_parts.push(GeminiPart::FunctionResponse {
|
||||
function_response: GeminiFunctionResponse {
|
||||
name: call_id.clone(),
|
||||
response: GeminiFunctionResponseContent {
|
||||
name: call_id.clone(),
|
||||
content: Value::String(output.clone()),
|
||||
content: Value::String(text),
|
||||
},
|
||||
},
|
||||
});
|
||||
|
|
@ -336,6 +376,26 @@ impl GeminiScheme {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
fn cap() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: true,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
||||
fn cap_budget_reasoning() -> ModelCapability {
|
||||
ModelCapability {
|
||||
reasoning: Some(ReasoningSupport::BudgetTokens),
|
||||
..cap()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_simple_request() {
|
||||
|
|
@ -344,7 +404,7 @@ mod tests {
|
|||
.system("You are a helpful assistant.")
|
||||
.user("Hello!");
|
||||
|
||||
let gemini_req = scheme.build_request(&request);
|
||||
let gemini_req = scheme.build_request(&request, &cap());
|
||||
|
||||
assert!(gemini_req.system_instruction.is_some());
|
||||
assert_eq!(gemini_req.contents.len(), 1);
|
||||
|
|
@ -366,7 +426,7 @@ mod tests {
|
|||
})),
|
||||
);
|
||||
|
||||
let gemini_req = scheme.build_request(&request);
|
||||
let gemini_req = scheme.build_request(&request, &cap());
|
||||
|
||||
assert_eq!(gemini_req.tools.len(), 1);
|
||||
assert_eq!(gemini_req.tools[0].function_declarations.len(), 1);
|
||||
|
|
@ -382,7 +442,7 @@ mod tests {
|
|||
let scheme = GeminiScheme::new();
|
||||
let request = Request::new().user("Hello").assistant("Hi there!");
|
||||
|
||||
let gemini_req = scheme.build_request(&request);
|
||||
let gemini_req = scheme.build_request(&request, &cap());
|
||||
|
||||
assert_eq!(gemini_req.contents.len(), 2);
|
||||
assert_eq!(gemini_req.contents[0].role, "user");
|
||||
|
|
@ -401,11 +461,36 @@ mod tests {
|
|||
))
|
||||
.item(Item::tool_result("call_123", "Sunny, 25°C"));
|
||||
|
||||
let gemini_req = scheme.build_request(&request);
|
||||
let gemini_req = scheme.build_request(&request, &cap());
|
||||
|
||||
assert_eq!(gemini_req.contents.len(), 3);
|
||||
assert_eq!(gemini_req.contents[0].role, "user");
|
||||
assert_eq!(gemini_req.contents[1].role, "model");
|
||||
assert_eq!(gemini_req.contents[2].role, "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn thinking_budget_projected_when_supported() {
|
||||
let scheme = GeminiScheme::new();
|
||||
let mut request = Request::new().user("think");
|
||||
request.config.reasoning = Some(ReasoningControl::BudgetTokens(-1));
|
||||
|
||||
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
|
||||
let config = gemini_req.generation_config.expect("generation config");
|
||||
let thinking = config.thinking_config.expect("thinking config");
|
||||
|
||||
assert_eq!(thinking.thinking_budget, -1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effort_reasoning_not_projected_to_gemini() {
|
||||
let scheme = GeminiScheme::new();
|
||||
let mut request = Request::new().user("think");
|
||||
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Medium));
|
||||
|
||||
let gemini_req = scheme.build_request(&request, &cap_budget_reasoning());
|
||||
let config = gemini_req.generation_config.expect("generation config");
|
||||
|
||||
assert!(config.thinking_config.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
//! `impl Scheme for GeminiScheme`
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError, auth::AuthRequirement, capability::ModelCapability, event::Event, scheme::Scheme,
|
||||
types::Request,
|
||||
};
|
||||
|
||||
use super::GeminiScheme;
|
||||
|
||||
impl Scheme for GeminiScheme {
|
||||
type State = ();
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
"https://generativelanguage.googleapis.com"
|
||||
}
|
||||
|
||||
fn path(&self, model_id: &str) -> String {
|
||||
format!("/v1beta/models/{model_id}:streamGenerateContent?alt=sse")
|
||||
}
|
||||
|
||||
fn required_auth(&self) -> AuthRequirement {
|
||||
AuthRequirement::QueryParam { name: "key" }
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
&self,
|
||||
_model_id: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> Value {
|
||||
let req = self.build_request(request, capability);
|
||||
serde_json::to_value(&req).expect("GeminiRequest is always serialisable")
|
||||
}
|
||||
|
||||
fn parse_sse(
|
||||
&self,
|
||||
_event_type: &str,
|
||||
data: &str,
|
||||
_state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
Ok(self.parse_event(data)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
fn default_capability(&self) -> ModelCapability {
|
||||
super::capability::default_capability()
|
||||
}
|
||||
}
|
||||
|
|
@ -3,7 +3,90 @@
|
|||
//! 各APIスキーマごとの変換ロジック
|
||||
//! - リクエスト変換: Request → プロバイダ固有JSON
|
||||
//! - レスポンス変換: SSEイベント → Event
|
||||
//!
|
||||
//! [`Scheme`] trait により `HttpTransport<S>` から scheme 固有の差分
|
||||
//! (パス、ヘッダ、認証要件、body 生成、SSE パース)をすべて委譲する。
|
||||
|
||||
pub mod anthropic;
|
||||
pub mod gemini;
|
||||
pub mod openai;
|
||||
pub mod openai_chat;
|
||||
pub mod openai_responses;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use super::auth::AuthRequirement;
|
||||
use super::capability::ModelCapability;
|
||||
use super::client::ConfigWarning;
|
||||
use super::error::ClientError;
|
||||
use super::event::Event;
|
||||
use super::types::{Request, RequestConfig};
|
||||
|
||||
/// wire scheme の抽象。各プロバイダの API 仕様ごとに 1 つ実装する。
|
||||
///
|
||||
/// `HttpTransport<S: Scheme>` が URL 組立・認証ヘッダ挿入・SSE パース
|
||||
/// のループを担い、`Scheme` 実装は各仕様固有の差分のみ提供する。
|
||||
///
|
||||
/// # 状態
|
||||
///
|
||||
/// SSE パースでフレーム間に状態を保つ必要がある scheme(Anthropic の
|
||||
/// `BlockStop` に `block_type` が載らない仕様の補完など)は
|
||||
/// [`Scheme::State`] に中間状態を表す型を置く。
|
||||
/// 状態を持たない scheme は `type State = ()` とする。
|
||||
pub trait Scheme: Clone + Send + Sync + 'static {
|
||||
/// SSE パースのフレーム間で共有する状態。`HttpTransport` が
|
||||
/// ストリーム開始時に `Default::default()` を一度だけ作り、
|
||||
/// フレームごとに `&mut` で渡す。
|
||||
type State: Default + Send + 'static;
|
||||
|
||||
/// scheme のベース URL(`ModelConfig::base_url` 未指定時のデフォルト)
|
||||
fn default_base_url(&self) -> &'static str;
|
||||
|
||||
/// リクエスト先の相対パス。Gemini のようにモデル名をパスに埋め込む
|
||||
/// プロバイダもあるため、モデル ID を受け取る。
|
||||
fn path(&self, model_id: &str) -> String;
|
||||
|
||||
/// この scheme が要求する認証形式。`build_client` 時に
|
||||
/// `manifest::AuthRef` と照合する。
|
||||
fn required_auth(&self) -> AuthRequirement;
|
||||
|
||||
/// `Content-Type` 以外の追加ヘッダ。`anthropic-version` / `anthropic-beta` 等。
|
||||
fn additional_headers(&self) -> Vec<(&'static str, String)> {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
/// リクエスト body を生成する。`capability` は `CacheStrategy` や
|
||||
/// `ReasoningSupport` を参照して scheme 側の挙動を分岐させるため
|
||||
/// に渡される。
|
||||
fn build_request_body(
|
||||
&self,
|
||||
model_id: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> Value;
|
||||
|
||||
/// SSE イベント 1 件を 0 個以上の [`Event`] に変換する。
|
||||
///
|
||||
/// `event_type` は SSE フレームの `event:` フィールド、`data` は
|
||||
/// `data:` フィールド。`[DONE]` 等の終端マーカーは実装側で判定する。
|
||||
/// `state` はストリーム単位で共有される可変状態。
|
||||
fn parse_sse(
|
||||
&self,
|
||||
event_type: &str,
|
||||
data: &str,
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError>;
|
||||
|
||||
/// scheme 既定の capability。モデル ID に関係なく、この wire で
|
||||
/// 安全に送れる最小共通項を返す。既知モデル ID の能力テーブルは
|
||||
/// `provider::capability::lookup` 側(高レベル構築層)の責務で、
|
||||
/// scheme はここには関与しない。
|
||||
fn default_capability(&self) -> ModelCapability;
|
||||
|
||||
/// scheme 側でサポートしていない `RequestConfig` フィールドを
|
||||
/// 警告として返す(例: OpenAI Chat は `top_k` 非対応)。
|
||||
/// デフォルトは空 Vec。
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
let _ = config;
|
||||
Vec::new()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
//! OpenAI Chat Completions scheme の wire-level 既定 capability。
|
||||
//!
|
||||
//! モデル ID 固有のテーブル(`gpt-5` 系など)は高レベル構築層
|
||||
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
|
||||
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
/// Scheme 既定の capability。OpenAI 互換ルーター系(xAI / Groq / OpenRouter 等)
|
||||
/// で未知モデル ID を受けたときのフォールバックに使う。
|
||||
pub(crate) fn default_capability() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
|
@ -75,6 +75,7 @@ impl OpenAIScheme {
|
|||
status: None,
|
||||
code: Some("parse_error".to_string()),
|
||||
message: format!("Failed to parse SSE data: {} -> {}", e, data),
|
||||
retry_after: None,
|
||||
})?;
|
||||
|
||||
let mut events = Vec::new();
|
||||
|
|
@ -3,8 +3,10 @@
|
|||
//! - リクエストJSON生成
|
||||
//! - SSEイベントパース → Event変換
|
||||
|
||||
pub(crate) mod capability;
|
||||
mod events;
|
||||
mod request;
|
||||
mod scheme_impl;
|
||||
|
||||
/// OpenAIスキーマ
|
||||
///
|
||||
|
|
@ -7,7 +7,8 @@ use serde_json::Value;
|
|||
|
||||
use crate::llm_client::{
|
||||
Request,
|
||||
types::{Item, Role, ToolDefinition},
|
||||
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
|
||||
types::{Item, Role, ToolDefinition, parse_tool_arguments},
|
||||
};
|
||||
|
||||
use super::OpenAIScheme;
|
||||
|
|
@ -34,6 +35,9 @@ pub(crate) struct OpenAIRequest {
|
|||
pub tools: Vec<OpenAITool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub tool_choice: Option<String>,
|
||||
/// Reasoning effort(o1 / o3 / o4 / gpt-5 系で有効)。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning_effort: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
|
@ -110,7 +114,12 @@ pub(crate) struct OpenAIToolCallFunction {
|
|||
|
||||
impl OpenAIScheme {
|
||||
/// Build OpenAI request from Request
|
||||
pub(crate) fn build_request(&self, model: &str, request: &Request) -> OpenAIRequest {
|
||||
pub(crate) fn build_request(
|
||||
&self,
|
||||
model: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> OpenAIRequest {
|
||||
let mut messages = Vec::new();
|
||||
|
||||
// Add system message if present
|
||||
|
|
@ -135,6 +144,22 @@ impl OpenAIScheme {
|
|||
(None, request.config.max_tokens)
|
||||
};
|
||||
|
||||
// Reasoning の投影: capability が Effort / Both をサポートし、
|
||||
// request 側で effort が指定されているときだけ reasoning_effort を付ける。
|
||||
let supports_effort = matches!(
|
||||
capability.reasoning,
|
||||
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
|
||||
);
|
||||
let reasoning_effort = request
|
||||
.config
|
||||
.reasoning
|
||||
.as_ref()
|
||||
.filter(|_| supports_effort)
|
||||
.and_then(|rc| match rc {
|
||||
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
|
||||
ReasoningControl::BudgetTokens(_) => None,
|
||||
});
|
||||
|
||||
OpenAIRequest {
|
||||
model: model.to_string(),
|
||||
max_completion_tokens,
|
||||
|
|
@ -149,6 +174,7 @@ impl OpenAIScheme {
|
|||
messages,
|
||||
tools,
|
||||
tool_choice: None,
|
||||
reasoning_effort,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -201,18 +227,24 @@ impl OpenAIScheme {
|
|||
arguments,
|
||||
..
|
||||
} => {
|
||||
// Normalize non-object / legacy "null" payloads to "{}" so
|
||||
// OpenAI gets a valid JSON object string.
|
||||
let normalized_args = parse_tool_arguments(arguments).to_string();
|
||||
pending_tool_calls.push(OpenAIToolCall {
|
||||
id: call_id.clone(),
|
||||
r#type: "function".to_string(),
|
||||
function: OpenAIToolCallFunction {
|
||||
name: name.clone(),
|
||||
arguments: arguments.clone(),
|
||||
arguments: normalized_args,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Item::ToolResult {
|
||||
call_id, output, ..
|
||||
call_id,
|
||||
summary,
|
||||
content,
|
||||
..
|
||||
} => {
|
||||
// Flush pending tool calls before tool result
|
||||
self.flush_pending_assistant(
|
||||
|
|
@ -221,9 +253,13 @@ impl OpenAIScheme {
|
|||
&mut pending_assistant_text,
|
||||
);
|
||||
|
||||
let text = match content {
|
||||
Some(c) => format!("{summary}\n{c}"),
|
||||
None => summary.clone(),
|
||||
};
|
||||
messages.push(OpenAIMessage {
|
||||
role: "tool".to_string(),
|
||||
content: Some(OpenAIContent::Text(output.clone())),
|
||||
content: Some(OpenAIContent::Text(text)),
|
||||
tool_calls: vec![],
|
||||
tool_call_id: Some(call_id.clone()),
|
||||
name: None,
|
||||
|
|
@ -284,13 +320,26 @@ impl OpenAIScheme {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ReasoningEffort, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
fn cap() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_simple_request() {
|
||||
let scheme = OpenAIScheme::new();
|
||||
let request = Request::new().system("System prompt").user("Hello");
|
||||
|
||||
let body = scheme.build_request("gpt-4o", &request);
|
||||
let body = scheme.build_request("gpt-4o", &request, &cap());
|
||||
|
||||
assert_eq!(body.model, "gpt-4o");
|
||||
assert_eq!(body.messages.len(), 2);
|
||||
|
|
@ -311,7 +360,7 @@ mod tests {
|
|||
.user("Check weather")
|
||||
.tool(ToolDefinition::new("weather").description("Get weather"));
|
||||
|
||||
let body = scheme.build_request("gpt-4o", &request);
|
||||
let body = scheme.build_request("gpt-4o", &request, &cap());
|
||||
assert_eq!(body.tools.len(), 1);
|
||||
assert_eq!(body.tools[0].function.name, "weather");
|
||||
}
|
||||
|
|
@ -321,7 +370,7 @@ mod tests {
|
|||
let scheme = OpenAIScheme::new().with_legacy_max_tokens(true);
|
||||
let request = Request::new().user("Hello").max_tokens(100);
|
||||
|
||||
let body = scheme.build_request("llama3", &request);
|
||||
let body = scheme.build_request("llama3", &request, &cap());
|
||||
|
||||
assert_eq!(body.max_tokens, Some(100));
|
||||
assert!(body.max_completion_tokens.is_none());
|
||||
|
|
@ -332,12 +381,44 @@ mod tests {
|
|||
let scheme = OpenAIScheme::new();
|
||||
let request = Request::new().user("Hello").max_tokens(100);
|
||||
|
||||
let body = scheme.build_request("gpt-4o", &request);
|
||||
let body = scheme.build_request("gpt-4o", &request, &cap());
|
||||
|
||||
assert_eq!(body.max_completion_tokens, Some(100));
|
||||
assert!(body.max_tokens.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_effort_projected_when_supported() {
|
||||
let scheme = OpenAIScheme::new();
|
||||
let mut request = Request::new().user("Hello");
|
||||
request.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::Other(
|
||||
"provider-native".into(),
|
||||
)));
|
||||
let capability = ModelCapability {
|
||||
reasoning: Some(ReasoningSupport::Effort),
|
||||
..cap()
|
||||
};
|
||||
|
||||
let body = scheme.build_request("gpt-5", &request, &capability);
|
||||
|
||||
assert_eq!(body.reasoning_effort.as_deref(), Some("provider-native"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_reasoning_not_projected_to_openai_chat() {
|
||||
let scheme = OpenAIScheme::new();
|
||||
let mut request = Request::new().user("Hello");
|
||||
request.config.reasoning = Some(ReasoningControl::BudgetTokens(4096));
|
||||
let capability = ModelCapability {
|
||||
reasoning: Some(ReasoningSupport::Both),
|
||||
..cap()
|
||||
};
|
||||
|
||||
let body = scheme.build_request("gpt-5", &request, &capability);
|
||||
|
||||
assert!(body.reasoning_effort.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_call_and_result() {
|
||||
let scheme = OpenAIScheme::new();
|
||||
|
|
@ -350,7 +431,7 @@ mod tests {
|
|||
))
|
||||
.item(Item::tool_result("call_123", "Sunny, 25°C"));
|
||||
|
||||
let body = scheme.build_request("gpt-4o", &request);
|
||||
let body = scheme.build_request("gpt-4o", &request, &cap());
|
||||
|
||||
assert_eq!(body.messages.len(), 3);
|
||||
assert_eq!(body.messages[0].role, "user");
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
//! `impl Scheme for OpenAIScheme`
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError,
|
||||
auth::AuthRequirement,
|
||||
capability::ModelCapability,
|
||||
client::ConfigWarning,
|
||||
event::Event,
|
||||
scheme::Scheme,
|
||||
types::{Request, RequestConfig},
|
||||
};
|
||||
|
||||
use super::OpenAIScheme;
|
||||
|
||||
impl Scheme for OpenAIScheme {
|
||||
type State = ();
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
"https://api.openai.com"
|
||||
}
|
||||
|
||||
fn path(&self, _model_id: &str) -> String {
|
||||
"/v1/chat/completions".to_string()
|
||||
}
|
||||
|
||||
fn required_auth(&self) -> AuthRequirement {
|
||||
AuthRequirement::Bearer
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
&self,
|
||||
model_id: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> Value {
|
||||
let req = self.build_request(model_id, request, capability);
|
||||
serde_json::to_value(&req).expect("OpenAIRequest is always serialisable")
|
||||
}
|
||||
|
||||
fn parse_sse(
|
||||
&self,
|
||||
_event_type: &str,
|
||||
data: &str,
|
||||
_state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
// `data: [DONE]` は終端マーカー
|
||||
if data.trim() == "[DONE]" {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
Ok(self.parse_event(data)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
fn default_capability(&self) -> ModelCapability {
|
||||
super::capability::default_capability()
|
||||
}
|
||||
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
let mut warnings = Vec::new();
|
||||
// OpenAI Chat Completions API は top_k を受け付けない
|
||||
if config.top_k.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported("top_k", "OpenAI Chat"));
|
||||
}
|
||||
warnings
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
//! OpenAI Responses scheme の wire-level 既定 capability。
|
||||
//!
|
||||
//! モデル ID 固有のテーブル(`gpt-5` / `codex-` 系など)は高レベル構築層
|
||||
//! (`provider::capability`)の責務。ここでは wire の保守的 default のみ。
|
||||
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ModelCapability, StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
pub(crate) fn default_capability() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: None,
|
||||
vision: false,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
1240
crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs
Normal file
1240
crates/llm-worker/src/llm_client/scheme/openai_responses/events.rs
Normal file
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,84 @@
|
|||
//! OpenAI Responses API スキーマ (`/v1/responses`)
|
||||
//!
|
||||
//! Chat Completions とは別物の item-based wire format。reasoning item と
|
||||
//! function_call item が first-class で、SSE イベントも `response.*` 名前空間で
|
||||
//! 流れる。ChatGPT OAuth 経路 (codex) は本 scheme 必須。
|
||||
//!
|
||||
//! - リクエスト JSON 生成: [`request`]
|
||||
//! - SSE イベントパース → [`Event`](crate::llm_client::event::Event) 変換: [`events`]
|
||||
|
||||
mod capability;
|
||||
mod events;
|
||||
mod request;
|
||||
mod scheme_impl;
|
||||
|
||||
pub use scheme_impl::OpenAIResponsesState;
|
||||
|
||||
/// OpenAI Responses scheme 本体。
|
||||
///
|
||||
/// `store` / `include_encrypted_content` / `send_max_output_tokens` /
|
||||
/// `send_sampling_params` は scheme 固定の wire 設定で、デフォルトは
|
||||
/// 公式 OpenAI Responses API 向け (stateless + ZDR + `max_output_tokens`
|
||||
/// / `temperature` / `top_p` 送出可)。ChatGPT backend (codex-oauth) の
|
||||
/// ように受理パラメータが subset の経路では provider 層で
|
||||
/// `send_max_output_tokens=false` / `send_sampling_params=false` に
|
||||
/// 上書きする。`ModelCapability` には入れない(モデル能力ではなく wire policy)。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct OpenAIResponsesScheme {
|
||||
/// サーバ側に response を保存するか。ZDR/stateless 運用では `false`。
|
||||
pub store: bool,
|
||||
/// `include: ["reasoning.encrypted_content"]` を付けるか。
|
||||
/// `store=false` で reasoning を使うなら必須。
|
||||
pub include_encrypted_content: bool,
|
||||
/// `max_output_tokens` を body に載せるか。公式 OpenAI Responses API は
|
||||
/// 受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
|
||||
/// で 400 を返すため、その経路では `false` にする。
|
||||
pub send_max_output_tokens: bool,
|
||||
/// `temperature` / `top_p` を body に載せるか。公式 OpenAI Responses API
|
||||
/// は受理するが、ChatGPT backend (codex-oauth) は `Unsupported parameter`
|
||||
/// で 400 を返すため、その経路では `false` にする。
|
||||
pub send_sampling_params: bool,
|
||||
}
|
||||
|
||||
impl Default for OpenAIResponsesScheme {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
store: false,
|
||||
include_encrypted_content: true,
|
||||
send_max_output_tokens: true,
|
||||
send_sampling_params: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl OpenAIResponsesScheme {
|
||||
/// デフォルト設定 (`store=false`, `include=["reasoning.encrypted_content"]`,
|
||||
/// `send_max_output_tokens=true`, `send_sampling_params=true`)。
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// `store` を上書き。
|
||||
pub fn with_store(mut self, store: bool) -> Self {
|
||||
self.store = store;
|
||||
self
|
||||
}
|
||||
|
||||
/// `include: ["reasoning.encrypted_content"]` の有無を上書き。
|
||||
pub fn with_include_encrypted_content(mut self, include: bool) -> Self {
|
||||
self.include_encrypted_content = include;
|
||||
self
|
||||
}
|
||||
|
||||
/// `max_output_tokens` を body に載せるかを上書き。
|
||||
pub fn with_send_max_output_tokens(mut self, send: bool) -> Self {
|
||||
self.send_max_output_tokens = send;
|
||||
self
|
||||
}
|
||||
|
||||
/// `temperature` / `top_p` を body に載せるかを上書き。
|
||||
pub fn with_send_sampling_params(mut self, send: bool) -> Self {
|
||||
self.send_sampling_params = send;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,650 @@
|
|||
//! OpenAI Responses API リクエスト body 生成
|
||||
//!
|
||||
//! Chat Completions の `messages` と違い、Responses は `input[]` の
|
||||
//! item 配列で reasoning / function_call / function_call_output が
|
||||
//! first-class。`Item` を素に近い形で `input[]` に投影できる。
|
||||
|
||||
use serde::{Serialize, Serializer};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
Request,
|
||||
capability::{ModelCapability, ReasoningControl, ReasoningSupport},
|
||||
types::{ContentPart, Item, Role, ToolDefinition, parse_tool_arguments},
|
||||
};
|
||||
|
||||
use super::OpenAIResponsesScheme;
|
||||
|
||||
/// `/v1/responses` のリクエスト body。
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ResponsesRequest {
|
||||
pub model: String,
|
||||
/// システムプロンプト相当。`input[]` とは別フィールド。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub instructions: Option<String>,
|
||||
pub input: Vec<InputItem>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub tools: Vec<ResponseTool>,
|
||||
/// 常時 `"auto"` を送る。scheme 固定値。
|
||||
pub tool_choice: &'static str,
|
||||
/// 常時 `true` を送る。scheme 固定値。
|
||||
pub parallel_tool_calls: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reasoning: Option<ReasoningConfig>,
|
||||
/// ZDR / stateless 運用では `false`。
|
||||
pub store: bool,
|
||||
/// 常時 `true`。
|
||||
pub stream: bool,
|
||||
/// `["reasoning.encrypted_content"]` 等。
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
pub include: Vec<&'static str>,
|
||||
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
|
||||
/// (codex-oauth) は 400 で弾く。scheme の `send_max_output_tokens`
|
||||
/// が `false` のときは `None` のまま送る (skip_serializing_if で除外)。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub max_output_tokens: Option<u32>,
|
||||
/// 公式 OpenAI Responses API では受理されるが、ChatGPT backend
|
||||
/// (codex-oauth) は `temperature` / `top_p` を 400 で弾く。scheme の
|
||||
/// `send_sampling_params` が `false` のときは `None` のまま送る。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub temperature: Option<f32>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub top_p: Option<f32>,
|
||||
/// 会話単位の安定キー。ChatGPT backend (codex-oauth) は明示キーが
|
||||
/// 無いとプロンプトキャッシュがほぼ効かない。pod 側は `SegmentId`
|
||||
/// を渡す。`Request::cache_key` が `None` のときはキー自体を送らない。
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub prompt_cache_key: Option<String>,
|
||||
}
|
||||
|
||||
/// reasoning 制御。
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ReasoningConfig {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub effort: Option<String>,
|
||||
/// summary の出力制御。`"auto"` 固定で summary_text を受け取る。
|
||||
pub summary: &'static str,
|
||||
}
|
||||
|
||||
/// `input[]` の 1 要素。
|
||||
///
|
||||
/// Responses API の item 型を素に近い形で投影する。未対応 type は
|
||||
/// 無視(reasoning 送信時に `content: []` の場合は `None` として弾く)。
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub(crate) enum InputItem {
|
||||
/// 会話メッセージ。user / assistant / developer のいずれか。
|
||||
/// `Role::System` items は `developer` として投影する(ChatGPT
|
||||
/// backend が `role: "system"` を拒否するため。Codex CLI も
|
||||
/// system 相当の挿入には DeveloperInstructions = `role: "developer"`
|
||||
/// を使う)。
|
||||
Message {
|
||||
role: &'static str,
|
||||
content: Vec<InputContent>,
|
||||
},
|
||||
/// 過去の function tool 呼び出し(assistant 側)。
|
||||
FunctionCall {
|
||||
call_id: String,
|
||||
name: String,
|
||||
/// JSON 文字列(object でなくても正規化済み)。
|
||||
arguments: String,
|
||||
},
|
||||
/// function tool の結果(user 側)。
|
||||
FunctionCallOutput {
|
||||
call_id: String,
|
||||
/// Responses は文字列 or 構造化 output を許すが、ここでは
|
||||
/// `summary` + `content` を改行連結した文字列で送る。
|
||||
output: String,
|
||||
},
|
||||
/// reasoning item。`encrypted_content` があれば必ず添える。
|
||||
Reasoning {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
id: Option<String>,
|
||||
/// Responses API は reasoning item に `summary` フィールドを必須で
|
||||
/// 要求する(中身が空でも `[]` として送る必要がある)。GPT-5 など
|
||||
/// summary を返さないモデル + reasoning effort 指定なしのターンでは
|
||||
/// summary text が一切付かないので、ここを skip すると 400
|
||||
/// "Missing required parameter: 'input[N].summary'" で弾かれる。
|
||||
summary: Vec<ReasoningSummaryPart>,
|
||||
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||
content: Vec<ReasoningContentPart>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
encrypted_content: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
/// メッセージ content_part。role で input/output を使い分ける。
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub(crate) enum InputContent {
|
||||
/// user / developer 側のテキスト
|
||||
InputText { text: String },
|
||||
/// assistant 側のテキスト
|
||||
OutputText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub(crate) enum ReasoningSummaryPart {
|
||||
SummaryText { text: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
#[serde(tag = "type", rename_all = "snake_case")]
|
||||
pub(crate) enum ReasoningContentPart {
|
||||
ReasoningText { text: String },
|
||||
}
|
||||
|
||||
/// Responses 用 tool 定義。Chat と違い function キーでネストせず
|
||||
/// トップレベルに `name` / `parameters` が載る。
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct ResponseTool {
|
||||
#[serde(rename = "type")]
|
||||
pub r#type: &'static str,
|
||||
pub name: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub description: Option<String>,
|
||||
/// OpenAI Responses API は `type:"object"` のパラメータスキーマに
|
||||
/// `properties` が存在することを要求する。schemars は引数なし struct
|
||||
/// から `properties` を含まない最小スキーマを出すので、serialize
|
||||
/// 時に空オブジェクトを補う。
|
||||
#[serde(serialize_with = "serialize_parameters")]
|
||||
pub parameters: Value,
|
||||
/// Structured output モード制御。デフォルト false。
|
||||
pub strict: bool,
|
||||
}
|
||||
|
||||
fn serialize_parameters<S: Serializer>(value: &Value, s: S) -> Result<S::Ok, S::Error> {
|
||||
if let Some(obj) = value.as_object()
|
||||
&& obj.get("type").and_then(Value::as_str) == Some("object")
|
||||
&& !obj.contains_key("properties")
|
||||
{
|
||||
let mut patched = obj.clone();
|
||||
patched.insert("properties".to_string(), Value::Object(Default::default()));
|
||||
return Value::Object(patched).serialize(s);
|
||||
}
|
||||
value.serialize(s)
|
||||
}
|
||||
|
||||
impl OpenAIResponsesScheme {
|
||||
/// `Request` から wire 形式の body を組み立てる。
|
||||
pub(crate) fn build_request(
|
||||
&self,
|
||||
model: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> ResponsesRequest {
|
||||
let input = convert_items_to_input(&request.items);
|
||||
let tools = request.tools.iter().map(convert_tool).collect();
|
||||
|
||||
// Reasoning 投影: capability が Effort / Both をサポートし、かつ
|
||||
// request 側で effort が指定されているときだけ reasoning を付ける。
|
||||
let supports_effort = matches!(
|
||||
capability.reasoning,
|
||||
Some(ReasoningSupport::Effort | ReasoningSupport::Both),
|
||||
);
|
||||
let reasoning = request
|
||||
.config
|
||||
.reasoning
|
||||
.as_ref()
|
||||
.filter(|_| supports_effort)
|
||||
.map(|effort| ReasoningConfig {
|
||||
effort: match effort {
|
||||
ReasoningControl::Effort(effort) => Some(effort.as_str().to_string()),
|
||||
ReasoningControl::BudgetTokens(_) => None,
|
||||
},
|
||||
summary: "auto",
|
||||
})
|
||||
.filter(|reasoning| reasoning.effort.is_some());
|
||||
|
||||
let include: Vec<&'static str> = if self.include_encrypted_content {
|
||||
vec!["reasoning.encrypted_content"]
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
ResponsesRequest {
|
||||
model: model.to_string(),
|
||||
instructions: request.system_prompt.clone(),
|
||||
input,
|
||||
tools,
|
||||
tool_choice: "auto",
|
||||
parallel_tool_calls: true,
|
||||
reasoning,
|
||||
store: self.store,
|
||||
stream: true,
|
||||
include,
|
||||
max_output_tokens: if self.send_max_output_tokens {
|
||||
request.config.max_tokens
|
||||
} else {
|
||||
None
|
||||
},
|
||||
temperature: if self.send_sampling_params {
|
||||
request.config.temperature
|
||||
} else {
|
||||
None
|
||||
},
|
||||
top_p: if self.send_sampling_params {
|
||||
request.config.top_p
|
||||
} else {
|
||||
None
|
||||
},
|
||||
prompt_cache_key: request.cache_key.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `Item` 列を `input[]` に変換する。
|
||||
fn convert_items_to_input(items: &[Item]) -> Vec<InputItem> {
|
||||
let mut out = Vec::with_capacity(items.len());
|
||||
for item in items {
|
||||
match item {
|
||||
Item::Message { role, content, .. } => {
|
||||
let (role_str, text_variant): (&'static str, fn(String) -> InputContent) =
|
||||
match role {
|
||||
Role::User => ("user", |t| InputContent::InputText { text: t }),
|
||||
Role::Assistant => ("assistant", |t| InputContent::OutputText { text: t }),
|
||||
Role::System => ("developer", |t| InputContent::InputText { text: t }),
|
||||
};
|
||||
let parts: Vec<InputContent> = content
|
||||
.iter()
|
||||
.map(|p| match p {
|
||||
ContentPart::Text { text } => text_variant(text.clone()),
|
||||
ContentPart::Refusal { refusal } => text_variant(refusal.clone()),
|
||||
})
|
||||
.collect();
|
||||
out.push(InputItem::Message {
|
||||
role: role_str,
|
||||
content: parts,
|
||||
});
|
||||
}
|
||||
Item::ToolCall {
|
||||
call_id,
|
||||
name,
|
||||
arguments,
|
||||
..
|
||||
} => {
|
||||
// 非 object / 旧形式の "null" を "{}" に正規化。
|
||||
let normalized = parse_tool_arguments(arguments).to_string();
|
||||
out.push(InputItem::FunctionCall {
|
||||
call_id: call_id.clone(),
|
||||
name: name.clone(),
|
||||
arguments: normalized,
|
||||
});
|
||||
}
|
||||
Item::ToolResult {
|
||||
call_id,
|
||||
summary,
|
||||
content,
|
||||
..
|
||||
} => {
|
||||
let text = match content {
|
||||
Some(c) => format!("{summary}\n{c}"),
|
||||
None => summary.clone(),
|
||||
};
|
||||
out.push(InputItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: text,
|
||||
});
|
||||
}
|
||||
Item::Reasoning {
|
||||
id,
|
||||
text,
|
||||
summary,
|
||||
encrypted_content,
|
||||
..
|
||||
} => {
|
||||
let summary_parts = summary
|
||||
.iter()
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(|s| ReasoningSummaryPart::SummaryText { text: s.clone() })
|
||||
.collect();
|
||||
let content_parts = if text.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec![ReasoningContentPart::ReasoningText { text: text.clone() }]
|
||||
};
|
||||
out.push(InputItem::Reasoning {
|
||||
id: id.clone(),
|
||||
summary: summary_parts,
|
||||
content: content_parts,
|
||||
encrypted_content: encrypted_content.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn convert_tool(tool: &ToolDefinition) -> ResponseTool {
|
||||
ResponseTool {
|
||||
r#type: "function",
|
||||
name: tool.name.clone(),
|
||||
description: tool.description.clone(),
|
||||
parameters: tool.input_schema.clone(),
|
||||
strict: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::llm_client::capability::{
|
||||
CacheStrategy, ModelCapability, ReasoningControl, ReasoningEffort, ReasoningSupport,
|
||||
StructuredOutput, ToolCallingSupport,
|
||||
};
|
||||
|
||||
fn cap_with_reasoning() -> ModelCapability {
|
||||
ModelCapability {
|
||||
tool_calling: ToolCallingSupport::Parallel,
|
||||
structured_output: StructuredOutput::JsonSchema,
|
||||
reasoning: Some(ReasoningSupport::Effort),
|
||||
vision: true,
|
||||
prompt_caching: CacheStrategy::Auto,
|
||||
}
|
||||
}
|
||||
|
||||
fn cap_no_reasoning() -> ModelCapability {
|
||||
ModelCapability {
|
||||
reasoning: None,
|
||||
..cap_with_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scheme_defaults_to_stateless_zdr() {
|
||||
let s = OpenAIResponsesScheme::new();
|
||||
assert!(!s.store);
|
||||
assert!(s.include_encrypted_content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_encrypted_content_when_enabled() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.include, vec!["reasoning.encrypted_content"]);
|
||||
assert!(!body.store);
|
||||
assert!(body.stream);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn instructions_from_system_prompt() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().system("be terse").user("hi");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.instructions.as_deref(), Some("be terse"));
|
||||
assert_eq!(body.input.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_choice_and_parallel_are_fixed() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.tool_choice, "auto");
|
||||
assert!(body.parallel_tool_calls);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn user_message_uses_input_text() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
match &body.input[0] {
|
||||
InputItem::Message { role, content } => {
|
||||
assert_eq!(*role, "user");
|
||||
assert_eq!(content.len(), 1);
|
||||
assert!(matches!(&content[0], InputContent::InputText { text } if text == "hi"));
|
||||
}
|
||||
_ => panic!("expected message"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn system_role_item_is_projected_as_developer() {
|
||||
// ChatGPT backend (codex-oauth) は input[] の `role: "system"` を
|
||||
// "System messages are not allowed" で 400 拒否する。in-conversation
|
||||
// な system note (notify / fs_view auto-read / compaction summary) は
|
||||
// `role: "developer"` として投影し、両 backend で受理されるようにする。
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new()
|
||||
.user("hi")
|
||||
.item(Item::system_message("[notify] hello"));
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
match &body.input[1] {
|
||||
InputItem::Message { role, content } => {
|
||||
assert_eq!(*role, "developer");
|
||||
assert!(
|
||||
matches!(&content[0], InputContent::InputText { text } if text == "[notify] hello"),
|
||||
);
|
||||
}
|
||||
_ => panic!("expected message"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn assistant_message_uses_output_text() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi").assistant("hello");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
match &body.input[1] {
|
||||
InputItem::Message { role, content } => {
|
||||
assert_eq!(*role, "assistant");
|
||||
assert!(
|
||||
matches!(&content[0], InputContent::OutputText { text } if text == "hello")
|
||||
);
|
||||
}
|
||||
_ => panic!("expected message"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_call_and_result_become_function_items() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new()
|
||||
.user("run")
|
||||
.item(Item::tool_call("c1", "t", r#"{"a":1}"#))
|
||||
.item(Item::tool_result("c1", "ok"));
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert!(matches!(body.input[1], InputItem::FunctionCall { .. }));
|
||||
assert!(matches!(
|
||||
body.input[2],
|
||||
InputItem::FunctionCallOutput { .. }
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_item_round_trips_encrypted_content() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let item = Item::reasoning("inner")
|
||||
.with_reasoning_summary(vec!["s1".into()])
|
||||
.with_encrypted_content("ENC");
|
||||
let req = Request::new().user("hi").item(item);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
match &body.input[1] {
|
||||
InputItem::Reasoning {
|
||||
summary,
|
||||
content,
|
||||
encrypted_content,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(summary.len(), 1);
|
||||
assert_eq!(content.len(), 1);
|
||||
assert_eq!(encrypted_content.as_deref(), Some("ENC"));
|
||||
}
|
||||
_ => panic!("expected reasoning"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_summary_field_is_always_serialized() {
|
||||
// Responses API は reasoning item に `summary` を必須で要求する。
|
||||
// summary が空でも wire 上に `summary: []` として残らないと、
|
||||
// ChatGPT backend (codex-oauth) が
|
||||
// 400 invalid_request_error: Missing required parameter:
|
||||
// 'input[N].summary'.
|
||||
// で弾く。GPT-5 + reasoning effort 未指定のターンでは summary text
|
||||
// が付かないことがあるため、空のままでも skip しないこと。
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let item = Item::reasoning("").with_encrypted_content("ENC");
|
||||
let req = Request::new().user("hi").item(item);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
let reasoning_item = &json["input"][1];
|
||||
assert_eq!(reasoning_item["type"], "reasoning");
|
||||
assert!(
|
||||
reasoning_item.get("summary").is_some(),
|
||||
"summary key must be present even when empty, got: {reasoning_item}"
|
||||
);
|
||||
assert_eq!(reasoning_item["summary"], serde_json::json!([]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_effort_projected_when_supported() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let mut req = Request::new().user("hi");
|
||||
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
let reasoning = body.reasoning.expect("reasoning should be set");
|
||||
assert_eq!(reasoning.effort.as_deref(), Some("high"));
|
||||
assert_eq!(reasoning.summary, "auto");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_omitted_when_unsupported() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let mut req = Request::new().user("hi");
|
||||
req.config.reasoning = Some(ReasoningControl::Effort(ReasoningEffort::High));
|
||||
let body = scheme.build_request("gpt-4o", &req, &cap_no_reasoning());
|
||||
assert!(body.reasoning.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_output_tokens_passed_through_by_default() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi").max_tokens(100);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.max_output_tokens, Some(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_output_tokens_dropped_when_send_disabled() {
|
||||
let scheme = OpenAIResponsesScheme::new().with_send_max_output_tokens(false);
|
||||
let req = Request::new().user("hi").max_tokens(100);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.max_output_tokens, None);
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert!(
|
||||
json.get("max_output_tokens").is_none(),
|
||||
"max_output_tokens key must not appear in serialised body, got: {json}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_params_passed_through_by_default() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.temperature, Some(0.4));
|
||||
assert_eq!(body.top_p, Some(0.9));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sampling_params_dropped_when_send_disabled() {
|
||||
let scheme = OpenAIResponsesScheme::new().with_send_sampling_params(false);
|
||||
let req = Request::new().user("hi").temperature(0.4).top_p(0.9);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.temperature, None);
|
||||
assert_eq!(body.top_p, None);
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert!(
|
||||
json.get("temperature").is_none() && json.get("top_p").is_none(),
|
||||
"temperature/top_p keys must not appear in serialised body, got: {json}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_cache_key_passed_through_when_set() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi").cache_key("session-abc");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert_eq!(body.prompt_cache_key.as_deref(), Some("session-abc"));
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert_eq!(json["prompt_cache_key"], "session-abc");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_cache_key_omitted_when_none() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new().user("hi");
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
assert!(body.prompt_cache_key.is_none());
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert!(
|
||||
json.get("prompt_cache_key").is_none(),
|
||||
"prompt_cache_key key must not appear in serialised body, got: {json}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_schema_without_properties_is_normalized() {
|
||||
// schemars は引数なし struct から `type:"object"` だけのスキーマを
|
||||
// 吐く。OpenAI Responses は `properties` 欠落を 400 で拒否するので
|
||||
// 送る直前に空オブジェクトを補うのを確認。
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let raw_schema = serde_json::json!({ "type": "object" });
|
||||
let req = Request::new().tool(
|
||||
ToolDefinition::new("empty")
|
||||
.description("no args")
|
||||
.input_schema(raw_schema),
|
||||
);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert_eq!(json["tools"][0]["parameters"]["type"], "object");
|
||||
assert!(
|
||||
json["tools"][0]["parameters"]["properties"].is_object(),
|
||||
"properties must be present as an object, got: {}",
|
||||
json["tools"][0]["parameters"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_schema_with_properties_is_untouched() {
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let raw_schema = serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": { "path": { "type": "string" } },
|
||||
"required": ["path"]
|
||||
});
|
||||
let req = Request::new().tool(
|
||||
ToolDefinition::new("t")
|
||||
.description("d")
|
||||
.input_schema(raw_schema.clone()),
|
||||
);
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert_eq!(json["tools"][0]["parameters"], raw_schema);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialized_body_has_expected_shape() {
|
||||
// wire 形式が崩れていないかのスモークテスト
|
||||
let scheme = OpenAIResponsesScheme::new();
|
||||
let req = Request::new()
|
||||
.system("sys")
|
||||
.user("hi")
|
||||
.tool(ToolDefinition::new("t").description("d"));
|
||||
let body = scheme.build_request("gpt-5", &req, &cap_with_reasoning());
|
||||
let json = serde_json::to_value(&body).unwrap();
|
||||
assert_eq!(json["model"], "gpt-5");
|
||||
assert_eq!(json["instructions"], "sys");
|
||||
assert_eq!(json["tool_choice"], "auto");
|
||||
assert_eq!(json["parallel_tool_calls"], true);
|
||||
assert_eq!(json["store"], false);
|
||||
assert_eq!(json["stream"], true);
|
||||
assert_eq!(json["include"][0], "reasoning.encrypted_content");
|
||||
assert_eq!(json["tools"][0]["type"], "function");
|
||||
assert_eq!(json["tools"][0]["name"], "t");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
//! `impl Scheme for OpenAIResponsesScheme`
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::llm_client::{
|
||||
ClientError,
|
||||
auth::AuthRequirement,
|
||||
capability::ModelCapability,
|
||||
client::ConfigWarning,
|
||||
event::Event,
|
||||
scheme::Scheme,
|
||||
types::{Request, RequestConfig},
|
||||
};
|
||||
|
||||
use super::OpenAIResponsesScheme;
|
||||
|
||||
pub use super::events::OpenAIResponsesState;
|
||||
|
||||
impl Scheme for OpenAIResponsesScheme {
|
||||
type State = OpenAIResponsesState;
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
// `/v1` は base_url 側に寄せる。ChatGPT OAuth 経由のときは
|
||||
// `https://chatgpt.com/backend-api/codex` を base にすれば同じ
|
||||
// `/responses` path で両系統を吸収できる(Codex CLI 準拠)。
|
||||
"https://api.openai.com/v1"
|
||||
}
|
||||
|
||||
fn path(&self, _model_id: &str) -> String {
|
||||
"/responses".to_string()
|
||||
}
|
||||
|
||||
fn required_auth(&self) -> AuthRequirement {
|
||||
AuthRequirement::Bearer
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
&self,
|
||||
model_id: &str,
|
||||
request: &Request,
|
||||
capability: &ModelCapability,
|
||||
) -> Value {
|
||||
let body = self.build_request(model_id, request, capability);
|
||||
serde_json::to_value(&body).expect("ResponsesRequest is always serialisable")
|
||||
}
|
||||
|
||||
fn parse_sse(
|
||||
&self,
|
||||
event_type: &str,
|
||||
data: &str,
|
||||
state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
super::events::parse_sse(event_type, data, state)
|
||||
}
|
||||
|
||||
fn default_capability(&self) -> ModelCapability {
|
||||
super::capability::default_capability()
|
||||
}
|
||||
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
let mut warnings = Vec::new();
|
||||
// ChatGPT backend (codex-oauth) は `max_output_tokens` を 400 で弾く。
|
||||
// scheme 構築時に `send_max_output_tokens=false` で組まれていれば
|
||||
// body 投影は止まっているので、ユーザの意図が落ちることだけを通知する。
|
||||
if !self.send_max_output_tokens && config.max_tokens.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported(
|
||||
"max_tokens",
|
||||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
// 同上、`temperature` / `top_p` も ChatGPT backend では 400 で弾かれる。
|
||||
if !self.send_sampling_params {
|
||||
if config.temperature.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported(
|
||||
"temperature",
|
||||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
if config.top_p.is_some() {
|
||||
warnings.push(ConfigWarning::unsupported(
|
||||
"top_p",
|
||||
"OpenAI Responses (ChatGPT backend)",
|
||||
));
|
||||
}
|
||||
}
|
||||
warnings
|
||||
}
|
||||
}
|
||||
485
crates/llm-worker/src/llm_client/transport.rs
Normal file
485
crates/llm-worker/src/llm_client/transport.rs
Normal file
|
|
@ -0,0 +1,485 @@
|
|||
//! `HttpTransport<S: Scheme>`: すべての LLM wire scheme を共通の 1 本の
|
||||
//! HTTP クライアントで扱う。
|
||||
//!
|
||||
//! 旧 `providers/{anthropic,openai,gemini,ollama}.rs` を置き換える。
|
||||
//! scheme 固有の差分は [`Scheme`] trait 実装に委譲する。
|
||||
|
||||
use std::pin::Pin;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use eventsource_stream::Eventsource;
|
||||
use futures::{Stream, StreamExt, TryStreamExt};
|
||||
use reqwest::header::{
|
||||
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
|
||||
};
|
||||
|
||||
use super::auth::{AuthProvider, AuthRequirement};
|
||||
use super::capability::ModelCapability;
|
||||
use super::client::{ConfigWarning, LlmClient, ResponseStream};
|
||||
use super::error::ClientError;
|
||||
use super::event::Event;
|
||||
use super::scheme::Scheme;
|
||||
use super::types::{Request, RequestConfig};
|
||||
|
||||
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
|
||||
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
|
||||
///
|
||||
/// - `None`: 認証ヘッダを送らない(Ollama 等の opt-out)
|
||||
/// - `ApiKey`: 静的な API key 文字列
|
||||
/// - `Custom`: リクエスト毎に動的にヘッダを組み立てる(Codex OAuth 等)
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum ResolvedAuth {
|
||||
None,
|
||||
ApiKey(String),
|
||||
Custom(Arc<dyn AuthProvider>),
|
||||
}
|
||||
|
||||
impl ResolvedAuth {
|
||||
/// 認証要件と実際の解決値が噛み合うか検査する。構築時検証用。
|
||||
///
|
||||
/// - `ResolvedAuth::None` は認証を付けない宣言なので、どの
|
||||
/// `AuthRequirement` でも受け入れる(Ollama の Anthropic scheme
|
||||
/// 流用は `required_auth = XApiKey` だが認証ヘッダなしで動く)
|
||||
/// - `ResolvedAuth::Custom` は「ヘッダ組立を全部こちらで行う」
|
||||
/// 宣言なので、scheme が要求する形式によらず受け入れる
|
||||
pub fn matches(&self, req: AuthRequirement) -> bool {
|
||||
match (self, req) {
|
||||
(Self::None, _) => true,
|
||||
(Self::Custom(_), _) => true,
|
||||
(
|
||||
Self::ApiKey(_),
|
||||
AuthRequirement::Bearer
|
||||
| AuthRequirement::XApiKey
|
||||
| AuthRequirement::QueryParam { .. },
|
||||
) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// scheme 共通の HTTP 通信層。
|
||||
pub struct HttpTransport<S: Scheme> {
|
||||
http_client: reqwest::Client,
|
||||
scheme: S,
|
||||
model_id: String,
|
||||
base_url: String,
|
||||
auth: ResolvedAuth,
|
||||
capability: ModelCapability,
|
||||
}
|
||||
|
||||
impl<S: Scheme> HttpTransport<S> {
|
||||
/// 新しい transport を作る。`base_url` は末尾スラッシュの有無を
|
||||
/// どちらでも受け付ける(内部で正規化)。
|
||||
pub fn new(
|
||||
scheme: S,
|
||||
model_id: impl Into<String>,
|
||||
base_url: impl Into<String>,
|
||||
auth: ResolvedAuth,
|
||||
capability: ModelCapability,
|
||||
) -> Self {
|
||||
let base_url = base_url.into();
|
||||
let base_url = base_url.trim_end_matches('/').to_string();
|
||||
Self {
|
||||
http_client: reqwest::Client::new(),
|
||||
scheme,
|
||||
model_id: model_id.into(),
|
||||
base_url,
|
||||
auth,
|
||||
capability,
|
||||
}
|
||||
}
|
||||
|
||||
/// カスタム HTTP クライアントを差し込む(テスト等)。
|
||||
pub fn with_http_client(mut self, client: reqwest::Client) -> Self {
|
||||
self.http_client = client;
|
||||
self
|
||||
}
|
||||
|
||||
fn build_url(&self) -> String {
|
||||
let path = self.scheme.path(&self.model_id);
|
||||
let url = format!("{}{}", self.base_url, path);
|
||||
// Gemini のようにクエリパラメータで認証する場合は URL にキーを追記する
|
||||
if let (AuthRequirement::QueryParam { name }, ResolvedAuth::ApiKey(key)) =
|
||||
(self.scheme.required_auth(), &self.auth)
|
||||
{
|
||||
let sep = if url.contains('?') { '&' } else { '?' };
|
||||
format!("{url}{sep}{name}={key}")
|
||||
} else {
|
||||
url
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_headers(&self) -> Result<HeaderMap, ClientError> {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
||||
|
||||
match (&self.auth, self.scheme.required_auth()) {
|
||||
(ResolvedAuth::None, _) | (_, AuthRequirement::None) => {}
|
||||
(ResolvedAuth::Custom(provider), _) => {
|
||||
for (name, mut value) in provider.headers().await? {
|
||||
value.set_sensitive(true);
|
||||
headers.insert(name, value);
|
||||
}
|
||||
}
|
||||
(ResolvedAuth::ApiKey(key), AuthRequirement::Bearer) => {
|
||||
let mut val = HeaderValue::from_str(&format!("Bearer {key}"))
|
||||
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
|
||||
val.set_sensitive(true);
|
||||
headers.insert("Authorization", val);
|
||||
}
|
||||
(ResolvedAuth::ApiKey(key), AuthRequirement::XApiKey) => {
|
||||
let mut val = HeaderValue::from_str(key.as_str())
|
||||
.map_err(|e| ClientError::Config(format!("invalid api key: {e}")))?;
|
||||
val.set_sensitive(true);
|
||||
headers.insert("x-api-key", val);
|
||||
}
|
||||
(_, AuthRequirement::QueryParam { .. }) => {
|
||||
// クエリパラメータは `build_url` で付与済み
|
||||
}
|
||||
(ResolvedAuth::ApiKey(_), AuthRequirement::Custom) => {
|
||||
// scheme が Custom を要求する組合せに ApiKey は流れてこない想定
|
||||
// (`matches()` で弾かれる)。安全側で何もしない
|
||||
}
|
||||
}
|
||||
|
||||
for (name, value) in self.scheme.additional_headers() {
|
||||
let hv = HeaderValue::from_str(&value)
|
||||
.map_err(|e| ClientError::Config(format!("invalid header {name}: {e}")))?;
|
||||
headers.insert(name, hv);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
|
||||
fn is_codex_backend(&self) -> bool {
|
||||
match &self.auth {
|
||||
ResolvedAuth::Custom(provider) => provider.is_codex_backend(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn apply_stream_headers(
|
||||
&self,
|
||||
headers: &mut HeaderMap,
|
||||
request: &Request,
|
||||
) -> Result<(), ClientError> {
|
||||
headers.insert(ACCEPT, HeaderValue::from_static("text/event-stream"));
|
||||
|
||||
if self.is_codex_backend()
|
||||
&& let Some(cache_key) = request.cache_key.as_deref()
|
||||
{
|
||||
let value = HeaderValue::from_str(cache_key).map_err(|e| {
|
||||
ClientError::Config(format!("invalid Codex conversation header: {e}"))
|
||||
})?;
|
||||
headers.insert(HeaderName::from_static("session_id"), value.clone());
|
||||
headers.insert(HeaderName::from_static("x-client-request-id"), value);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn encode_request_body(
|
||||
&self,
|
||||
body: &serde_json::Value,
|
||||
headers: &mut HeaderMap,
|
||||
) -> Result<RequestBody, ClientError> {
|
||||
if !self.is_codex_backend() {
|
||||
return Ok(RequestBody::Json(body.clone()));
|
||||
}
|
||||
|
||||
let raw = serde_json::to_vec(body)?;
|
||||
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
|
||||
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
|
||||
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
|
||||
Ok(RequestBody::CompressedJson(compressed))
|
||||
}
|
||||
}
|
||||
|
||||
enum RequestBody {
|
||||
Json(serde_json::Value),
|
||||
CompressedJson(Vec<u8>),
|
||||
}
|
||||
|
||||
async fn response_with_timeout(
|
||||
future: impl std::future::Future<Output = Result<reqwest::Response, reqwest::Error>>,
|
||||
timeout: Duration,
|
||||
phase: &'static str,
|
||||
) -> Result<reqwest::Response, ClientError> {
|
||||
tokio::time::timeout(timeout, future)
|
||||
.await
|
||||
.map_err(|_| ClientError::Timeout { phase, timeout })?
|
||||
.map_err(ClientError::Http)
|
||||
}
|
||||
|
||||
impl<S: Scheme + Clone> Clone for HttpTransport<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
http_client: self.http_client.clone(),
|
||||
scheme: self.scheme.clone(),
|
||||
model_id: self.model_id.clone(),
|
||||
base_url: self.base_url.clone(),
|
||||
auth: self.auth.clone(),
|
||||
capability: self.capability.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// エラーレスポンスを `ClientError::Api` に変換する。
|
||||
async fn classify_error_response(resp: reqwest::Response) -> ClientError {
|
||||
let status = resp.status().as_u16();
|
||||
let retry_after = resp
|
||||
.headers()
|
||||
.get(RETRY_AFTER)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
.map(Duration::from_secs);
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
if let Ok(json) = serde_json::from_str::<serde_json::Value>(&text) {
|
||||
let error = json.get("error").unwrap_or(&json);
|
||||
let code = error.get("type").and_then(|v| v.as_str()).map(String::from);
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or(&text)
|
||||
.to_string();
|
||||
ClientError::Api {
|
||||
status: Some(status),
|
||||
code,
|
||||
message,
|
||||
retry_after,
|
||||
}
|
||||
} else {
|
||||
ClientError::Api {
|
||||
status: Some(status),
|
||||
code: None,
|
||||
message: text,
|
||||
retry_after,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
|
||||
fn clone_boxed(&self) -> Box<dyn LlmClient> {
|
||||
Box::new(self.clone())
|
||||
}
|
||||
|
||||
fn validate_config(&self, config: &RequestConfig) -> Vec<ConfigWarning> {
|
||||
self.scheme.validate_config(config)
|
||||
}
|
||||
|
||||
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
|
||||
let url = self.build_url();
|
||||
let mut headers = self.build_headers().await?;
|
||||
self.apply_stream_headers(&mut headers, &request)?;
|
||||
let body = self
|
||||
.scheme
|
||||
.build_request_body(&self.model_id, &request, &self.capability);
|
||||
let request_body = self.encode_request_body(&body, &mut headers)?;
|
||||
|
||||
let builder = self.http_client.post(&url).headers(headers);
|
||||
let builder = match request_body {
|
||||
RequestBody::Json(body) => builder.json(&body),
|
||||
RequestBody::CompressedJson(body) => builder.body(body),
|
||||
};
|
||||
let response =
|
||||
response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
|
||||
.await?;
|
||||
|
||||
if !response.status().is_success() {
|
||||
return Err(classify_error_response(response).await);
|
||||
}
|
||||
|
||||
let scheme = self.scheme.clone();
|
||||
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
|
||||
let event_stream = byte_stream.eventsource();
|
||||
|
||||
// scheme 固有のパース状態をストリーム単位で保持する
|
||||
let mut state = <S::State as Default>::default();
|
||||
|
||||
let stream = event_stream
|
||||
.map(move |result| match result {
|
||||
Ok(frame) => match scheme.parse_sse(&frame.event, &frame.data, &mut state) {
|
||||
Ok(events) => Ok(events),
|
||||
Err(e) => Err(e),
|
||||
},
|
||||
Err(e) => Err(ClientError::Sse(e.to_string())),
|
||||
})
|
||||
.map(|res| {
|
||||
let s: Pin<Box<dyn Stream<Item = Result<Event, ClientError>> + Send>> = match res {
|
||||
Ok(events) => Box::pin(futures::stream::iter(events.into_iter().map(Ok))),
|
||||
Err(e) => Box::pin(futures::stream::once(async move { Err(e) })),
|
||||
};
|
||||
s
|
||||
})
|
||||
.flatten();
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct TestAuthProvider {
|
||||
codex: bool,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl AuthProvider for TestAuthProvider {
|
||||
async fn headers(&self) -> Result<Vec<(HeaderName, HeaderValue)>, ClientError> {
|
||||
Ok(vec![
|
||||
(
|
||||
HeaderName::from_static("authorization"),
|
||||
HeaderValue::from_static("Bearer test-token"),
|
||||
),
|
||||
(
|
||||
HeaderName::from_static("chatgpt-account-id"),
|
||||
HeaderValue::from_static("account-1"),
|
||||
),
|
||||
])
|
||||
}
|
||||
|
||||
fn is_codex_backend(&self) -> bool {
|
||||
self.codex
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct TestScheme;
|
||||
|
||||
impl Scheme for TestScheme {
|
||||
type State = ();
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
"https://example.test"
|
||||
}
|
||||
|
||||
fn path(&self, _model_id: &str) -> String {
|
||||
"/responses".to_string()
|
||||
}
|
||||
|
||||
fn required_auth(&self) -> AuthRequirement {
|
||||
AuthRequirement::Bearer
|
||||
}
|
||||
|
||||
fn build_request_body(
|
||||
&self,
|
||||
model_id: &str,
|
||||
request: &Request,
|
||||
_capability: &ModelCapability,
|
||||
) -> serde_json::Value {
|
||||
json!({
|
||||
"model": model_id,
|
||||
"input_len": request.items.len(),
|
||||
"prompt_cache_key": request.cache_key,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_sse(
|
||||
&self,
|
||||
_event_type: &str,
|
||||
_data: &str,
|
||||
_state: &mut Self::State,
|
||||
) -> Result<Vec<Event>, ClientError> {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
|
||||
fn default_capability(&self) -> ModelCapability {
|
||||
ModelCapability::minimal()
|
||||
}
|
||||
}
|
||||
|
||||
fn transport(auth: ResolvedAuth) -> HttpTransport<TestScheme> {
|
||||
HttpTransport::new(
|
||||
TestScheme,
|
||||
"gpt-test",
|
||||
"https://example.test",
|
||||
auth,
|
||||
ModelCapability::minimal(),
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn response_timeout_returns_retryable_lifecycle_timeout() {
|
||||
let err = response_with_timeout(
|
||||
std::future::pending::<Result<reqwest::Response, reqwest::Error>>(),
|
||||
Duration::from_millis(5),
|
||||
"stream_open",
|
||||
)
|
||||
.await
|
||||
.unwrap_err();
|
||||
|
||||
assert!(crate::llm_client::error::is_retryable(&err));
|
||||
assert!(matches!(
|
||||
err,
|
||||
ClientError::Timeout {
|
||||
phase: "stream_open",
|
||||
..
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn codex_backend_adds_conversation_headers_and_zstd_body() {
|
||||
let transport = transport(ResolvedAuth::Custom(Arc::new(TestAuthProvider {
|
||||
codex: true,
|
||||
})));
|
||||
let request = Request::new().user("hello").cache_key("segment-123");
|
||||
let mut headers = transport.build_headers().await.unwrap();
|
||||
transport
|
||||
.apply_stream_headers(&mut headers, &request)
|
||||
.unwrap();
|
||||
let body = transport.scheme.build_request_body(
|
||||
&transport.model_id,
|
||||
&request,
|
||||
&transport.capability,
|
||||
);
|
||||
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
|
||||
|
||||
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
|
||||
assert_eq!(headers.get("session_id").unwrap(), "segment-123");
|
||||
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
|
||||
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
|
||||
|
||||
let RequestBody::CompressedJson(compressed) = encoded else {
|
||||
panic!("Codex backend request body must be zstd-compressed");
|
||||
};
|
||||
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
|
||||
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
|
||||
assert_eq!(decoded["prompt_cache_key"], "segment-123");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn non_codex_request_does_not_get_codex_only_headers_or_compression() {
|
||||
let transport = transport(ResolvedAuth::ApiKey("api-key".to_string()));
|
||||
let request = Request::new().user("hello").cache_key("segment-123");
|
||||
let mut headers = transport.build_headers().await.unwrap();
|
||||
transport
|
||||
.apply_stream_headers(&mut headers, &request)
|
||||
.unwrap();
|
||||
let body = transport.scheme.build_request_body(
|
||||
&transport.model_id,
|
||||
&request,
|
||||
&transport.capability,
|
||||
);
|
||||
let encoded = transport.encode_request_body(&body, &mut headers).unwrap();
|
||||
|
||||
assert_eq!(headers.get(ACCEPT).unwrap(), "text/event-stream");
|
||||
assert!(headers.get("session_id").is_none());
|
||||
assert!(headers.get("x-client-request-id").is_none());
|
||||
assert!(headers.get(CONTENT_ENCODING).is_none());
|
||||
|
||||
let RequestBody::Json(decoded) = encoded else {
|
||||
panic!("non-Codex request body must remain normal JSON");
|
||||
};
|
||||
assert_eq!(decoded["prompt_cache_key"], "segment-123");
|
||||
}
|
||||
}
|
||||
|
|
@ -9,6 +9,10 @@
|
|||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
fn is_false(value: &bool) -> bool {
|
||||
!*value
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Item - The core unit of conversation
|
||||
// ============================================================================
|
||||
|
|
@ -74,8 +78,14 @@ pub enum Item {
|
|||
id: Option<ItemId>,
|
||||
/// Call ID linking to the tool call
|
||||
call_id: CallId,
|
||||
/// Output content
|
||||
output: String,
|
||||
/// Short summary (always kept in history, survives pruning)
|
||||
summary: String,
|
||||
/// Detailed output (removed by pruning when old enough)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
content: Option<String>,
|
||||
/// Whether the tool result represents an execution error.
|
||||
#[serde(default, skip_serializing_if = "is_false")]
|
||||
is_error: bool,
|
||||
},
|
||||
|
||||
/// Reasoning/thinking item
|
||||
|
|
@ -83,8 +93,23 @@ pub enum Item {
|
|||
/// Optional item ID
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
id: Option<ItemId>,
|
||||
/// Reasoning text
|
||||
/// Reasoning text(reasoning body, `reasoning_text.delta` の累積)
|
||||
text: String,
|
||||
/// Reasoning summary(OpenAI Responses の `summary_text[]` を格納。
|
||||
/// 他 scheme は空)
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
summary: Vec<String>,
|
||||
/// サーバから返された暗号化済み reasoning blob。ZDR / `store=false`
|
||||
/// 運用で stateless に再送するときそのまま添える必要がある。
|
||||
/// Anthropic の `redacted_thinking.data` もここに格納する。
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
encrypted_content: Option<String>,
|
||||
/// Anthropic extended thinking の `signature`。新世代 Claude
|
||||
/// (Opus 4.5+/Sonnet 4.6+) では同一論理ターン内の `thinking`
|
||||
/// ブロックを送り返す際に必須。改ざん検知に使われる。他 scheme
|
||||
/// では `None`。
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
signature: Option<String>,
|
||||
/// Item status
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
status: Option<ItemStatus>,
|
||||
|
|
@ -96,6 +121,20 @@ impl Item {
|
|||
// Message constructors
|
||||
// ========================================================================
|
||||
|
||||
/// Create a system message item with text content.
|
||||
///
|
||||
/// System items in history are sent as `role: "system"` on OpenAI,
|
||||
/// and as `role: "user"` on Anthropic/Gemini (which lack a system
|
||||
/// role in conversation items).
|
||||
pub fn system_message(text: impl Into<String>) -> Self {
|
||||
Self::Message {
|
||||
id: None,
|
||||
role: Role::System,
|
||||
content: vec![ContentPart::Text { text: text.into() }],
|
||||
status: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a user message item with text content
|
||||
pub fn user_message(text: impl Into<String>) -> Self {
|
||||
Self::Message {
|
||||
|
|
@ -164,15 +203,41 @@ impl Item {
|
|||
Self::tool_call(call_id, name, arguments.to_string())
|
||||
}
|
||||
|
||||
/// Create a tool result item
|
||||
pub fn tool_result(call_id: impl Into<String>, output: impl Into<String>) -> Self {
|
||||
/// Create a tool result item with summary only (no content).
|
||||
pub fn tool_result(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
|
||||
Self::tool_result_item(call_id, summary, None, false)
|
||||
}
|
||||
|
||||
/// Create an error tool result item with summary only (no content).
|
||||
pub fn tool_result_error(call_id: impl Into<String>, summary: impl Into<String>) -> Self {
|
||||
Self::tool_result_item(call_id, summary, None, true)
|
||||
}
|
||||
|
||||
/// Create a tool result item with summary, optional content, and error flag.
|
||||
pub fn tool_result_item(
|
||||
call_id: impl Into<String>,
|
||||
summary: impl Into<String>,
|
||||
content: Option<String>,
|
||||
is_error: bool,
|
||||
) -> Self {
|
||||
Self::ToolResult {
|
||||
id: None,
|
||||
call_id: call_id.into(),
|
||||
output: output.into(),
|
||||
summary: summary.into(),
|
||||
content,
|
||||
is_error,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a tool result item with summary and content.
|
||||
pub fn tool_result_with_content(
|
||||
call_id: impl Into<String>,
|
||||
summary: impl Into<String>,
|
||||
content: impl Into<String>,
|
||||
) -> Self {
|
||||
Self::tool_result_item(call_id, summary, Some(content.into()), false)
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Reasoning constructors
|
||||
// ========================================================================
|
||||
|
|
@ -182,10 +247,40 @@ impl Item {
|
|||
Self::Reasoning {
|
||||
id: None,
|
||||
text: text.into(),
|
||||
summary: Vec::new(),
|
||||
encrypted_content: None,
|
||||
signature: None,
|
||||
status: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set reasoning summary on a `Reasoning` item. No-op on other variants.
|
||||
pub fn with_reasoning_summary(mut self, new_summary: Vec<String>) -> Self {
|
||||
if let Self::Reasoning { summary, .. } = &mut self {
|
||||
*summary = new_summary;
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Set `encrypted_content` on a `Reasoning` item. No-op on other variants.
|
||||
pub fn with_encrypted_content(mut self, content: impl Into<String>) -> Self {
|
||||
if let Self::Reasoning {
|
||||
encrypted_content, ..
|
||||
} = &mut self
|
||||
{
|
||||
*encrypted_content = Some(content.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Set Anthropic `signature` on a `Reasoning` item. No-op on other variants.
|
||||
pub fn with_signature(mut self, sig: impl Into<String>) -> Self {
|
||||
if let Self::Reasoning { signature, .. } = &mut self {
|
||||
*signature = Some(sig.into());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
// ========================================================================
|
||||
// Builder methods
|
||||
// ========================================================================
|
||||
|
|
@ -285,6 +380,19 @@ impl Item {
|
|||
}
|
||||
}
|
||||
|
||||
/// Parse a ToolCall `arguments` string into a JSON object.
|
||||
///
|
||||
/// Tool call arguments must be a JSON object at the provider API level
|
||||
/// (Anthropic rejects non-object `tool_use.input`). This helper normalizes
|
||||
/// anything that is not a JSON object — empty string, the literal `"null"`,
|
||||
/// arrays, scalars, or parse failures — to an empty object `{}`.
|
||||
pub fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
|
||||
match serde_json::from_str::<serde_json::Value>(arguments) {
|
||||
Ok(value) if value.is_object() => value,
|
||||
_ => serde_json::Value::Object(serde_json::Map::new()),
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Content Parts - Components within message items
|
||||
// ============================================================================
|
||||
|
|
@ -374,6 +482,21 @@ pub struct Request {
|
|||
pub tools: Vec<ToolDefinition>,
|
||||
/// Request configuration
|
||||
pub config: RequestConfig,
|
||||
/// Index into `items` marking the end of a stable, cacheable prefix.
|
||||
///
|
||||
/// Higher layers that know about durable prefix boundaries (e.g. a
|
||||
/// post-compaction summary) set this so that caching-aware providers
|
||||
/// (Anthropic today) can place a long-lived cache breakpoint there.
|
||||
/// Providers without prompt caching ignore the field.
|
||||
pub cache_anchor: Option<usize>,
|
||||
/// 会話単位の安定キー。`prompt_cache_key` として送られる
|
||||
/// (OpenAI Responses)。ChatGPT backend (codex-oauth) は明示キーが
|
||||
/// 無いと org/project ハッシュ衝突でプロンプトキャッシュが
|
||||
/// ほぼヒットしないため、pod 側で `SegmentId` を渡す運用を想定。
|
||||
/// `cache_anchor` と違い名前空間キーであり、`prefix anchor` とは
|
||||
/// 別の概念。`cache_anchor` を読まない provider と同じく、
|
||||
/// `prompt_cache_key` を持たない provider は無視する。
|
||||
pub cache_key: Option<String>,
|
||||
}
|
||||
|
||||
impl Request {
|
||||
|
|
@ -453,6 +576,14 @@ impl Request {
|
|||
self.config.stop_sequences.push(sequence.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the conversation cache key.
|
||||
///
|
||||
/// 詳細は [`Request::cache_key`] のフィールドコメント参照。
|
||||
pub fn cache_key(mut self, key: impl Into<String>) -> Self {
|
||||
self.cache_key = Some(key.into());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -513,6 +644,12 @@ pub struct RequestConfig {
|
|||
pub top_k: Option<u32>,
|
||||
/// Stop sequences
|
||||
pub stop_sequences: Vec<String>,
|
||||
/// Reasoning / extended-thinking 制御(共通型、scheme 側で各社形式に投影)。
|
||||
///
|
||||
/// `None` のときは何も送らない。`Some` でも scheme の
|
||||
/// `ModelCapability::reasoning` が `None` なら無視される。
|
||||
#[serde(default)]
|
||||
pub reasoning: Option<crate::llm_client::capability::ReasoningControl>,
|
||||
}
|
||||
|
||||
impl RequestConfig {
|
||||
|
|
@ -551,3 +688,54 @@ impl RequestConfig {
|
|||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod parse_tool_arguments_tests {
|
||||
use super::parse_tool_arguments;
|
||||
use serde_json::{Value, json};
|
||||
|
||||
fn empty_object() -> Value {
|
||||
Value::Object(serde_json::Map::new())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string_normalizes_to_object() {
|
||||
assert_eq!(parse_tool_arguments(""), empty_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn literal_null_normalizes_to_object() {
|
||||
// 既存セッションに残っている "null" が resume 時に復旧できること
|
||||
assert_eq!(parse_tool_arguments("null"), empty_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn array_normalizes_to_object() {
|
||||
assert_eq!(parse_tool_arguments("[1, 2, 3]"), empty_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scalar_normalizes_to_object() {
|
||||
assert_eq!(parse_tool_arguments("42"), empty_object());
|
||||
assert_eq!(parse_tool_arguments("\"str\""), empty_object());
|
||||
assert_eq!(parse_tool_arguments("true"), empty_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_json_normalizes_to_object() {
|
||||
assert_eq!(parse_tool_arguments("{not json"), empty_object());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_object_passes_through() {
|
||||
assert_eq!(
|
||||
parse_tool_arguments(r#"{"city":"Tokyo","days":3}"#),
|
||||
json!({"city": "Tokyo", "days": 3}),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_object_passes_through() {
|
||||
assert_eq!(parse_tool_arguments("{}"), empty_object());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
451
crates/llm-worker/src/prune.rs
Normal file
451
crates/llm-worker/src/prune.rs
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
//! Prune — context projection for old tool-result content.
|
||||
//!
|
||||
//! LLM 送信時のコンテキストから古い [`Item::ToolResult`] の `content` を
|
||||
//! 省略して、コンテキスト窓のトークンを回収する。`summary` は残すので
|
||||
//! 「何が起きたか」の痕跡は保たれる。
|
||||
//!
|
||||
//! # 設計方針
|
||||
//!
|
||||
//! Prune は **コンテキスト射影** であり、history の変換ではない。
|
||||
//! この crate が提供するのは pure な候補抽出 [`prunable_indices`] のみで、
|
||||
//! 射影の適用は上位層(`pod::prune_hook` 等)が LLM に送る一時コンテキスト
|
||||
//! に対してだけ行う。Worker の永続履歴は決して変更されない。
|
||||
//!
|
||||
//! 保護境界は末尾 token budget で決めるが、この crate は usage 履歴を
|
||||
//! 所有しない。prefix ごとの token 推定値と savings 推定は上位層から
|
||||
//! callback で注入される。
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::llm_client::types::Item;
|
||||
use crate::token_counter::{EstimateSource, TokenEstimate};
|
||||
|
||||
/// Callback that returns token estimates for every prefix boundary of the
|
||||
/// supplied request history.
|
||||
///
|
||||
/// The returned slice must have `history.len() + 1` entries where entry `i`
|
||||
/// estimates the token count of `history[..i]`. Returning a malformed vector,
|
||||
/// or estimates whose source is [`EstimateSource::NoData`], makes prune treat
|
||||
/// the request as having no candidates.
|
||||
pub type TokenEstimator = Box<dyn Fn(&[Item]) -> Vec<TokenEstimate> + Send + Sync>;
|
||||
|
||||
/// Callback that estimates the token savings for projecting the
|
||||
/// `ToolResult.content` out of `history[i]` for each `i` in `indices`.
|
||||
///
|
||||
/// Injected into [`crate::Worker`] via `set_savings_estimator` so the
|
||||
/// Worker can make `min_savings` decisions without knowing about usage
|
||||
/// measurement sources. Return `0` to signal "no data / refuse to prune".
|
||||
///
|
||||
/// 推定対象は「drop する範囲全体」ではなく「content を None にする差分」
|
||||
/// であることに注意。item 自体(summary 等)は残るので、この callback は
|
||||
/// 実際の projection と一致する savings を返す必要がある。
|
||||
pub type SavingsEstimator = Box<dyn Fn(&[Item], &[usize]) -> u64 + Send + Sync>;
|
||||
|
||||
/// Result of one prune evaluation pass, surfaced to the optional
|
||||
/// [`PruneObserver`] for instrumentation.
|
||||
///
|
||||
/// Worker は LLM リクエストごとに 1 回 prune の評価をし、その結果を
|
||||
/// (observer が登録されていれば)この値で通知する。fire/skip の判定
|
||||
/// 結果と、判定材料になった候補数 / 推定 savings / 保護領域の先頭 index を持つ。
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PruneEvaluation {
|
||||
/// `prunable_indices` の長さ。`Skipped::NoCandidates` の時は 0。
|
||||
pub candidate_count: usize,
|
||||
/// 推定された savings (tokens)。`NoCandidates` の時は 0。
|
||||
pub estimated_savings: u64,
|
||||
/// Token budget で保護される suffix の先頭 item index。
|
||||
/// usage 推定が `NoData` で境界が決まらない場合は `None`。
|
||||
pub protected_start_index: Option<usize>,
|
||||
/// 判定結果。
|
||||
pub decision: PruneDecision,
|
||||
}
|
||||
|
||||
/// Outcome of one prune evaluation. Each variant is one branch of the
|
||||
/// "fire vs skip" decision tree the Worker walks before each LLM request.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum PruneDecision {
|
||||
/// `prunable_indices` が空 → 何もしない。
|
||||
SkippedNoCandidates,
|
||||
/// 候補はあったが推定 savings が `min_savings` 未満 → 何もしない。
|
||||
SkippedBelowMinSavings,
|
||||
/// 候補があり savings >= min_savings → projection を適用した。
|
||||
/// `pruned_count` は `project()` が実際に書き換えた item 数
|
||||
/// (既に content=None だった候補は 0 計上)。
|
||||
Fired { pruned_count: usize },
|
||||
}
|
||||
|
||||
/// Optional observer invoked after each prune evaluation, regardless of
|
||||
/// branch. Pod 等の上位層が install して metrics を発行する。
|
||||
pub type PruneObserver = Box<dyn Fn(&PruneEvaluation) + Send + Sync>;
|
||||
|
||||
/// Configuration for the Prune algorithm.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PruneConfig {
|
||||
/// Token budget at the history tail protected from pruning.
|
||||
#[serde(default = "default_protected_tokens")]
|
||||
pub protected_tokens: u64,
|
||||
|
||||
/// Minimum token savings required to actually prune. If the prunable
|
||||
/// content is smaller than this, the caller should skip to avoid
|
||||
/// pointless KV-cache invalidation. The unit is tokens; the caller
|
||||
/// is responsible for measuring savings via a usage-history-aware
|
||||
/// estimator and comparing against this threshold.
|
||||
#[serde(default = "default_min_savings")]
|
||||
pub min_savings: u64,
|
||||
}
|
||||
|
||||
fn default_protected_tokens() -> u64 {
|
||||
8000
|
||||
}
|
||||
fn default_min_savings() -> u64 {
|
||||
4096
|
||||
}
|
||||
|
||||
impl Default for PruneConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
protected_tokens: default_protected_tokens(),
|
||||
min_savings: default_min_savings(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Set `content = None` on each `Item::ToolResult` at the given indices.
|
||||
///
|
||||
/// Returns the number of items that were actually modified — items that
|
||||
/// are already content-less are counted as 0. Intended for use on a
|
||||
/// request-context clone (never on a persistent history).
|
||||
pub fn project(items: &mut [Item], indices: &[usize]) -> usize {
|
||||
let mut count = 0;
|
||||
for &i in indices {
|
||||
if let Item::ToolResult { content, .. } = &mut items[i]
|
||||
&& content.is_some()
|
||||
{
|
||||
*content = None;
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Indices of `Item::ToolResult { content: Some(_), .. }` that lie before
|
||||
/// the suffix protected by `protected_tokens`. Pure: does not mutate `items`.
|
||||
///
|
||||
/// Returns an empty vector when token estimates are unavailable (`NoData`) or
|
||||
/// no prunable candidates exist.
|
||||
pub fn prunable_indices(
|
||||
items: &[Item],
|
||||
protected_tokens: u64,
|
||||
token_estimates: &[TokenEstimate],
|
||||
) -> Vec<usize> {
|
||||
evaluate_candidates(items, protected_tokens, token_estimates).0
|
||||
}
|
||||
|
||||
/// Same as [`prunable_indices`] but also returns the start index of the
|
||||
/// protected suffix. `None` means the token boundary could not be determined
|
||||
/// (currently because usage estimates were `NoData` or malformed).
|
||||
pub fn evaluate_candidates(
|
||||
items: &[Item],
|
||||
protected_tokens: u64,
|
||||
token_estimates: &[TokenEstimate],
|
||||
) -> (Vec<usize>, Option<usize>) {
|
||||
let Some(protected_start) = protected_start_index(items, protected_tokens, token_estimates)
|
||||
else {
|
||||
return (Vec::new(), None);
|
||||
};
|
||||
|
||||
let candidates = items[..protected_start]
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, item)| match item {
|
||||
Item::ToolResult {
|
||||
content: Some(_), ..
|
||||
} => Some(i),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
(candidates, Some(protected_start))
|
||||
}
|
||||
|
||||
fn protected_start_index(
|
||||
items: &[Item],
|
||||
protected_tokens: u64,
|
||||
token_estimates: &[TokenEstimate],
|
||||
) -> Option<usize> {
|
||||
if token_estimates.len() != items.len() + 1 {
|
||||
return None;
|
||||
}
|
||||
let total = token_estimates[items.len()];
|
||||
if total.source == EstimateSource::NoData {
|
||||
return None;
|
||||
}
|
||||
if protected_tokens == 0 {
|
||||
return Some(items.len());
|
||||
}
|
||||
|
||||
let mut protected_start = items.len();
|
||||
for idx in (0..items.len()).rev() {
|
||||
let prefix = token_estimates[idx];
|
||||
if prefix.source == EstimateSource::NoData {
|
||||
return None;
|
||||
}
|
||||
protected_start = idx;
|
||||
let tail_tokens = total.tokens.saturating_sub(prefix.tokens);
|
||||
if tail_tokens >= protected_tokens {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Some(protected_start)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Helper: build a history with interleaved user messages and tool results.
|
||||
fn make_history(turns: &[(&str, Vec<(&str, Option<&str>)>)]) -> Vec<Item> {
|
||||
let mut items = Vec::new();
|
||||
for (user_msg, tool_results) in turns {
|
||||
items.push(Item::user_message(*user_msg));
|
||||
items.push(Item::assistant_message("ok"));
|
||||
for (i, (summary, content)) in tool_results.iter().enumerate() {
|
||||
let call_id = format!("call_{}", items.len() + i);
|
||||
items.push(Item::tool_call(&call_id, "some_tool", "{}"));
|
||||
match content {
|
||||
Some(c) => items.push(Item::tool_result_with_content(&call_id, *summary, *c)),
|
||||
None => items.push(Item::tool_result(&call_id, *summary)),
|
||||
}
|
||||
}
|
||||
}
|
||||
items
|
||||
}
|
||||
|
||||
fn measured_prefix(tokens: &[u64]) -> Vec<TokenEstimate> {
|
||||
tokens
|
||||
.iter()
|
||||
.copied()
|
||||
.map(|tokens| TokenEstimate {
|
||||
tokens,
|
||||
source: EstimateSource::Measured,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn uniform_estimates(items: &[Item], item_tokens: u64) -> Vec<TokenEstimate> {
|
||||
let mut tokens = Vec::with_capacity(items.len() + 1);
|
||||
for i in 0..=items.len() {
|
||||
tokens.push(i as u64 * item_tokens);
|
||||
}
|
||||
measured_prefix(&tokens)
|
||||
}
|
||||
|
||||
fn estimates_from_item_tokens(item_tokens: &[u64]) -> Vec<TokenEstimate> {
|
||||
let mut prefix = Vec::with_capacity(item_tokens.len() + 1);
|
||||
let mut acc = 0;
|
||||
prefix.push(acc);
|
||||
for tokens in item_tokens {
|
||||
acc += tokens;
|
||||
prefix.push(acc);
|
||||
}
|
||||
measured_prefix(&prefix)
|
||||
}
|
||||
|
||||
fn no_data_estimates(items: &[Item]) -> Vec<TokenEstimate> {
|
||||
(0..=items.len())
|
||||
.map(|i| TokenEstimate {
|
||||
tokens: i as u64,
|
||||
source: if i == 0 {
|
||||
EstimateSource::Measured
|
||||
} else {
|
||||
EstimateSource::NoData
|
||||
},
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_candidates_when_estimate_has_no_data() {
|
||||
let items = make_history(&[("turn1", vec![("summary1", Some("big content here"))])]);
|
||||
let estimates = no_data_estimates(&items);
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 10, &estimates);
|
||||
assert!(candidates.is_empty());
|
||||
assert_eq!(protected_start, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_candidates_when_history_fits_in_protected_tokens() {
|
||||
let items = make_history(&[
|
||||
("turn1", vec![("summary1", Some("big content here"))]),
|
||||
("turn2", vec![("summary2", Some("more content"))]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
assert!(prunable_indices(&items, 10_000, &estimates).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn candidates_before_token_protected_suffix() {
|
||||
let big = "x".repeat(4096 * 4);
|
||||
let items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![("s2", Some(&big))]),
|
||||
("turn3", vec![("s3", Some("keep me"))]),
|
||||
("turn4", vec![("s4", Some("keep me too"))]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let candidates = prunable_indices(&items, 80, &estimates);
|
||||
assert_eq!(candidates.len(), 2);
|
||||
// suffix budget 80 tokens protects turn3+turn4 (8 items), so only s1/s2 are candidates.
|
||||
for &i in &candidates {
|
||||
if let Item::ToolResult { summary, .. } = &items[i] {
|
||||
assert!(summary == "s1" || summary == "s2");
|
||||
} else {
|
||||
panic!("non tool-result selected");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn single_long_task_gets_candidates_without_multiple_user_turns() {
|
||||
let big = "x".repeat(4096 * 8);
|
||||
let items = make_history(&[(
|
||||
"one long task",
|
||||
vec![
|
||||
("s1", Some(&big)),
|
||||
("s2", Some(&big)),
|
||||
("s3", Some(&big)),
|
||||
("s4", Some(&big)),
|
||||
],
|
||||
)]);
|
||||
// user + assistant are cheap; every ToolCall is cheap; every ToolResult is heavy.
|
||||
let item_tokens = vec![1, 1, 1, 5_000, 1, 5_000, 1, 5_000, 1, 5_000];
|
||||
let estimates = estimates_from_item_tokens(&item_tokens);
|
||||
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 8_000, &estimates);
|
||||
|
||||
assert_eq!(protected_start, Some(7));
|
||||
assert_eq!(candidates.len(), 2);
|
||||
for &i in &candidates {
|
||||
if let Item::ToolResult { summary, .. } = &items[i] {
|
||||
assert!(summary == "s1" || summary == "s2");
|
||||
} else {
|
||||
panic!("non tool-result selected");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn already_pruned_items_excluded_from_candidates() {
|
||||
let items = make_history(&[
|
||||
("turn1", vec![("s1", None)]), // already pruned (content=None)
|
||||
("turn2", vec![]),
|
||||
("turn3", vec![]),
|
||||
("turn4", vec![]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
assert!(prunable_indices(&items, 20, &estimates).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn project_drops_content_and_counts_modifications() {
|
||||
let big = "x".repeat(64);
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![("s2", Some(&big))]),
|
||||
("turn3", vec![("s3", Some("keep me"))]),
|
||||
("turn4", vec![("s4", Some("keep me too"))]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let candidates = prunable_indices(&items, 80, &estimates);
|
||||
let count = project(&mut items, &candidates);
|
||||
assert_eq!(count, 2);
|
||||
|
||||
for item in &items {
|
||||
if let Item::ToolResult {
|
||||
summary, content, ..
|
||||
} = item
|
||||
{
|
||||
if summary == "s1" || summary == "s2" {
|
||||
assert!(content.is_none(), "old content should be projected out");
|
||||
} else {
|
||||
assert!(content.is_some(), "protected content should remain");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn project_skips_already_pruned_items() {
|
||||
// indices points at an item whose content is already None.
|
||||
// project() should count it as 0 modifications.
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", None)]),
|
||||
("turn2", vec![("s2", Some("hello"))]),
|
||||
]);
|
||||
// Manually target s1 even though it's already None.
|
||||
let target = items
|
||||
.iter()
|
||||
.position(|it| matches!(it, Item::ToolResult { summary, .. } if summary == "s1"))
|
||||
.unwrap();
|
||||
let count = project(&mut items, &[target]);
|
||||
assert_eq!(count, 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn project_is_idempotent() {
|
||||
let big = "x".repeat(64);
|
||||
let mut items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![]),
|
||||
("turn3", vec![]),
|
||||
("turn4", vec![]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let candidates = prunable_indices(&items, 20, &estimates);
|
||||
assert_eq!(project(&mut items, &candidates), 1);
|
||||
// 2 周目: 候補は一度の prunable_indices 結果を使い回しても 0 件。
|
||||
assert_eq!(project(&mut items, &candidates), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_candidates_returns_protected_start_index() {
|
||||
let big = "x".repeat(64);
|
||||
let items = make_history(&[
|
||||
("turn1", vec![("s1", Some(&big))]),
|
||||
("turn2", vec![("s2", Some(&big))]),
|
||||
("turn3", vec![("s3", Some("keep"))]),
|
||||
("turn4", vec![("s4", Some("keep too"))]),
|
||||
]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 80, &estimates);
|
||||
assert_eq!(candidates.len(), 2);
|
||||
// protected_tokens=80 → protected suffix is turn3+turn4, starting at index 8.
|
||||
assert_eq!(protected_start, Some(8));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn evaluate_candidates_reports_zero_start_when_everything_is_protected() {
|
||||
let items = make_history(&[("only", vec![("s", Some("x"))])]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 10_000, &estimates);
|
||||
assert!(candidates.is_empty());
|
||||
assert_eq!(protected_start, Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zero_protected_tokens_allows_all_tool_results_as_candidates() {
|
||||
let big = "x".repeat(64);
|
||||
let items = make_history(&[("turn1", vec![("s1", Some(&big)), ("s2", Some(&big))])]);
|
||||
let estimates = uniform_estimates(&items, 10);
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 0, &estimates);
|
||||
assert_eq!(protected_start, Some(items.len()));
|
||||
assert_eq!(candidates.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_estimate_vector_is_treated_as_no_boundary() {
|
||||
let items = make_history(&[("turn1", vec![("s1", Some("x"))])]);
|
||||
let (candidates, protected_start) = evaluate_candidates(&items, 10, &[]);
|
||||
assert!(candidates.is_empty());
|
||||
assert_eq!(protected_start, None);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
//! Worker State
|
||||
//!
|
||||
//! State marker types for cache protection using the Type-state pattern.
|
||||
//! Worker has state transitions from `Mutable` → `CacheLocked`.
|
||||
//! Worker has state transitions from `Mutable` → `Locked`.
|
||||
|
||||
/// Marker trait representing Worker state
|
||||
///
|
||||
|
|
@ -19,7 +19,7 @@ mod private {
|
|||
/// - Editing message history (add, delete, clear)
|
||||
/// - Registering tools and hooks
|
||||
///
|
||||
/// Can transition to [`CacheLocked`] state via `Worker::lock()`.
|
||||
/// Can transition to [`Locked`] state via `Worker::lock()`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
|
|
@ -54,7 +54,7 @@ impl WorkerState for Mutable {}
|
|||
/// Can return to [`Mutable`] state via `Worker::unlock()`,
|
||||
/// but note that cache protection will be released.
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub struct CacheLocked;
|
||||
pub struct Locked;
|
||||
|
||||
impl private::Sealed for CacheLocked {}
|
||||
impl WorkerState for CacheLocked {}
|
||||
impl private::Sealed for Locked {}
|
||||
impl WorkerState for Locked {}
|
||||
|
|
|
|||
|
|
@ -1,371 +0,0 @@
|
|||
//! Event Subscription
|
||||
//!
|
||||
//! Trait for receiving streaming events from LLM in real-time.
|
||||
//! Used for stream display to UI and progress display.
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::{
|
||||
handler::{
|
||||
ErrorKind, Handler, StatusKind, TextBlockEvent, TextBlockKind, ToolUseBlockEvent,
|
||||
ToolUseBlockKind, UsageKind,
|
||||
},
|
||||
hook::ToolCall,
|
||||
timeline::event::{ErrorEvent, StatusEvent, UsageEvent},
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// WorkerSubscriber Trait
|
||||
// =============================================================================
|
||||
|
||||
/// Trait for subscribing to streaming events from LLM
|
||||
///
|
||||
/// When registered with Worker, you can receive events from text generation
|
||||
/// and tool calls in real-time. Ideal for stream display to UI.
|
||||
///
|
||||
/// # Available Events
|
||||
///
|
||||
/// - **Block events**: Text, tool use (with scope)
|
||||
/// - **Meta events**: Usage, status, error
|
||||
/// - **Completion events**: Text complete, tool call complete
|
||||
/// - **Turn control**: Turn start, turn end
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// use llm_worker::subscriber::WorkerSubscriber;
|
||||
/// use llm_worker::timeline::TextBlockEvent;
|
||||
///
|
||||
/// struct StreamPrinter;
|
||||
///
|
||||
/// impl WorkerSubscriber for StreamPrinter {
|
||||
/// type TextBlockScope = ();
|
||||
/// type ToolUseBlockScope = ();
|
||||
///
|
||||
/// fn on_text_block(&mut self, _: &mut (), event: &TextBlockEvent) {
|
||||
/// if let TextBlockEvent::Delta(text) = event {
|
||||
/// print!("{}", text); // Real-time output
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// fn on_text_complete(&mut self, text: &str) {
|
||||
/// println!("\n--- Complete: {} chars ---", text.len());
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// // Register with Worker
|
||||
/// worker.subscribe(StreamPrinter);
|
||||
/// ```
|
||||
pub trait WorkerSubscriber: Send {
|
||||
// =========================================================================
|
||||
// Scope Types (for block events)
|
||||
// =========================================================================
|
||||
|
||||
/// Scope type for text block processing
|
||||
///
|
||||
/// Generated with Default::default() at block start,
|
||||
/// destroyed at block end.
|
||||
type TextBlockScope: Default + Send + Sync;
|
||||
|
||||
/// Scope type for tool use block processing
|
||||
type ToolUseBlockScope: Default + Send + Sync;
|
||||
|
||||
// =========================================================================
|
||||
// Block Events (with scope management)
|
||||
// =========================================================================
|
||||
|
||||
/// Text block event
|
||||
///
|
||||
/// Has Start/Delta/Stop lifecycle.
|
||||
/// Scope is generated at block start and destroyed at end.
|
||||
#[allow(unused_variables)]
|
||||
fn on_text_block(&mut self, scope: &mut Self::TextBlockScope, event: &TextBlockEvent) {}
|
||||
|
||||
/// Tool use block event
|
||||
///
|
||||
/// Has Start/InputJsonDelta/Stop lifecycle.
|
||||
#[allow(unused_variables)]
|
||||
fn on_tool_use_block(
|
||||
&mut self,
|
||||
scope: &mut Self::ToolUseBlockScope,
|
||||
event: &ToolUseBlockEvent,
|
||||
) {
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Single Events (no scope needed)
|
||||
// =========================================================================
|
||||
|
||||
/// Usage event
|
||||
#[allow(unused_variables)]
|
||||
fn on_usage(&mut self, event: &UsageEvent) {}
|
||||
|
||||
/// Status event
|
||||
#[allow(unused_variables)]
|
||||
fn on_status(&mut self, event: &StatusEvent) {}
|
||||
|
||||
/// Error event
|
||||
#[allow(unused_variables)]
|
||||
fn on_error(&mut self, event: &ErrorEvent) {}
|
||||
|
||||
// =========================================================================
|
||||
// Accumulated Events (added in Worker layer)
|
||||
// =========================================================================
|
||||
|
||||
/// Text complete event
|
||||
///
|
||||
/// When a text block completes, the entire accumulated text is passed.
|
||||
/// Convenient for receiving the final result after block processing.
|
||||
#[allow(unused_variables)]
|
||||
fn on_text_complete(&mut self, text: &str) {}
|
||||
|
||||
/// Tool call complete event
|
||||
///
|
||||
/// When a tool use block completes, the complete ToolCall is passed.
|
||||
#[allow(unused_variables)]
|
||||
fn on_tool_call_complete(&mut self, call: &ToolCall) {}
|
||||
|
||||
// =========================================================================
|
||||
// Turn Control
|
||||
// =========================================================================
|
||||
|
||||
/// On turn start
|
||||
///
|
||||
/// `turn` is a 0-based turn number.
|
||||
#[allow(unused_variables)]
|
||||
fn on_turn_start(&mut self, turn: usize) {}
|
||||
|
||||
/// On turn end
|
||||
#[allow(unused_variables)]
|
||||
fn on_turn_end(&mut self, turn: usize) {}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// SubscriberAdapter - Bridge WorkerSubscriber to Timeline handlers
|
||||
// =============================================================================
|
||||
|
||||
// =============================================================================
|
||||
// TextBlock Handler Adapter
|
||||
// =============================================================================
|
||||
|
||||
/// Subscriber adapter for TextBlockKind
|
||||
pub(crate) struct TextBlockSubscriberAdapter<S: WorkerSubscriber> {
|
||||
subscriber: Arc<Mutex<S>>,
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> TextBlockSubscriberAdapter<S> {
|
||||
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
|
||||
Self { subscriber }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Clone for TextBlockSubscriberAdapter<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
subscriber: self.subscriber.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for TextBlock scope
|
||||
pub struct TextBlockScopeWrapper<S: WorkerSubscriber> {
|
||||
inner: S::TextBlockScope,
|
||||
buffer: String, // Buffer for on_text_complete
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Default for TextBlockScopeWrapper<S> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: S::TextBlockScope::default(),
|
||||
buffer: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber + 'static> Handler<TextBlockKind> for TextBlockSubscriberAdapter<S> {
|
||||
type Scope = TextBlockScopeWrapper<S>;
|
||||
|
||||
fn on_event(&mut self, scope: &mut Self::Scope, event: &TextBlockEvent) {
|
||||
// Accumulate deltas into buffer
|
||||
if let TextBlockEvent::Delta(text) = event {
|
||||
scope.buffer.push_str(text);
|
||||
}
|
||||
|
||||
// Call Subscriber's TextBlock event handler
|
||||
if let Ok(mut subscriber) = self.subscriber.lock() {
|
||||
subscriber.on_text_block(&mut scope.inner, event);
|
||||
|
||||
// Also call on_text_complete on Stop
|
||||
if matches!(event, TextBlockEvent::Stop(_)) {
|
||||
subscriber.on_text_complete(&scope.buffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// ToolUseBlock Handler Adapter
|
||||
// =============================================================================
|
||||
|
||||
/// Subscriber adapter for ToolUseBlockKind
|
||||
pub(crate) struct ToolUseBlockSubscriberAdapter<S: WorkerSubscriber> {
|
||||
subscriber: Arc<Mutex<S>>,
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> ToolUseBlockSubscriberAdapter<S> {
|
||||
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
|
||||
Self { subscriber }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Clone for ToolUseBlockSubscriberAdapter<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
subscriber: self.subscriber.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper for ToolUseBlock scope
|
||||
pub struct ToolUseBlockScopeWrapper<S: WorkerSubscriber> {
|
||||
inner: S::ToolUseBlockScope,
|
||||
id: String,
|
||||
name: String,
|
||||
input_json: String, // JSON accumulation
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Default for ToolUseBlockScopeWrapper<S> {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
inner: S::ToolUseBlockScope::default(),
|
||||
id: String::new(),
|
||||
name: String::new(),
|
||||
input_json: String::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber + 'static> Handler<ToolUseBlockKind> for ToolUseBlockSubscriberAdapter<S> {
|
||||
type Scope = ToolUseBlockScopeWrapper<S>;
|
||||
|
||||
fn on_event(&mut self, scope: &mut Self::Scope, event: &ToolUseBlockEvent) {
|
||||
// Save metadata on Start
|
||||
if let ToolUseBlockEvent::Start(start) = event {
|
||||
scope.id = start.id.clone();
|
||||
scope.name = start.name.clone();
|
||||
}
|
||||
|
||||
// Accumulate InputJsonDelta into buffer
|
||||
if let ToolUseBlockEvent::InputJsonDelta(json) = event {
|
||||
scope.input_json.push_str(json);
|
||||
}
|
||||
|
||||
// Call Subscriber's ToolUseBlock event handler
|
||||
if let Ok(mut subscriber) = self.subscriber.lock() {
|
||||
subscriber.on_tool_use_block(&mut scope.inner, event);
|
||||
|
||||
// Also call on_tool_call_complete on Stop
|
||||
if matches!(event, ToolUseBlockEvent::Stop(_)) {
|
||||
let input: serde_json::Value =
|
||||
serde_json::from_str(&scope.input_json).unwrap_or_default();
|
||||
let tool_call = ToolCall {
|
||||
id: scope.id.clone(),
|
||||
name: scope.name.clone(),
|
||||
input,
|
||||
};
|
||||
subscriber.on_tool_call_complete(&tool_call);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Meta Event Handler Adapters
|
||||
// =============================================================================
|
||||
|
||||
/// Subscriber adapter for UsageKind
|
||||
pub(crate) struct UsageSubscriberAdapter<S: WorkerSubscriber> {
|
||||
subscriber: Arc<Mutex<S>>,
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> UsageSubscriberAdapter<S> {
|
||||
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
|
||||
Self { subscriber }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Clone for UsageSubscriberAdapter<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
subscriber: self.subscriber.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber + 'static> Handler<UsageKind> for UsageSubscriberAdapter<S> {
|
||||
type Scope = ();
|
||||
|
||||
fn on_event(&mut self, _scope: &mut Self::Scope, event: &UsageEvent) {
|
||||
if let Ok(mut subscriber) = self.subscriber.lock() {
|
||||
subscriber.on_usage(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscriber adapter for StatusKind
|
||||
pub(crate) struct StatusSubscriberAdapter<S: WorkerSubscriber> {
|
||||
subscriber: Arc<Mutex<S>>,
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> StatusSubscriberAdapter<S> {
|
||||
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
|
||||
Self { subscriber }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Clone for StatusSubscriberAdapter<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
subscriber: self.subscriber.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber + 'static> Handler<StatusKind> for StatusSubscriberAdapter<S> {
|
||||
type Scope = ();
|
||||
|
||||
fn on_event(&mut self, _scope: &mut Self::Scope, event: &StatusEvent) {
|
||||
if let Ok(mut subscriber) = self.subscriber.lock() {
|
||||
subscriber.on_status(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Subscriber adapter for ErrorKind
|
||||
pub(crate) struct ErrorSubscriberAdapter<S: WorkerSubscriber> {
|
||||
subscriber: Arc<Mutex<S>>,
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> ErrorSubscriberAdapter<S> {
|
||||
pub fn new(subscriber: Arc<Mutex<S>>) -> Self {
|
||||
Self { subscriber }
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber> Clone for ErrorSubscriberAdapter<S> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
subscriber: self.subscriber.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: WorkerSubscriber + 'static> Handler<ErrorKind> for ErrorSubscriberAdapter<S> {
|
||||
type Scope = ();
|
||||
|
||||
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ErrorEvent) {
|
||||
if let Ok(mut subscriber) = self.subscriber.lock() {
|
||||
subscriber.on_error(event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -10,14 +10,16 @@
|
|||
//! - [`ToolCallCollector`] - ツール呼び出しを収集するHandler
|
||||
|
||||
pub mod event;
|
||||
mod reasoning_item_collector;
|
||||
mod text_block_collector;
|
||||
mod timeline;
|
||||
mod tool_call_collector;
|
||||
|
||||
// 公開API
|
||||
pub use event::*;
|
||||
pub use reasoning_item_collector::ReasoningItemCollector;
|
||||
pub use text_block_collector::TextBlockCollector;
|
||||
pub use timeline::{ErasedHandler, HandlerWrapper, Timeline};
|
||||
pub use timeline::Timeline;
|
||||
pub use tool_call_collector::ToolCallCollector;
|
||||
|
||||
// 型定義からのre-export
|
||||
|
|
@ -28,6 +30,7 @@ pub use crate::handler::{
|
|||
Handler,
|
||||
Kind,
|
||||
PingKind,
|
||||
ReasoningItemKind,
|
||||
StatusKind,
|
||||
// Block Events
|
||||
TextBlockEvent,
|
||||
|
|
|
|||
77
crates/llm-worker/src/timeline/reasoning_item_collector.rs
Normal file
77
crates/llm-worker/src/timeline/reasoning_item_collector.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
//! `ReasoningItemCollector` - 完成済み reasoning item を収集する Handler
|
||||
//!
|
||||
//! Timeline の `ReasoningItemKind` Handler として登録し、scheme 側が
|
||||
//! `Event::ReasoningItem` を発火するたびに 1 件ずつバッファに溜める。
|
||||
//! Worker はターン終了時に `take_collected()` でドレインして
|
||||
//! `Item::Reasoning` として `worker.history` に append する。
|
||||
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use crate::handler::{Handler, ReasoningItemKind};
|
||||
use crate::llm_client::event::ReasoningItemEvent;
|
||||
|
||||
/// 収集された reasoning item の連列。
|
||||
#[derive(Clone, Default)]
|
||||
pub struct ReasoningItemCollector {
|
||||
collected: Arc<Mutex<Vec<ReasoningItemEvent>>>,
|
||||
}
|
||||
|
||||
impl ReasoningItemCollector {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// 収集済み item を取り出してクリア
|
||||
pub fn take_collected(&self) -> Vec<ReasoningItemEvent> {
|
||||
let mut guard = self.collected.lock().unwrap();
|
||||
std::mem::take(&mut *guard)
|
||||
}
|
||||
|
||||
/// 収集をクリア
|
||||
pub fn clear(&self) {
|
||||
self.collected.lock().unwrap().clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl Handler<ReasoningItemKind> for ReasoningItemCollector {
|
||||
type Scope = ();
|
||||
|
||||
fn on_event(&mut self, _scope: &mut Self::Scope, event: &ReasoningItemEvent) {
|
||||
self.collected.lock().unwrap().push(event.clone());
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::llm_client::event::Event;
|
||||
use crate::timeline::Timeline;
|
||||
|
||||
#[test]
|
||||
fn collects_in_order() {
|
||||
let collector = ReasoningItemCollector::new();
|
||||
let mut timeline = Timeline::new();
|
||||
timeline.on_reasoning_item(collector.clone());
|
||||
|
||||
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
|
||||
id: Some("r1".into()),
|
||||
text: "first".into(),
|
||||
signature: Some("sig1".into()),
|
||||
..Default::default()
|
||||
}));
|
||||
timeline.dispatch(&Event::ReasoningItem(ReasoningItemEvent {
|
||||
id: Some("r2".into()),
|
||||
text: "second".into(),
|
||||
..Default::default()
|
||||
}));
|
||||
|
||||
let items = collector.take_collected();
|
||||
assert_eq!(items.len(), 2);
|
||||
assert_eq!(items[0].text, "first");
|
||||
assert_eq!(items[0].signature.as_deref(), Some("sig1"));
|
||||
assert_eq!(items[1].text, "second");
|
||||
|
||||
// take は drain なので 2 度目は空
|
||||
assert!(collector.take_collected().is_empty());
|
||||
}
|
||||
}
|
||||
|
|
@ -8,6 +8,33 @@ use std::marker::PhantomData;
|
|||
use super::event::*;
|
||||
use crate::handler::*;
|
||||
|
||||
// =============================================================================
|
||||
// Helpers
|
||||
// =============================================================================
|
||||
|
||||
/// 1リクエスト内で受信した複数 UsageEvent をマージする。
|
||||
/// 各フィールドについて新しい値が `Some` ならそれで上書き。
|
||||
/// プロバイダによっては input/cache 系を最初の event だけに載せ、
|
||||
/// output_tokens を後続 event で更新するため、最後の値だけを取るのではなく
|
||||
/// フィールド単位で latest-non-None を取る。
|
||||
fn merge_usage(acc: &mut UsageEvent, new: &UsageEvent) {
|
||||
if new.input_tokens.is_some() {
|
||||
acc.input_tokens = new.input_tokens;
|
||||
}
|
||||
if new.output_tokens.is_some() {
|
||||
acc.output_tokens = new.output_tokens;
|
||||
}
|
||||
if new.total_tokens.is_some() {
|
||||
acc.total_tokens = new.total_tokens;
|
||||
}
|
||||
if new.cache_read_input_tokens.is_some() {
|
||||
acc.cache_read_input_tokens = new.cache_read_input_tokens;
|
||||
}
|
||||
if new.cache_creation_input_tokens.is_some() {
|
||||
acc.cache_creation_input_tokens = new.cache_creation_input_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Type-erased Handler
|
||||
// =============================================================================
|
||||
|
|
@ -354,6 +381,7 @@ pub struct Timeline {
|
|||
ping_handlers: Vec<Box<dyn ErasedHandler<PingKind>>>,
|
||||
status_handlers: Vec<Box<dyn ErasedHandler<StatusKind>>>,
|
||||
error_handlers: Vec<Box<dyn ErasedHandler<ErrorKind>>>,
|
||||
reasoning_item_handlers: Vec<Box<dyn ErasedHandler<ReasoningItemKind>>>,
|
||||
|
||||
// Block系ハンドラー(BlockTypeごとにグループ化)
|
||||
text_block_handlers: Vec<Box<dyn ErasedBlockHandler>>,
|
||||
|
|
@ -362,6 +390,12 @@ pub struct Timeline {
|
|||
|
||||
// 現在アクティブなブロック
|
||||
current_block: Option<BlockType>,
|
||||
|
||||
// 1リクエスト内で受信した Usage event の集約バッファ。
|
||||
// Anthropic は message_start と message_delta、Gemini は各チャンクと、
|
||||
// 多くのプロバイダが複数 Usage を発行するため、リクエスト境界で
|
||||
// 1度だけ発火するためにここでマージする。flush_usage() で発火する。
|
||||
pending_usage: Option<UsageEvent>,
|
||||
}
|
||||
|
||||
impl Default for Timeline {
|
||||
|
|
@ -377,10 +411,12 @@ impl Timeline {
|
|||
ping_handlers: Vec::new(),
|
||||
status_handlers: Vec::new(),
|
||||
error_handlers: Vec::new(),
|
||||
reasoning_item_handlers: Vec::new(),
|
||||
text_block_handlers: Vec::new(),
|
||||
thinking_block_handlers: Vec::new(),
|
||||
tool_use_block_handlers: Vec::new(),
|
||||
current_block: None,
|
||||
pending_usage: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -437,6 +473,18 @@ impl Timeline {
|
|||
self
|
||||
}
|
||||
|
||||
/// `ReasoningItemKind` 用 Handler を登録
|
||||
pub fn on_reasoning_item<H>(&mut self, handler: H) -> &mut Self
|
||||
where
|
||||
H: Handler<ReasoningItemKind> + Send + Sync + 'static,
|
||||
H::Scope: Send + Sync,
|
||||
{
|
||||
let mut wrapper = HandlerWrapper::new(handler);
|
||||
wrapper.start_scope();
|
||||
self.reasoning_item_handlers.push(Box::new(wrapper));
|
||||
self
|
||||
}
|
||||
|
||||
/// TextBlockKind用のHandlerを登録
|
||||
pub fn on_text_block<H>(&mut self, handler: H) -> &mut Self
|
||||
where
|
||||
|
|
@ -482,18 +530,38 @@ impl Timeline {
|
|||
Event::Ping(p) => self.dispatch_ping(p),
|
||||
Event::Status(s) => self.dispatch_status(s),
|
||||
Event::Error(e) => self.dispatch_error(e),
|
||||
// Observability-only event: stream trace records it before timeline dispatch.
|
||||
Event::UnhandledSse(_) => {}
|
||||
|
||||
// Block系: スコープ管理しながらディスパッチ
|
||||
Event::BlockStart(s) => self.handle_block_start(s),
|
||||
Event::BlockDelta(d) => self.handle_block_delta(d),
|
||||
Event::BlockStop(s) => self.handle_block_stop(s),
|
||||
Event::BlockAbort(a) => self.handle_block_abort(a),
|
||||
|
||||
// 完成済み reasoning item: 即時ディスパッチ
|
||||
Event::ReasoningItem(r) => self.dispatch_reasoning_item(r),
|
||||
}
|
||||
}
|
||||
|
||||
/// Usage event を即時には dispatch せず、pending_usage にマージする。
|
||||
/// 1リクエスト内で複数の Usage event が来ても、ハンドラには 1 度だけ
|
||||
/// 最終値を渡したいため。flush_usage() で発火する。
|
||||
fn dispatch_usage(&mut self, event: &UsageEvent) {
|
||||
for handler in &mut self.usage_handlers {
|
||||
handler.dispatch(event);
|
||||
match &mut self.pending_usage {
|
||||
Some(acc) => merge_usage(acc, event),
|
||||
None => self.pending_usage = Some(event.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
/// pending_usage を usage_handlers に発火し、バッファをクリアする。
|
||||
/// 1リクエスト分のストリーム終了時に1回だけ呼ぶ想定。
|
||||
/// pending_usage が空ならば何もしない。
|
||||
pub fn flush_usage(&mut self) {
|
||||
if let Some(event) = self.pending_usage.take() {
|
||||
for handler in &mut self.usage_handlers {
|
||||
handler.dispatch(&event);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -515,6 +583,12 @@ impl Timeline {
|
|||
}
|
||||
}
|
||||
|
||||
fn dispatch_reasoning_item(&mut self, event: &ReasoningItemEvent) {
|
||||
for handler in &mut self.reasoning_item_handlers {
|
||||
handler.dispatch(event);
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_block_start(&mut self, start: &BlockStart) {
|
||||
self.current_block = Some(start.block_type);
|
||||
|
||||
|
|
@ -606,6 +680,36 @@ mod tests {
|
|||
assert!(timeline.current_block().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unhandled_sse_is_ignored_by_timeline_handlers() {
|
||||
struct TestTextHandler {
|
||||
calls: Arc<Mutex<Vec<TextBlockEvent>>>,
|
||||
}
|
||||
|
||||
impl Handler<TextBlockKind> for TestTextHandler {
|
||||
type Scope = ();
|
||||
fn on_event(&mut self, _scope: &mut (), event: &TextBlockEvent) {
|
||||
self.calls.lock().unwrap().push(event.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let calls = Arc::new(Mutex::new(Vec::new()));
|
||||
let mut timeline = Timeline::new();
|
||||
timeline.on_text_block(TestTextHandler {
|
||||
calls: calls.clone(),
|
||||
});
|
||||
|
||||
timeline.dispatch(&Event::UnhandledSse(UnhandledSseEvent {
|
||||
provider: "openai_responses".to_string(),
|
||||
event_type: "response.mystery".to_string(),
|
||||
data_preview: "{}".to_string(),
|
||||
data_len: 2,
|
||||
}));
|
||||
|
||||
assert!(timeline.current_block().is_none());
|
||||
assert!(calls.lock().unwrap().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_meta_event_dispatch() {
|
||||
// シンプルなテスト用構造体
|
||||
|
|
@ -629,9 +733,63 @@ mod tests {
|
|||
timeline.on_usage(handler);
|
||||
|
||||
timeline.dispatch(&Event::usage(100, 50));
|
||||
// pending_usage に積まれているだけなのでまだ未発火
|
||||
assert_eq!(calls.lock().unwrap().len(), 0);
|
||||
|
||||
// flush で 1 度だけ発火
|
||||
timeline.flush_usage();
|
||||
let recorded = calls.lock().unwrap();
|
||||
assert_eq!(recorded.len(), 1);
|
||||
assert_eq!(recorded[0].input_tokens, Some(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_usage_aggregation_and_flush() {
|
||||
struct TestUsageHandler {
|
||||
calls: Arc<Mutex<Vec<UsageEvent>>>,
|
||||
}
|
||||
impl Handler<UsageKind> for TestUsageHandler {
|
||||
type Scope = ();
|
||||
fn on_event(&mut self, _scope: &mut (), event: &UsageEvent) {
|
||||
self.calls.lock().unwrap().push(event.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let calls = Arc::new(Mutex::new(Vec::new()));
|
||||
let mut timeline = Timeline::new();
|
||||
timeline.on_usage(TestUsageHandler {
|
||||
calls: calls.clone(),
|
||||
});
|
||||
|
||||
// Anthropic 風: message_start で input + 暫定 output
|
||||
timeline.dispatch(&Event::Usage(UsageEvent {
|
||||
input_tokens: Some(409),
|
||||
output_tokens: Some(1),
|
||||
total_tokens: Some(410),
|
||||
cache_read_input_tokens: Some(0),
|
||||
cache_creation_input_tokens: Some(0),
|
||||
}));
|
||||
// message_delta で最終 output
|
||||
timeline.dispatch(&Event::Usage(UsageEvent {
|
||||
input_tokens: Some(409),
|
||||
output_tokens: Some(71),
|
||||
total_tokens: Some(480),
|
||||
cache_read_input_tokens: Some(0),
|
||||
cache_creation_input_tokens: Some(0),
|
||||
}));
|
||||
|
||||
// 未 flush の段階では発火しない
|
||||
assert_eq!(calls.lock().unwrap().len(), 0);
|
||||
|
||||
timeline.flush_usage();
|
||||
let recorded = calls.lock().unwrap();
|
||||
assert_eq!(recorded.len(), 1);
|
||||
assert_eq!(recorded[0].input_tokens, Some(409));
|
||||
assert_eq!(recorded[0].output_tokens, Some(71));
|
||||
|
||||
// flush 後にもう一度 flush しても何も起きない
|
||||
drop(recorded);
|
||||
timeline.flush_usage();
|
||||
assert_eq!(calls.lock().unwrap().len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user