feat: tuiのトークン集計表示の修正

2026-05-04 00:01:37 +09:00 · 2026-05-04 00:01:37 +09:00 · 5efe0e4910
commit 5efe0e4910
parent 6168e3f924
6 changed files with 39 additions and 11 deletions
--- a/crates/pod/examples/pod_protocol.rs
+++ b/crates/pod/examples/pod_protocol.rs
@ -76,10 +76,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
                Event::Usage {
                    input_tokens,
                    output_tokens,
+                    cache_read_input_tokens,
                } => {
                    println!(
-                        "[usage] in={} out={}",
+                        "[usage] in={} (cache_read={}) out={}",
                        input_tokens.unwrap_or(0),
+                        cache_read_input_tokens.unwrap_or(0),
                        output_tokens.unwrap_or(0)
                    );
                }
--- a/crates/pod/src/controller.rs
+++ b/crates/pod/src/controller.rs
@ -229,6 +229,7 @@ impl PodController {
                let _ = tx.send(Event::Usage {
                    input_tokens: event.input_tokens,
                    output_tokens: event.output_tokens,
+                    cache_read_input_tokens: event.cache_read_input_tokens,
                });
            });

--- a/crates/protocol/src/lib.rs
+++ b/crates/protocol/src/lib.rs
@ -281,9 +281,19 @@ pub enum Event {
        #[serde(default)]
        is_error: bool,
    },
+    /// Token accounting for one LLM request.
+    ///
+    /// `input_tokens` is the prompt prefix occupancy (cache reads /
+    /// cache writes included), as normalised by the worker layer.
+    /// `cache_read_input_tokens` is the cache-hit subset of that
+    /// occupancy; subtracting it yields the "net upload" the client
+    /// actually paid full price to send on this request, which is what
+    /// the TUI status line accumulates per turn.
    Usage {
        input_tokens: Option<u64>,
        output_tokens: Option<u64>,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        cache_read_input_tokens: Option<u64>,
    },
    RunEnd {
        result: RunResult,
--- a/crates/tui/src/app.rs
+++ b/crates/tui/src/app.rs
@ -47,7 +47,11 @@ pub struct App {
    /// `Resume` or a fresh `Run`).
    pub paused: bool,
    pub run_requests: usize,
-    pub run_input_tokens: u64,
+    /// Sum of `input_tokens - cache_read_input_tokens` across the
+    /// current turn's LLM requests — i.e. the net tokens this turn
+    /// actually had to upload at full price (cache writes included,
+    /// cache reads excluded). Reset on `RunEnd`.
+    pub run_upload_tokens: u64,
    pub run_output_tokens: u64,
    pub turn_index: usize,
    pub current_tool: Option<String>,
@ -79,7 +83,7 @@ impl App {
            running: false,
            paused: false,
            run_requests: 0,
-            run_input_tokens: 0,
+            run_upload_tokens: 0,
            run_output_tokens: 0,
            turn_index: 0,
            current_tool: None,
@ -465,8 +469,16 @@ impl App {
            Event::Usage {
                input_tokens,
                output_tokens,
+                cache_read_input_tokens,
            } => {
-                self.run_input_tokens += input_tokens.unwrap_or(0);
+                // Subtract the cache-hit portion so a tool loop that
+                // re-sends the same prefix on every request doesn't
+                // re-count it. cache_creation stays in (it is full
+                // price on this request).
+                let net_input = input_tokens
+                    .unwrap_or(0)
+                    .saturating_sub(cache_read_input_tokens.unwrap_or(0));
+                self.run_upload_tokens += net_input;
                self.run_output_tokens += output_tokens.unwrap_or(0);
            }
            Event::Error { code, message } => {
@ -475,13 +487,13 @@ impl App {
            Event::RunEnd { result } => {
                self.blocks.push(Block::TurnStats {
                    requests: self.run_requests,
-                    input_tokens: self.run_input_tokens,
+                    upload_tokens: self.run_upload_tokens,
                    output_tokens: self.run_output_tokens,
                });
                self.running = false;
                self.paused = matches!(result, RunResult::Paused);
                self.run_requests = 0;
-                self.run_input_tokens = 0;
+                self.run_upload_tokens = 0;
                self.run_output_tokens = 0;
                self.current_tool = None;
                self.assistant_streaming = false;
--- a/crates/tui/src/block.rs
+++ b/crates/tui/src/block.rs
@ -43,7 +43,10 @@ pub enum Block {
    Compact(CompactEvent),
    TurnStats {
        requests: usize,
-        input_tokens: u64,
+        /// Net tokens uploaded across the turn's LLM requests
+        /// (cache reads excluded; cache writes included). Same value
+        /// the status line accumulates while the turn is in flight.
+        upload_tokens: u64,
        output_tokens: u64,
    },
 }
--- a/crates/tui/src/ui.rs
+++ b/crates/tui/src/ui.rs
@ -406,13 +406,13 @@ fn render_block_into(lines: &mut Vec<Line<'static>>, block: &Block, width: u16,
        Block::Compact(evt) => render_compact(lines, evt, width, mode),
        Block::TurnStats {
            requests,
-            input_tokens,
+            upload_tokens,
            output_tokens,
        } => {
            let text = format!(
                "{} reqs ↑{}/↓{}",
                requests,
-                fmt_tokens(*input_tokens),
+                fmt_tokens(*upload_tokens),
                fmt_tokens(*output_tokens),
            );
            lines.push(
@ -780,14 +780,14 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) {
            format!(
                "request: {} | ↑{}/↓{} | tool: {tool}",
                app.run_requests,
-                fmt_tokens(app.run_input_tokens),
+                fmt_tokens(app.run_upload_tokens),
                fmt_tokens(app.run_output_tokens),
            )
        } else {
            format!(
                "request: {} | ↑{}/↓{}",
                app.run_requests,
-                fmt_tokens(app.run_input_tokens),
+                fmt_tokens(app.run_upload_tokens),
                fmt_tokens(app.run_output_tokens),
            )
        };