From 8be579dc3c883572c256fc6cb59b8b88396d299d Mon Sep 17 00:00:00 2001 From: Hare Date: Mon, 4 May 2026 00:01:37 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20tui=E3=81=AE=E3=83=88=E3=83=BC=E3=82=AF?= =?UTF-8?q?=E3=83=B3=E9=9B=86=E8=A8=88=E8=A1=A8=E7=A4=BA=E3=81=AE=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/pod/examples/pod_protocol.rs | 4 +++- crates/pod/src/controller.rs | 1 + crates/protocol/src/lib.rs | 10 ++++++++++ crates/tui/src/app.rs | 22 +++++++++++++++++----- crates/tui/src/block.rs | 5 ++++- crates/tui/src/ui.rs | 8 ++++---- 6 files changed, 39 insertions(+), 11 deletions(-) diff --git a/crates/pod/examples/pod_protocol.rs b/crates/pod/examples/pod_protocol.rs index d03633da..6e4a1801 100644 --- a/crates/pod/examples/pod_protocol.rs +++ b/crates/pod/examples/pod_protocol.rs @@ -76,10 +76,12 @@ async fn main() -> Result<(), Box> { Event::Usage { input_tokens, output_tokens, + cache_read_input_tokens, } => { println!( - "[usage] in={} out={}", + "[usage] in={} (cache_read={}) out={}", input_tokens.unwrap_or(0), + cache_read_input_tokens.unwrap_or(0), output_tokens.unwrap_or(0) ); } diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index 4ef0c4e0..13d8e278 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -229,6 +229,7 @@ impl PodController { let _ = tx.send(Event::Usage { input_tokens: event.input_tokens, output_tokens: event.output_tokens, + cache_read_input_tokens: event.cache_read_input_tokens, }); }); diff --git a/crates/protocol/src/lib.rs b/crates/protocol/src/lib.rs index c70a9ed3..aeedb69b 100644 --- a/crates/protocol/src/lib.rs +++ b/crates/protocol/src/lib.rs @@ -281,9 +281,19 @@ pub enum Event { #[serde(default)] is_error: bool, }, + /// Token accounting for one LLM request. + /// + /// `input_tokens` is the prompt prefix occupancy (cache reads / + /// cache writes included), as normalised by the worker layer. + /// `cache_read_input_tokens` is the cache-hit subset of that + /// occupancy; subtracting it yields the "net upload" the client + /// actually paid full price to send on this request, which is what + /// the TUI status line accumulates per turn. Usage { input_tokens: Option, output_tokens: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + cache_read_input_tokens: Option, }, RunEnd { result: RunResult, diff --git a/crates/tui/src/app.rs b/crates/tui/src/app.rs index d7cb9f6b..e7810874 100644 --- a/crates/tui/src/app.rs +++ b/crates/tui/src/app.rs @@ -47,7 +47,11 @@ pub struct App { /// `Resume` or a fresh `Run`). pub paused: bool, pub run_requests: usize, - pub run_input_tokens: u64, + /// Sum of `input_tokens - cache_read_input_tokens` across the + /// current turn's LLM requests — i.e. the net tokens this turn + /// actually had to upload at full price (cache writes included, + /// cache reads excluded). Reset on `RunEnd`. + pub run_upload_tokens: u64, pub run_output_tokens: u64, pub turn_index: usize, pub current_tool: Option, @@ -79,7 +83,7 @@ impl App { running: false, paused: false, run_requests: 0, - run_input_tokens: 0, + run_upload_tokens: 0, run_output_tokens: 0, turn_index: 0, current_tool: None, @@ -465,8 +469,16 @@ impl App { Event::Usage { input_tokens, output_tokens, + cache_read_input_tokens, } => { - self.run_input_tokens += input_tokens.unwrap_or(0); + // Subtract the cache-hit portion so a tool loop that + // re-sends the same prefix on every request doesn't + // re-count it. cache_creation stays in (it is full + // price on this request). + let net_input = input_tokens + .unwrap_or(0) + .saturating_sub(cache_read_input_tokens.unwrap_or(0)); + self.run_upload_tokens += net_input; self.run_output_tokens += output_tokens.unwrap_or(0); } Event::Error { code, message } => { @@ -475,13 +487,13 @@ impl App { Event::RunEnd { result } => { self.blocks.push(Block::TurnStats { requests: self.run_requests, - input_tokens: self.run_input_tokens, + upload_tokens: self.run_upload_tokens, output_tokens: self.run_output_tokens, }); self.running = false; self.paused = matches!(result, RunResult::Paused); self.run_requests = 0; - self.run_input_tokens = 0; + self.run_upload_tokens = 0; self.run_output_tokens = 0; self.current_tool = None; self.assistant_streaming = false; diff --git a/crates/tui/src/block.rs b/crates/tui/src/block.rs index 1692c0a4..c1a34d0a 100644 --- a/crates/tui/src/block.rs +++ b/crates/tui/src/block.rs @@ -43,7 +43,10 @@ pub enum Block { Compact(CompactEvent), TurnStats { requests: usize, - input_tokens: u64, + /// Net tokens uploaded across the turn's LLM requests + /// (cache reads excluded; cache writes included). Same value + /// the status line accumulates while the turn is in flight. + upload_tokens: u64, output_tokens: u64, }, } diff --git a/crates/tui/src/ui.rs b/crates/tui/src/ui.rs index e4497828..f64ebbc6 100644 --- a/crates/tui/src/ui.rs +++ b/crates/tui/src/ui.rs @@ -406,13 +406,13 @@ fn render_block_into(lines: &mut Vec>, block: &Block, width: u16, Block::Compact(evt) => render_compact(lines, evt, width, mode), Block::TurnStats { requests, - input_tokens, + upload_tokens, output_tokens, } => { let text = format!( "{} reqs ↑{}/↓{}", requests, - fmt_tokens(*input_tokens), + fmt_tokens(*upload_tokens), fmt_tokens(*output_tokens), ); lines.push( @@ -780,14 +780,14 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) { format!( "request: {} | ↑{}/↓{} | tool: {tool}", app.run_requests, - fmt_tokens(app.run_input_tokens), + fmt_tokens(app.run_upload_tokens), fmt_tokens(app.run_output_tokens), ) } else { format!( "request: {} | ↑{}/↓{}", app.run_requests, - fmt_tokens(app.run_input_tokens), + fmt_tokens(app.run_upload_tokens), fmt_tokens(app.run_output_tokens), ) };