feat: tuiのトークン集計表示の修正

This commit is contained in:
Keisuke Hirata 2026-05-04 00:01:37 +09:00
parent ffd59b05a1
commit 8be579dc3c
No known key found for this signature in database
6 changed files with 39 additions and 11 deletions

View File

@ -76,10 +76,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
Event::Usage { Event::Usage {
input_tokens, input_tokens,
output_tokens, output_tokens,
cache_read_input_tokens,
} => { } => {
println!( println!(
"[usage] in={} out={}", "[usage] in={} (cache_read={}) out={}",
input_tokens.unwrap_or(0), input_tokens.unwrap_or(0),
cache_read_input_tokens.unwrap_or(0),
output_tokens.unwrap_or(0) output_tokens.unwrap_or(0)
); );
} }

View File

@ -229,6 +229,7 @@ impl PodController {
let _ = tx.send(Event::Usage { let _ = tx.send(Event::Usage {
input_tokens: event.input_tokens, input_tokens: event.input_tokens,
output_tokens: event.output_tokens, output_tokens: event.output_tokens,
cache_read_input_tokens: event.cache_read_input_tokens,
}); });
}); });

View File

@ -281,9 +281,19 @@ pub enum Event {
#[serde(default)] #[serde(default)]
is_error: bool, is_error: bool,
}, },
/// Token accounting for one LLM request.
///
/// `input_tokens` is the prompt prefix occupancy (cache reads /
/// cache writes included), as normalised by the worker layer.
/// `cache_read_input_tokens` is the cache-hit subset of that
/// occupancy; subtracting it yields the "net upload" the client
/// actually paid full price to send on this request, which is what
/// the TUI status line accumulates per turn.
Usage { Usage {
input_tokens: Option<u64>, input_tokens: Option<u64>,
output_tokens: Option<u64>, output_tokens: Option<u64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
cache_read_input_tokens: Option<u64>,
}, },
RunEnd { RunEnd {
result: RunResult, result: RunResult,

View File

@ -47,7 +47,11 @@ pub struct App {
/// `Resume` or a fresh `Run`). /// `Resume` or a fresh `Run`).
pub paused: bool, pub paused: bool,
pub run_requests: usize, pub run_requests: usize,
pub run_input_tokens: u64, /// Sum of `input_tokens - cache_read_input_tokens` across the
/// current turn's LLM requests — i.e. the net tokens this turn
/// actually had to upload at full price (cache writes included,
/// cache reads excluded). Reset on `RunEnd`.
pub run_upload_tokens: u64,
pub run_output_tokens: u64, pub run_output_tokens: u64,
pub turn_index: usize, pub turn_index: usize,
pub current_tool: Option<String>, pub current_tool: Option<String>,
@ -79,7 +83,7 @@ impl App {
running: false, running: false,
paused: false, paused: false,
run_requests: 0, run_requests: 0,
run_input_tokens: 0, run_upload_tokens: 0,
run_output_tokens: 0, run_output_tokens: 0,
turn_index: 0, turn_index: 0,
current_tool: None, current_tool: None,
@ -465,8 +469,16 @@ impl App {
Event::Usage { Event::Usage {
input_tokens, input_tokens,
output_tokens, output_tokens,
cache_read_input_tokens,
} => { } => {
self.run_input_tokens += input_tokens.unwrap_or(0); // Subtract the cache-hit portion so a tool loop that
// re-sends the same prefix on every request doesn't
// re-count it. cache_creation stays in (it is full
// price on this request).
let net_input = input_tokens
.unwrap_or(0)
.saturating_sub(cache_read_input_tokens.unwrap_or(0));
self.run_upload_tokens += net_input;
self.run_output_tokens += output_tokens.unwrap_or(0); self.run_output_tokens += output_tokens.unwrap_or(0);
} }
Event::Error { code, message } => { Event::Error { code, message } => {
@ -475,13 +487,13 @@ impl App {
Event::RunEnd { result } => { Event::RunEnd { result } => {
self.blocks.push(Block::TurnStats { self.blocks.push(Block::TurnStats {
requests: self.run_requests, requests: self.run_requests,
input_tokens: self.run_input_tokens, upload_tokens: self.run_upload_tokens,
output_tokens: self.run_output_tokens, output_tokens: self.run_output_tokens,
}); });
self.running = false; self.running = false;
self.paused = matches!(result, RunResult::Paused); self.paused = matches!(result, RunResult::Paused);
self.run_requests = 0; self.run_requests = 0;
self.run_input_tokens = 0; self.run_upload_tokens = 0;
self.run_output_tokens = 0; self.run_output_tokens = 0;
self.current_tool = None; self.current_tool = None;
self.assistant_streaming = false; self.assistant_streaming = false;

View File

@ -43,7 +43,10 @@ pub enum Block {
Compact(CompactEvent), Compact(CompactEvent),
TurnStats { TurnStats {
requests: usize, requests: usize,
input_tokens: u64, /// Net tokens uploaded across the turn's LLM requests
/// (cache reads excluded; cache writes included). Same value
/// the status line accumulates while the turn is in flight.
upload_tokens: u64,
output_tokens: u64, output_tokens: u64,
}, },
} }

View File

@ -406,13 +406,13 @@ fn render_block_into(lines: &mut Vec<Line<'static>>, block: &Block, width: u16,
Block::Compact(evt) => render_compact(lines, evt, width, mode), Block::Compact(evt) => render_compact(lines, evt, width, mode),
Block::TurnStats { Block::TurnStats {
requests, requests,
input_tokens, upload_tokens,
output_tokens, output_tokens,
} => { } => {
let text = format!( let text = format!(
"{} reqs ↑{}/↓{}", "{} reqs ↑{}/↓{}",
requests, requests,
fmt_tokens(*input_tokens), fmt_tokens(*upload_tokens),
fmt_tokens(*output_tokens), fmt_tokens(*output_tokens),
); );
lines.push( lines.push(
@ -780,14 +780,14 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) {
format!( format!(
"request: {} | ↑{}/↓{} | tool: {tool}", "request: {} | ↑{}/↓{} | tool: {tool}",
app.run_requests, app.run_requests,
fmt_tokens(app.run_input_tokens), fmt_tokens(app.run_upload_tokens),
fmt_tokens(app.run_output_tokens), fmt_tokens(app.run_output_tokens),
) )
} else { } else {
format!( format!(
"request: {} | ↑{}/↓{}", "request: {} | ↑{}/↓{}",
app.run_requests, app.run_requests,
fmt_tokens(app.run_input_tokens), fmt_tokens(app.run_upload_tokens),
fmt_tokens(app.run_output_tokens), fmt_tokens(app.run_output_tokens),
) )
}; };