feat: tuiのトークン集計表示の修正
This commit is contained in:
parent
ffd59b05a1
commit
8be579dc3c
|
|
@ -76,10 +76,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
Event::Usage {
|
Event::Usage {
|
||||||
input_tokens,
|
input_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
|
cache_read_input_tokens,
|
||||||
} => {
|
} => {
|
||||||
println!(
|
println!(
|
||||||
"[usage] in={} out={}",
|
"[usage] in={} (cache_read={}) out={}",
|
||||||
input_tokens.unwrap_or(0),
|
input_tokens.unwrap_or(0),
|
||||||
|
cache_read_input_tokens.unwrap_or(0),
|
||||||
output_tokens.unwrap_or(0)
|
output_tokens.unwrap_or(0)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -229,6 +229,7 @@ impl PodController {
|
||||||
let _ = tx.send(Event::Usage {
|
let _ = tx.send(Event::Usage {
|
||||||
input_tokens: event.input_tokens,
|
input_tokens: event.input_tokens,
|
||||||
output_tokens: event.output_tokens,
|
output_tokens: event.output_tokens,
|
||||||
|
cache_read_input_tokens: event.cache_read_input_tokens,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -281,9 +281,19 @@ pub enum Event {
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
is_error: bool,
|
is_error: bool,
|
||||||
},
|
},
|
||||||
|
/// Token accounting for one LLM request.
|
||||||
|
///
|
||||||
|
/// `input_tokens` is the prompt prefix occupancy (cache reads /
|
||||||
|
/// cache writes included), as normalised by the worker layer.
|
||||||
|
/// `cache_read_input_tokens` is the cache-hit subset of that
|
||||||
|
/// occupancy; subtracting it yields the "net upload" the client
|
||||||
|
/// actually paid full price to send on this request, which is what
|
||||||
|
/// the TUI status line accumulates per turn.
|
||||||
Usage {
|
Usage {
|
||||||
input_tokens: Option<u64>,
|
input_tokens: Option<u64>,
|
||||||
output_tokens: Option<u64>,
|
output_tokens: Option<u64>,
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
cache_read_input_tokens: Option<u64>,
|
||||||
},
|
},
|
||||||
RunEnd {
|
RunEnd {
|
||||||
result: RunResult,
|
result: RunResult,
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,11 @@ pub struct App {
|
||||||
/// `Resume` or a fresh `Run`).
|
/// `Resume` or a fresh `Run`).
|
||||||
pub paused: bool,
|
pub paused: bool,
|
||||||
pub run_requests: usize,
|
pub run_requests: usize,
|
||||||
pub run_input_tokens: u64,
|
/// Sum of `input_tokens - cache_read_input_tokens` across the
|
||||||
|
/// current turn's LLM requests — i.e. the net tokens this turn
|
||||||
|
/// actually had to upload at full price (cache writes included,
|
||||||
|
/// cache reads excluded). Reset on `RunEnd`.
|
||||||
|
pub run_upload_tokens: u64,
|
||||||
pub run_output_tokens: u64,
|
pub run_output_tokens: u64,
|
||||||
pub turn_index: usize,
|
pub turn_index: usize,
|
||||||
pub current_tool: Option<String>,
|
pub current_tool: Option<String>,
|
||||||
|
|
@ -79,7 +83,7 @@ impl App {
|
||||||
running: false,
|
running: false,
|
||||||
paused: false,
|
paused: false,
|
||||||
run_requests: 0,
|
run_requests: 0,
|
||||||
run_input_tokens: 0,
|
run_upload_tokens: 0,
|
||||||
run_output_tokens: 0,
|
run_output_tokens: 0,
|
||||||
turn_index: 0,
|
turn_index: 0,
|
||||||
current_tool: None,
|
current_tool: None,
|
||||||
|
|
@ -465,8 +469,16 @@ impl App {
|
||||||
Event::Usage {
|
Event::Usage {
|
||||||
input_tokens,
|
input_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
|
cache_read_input_tokens,
|
||||||
} => {
|
} => {
|
||||||
self.run_input_tokens += input_tokens.unwrap_or(0);
|
// Subtract the cache-hit portion so a tool loop that
|
||||||
|
// re-sends the same prefix on every request doesn't
|
||||||
|
// re-count it. cache_creation stays in (it is full
|
||||||
|
// price on this request).
|
||||||
|
let net_input = input_tokens
|
||||||
|
.unwrap_or(0)
|
||||||
|
.saturating_sub(cache_read_input_tokens.unwrap_or(0));
|
||||||
|
self.run_upload_tokens += net_input;
|
||||||
self.run_output_tokens += output_tokens.unwrap_or(0);
|
self.run_output_tokens += output_tokens.unwrap_or(0);
|
||||||
}
|
}
|
||||||
Event::Error { code, message } => {
|
Event::Error { code, message } => {
|
||||||
|
|
@ -475,13 +487,13 @@ impl App {
|
||||||
Event::RunEnd { result } => {
|
Event::RunEnd { result } => {
|
||||||
self.blocks.push(Block::TurnStats {
|
self.blocks.push(Block::TurnStats {
|
||||||
requests: self.run_requests,
|
requests: self.run_requests,
|
||||||
input_tokens: self.run_input_tokens,
|
upload_tokens: self.run_upload_tokens,
|
||||||
output_tokens: self.run_output_tokens,
|
output_tokens: self.run_output_tokens,
|
||||||
});
|
});
|
||||||
self.running = false;
|
self.running = false;
|
||||||
self.paused = matches!(result, RunResult::Paused);
|
self.paused = matches!(result, RunResult::Paused);
|
||||||
self.run_requests = 0;
|
self.run_requests = 0;
|
||||||
self.run_input_tokens = 0;
|
self.run_upload_tokens = 0;
|
||||||
self.run_output_tokens = 0;
|
self.run_output_tokens = 0;
|
||||||
self.current_tool = None;
|
self.current_tool = None;
|
||||||
self.assistant_streaming = false;
|
self.assistant_streaming = false;
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,10 @@ pub enum Block {
|
||||||
Compact(CompactEvent),
|
Compact(CompactEvent),
|
||||||
TurnStats {
|
TurnStats {
|
||||||
requests: usize,
|
requests: usize,
|
||||||
input_tokens: u64,
|
/// Net tokens uploaded across the turn's LLM requests
|
||||||
|
/// (cache reads excluded; cache writes included). Same value
|
||||||
|
/// the status line accumulates while the turn is in flight.
|
||||||
|
upload_tokens: u64,
|
||||||
output_tokens: u64,
|
output_tokens: u64,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -406,13 +406,13 @@ fn render_block_into(lines: &mut Vec<Line<'static>>, block: &Block, width: u16,
|
||||||
Block::Compact(evt) => render_compact(lines, evt, width, mode),
|
Block::Compact(evt) => render_compact(lines, evt, width, mode),
|
||||||
Block::TurnStats {
|
Block::TurnStats {
|
||||||
requests,
|
requests,
|
||||||
input_tokens,
|
upload_tokens,
|
||||||
output_tokens,
|
output_tokens,
|
||||||
} => {
|
} => {
|
||||||
let text = format!(
|
let text = format!(
|
||||||
"{} reqs ↑{}/↓{}",
|
"{} reqs ↑{}/↓{}",
|
||||||
requests,
|
requests,
|
||||||
fmt_tokens(*input_tokens),
|
fmt_tokens(*upload_tokens),
|
||||||
fmt_tokens(*output_tokens),
|
fmt_tokens(*output_tokens),
|
||||||
);
|
);
|
||||||
lines.push(
|
lines.push(
|
||||||
|
|
@ -780,14 +780,14 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) {
|
||||||
format!(
|
format!(
|
||||||
"request: {} | ↑{}/↓{} | tool: {tool}",
|
"request: {} | ↑{}/↓{} | tool: {tool}",
|
||||||
app.run_requests,
|
app.run_requests,
|
||||||
fmt_tokens(app.run_input_tokens),
|
fmt_tokens(app.run_upload_tokens),
|
||||||
fmt_tokens(app.run_output_tokens),
|
fmt_tokens(app.run_output_tokens),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
format!(
|
format!(
|
||||||
"request: {} | ↑{}/↓{}",
|
"request: {} | ↑{}/↓{}",
|
||||||
app.run_requests,
|
app.run_requests,
|
||||||
fmt_tokens(app.run_input_tokens),
|
fmt_tokens(app.run_upload_tokens),
|
||||||
fmt_tokens(app.run_output_tokens),
|
fmt_tokens(app.run_output_tokens),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user