137 lines
5.0 KiB
Rust
137 lines
5.0 KiB
Rust
//! Tracks per-LLM-request Usage measurements within a Pod run.
|
||
//!
|
||
//! Bridge between two sync touchpoints in the Worker lifecycle:
|
||
//!
|
||
//! - **`pre_llm_request` hook** (async, but synchronously accessed via the
|
||
//! tracker): captures `history.len()` at the moment a request goes out.
|
||
//! - **`on_usage` callback** (sync closure): receives the aggregated final
|
||
//! `UsageEvent` for that request after the stream completes.
|
||
//!
|
||
//! Pairing the two yields one `UsageRecord` per LLM call. Pod drains them
|
||
//! in `persist_turn` and writes them as `LogEntry::LlmUsage` entries.
|
||
//!
|
||
//! Multiple LLM calls per Pod run (tool loop) are supported: each call
|
||
//! produces its own `(history_len, UsageEvent)` pair, and the records are
|
||
//! buffered in chronological order.
|
||
|
||
use std::sync::Mutex;
|
||
|
||
use llm_worker::UsageRecord;
|
||
use llm_worker::timeline::event::UsageEvent;
|
||
|
||
/// Shared between the pre-request hook, the `on_usage` callback, and Pod.
|
||
pub(crate) struct UsageTracker {
|
||
/// `history.len()` captured at the most recent `pre_llm_request`.
|
||
/// Cleared when paired with an incoming `on_usage` event.
|
||
pending_history_len: Mutex<Option<usize>>,
|
||
/// Records accumulated during the current run; drained by Pod.
|
||
pending_records: Mutex<Vec<UsageRecord>>,
|
||
}
|
||
|
||
impl UsageTracker {
|
||
pub(crate) fn new() -> Self {
|
||
Self {
|
||
pending_history_len: Mutex::new(None),
|
||
pending_records: Mutex::new(Vec::new()),
|
||
}
|
||
}
|
||
|
||
/// Called from a `pre_llm_request` hook with the current history length.
|
||
pub(crate) fn note_request(&self, history_len: usize) {
|
||
*self.pending_history_len.lock().unwrap() = Some(history_len);
|
||
}
|
||
|
||
/// Called from the `on_usage` callback with the aggregated final
|
||
/// UsageEvent. If a `history_len` was previously stashed via
|
||
/// `note_request`, builds a `UsageRecord` and pushes it onto the buffer.
|
||
/// If not (e.g. test code that fires Usage outside a request), drops
|
||
/// the event.
|
||
pub(crate) fn record_usage(&self, event: &UsageEvent) {
|
||
let history_len = match self.pending_history_len.lock().unwrap().take() {
|
||
Some(n) => n,
|
||
None => return,
|
||
};
|
||
// UsageEvent.input_tokens は scheme 層で「占有量(プロンプト全長)」に
|
||
// 正規化済みである前提(Anthropic は cache_read + cache_creation を
|
||
// 加算して emit する)。
|
||
let input_total = event.input_tokens.unwrap_or(0);
|
||
let cache_read = event.cache_read_input_tokens.unwrap_or(0);
|
||
let cache_write = event.cache_creation_input_tokens.unwrap_or(0);
|
||
let output = event.output_tokens.unwrap_or(0);
|
||
self.pending_records.lock().unwrap().push(UsageRecord {
|
||
history_len,
|
||
input_total_tokens: input_total,
|
||
cache_read_tokens: cache_read,
|
||
cache_write_tokens: cache_write,
|
||
output_tokens: output,
|
||
});
|
||
}
|
||
|
||
/// Drain accumulated records. Called by Pod after a run completes,
|
||
/// before persisting the turn.
|
||
pub(crate) fn drain(&self) -> Vec<UsageRecord> {
|
||
std::mem::take(&mut *self.pending_records.lock().unwrap())
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
|
||
fn make_event(input: u64, cache_read: u64, cache_write: u64, output: u64) -> UsageEvent {
|
||
UsageEvent {
|
||
input_tokens: Some(input),
|
||
output_tokens: Some(output),
|
||
total_tokens: Some(input + output),
|
||
cache_read_input_tokens: Some(cache_read),
|
||
cache_creation_input_tokens: Some(cache_write),
|
||
}
|
||
}
|
||
|
||
#[test]
|
||
fn pairs_history_len_with_usage_event() {
|
||
let tracker = UsageTracker::new();
|
||
tracker.note_request(5);
|
||
tracker.record_usage(&make_event(1000, 800, 100, 42));
|
||
|
||
let records = tracker.drain();
|
||
assert_eq!(records.len(), 1);
|
||
assert_eq!(records[0].history_len, 5);
|
||
assert_eq!(records[0].input_total_tokens, 1000);
|
||
assert_eq!(records[0].cache_read_tokens, 800);
|
||
assert_eq!(records[0].cache_write_tokens, 100);
|
||
assert_eq!(records[0].output_tokens, 42);
|
||
}
|
||
|
||
#[test]
|
||
fn drain_clears_buffer() {
|
||
let tracker = UsageTracker::new();
|
||
tracker.note_request(1);
|
||
tracker.record_usage(&make_event(10, 0, 0, 5));
|
||
assert_eq!(tracker.drain().len(), 1);
|
||
assert_eq!(tracker.drain().len(), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn usage_without_pending_history_len_is_dropped() {
|
||
let tracker = UsageTracker::new();
|
||
tracker.record_usage(&make_event(10, 0, 0, 5));
|
||
assert_eq!(tracker.drain().len(), 0);
|
||
}
|
||
|
||
#[test]
|
||
fn multiple_requests_in_one_run() {
|
||
let tracker = UsageTracker::new();
|
||
tracker.note_request(5);
|
||
tracker.record_usage(&make_event(100, 0, 0, 20));
|
||
tracker.note_request(10);
|
||
tracker.record_usage(&make_event(200, 50, 0, 30));
|
||
|
||
let records = tracker.drain();
|
||
assert_eq!(records.len(), 2);
|
||
assert_eq!(records[0].history_len, 5);
|
||
assert_eq!(records[1].history_len, 10);
|
||
assert_eq!(records[1].cache_read_tokens, 50);
|
||
}
|
||
}
|