//! Tracks per-LLM-request Usage measurements within a Pod run. //! //! Bridge between two sync touchpoints in the Worker lifecycle: //! //! - **`pre_llm_request` hook** (async, but synchronously accessed via the //! tracker): captures `history.len()` at the moment a request goes out. //! - **`on_usage` callback** (sync closure): receives the aggregated final //! `UsageEvent` for that request after the stream completes. //! //! Pairing the two yields one `UsageRecord` per LLM call. Pod drains them //! in `persist_turn` and writes them as `LogEntry::LlmUsage` entries. //! //! Multiple LLM calls per Pod run (tool loop) are supported: each call //! produces its own `(history_len, UsageEvent)` pair, and the records are //! buffered in chronological order. use std::sync::Mutex; use llm_worker::UsageRecord; use llm_worker::timeline::event::UsageEvent; /// One drained measurement: the underlying `UsageRecord` plus an optional /// `correlation_id` stamped by the prune projection (or any other future /// upstream observer) so that downstream metrics emitted alongside this /// record can be joined to it after the fact. #[derive(Debug, Clone)] pub(crate) struct RecordedUsage { pub(crate) record: UsageRecord, pub(crate) correlation_id: Option, } /// Shared between the pre-request hook, the `on_usage` callback, and Pod. pub(crate) struct UsageTracker { /// `history.len()` captured at the most recent `pre_llm_request`. /// Cleared when paired with an incoming `on_usage` event. pending_history_len: Mutex>, /// Optional `correlation_id` set by an upstream observer (currently /// the prune projection on `Fired`). Paired into the next /// `RecordedUsage` and cleared. Skips that don't fire leave this /// `None`, so the resulting record carries no correlation. pending_correlation_id: Mutex>, /// Records accumulated during the current run; drained by Pod. pending_records: Mutex>, } impl UsageTracker { pub(crate) fn new() -> Self { Self { pending_history_len: Mutex::new(None), pending_correlation_id: Mutex::new(None), pending_records: Mutex::new(Vec::new()), } } /// Called from a `pre_llm_request` hook with the current history length. pub(crate) fn note_request(&self, history_len: usize) { *self.pending_history_len.lock().unwrap() = Some(history_len); } /// Stash a `correlation_id` to be paired into the next `RecordedUsage`. /// Currently invoked by the prune observer on `Fired` so that the /// `prune.fire` metric and the `prune.post_request` metric (emitted /// alongside the resulting `LlmUsage`) carry the same join key. /// /// Overwrites any previous unconsumed value — by construction the /// observer fires at most once per outgoing LLM request, immediately /// before the pre-request hook captures `history_len`. pub(crate) fn note_correlation_id(&self, id: String) { *self.pending_correlation_id.lock().unwrap() = Some(id); } /// Called from the `on_usage` callback with the aggregated final /// UsageEvent. If a `history_len` was previously stashed via /// `note_request`, builds a `RecordedUsage` and pushes it onto the /// buffer. If not (e.g. test code that fires Usage outside a request), /// drops the event. pub(crate) fn record_usage(&self, event: &UsageEvent) { let history_len = match self.pending_history_len.lock().unwrap().take() { Some(n) => n, None => return, }; let correlation_id = self.pending_correlation_id.lock().unwrap().take(); // UsageEvent.input_tokens は scheme 層で「占有量(プロンプト全長)」に // 正規化済みである前提(Anthropic は cache_read + cache_creation を // 加算して emit する)。 let input_total = event.input_tokens.unwrap_or(0); let cache_read = event.cache_read_input_tokens.unwrap_or(0); let cache_write = event.cache_creation_input_tokens.unwrap_or(0); let output = event.output_tokens.unwrap_or(0); self.pending_records.lock().unwrap().push(RecordedUsage { record: UsageRecord { history_len, input_total_tokens: input_total, cache_read_tokens: cache_read, cache_write_tokens: cache_write, output_tokens: output, }, correlation_id, }); } /// Return a clone of the accumulated `UsageRecord`s without clearing them. /// Used by request-time circuit breakers that need the same occupancy /// projection as Pod persistence while the run is still active. pub(crate) fn records(&self) -> Vec { self.pending_records .lock() .unwrap() .iter() .map(|r| r.record.clone()) .collect() } /// Drain accumulated records. Called by Pod after a run completes, /// before persisting the turn. pub(crate) fn drain(&self) -> Vec { std::mem::take(&mut *self.pending_records.lock().unwrap()) } } #[cfg(test)] mod tests { use super::*; fn make_event(input: u64, cache_read: u64, cache_write: u64, output: u64) -> UsageEvent { UsageEvent { input_tokens: Some(input), output_tokens: Some(output), total_tokens: Some(input + output), cache_read_input_tokens: Some(cache_read), cache_creation_input_tokens: Some(cache_write), } } #[test] fn pairs_history_len_with_usage_event() { let tracker = UsageTracker::new(); tracker.note_request(5); tracker.record_usage(&make_event(1000, 800, 100, 42)); let records = tracker.drain(); assert_eq!(records.len(), 1); assert_eq!(records[0].record.history_len, 5); assert_eq!(records[0].record.input_total_tokens, 1000); assert_eq!(records[0].record.cache_read_tokens, 800); assert_eq!(records[0].record.cache_write_tokens, 100); assert_eq!(records[0].record.output_tokens, 42); assert!(records[0].correlation_id.is_none()); } #[test] fn records_clones_without_clearing() { let tracker = UsageTracker::new(); tracker.note_request(1); tracker.record_usage(&make_event(10, 0, 0, 5)); let records = tracker.records(); assert_eq!(records.len(), 1); assert_eq!(records[0].history_len, 1); assert_eq!(records[0].input_total_tokens, 10); assert_eq!(tracker.records().len(), 1); } #[test] fn drain_clears_buffer() { let tracker = UsageTracker::new(); tracker.note_request(1); tracker.record_usage(&make_event(10, 0, 0, 5)); assert_eq!(tracker.drain().len(), 1); assert_eq!(tracker.drain().len(), 0); } #[test] fn usage_without_pending_history_len_is_dropped() { let tracker = UsageTracker::new(); tracker.record_usage(&make_event(10, 0, 0, 5)); assert_eq!(tracker.drain().len(), 0); } #[test] fn multiple_requests_in_one_run() { let tracker = UsageTracker::new(); tracker.note_request(5); tracker.record_usage(&make_event(100, 0, 0, 20)); tracker.note_request(10); tracker.record_usage(&make_event(200, 50, 0, 30)); let records = tracker.drain(); assert_eq!(records.len(), 2); assert_eq!(records[0].record.history_len, 5); assert_eq!(records[1].record.history_len, 10); assert_eq!(records[1].record.cache_read_tokens, 50); } #[test] fn correlation_id_pairs_with_next_record_only() { let tracker = UsageTracker::new(); // Stash an ID, then run a request → the ID should land on this record. tracker.note_correlation_id("abc".into()); tracker.note_request(5); tracker.record_usage(&make_event(100, 0, 0, 20)); // Next request without a fresh stash → no correlation_id. tracker.note_request(10); tracker.record_usage(&make_event(200, 50, 0, 30)); let records = tracker.drain(); assert_eq!(records.len(), 2); assert_eq!(records[0].correlation_id.as_deref(), Some("abc")); assert!(records[1].correlation_id.is_none()); } }