diff --git a/crates/manifest/src/model.rs b/crates/manifest/src/model.rs index 2b06a128..cb6d4d6c 100644 --- a/crates/manifest/src/model.rs +++ b/crates/manifest/src/model.rs @@ -52,6 +52,10 @@ pub struct ModelManifest { /// `default_capability` → scheme 既定の順で解決される。 #[serde(default, skip_serializing_if = "Option::is_none")] pub capability: Option, + /// モデルのコンテキストウィンドウ上限(tokens)。カタログ未掲載 / inline + /// モデルでもここで明示 override できる。 + #[serde(default, skip_serializing_if = "Option::is_none")] + pub context_window: Option, } impl ModelManifest { @@ -65,6 +69,7 @@ impl ModelManifest { model_id: upper.model_id.or(self.model_id), auth: upper.auth.or(self.auth), capability: upper.capability.or(self.capability), + context_window: upper.context_window.or(self.context_window), } } } diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index 2c033719..29f6f301 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -896,6 +896,10 @@ where // `build_client` がここに到達する前に同じマニフェストで成功している // ため、カタログ解決も必ず通る。念のため失敗時は "unknown" に落とす。 let resolved = provider::catalog::resolve_model_manifest(&manifest.model).ok(); + let context_window = resolved + .as_ref() + .map(|cfg| cfg.context_window) + .unwrap_or(provider::catalog::DEFAULT_CONTEXT_WINDOW); let (provider_name, model_id) = match resolved { Some(cfg) => { let name = match cfg.scheme { @@ -933,6 +937,8 @@ where model: model_id, scope_summary: pod.scope_snapshot().summary(), tools: tool_names, + context_window, + context_tokens: pod.total_tokens().tokens, } } @@ -1004,6 +1010,8 @@ mod tests { model: String::new(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, )); let notify_buffer = NotifyBuffer::new(); @@ -1043,6 +1051,8 @@ mod tests { model: "test".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, status: PodStatus::Idle, }) diff --git a/crates/pod/src/runtime/dir.rs b/crates/pod/src/runtime/dir.rs index 21259981..bfcc70df 100644 --- a/crates/pod/src/runtime/dir.rs +++ b/crates/pod/src/runtime/dir.rs @@ -140,6 +140,8 @@ mod tests { model: "claude".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, ) } diff --git a/crates/pod/src/shared_state.rs b/crates/pod/src/shared_state.rs index b363d911..61ada971 100644 --- a/crates/pod/src/shared_state.rs +++ b/crates/pod/src/shared_state.rs @@ -151,6 +151,8 @@ mod tests { model: "claude".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, } } diff --git a/crates/pod/src/spawn/comm_tools.rs b/crates/pod/src/spawn/comm_tools.rs index ace7b282..c52fe68c 100644 --- a/crates/pod/src/spawn/comm_tools.rs +++ b/crates/pod/src/spawn/comm_tools.rs @@ -549,6 +549,8 @@ mod tests { model: "test".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, status: PodStatus::Idle, } diff --git a/crates/pod/tests/pod_comm_tools_test.rs b/crates/pod/tests/pod_comm_tools_test.rs index f00f0a74..d949f5f9 100644 --- a/crates/pod/tests/pod_comm_tools_test.rs +++ b/crates/pod/tests/pod_comm_tools_test.rs @@ -126,6 +126,8 @@ fn empty_snapshot() -> Event { model: "x".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, status: protocol::PodStatus::Idle, } @@ -198,6 +200,8 @@ fn serve_history(listener: UnixListener, items: Vec) -> JoinHandle<()> { model: "x".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, status: protocol::PodStatus::Idle, }; diff --git a/crates/pod/tests/pod_events_test.rs b/crates/pod/tests/pod_events_test.rs index f305ef9a..242f801f 100644 --- a/crates/pod/tests/pod_events_test.rs +++ b/crates/pod/tests/pod_events_test.rs @@ -87,6 +87,8 @@ fn empty_snapshot() -> Event { model: "test".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, }, status: PodStatus::Idle, } diff --git a/crates/protocol/src/lib.rs b/crates/protocol/src/lib.rs index df8e3af9..b29d4a8e 100644 --- a/crates/protocol/src/lib.rs +++ b/crates/protocol/src/lib.rs @@ -486,6 +486,12 @@ pub struct Greeting { pub model: String, pub scope_summary: String, pub tools: Vec, + /// Model context window in tokens. Always filled by the Pod greeting. + #[serde(default)] + pub context_window: u64, + /// Estimated current session context tokens at connect time. + #[serde(default)] + pub context_tokens: u64, } // --------------------------------------------------------------------------- @@ -873,6 +879,8 @@ mod tests { model: "claude".into(), scope_summary: "Writable:\n - /tmp".into(), tools: vec!["Read".into()], + context_window: 200_000, + context_tokens: 42_000, }, status: PodStatus::Paused, }; @@ -883,6 +891,8 @@ mod tests { assert_eq!(parsed["data"]["entries"][0]["kind"], "user_input"); assert_eq!(parsed["data"]["greeting"]["pod_name"], "test"); assert_eq!(parsed["data"]["greeting"]["tools"][0], "Read"); + assert_eq!(parsed["data"]["greeting"]["context_window"], 200_000); + assert_eq!(parsed["data"]["greeting"]["context_tokens"], 42_000); assert_eq!(parsed["data"]["status"], "paused"); } @@ -942,7 +952,13 @@ mod tests { let json = r#"{"event":"snapshot","data":{"entries":[],"greeting":{"pod_name":"test","cwd":"/tmp","provider":"anthropic","model":"claude","scope_summary":"","tools":[]}}}"#; let decoded: Event = serde_json::from_str(json).unwrap(); match decoded { - Event::Snapshot { status, .. } => assert_eq!(status, PodStatus::Idle), + Event::Snapshot { + status, greeting, .. + } => { + assert_eq!(status, PodStatus::Idle); + assert_eq!(greeting.context_window, 0); + assert_eq!(greeting.context_tokens, 0); + } other => panic!("expected Snapshot, got {other:?}"), } } diff --git a/crates/provider/README.md b/crates/provider/README.md index a805b0fb..42edaa19 100644 --- a/crates/provider/README.md +++ b/crates/provider/README.md @@ -18,3 +18,4 @@ - `AuthRef::None` / `AuthRef::CodexOAuth` の解決 - `Scheme::required_auth()` と `ResolvedAuth` の妥当性検証(非対応組合せは構築エラー) - capability は manifest 明示 > model catalog > provider.default_capability > `Scheme::default_capability()` の順で解決 +- context window は manifest 明示 > model catalog > provider.default_context_window > builtin fallback の順で解決し、inline model でも `context_window` で override できる diff --git a/crates/provider/src/catalog.rs b/crates/provider/src/catalog.rs index 3a7206e7..f34890cb 100644 --- a/crates/provider/src/catalog.rs +++ b/crates/provider/src/catalog.rs @@ -22,6 +22,11 @@ use serde::{Deserialize, Serialize}; const BUILTIN_PROVIDERS: &str = include_str!("../../../resources/providers/builtin.toml"); const BUILTIN_MODELS: &str = include_str!("../../../resources/models/builtin.toml"); +/// Conservative fallback used when neither the manifest nor catalogs specify +/// a model context window. Greeting still carries a concrete number, while +/// catalog / manifest metadata can override unknown or inline models. +pub const DEFAULT_CONTEXT_WINDOW: u64 = 200_000; + #[derive(Debug, thiserror::Error)] pub enum CatalogError { #[error("failed to read catalog at {path}: {source}")] @@ -92,6 +97,10 @@ pub struct ProviderEntry { /// 使う。 #[serde(default)] pub default_capability: Option, + /// モデルカタログ未登録モデルで使う既定の context window。省略時は + /// [`DEFAULT_CONTEXT_WINDOW`] を使う。 + #[serde(default)] + pub default_context_window: Option, } /// モデルカタログの 1 エントリ。 @@ -107,6 +116,10 @@ pub struct ModelEntry { /// `ProviderEntry::default_capability` にフォールバックする。 #[serde(default)] pub capability: Option, + /// モデル単位の context window。省略時は provider default → builtin + /// fallback にフォールバックする。 + #[serde(default)] + pub context_window: Option, } /// 解決済みモデル設定。`build_client` が消費する完成形。 @@ -117,6 +130,7 @@ pub struct ModelConfig { pub model_id: String, pub auth: AuthRef, pub capability: Option, + pub context_window: u64, } #[derive(Debug, Deserialize)] @@ -244,6 +258,8 @@ fn split_ref(s: &str) -> Option<(&str, &str)> { /// auth は manifest 明示 > provider.auth_hint 由来、capability は /// manifest 明示 > model catalog > provider.default_capability > /// (`build_client` 側で)`Scheme::default_capability()`。 +/// context_window は manifest 明示 > model catalog > provider default > +/// [`DEFAULT_CONTEXT_WINDOW`]。 pub fn resolve_model_manifest(manifest: &ModelManifest) -> Result { let providers = load_providers().map_err(ResolveError::LoadProviders)?; let models = load_models().map_err(ResolveError::LoadModels)?; @@ -294,12 +310,18 @@ pub fn resolve_with_catalogs( .and_then(|m| m.capability.clone()) .or_else(|| provider.default_capability.clone()) }); + let context_window = manifest + .context_window + .or_else(|| model_entry.and_then(|m| m.context_window)) + .or(provider.default_context_window) + .unwrap_or(DEFAULT_CONTEXT_WINDOW); Ok(ModelConfig { scheme, base_url, model_id, auth, capability, + context_window, }) } else { let scheme = manifest @@ -319,6 +341,7 @@ pub fn resolve_with_catalogs( model_id, auth, capability: manifest.capability.clone(), + context_window: manifest.context_window.unwrap_or(DEFAULT_CONTEXT_WINDOW), }) } } @@ -381,6 +404,20 @@ mod tests { cfg.capability.is_some(), "should fall back to provider.default_capability" ); + assert_eq!(cfg.context_window, 200_000); + } + + #[test] + fn context_window_manifest_overrides_catalog() { + let providers = load_builtin_providers().unwrap(); + let models = load_builtin_models().unwrap(); + let manifest = ModelManifest { + ref_: Some("anthropic/claude-sonnet-4-6".into()), + context_window: Some(123_456), + ..Default::default() + }; + let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap(); + assert_eq!(cfg.context_window, 123_456); } #[test] @@ -461,6 +498,25 @@ mod tests { assert_eq!(cfg.scheme, SchemeKind::Anthropic); assert_eq!(cfg.model_id, "claude-sonnet-4-6"); assert!(cfg.capability.is_none(), "no catalog hit for inline-only"); + assert_eq!(cfg.context_window, DEFAULT_CONTEXT_WINDOW); + } + + #[test] + fn resolve_inline_context_window_override() { + let providers = load_builtin_providers().unwrap(); + let models = load_builtin_models().unwrap(); + let manifest = ModelManifest { + scheme: Some(SchemeKind::Anthropic), + model_id: Some("claude-sonnet-4-6".into()), + auth: Some(AuthRef::ApiKey { + env: None, + file: Some(PathBuf::from("/tmp/sk")), + }), + context_window: Some(777_000), + ..Default::default() + }; + let cfg = resolve_with_catalogs(&manifest, &providers, &models).unwrap(); + assert_eq!(cfg.context_window, 777_000); } #[test] diff --git a/crates/provider/src/lib.rs b/crates/provider/src/lib.rs index b5eb965f..6bd3ea5b 100644 --- a/crates/provider/src/lib.rs +++ b/crates/provider/src/lib.rs @@ -186,6 +186,7 @@ mod tests { file: None, }, capability: None, + context_window: 200_000, } } @@ -313,6 +314,7 @@ mod tests { model_id: "llama3".into(), auth: AuthRef::None, capability: None, + context_window: 200_000, }; assert!(build_client_from_config(&config).is_ok()); } diff --git a/crates/tui/src/app.rs b/crates/tui/src/app.rs index d644c538..23d1f98c 100644 --- a/crates/tui/src/app.rs +++ b/crates/tui/src/app.rs @@ -57,6 +57,10 @@ pub struct App { /// cache reads excluded). Reset on `RunEnd`. pub run_upload_tokens: u64, pub run_output_tokens: u64, + /// Latest session context tokens reported by the Pod. This is the raw + /// `input_tokens` value and is independent from per-run upload totals. + pub session_context_tokens: u64, + pub context_window: u64, pub turn_index: usize, pub current_tool: Option, pub input: InputBuffer, @@ -100,6 +104,8 @@ impl App { run_requests: 0, run_upload_tokens: 0, run_output_tokens: 0, + session_context_tokens: 0, + context_window: 0, turn_index: 0, current_tool: None, input: InputBuffer::new(), @@ -649,6 +655,7 @@ impl App { output_tokens, cache_read_input_tokens, } => { + self.session_context_tokens = input_tokens.unwrap_or(0); // Subtract the cache-hit portion so a tool loop that // re-sends the same prefix on every request doesn't // re-count it. cache_creation stays in (it is full @@ -684,6 +691,7 @@ impl App { })); } Event::CompactDone { new_segment_id } => { + self.session_context_tokens = 0; if let Some(evt) = self.last_streaming_compact_mut() { let elapsed_secs = match evt { CompactEvent::Streaming { started_at } => { @@ -914,6 +922,8 @@ impl App { /// produced. Followed by `Event::Entry` updates for anything /// committed after the snapshot. fn restore_snapshot(&mut self, entries: &[serde_json::Value], greeting: protocol::Greeting) { + self.context_window = greeting.context_window; + self.session_context_tokens = greeting.context_tokens; self.turn_index = 0; self.blocks.clear(); self.cache = FileCache::new(); @@ -1570,9 +1580,68 @@ mod completion_flow_tests { model: "test-model".into(), scope_summary: String::new(), tools: Vec::new(), + context_window: 200_000, + context_tokens: 0, } } + #[test] + fn snapshot_initializes_context_usage() { + let mut app = App::new("test".into()); + let mut greeting = test_greeting(); + greeting.context_window = 123_000; + greeting.context_tokens = 45_000; + + app.handle_pod_event(Event::Snapshot { + entries: Vec::new(), + greeting, + status: PodStatus::Idle, + }); + + assert_eq!(app.context_window, 123_000); + assert_eq!(app.session_context_tokens, 45_000); + } + + #[test] + fn usage_updates_session_context_tokens_without_cache_discount() { + let mut app = App::new("test".into()); + + app.handle_pod_event(Event::Usage { + input_tokens: Some(42_000), + output_tokens: Some(9), + cache_read_input_tokens: Some(40_000), + }); + + assert_eq!(app.session_context_tokens, 42_000); + assert_eq!(app.run_upload_tokens, 2_000); + assert_eq!(app.run_output_tokens, 9); + } + + #[test] + fn compact_done_resets_session_context_tokens() { + let mut app = App::new("test".into()); + app.session_context_tokens = 42_000; + + app.handle_pod_event(Event::CompactDone { + new_segment_id: uuid::Uuid::nil(), + }); + + assert_eq!(app.session_context_tokens, 0); + } + + #[test] + fn turn_start_and_run_end_do_not_reset_session_context_tokens() { + let mut app = App::new("test".into()); + app.session_context_tokens = 42_000; + + app.handle_pod_event(Event::TurnStart { turn: 1 }); + app.handle_pod_event(Event::RunEnd { + result: RunResult::Finished, + }); + + assert_eq!(app.session_context_tokens, 42_000); + } + #[test] fn live_task_create_updates_task_store() { let mut app = App::new("test".into()); diff --git a/crates/tui/src/ui.rs b/crates/tui/src/ui.rs index f6f923ad..c81c3543 100644 --- a/crates/tui/src/ui.rs +++ b/crates/tui/src/ui.rs @@ -7,6 +7,7 @@ //! ──────────── separator ────────── //! status line (1 row) //! > input area (1 row in Phase 1) +//! actionbar (1 row) //! ``` //! //! Every frame we walk the entire `App::blocks` vector, produce styled @@ -78,6 +79,7 @@ pub fn draw(frame: &mut Frame, app: &mut App) { Constraint::Length(1), // separator Constraint::Length(1), // status Constraint::Length(input_height), // input area + Constraint::Length(1), // actionbar ]) .split(area); @@ -88,6 +90,7 @@ pub fn draw(frame: &mut Frame, app: &mut App) { draw_separator(frame, chunks[3]); draw_status(frame, app, chunks[4]); draw_input(frame, &input_render, chunks[5]); + draw_actionbar(frame, app, chunks[6]); if let Some(state) = app.completion.as_ref().filter(|c| c.is_active()) { draw_completion_popup(frame, state, chunks[5]); } @@ -1074,6 +1077,20 @@ fn draw_separator(frame: &mut Frame, area: Rect) { ); } +fn context_usage_text(app: &App) -> String { + let pct = if app.context_window == 0 { + 0 + } else { + ((app.session_context_tokens as f64 / app.context_window as f64) * 100.0).round() as u64 + }; + format!( + "{} / {} ({}%)", + fmt_tokens(app.session_context_tokens), + fmt_tokens(app.context_window), + pct + ) +} + fn draw_status(frame: &mut Frame, app: &App, area: Rect) { let conn = if app.connected { Span::styled("●", Style::default().fg(Color::Green)) @@ -1124,7 +1141,15 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) { spans.push(Span::styled(" idle", Style::default().fg(Color::DarkGray))); } - // Right-aligned mode / scroll indicator. + let right_text = context_usage_text(app); + let right_line = Line::from(Span::styled(right_text, Style::default().fg(Color::Gray))) + .alignment(ratatui::layout::Alignment::Right); + + frame.render_widget(Paragraph::new(Line::from(spans)), area); + frame.render_widget(Paragraph::new(right_line), area); +} + +fn draw_actionbar(frame: &mut Frame, app: &App, area: Rect) { let mut right: Vec> = Vec::new(); if !app.scroll.follow_tail { right.push(Span::styled( @@ -1137,8 +1162,6 @@ fn draw_status(frame: &mut Frame, app: &App, area: Rect) { Style::default().fg(Color::DarkGray), )); let right_line = Line::from(right).alignment(ratatui::layout::Alignment::Right); - - frame.render_widget(Paragraph::new(Line::from(spans)), area); frame.render_widget(Paragraph::new(right_line), area); } diff --git a/docs/tui-parts.md b/docs/tui-parts.md index bf55e3ab..75cf0368 100644 --- a/docs/tui-parts.md +++ b/docs/tui-parts.md @@ -6,3 +6,5 @@ status |● insomnia idle 42.1k / 200k (21%) input |> actionbar | ↑ scrolled [normal] ``` + +status 右端は常に session context usage を ` / (%)` 形式で表示する。mode / scrolled などの操作状態は actionbar に寄せる。 diff --git a/resources/models/builtin.toml b/resources/models/builtin.toml index 51271ca5..562c1d9e 100644 --- a/resources/models/builtin.toml +++ b/resources/models/builtin.toml @@ -2,42 +2,51 @@ [[model]] id = "claude-sonnet-4-6" provider = "anthropic" +context_window = 200000 [[model]] id = "claude-sonnet-4-5" provider = "anthropic" +context_window = 200000 [[model]] id = "claude-opus-4-1" provider = "anthropic" +context_window = 200000 # Ollama local (capability is router-ish / ollama handles its own models) [[model]] id = "llama3.1" provider = "ollama-local" +context_window = 128000 [[model]] id = "qwen2.5-coder" provider = "ollama-local" +context_window = 128000 # Codex OAuth (ChatGPT backend via Responses API) [[model]] id = "gpt-5-codex" provider = "codex-oauth" +context_window = 400000 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } [[model]] id = "gpt-5" provider = "codex-oauth" +context_window = 400000 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } # OpenRouter [[model]] id = "anthropic/claude-sonnet-4" provider = "openrouter" +context_window = 200000 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "budget_tokens", vision = true, prompt_caching = { kind = "auto" } } [[model]] id = "openai/gpt-5" provider = "openrouter" +context_window = 400000 capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } diff --git a/resources/providers/builtin.toml b/resources/providers/builtin.toml index faa6d353..b79811bb 100644 --- a/resources/providers/builtin.toml +++ b/resources/providers/builtin.toml @@ -5,6 +5,7 @@ scheme = "anthropic" base_url = "https://api.anthropic.com" auth_hint = { kind = "api_key", env = "INSOMNIA_API_KEY_ANTHROPIC" } default_capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "budget_tokens", vision = true, prompt_caching = { kind = "explicit", max_breakpoints = 4 } } +default_context_window = 200000 [[provider]] id = "ollama-local" @@ -13,6 +14,7 @@ scheme = "anthropic" base_url = "http://localhost:11434" auth_hint = { kind = "none" } default_capability = { tool_calling = "parallel", structured_output = "json_schema", vision = false, prompt_caching = { kind = "auto" } } +default_context_window = 128000 [[provider]] id = "codex-oauth" @@ -20,6 +22,7 @@ display_name = "ChatGPT (Codex OAuth)" scheme = "openai_responses" auth_hint = { kind = "codex_oauth" } default_capability = { tool_calling = "parallel", structured_output = "json_schema", reasoning = "effort", vision = true, prompt_caching = { kind = "auto" } } +default_context_window = 400000 [[provider]] id = "openrouter" @@ -28,3 +31,4 @@ scheme = "openai_chat" base_url = "https://openrouter.ai/api/v1" auth_hint = { kind = "api_key", env = "INSOMNIA_API_KEY_OPENROUTER" } default_capability = { tool_calling = "parallel", structured_output = "json_schema", vision = true, prompt_caching = { kind = "auto" } } +default_context_window = 200000