close: multi pod status probes

merge: multi pod status probes
review: multi pod status probes
2026-05-30 14:45:39 +09:00 · 2026-05-30 14:45:14 +09:00 · 2026-05-30 14:45:14 +09:00 · 2026-05-30 14:40:53 +09:00 · 2026-05-30 14:38:21 +09:00 · 2026-05-30 14:38:11 +09:00
15 changed files with 692 additions and 48 deletions
--- a/crates/pod/src/controller.rs
+++ b/crates/pod/src/controller.rs
@ -900,28 +900,15 @@ async fn controller_loop<C, St>(
            Method::ListCompletions { .. } => {}

            Method::PodEvent(event) => {
-                // For agent-visible PodEvents, live echo travels through the
-                // SystemItem lane: once the interceptor drains the notify buffer,
-                // the typed `SystemItem::PodEvent` lands as a
-                // `LogEntry::SystemItem` entry and the sink forwards it
-                // to clients as `Event::SystemItem`. Control-plane-only
-                // PodEvents use this same receive path only for side effects.
-                //
-                // (1) system side effects — idempotent and tolerant of
-                // out-of-order delivery (e.g. `TurnEnded` arriving
-                // after `ShutDown`).
-                crate::ipc::event::apply_event_side_effects(
-                    &event,
+                if handle_inbound_pod_event(
+                    event,
                    &spawned_registry,
                    &spawner_name,
-                    &self_parent_socket,
+                    self_parent_socket.as_ref(),
+                    &notify_buffer,
                )
-                .await;
-                // (2) agent-visible events enter the notification/history lane.
-                // Control-plane-only events (currently ScopeSubDelegated)
-                // stop after side effects so they do not wake or notify the LLM.
-                if event.should_notify_agent() {
-                    pod.push_pod_event_notify(event);
+                .await
+                {
                    // Auto-kick a turn if the Pod is idle so the
                    // notification is not stranded. Matches the
                    // `Method::Notify` idle path.
@ -961,6 +948,35 @@ async fn controller_loop<C, St>(
    let _ = shutdown_tx.send(());
 }

+/// Apply an inbound child `PodEvent` exactly once.
+///
+/// Side effects are control-plane state updates and upward propagation; they
+/// run for every event. Only agent-visible events are staged on the notify
+/// buffer. The caller owns lifecycle-dependent follow-up such as idle
+/// `RunForNotification` auto-kick.
+async fn handle_inbound_pod_event(
+    event: protocol::PodEvent,
+    spawned_registry: &Arc<SpawnedPodRegistry>,
+    self_name: &str,
+    parent_socket: Option<&PathBuf>,
+    notify_buffer: &NotifyBuffer,
+) -> bool {
+    let self_parent_socket = parent_socket.cloned();
+    crate::ipc::event::apply_event_side_effects(
+        &event,
+        spawned_registry,
+        self_name,
+        &self_parent_socket,
+    )
+    .await;
+
+    let notify_agent = event.should_notify_agent();
+    if notify_agent {
+        notify_buffer.push_pod_event(event);
+    }
+    notify_agent
+}
+
 /// Drives a Pod future (one in-flight turn) while concurrently
 /// processing incoming methods through an inner select! arm. Returns
 /// `(final_status, shutdown_requested)`.
@ -1095,23 +1111,17 @@ where
                        // mpsc is consume-once, so we cannot defer this
                        // to the next main-loop iteration — drop here
                        // would lose the event entirely (children fire
-                        // and forget). Apply the side effects inline
-                        // and, for agent-visible variants, stage the typed
-                        // event on the notification buffer so the in-flight
-                        // turn's next `pending_history_appends` surfaces it
-                        // as a typed `SystemItem::PodEvent`. Control-plane-only
-                        // variants stop after side effects.
-                        let self_parent_socket = parent_socket.cloned();
-                        crate::ipc::event::apply_event_side_effects(
-                            &event,
+                        // and forget). Auto-kick remains unnecessary here:
+                        // the in-flight turn will drain agent-visible events
+                        // from the notify buffer on its next history append.
+                        handle_inbound_pod_event(
+                            event,
                            spawned_registry,
                            self_name,
-                            &self_parent_socket,
+                            parent_socket,
+                            notify_buffer,
                        )
                        .await;
-                        if event.should_notify_agent() {
-                            notify_buffer.push_pod_event(event);
-                        }
                    }
                    None => {
                        let _ = cancel_tx.try_send(());
--- a/crates/tui/src/multi_pod.rs
+++ b/crates/tui/src/multi_pod.rs
@ -656,7 +656,7 @@ fn row_status_label(entry: &PodListEntry) -> (&'static str, Style) {
                    .fg(Color::Cyan)
                    .add_modifier(Modifier::BOLD),
            ),
-            None => ("live unknown", Style::default().fg(Color::DarkGray)),
+            None => ("live", Style::default().fg(Color::DarkGray)),
        };
    }
    if entry
@ -1194,6 +1194,31 @@ mod tests {
        assert!(app.selected_send_disabled_reason().is_none());
    }

+    #[test]
+    fn multi_status_label_for_live_without_reported_status_is_softened() {
+        let mut live = live_info("probing", PodStatus::Idle);
+        live.status = None;
+        let app = test_app(vec![live]);
+
+        let (label, _) = row_status_label(app.list.selected_entry().unwrap());
+
+        assert_eq!(label, "live");
+    }
+
+    #[test]
+    fn multi_status_labels_preserve_explicit_live_statuses() {
+        for (status, expected_label) in [
+            (PodStatus::Idle, "live idle"),
+            (PodStatus::Running, "live running"),
+            (PodStatus::Paused, "live paused"),
+        ] {
+            let app = test_app(vec![live_info("pod", status)]);
+            let (label, _) = row_status_label(app.list.selected_entry().unwrap());
+
+            assert_eq!(label, expected_label);
+        }
+    }
+
    #[test]
    fn multi_running_paused_and_stopped_targets_are_direct_send_disabled() {
        let mut app = test_app(vec![
--- a/crates/tui/src/pod_list.rs
+++ b/crates/tui/src/pod_list.rs
@ -291,19 +291,39 @@ pub(crate) async fn read_reachable_live_pod_infos(
    store: &FsStore,
 ) -> Result<Vec<LivePodInfo>, io::Error> {
    let records = read_live_pod_infos()?;
-    let mut reachable = Vec::new();
-    for mut record in records {
-        let Ok(status) = probe_live_status(&record.socket_path).await else {
+    probe_reachable_live_pod_infos(store, records).await
+}
+
+async fn probe_reachable_live_pod_infos(
+    store: &FsStore,
+    records: Vec<LivePodInfo>,
+) -> Result<Vec<LivePodInfo>, io::Error> {
+    let mut handles = Vec::with_capacity(records.len());
+    for record in records {
+        handles.push(tokio::spawn(probe_live_pod_info(record)));
+    }
+
+    let mut reachable = Vec::with_capacity(handles.len());
+    for handle in handles {
+        let result = handle
+            .await
+            .map_err(|e| io::Error::other(format!("live status probe task failed: {e}")))?;
+        let Ok(mut record) = result else {
            continue;
        };
-        record.reachable = true;
-        record.status = status;
        record.summary = summarize_live_pod(store, &record);
        reachable.push(record);
    }
    Ok(reachable)
 }

+async fn probe_live_pod_info(mut record: LivePodInfo) -> Result<LivePodInfo, io::Error> {
+    let status = probe_live_status(&record.socket_path).await?;
+    record.reachable = true;
+    record.status = status;
+    Ok(record)
+}
+
 pub(crate) fn live_socket_for_pod(pod_name: &str) -> Option<PathBuf> {
    read_live_pod_infos()
        .ok()?
@ -343,7 +363,7 @@ fn corrupt_stored_info(pod_name: String, message: String) -> StoredPodInfo {
    }
 }

-const LIVE_STATUS_PROBE_TIMEOUT: Duration = Duration::from_millis(25);
+const LIVE_STATUS_PROBE_TIMEOUT: Duration = Duration::from_millis(200);

 async fn probe_live_status(socket_path: &Path) -> Result<Option<PodStatus>, io::Error> {
    let mut client = PodClient::connect(socket_path).await?;
@ -561,11 +581,16 @@ fn trim_one_line(s: &str, max_chars: usize) -> String {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use std::sync::Arc;
+
    use llm_worker::llm_client::types::RequestConfig;
    use pod_store::FsPodStore;
    use pod_store::{PodActiveSegmentRef, PodMetadataStore};
+    use protocol::stream::JsonLineWriter;
    use session_store::{new_segment_id, new_session_id};
    use tempfile::tempdir;
+    use tokio::net::UnixListener;
+    use tokio::sync::Barrier;

    const SOURCE: PodVisibilitySource = PodVisibilitySource::ResumePicker;

@ -752,6 +777,30 @@ mod tests {
        );
    }

+    #[test]
+    fn live_reachable_row_without_reported_status_can_open_but_not_send_now() {
+        let mut live = live_info("live", PodStatus::Idle);
+        live.status = None;
+        live.reachable = true;
+
+        let entry = single_entry(PodList::from_sources(SOURCE, vec![], vec![live], None, 10));
+
+        assert!(entry.actions.can_open);
+        assert!(!entry.actions.can_restore);
+        assert!(!entry.actions.can_send_now);
+        assert!(!entry.actions.can_queue_send);
+        assert_eq!(
+            entry.attach_socket_path(),
+            Some(Path::new("/tmp/live.sock"))
+        );
+        assert!(
+            !entry
+                .diagnostics
+                .iter()
+                .any(|diagnostic| diagnostic.kind == PodEntryDiagnosticKind::LiveUnreachable)
+        );
+    }
+
    #[test]
    fn live_running_reachable_row_can_open_but_not_send_now() {
        let entry = single_entry(PodList::from_sources(
@ -811,6 +860,82 @@ mod tests {
        assert_eq!(status, Some(PodStatus::Idle));
    }

+    #[tokio::test]
+    async fn live_status_probes_run_concurrently() {
+        let store_dir = tempdir().unwrap();
+        let store = FsStore::new(store_dir.path()).unwrap();
+        let socket_dir = tempdir().unwrap();
+        let probe_count = 3;
+        let barrier = Arc::new(Barrier::new(probe_count));
+        let mut records = Vec::new();
+        let mut servers = Vec::new();
+
+        for index in 0..probe_count {
+            let pod_name = format!("pod-{index}");
+            let socket_path = socket_dir.path().join(format!("{pod_name}.sock"));
+            let listener = UnixListener::bind(&socket_path).unwrap();
+            let barrier = Arc::clone(&barrier);
+            servers.push(tokio::spawn(async move {
+                let (stream, _) = listener.accept().await.unwrap();
+                barrier.wait().await;
+                let mut writer = JsonLineWriter::new(stream);
+                writer
+                    .write(&Event::Status {
+                        status: PodStatus::Idle,
+                    })
+                    .await
+                    .unwrap();
+            }));
+            records.push(live_probe_record(&pod_name, socket_path));
+        }
+
+        let records = tokio::time::timeout(
+            LIVE_STATUS_PROBE_TIMEOUT * 3,
+            probe_reachable_live_pod_infos(&store, records),
+        )
+        .await
+        .expect("status probes should complete")
+        .unwrap();
+
+        assert_eq!(records.len(), probe_count);
+        assert!(records.iter().all(|record| record.reachable));
+        assert!(
+            records
+                .iter()
+                .all(|record| record.status == Some(PodStatus::Idle))
+        );
+        for server in servers {
+            server.await.unwrap();
+        }
+    }
+
+    #[tokio::test]
+    async fn live_status_probe_timeout_still_marks_socket_reachable() {
+        let store_dir = tempdir().unwrap();
+        let store = FsStore::new(store_dir.path()).unwrap();
+        let socket_dir = tempdir().unwrap();
+        let socket_path = socket_dir.path().join("silent.sock");
+        let listener = UnixListener::bind(&socket_path).unwrap();
+        let server = tokio::spawn(async move {
+            let (_stream, _) = listener.accept().await.unwrap();
+            std::future::pending::<()>().await;
+        });
+
+        let records = probe_reachable_live_pod_infos(
+            &store,
+            vec![live_probe_record("silent", socket_path.clone())],
+        )
+        .await
+        .unwrap();
+
+        assert_eq!(records.len(), 1);
+        assert_eq!(records[0].pod_name, "silent");
+        assert!(records[0].reachable);
+        assert_eq!(records[0].status, None);
+        assert_eq!(records[0].socket_path, socket_path);
+        server.abort();
+    }
+
    #[test]
    fn corrupt_stored_metadata_has_diagnostic() {
        let entry = single_entry(PodList::from_sources(
@ -985,6 +1110,17 @@ mod tests {
        }
    }

+    fn live_probe_record(pod_name: &str, socket_path: PathBuf) -> LivePodInfo {
+        LivePodInfo {
+            pod_name: pod_name.to_string(),
+            socket_path,
+            status: None,
+            reachable: false,
+            segment_id: None,
+            summary: PodEntrySummary::default(),
+        }
+    }
+
    fn test_greeting() -> protocol::Greeting {
        protocol::Greeting {
            pod_name: "live".to_string(),
--- a/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/artifacts/.gitkeep
+++ b/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/artifacts/.gitkeep
--- a/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/item.md
+++ b/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/item.md
@ -2,12 +2,12 @@
 id: 20260527-000007-pod-inbound-pod-event-dedup
 slug: pod-inbound-pod-event-dedup
 title: Inbound PodEvent ハンドリングの重複を統合する
-status: open
+status: closed
 kind: task
 priority: P2
 labels: [migrated]
 created_at: 2026-05-27T00:00:07Z
-updated_at: 2026-05-27T00:00:07Z
+updated_at: 2026-05-30T05:37:00Z
 assignee: null
 legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
 ---
--- a/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/resolution.md
+++ b/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/resolution.md
@ -0,0 +1,76 @@
+---
+id: 20260527-000007-pod-inbound-pod-event-dedup
+slug: pod-inbound-pod-event-dedup
+title: Inbound PodEvent ハンドリングの重複を統合する
+status: closed
+kind: task
+priority: P2
+labels: [migrated]
+created_at: 2026-05-27T00:00:07Z
+updated_at: 2026-05-30T05:37:00Z
+assignee: null
+legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
+---
+
+## Migration reference
+
+- legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
+- migrated_from: TODO.md / tickets directory migration on 2026-05-27
+
+# Inbound PodEvent ハンドリングの重複を統合する
+
+## 背景
+
+子 Pod から `Method::PodEvent(event)` を受けたときの処理が `controller_loop` と `drive_turn` の 2 箇所にコピーされている。
+
+`controller.rs:693-720`（idle / paused 中）:
+
+```rust
+Method::PodEvent(event) => {
+    crate::ipc::event::apply_event_side_effects(
+        &event, &spawned_registry, &spawner_name, &self_parent_socket,
+    ).await;
+    pod.push_pod_event_notify(event);
+    if shared_state.get_status() == PodStatus::Idle {
+        pending = Some(PendingRun::RunForNotification);
+    }
+}
+```
+
+`controller.rs:861-879`（in-flight turn 中）:
+
+```rust
+Some(Method::PodEvent(event)) => {
+    let self_parent_socket = parent_socket.cloned();
+    crate::ipc::event::apply_event_side_effects(
+        &event, spawned_registry, self_name, &self_parent_socket,
+    ).await;
+    notify_buffer.push_pod_event(event);
+}
+```
+
+差分は 2 点:
+
+1. **buffer への push 経路**: `pod.push_pod_event_notify(event)` vs `notify_buffer.push_pod_event(event)`。両者は同じ `NotifyBuffer` を叩く（`pod.rs:845-846` は `self.pending_notifies.push_pod_event(event)` を呼ぶだけで、`notify_buffer_handle()` はその `pending_notifies.clone()` を返す）。**完全に等価**。
+2. **auto-kick**: idle 経路だけ `PendingRun::RunForNotification` を stage する。in-flight 経路は in-flight 自体が消化するので不要。
+
+つまり「event の処理本体」（side-effects + notify buffer への push）は同一で、後段の auto-kick だけが state-dependent な分岐。にもかかわらず関数化されておらず、片方をいじってもう片方を忘れると挙動が割れる。
+
+## 要件
+
+- side-effects 適用 + NotifyBuffer への typed push の流れを単一関数 `handle_inbound_pod_event` に切り出す。
+- `controller_loop` / `drive_turn` の両方からこのヘルパーを呼ぶ形に置き換える。
+- auto-kick (`PendingRun::RunForNotification` の stage) は呼び出し側の責務として残す。これは Pod のライフサイクル状態に依存した判断で、ヘルパー内には押し込めない。
+- 関数シグネチャは引数を最小化する。`event`、`spawned_registry`、`self_name: &str`、`self_parent_socket: &Option<PathBuf>` または `Option<&PathBuf>`、`notify_buffer: &NotifyBuffer` の 5 つで足りる前提。`Pod` への可変参照は不要（`notify_buffer` で代用可能）。
+- 動作変化なし。既存の `Method::PodEvent` 挙動（in-flight / idle 両方）が完全に同一で続行すること。
+
+## 完了条件
+
+- `controller.rs` 内に `apply_event_side_effects` 呼び出しが 1 箇所だけ残り、`controller_loop` と `drive_turn` の `Method::PodEvent` アームはどちらも `handle_inbound_pod_event(...)` 呼び出し + idle 経路のみ auto-kick stage、という形になる。
+- 既存の inbound PodEvent 関連テスト（特に `apply_event_side_effects` の idempotency や `notify_buffer` への typed push）が通る。
+
+## 範囲外
+
+- `apply_event_side_effects` 自体の中身変更。
+- `NotifyBuffer` API のリネーム / 統合。
+- `pod.push_pod_event_notify` の削除（[[pod-interrupt-prep-internalize]] と同じく将来の整理対象だが、本チケットでは外向き API は触らない）。
--- a/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/thread.md
+++ b/work-items/closed/20260527-000007-pod-inbound-pod-event-dedup/thread.md
@ -0,0 +1,91 @@
+<!-- event: migration author: tickets.sh-migration at: 2026-05-27T00:00:07Z -->
+
+## Migrated
+
+Migrated from tickets/pod-inbound-pod-event-dedup.md. No legacy review file was present at migration time.
+
+---
+
+<!-- event: close author: hare at: 2026-05-30T05:37:00Z status: closed -->
+
+## Closed
+
+---
+id: 20260527-000007-pod-inbound-pod-event-dedup
+slug: pod-inbound-pod-event-dedup
+title: Inbound PodEvent ハンドリングの重複を統合する
+status: closed
+kind: task
+priority: P2
+labels: [migrated]
+created_at: 2026-05-27T00:00:07Z
+updated_at: 2026-05-30T05:37:00Z
+assignee: null
+legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
+---
+
+## Migration reference
+
+- legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
+- migrated_from: TODO.md / tickets directory migration on 2026-05-27
+
+# Inbound PodEvent ハンドリングの重複を統合する
+
+## 背景
+
+子 Pod から `Method::PodEvent(event)` を受けたときの処理が `controller_loop` と `drive_turn` の 2 箇所にコピーされている。
+
+`controller.rs:693-720`（idle / paused 中）:
+
+```rust
+Method::PodEvent(event) => {
+    crate::ipc::event::apply_event_side_effects(
+        &event, &spawned_registry, &spawner_name, &self_parent_socket,
+    ).await;
+    pod.push_pod_event_notify(event);
+    if shared_state.get_status() == PodStatus::Idle {
+        pending = Some(PendingRun::RunForNotification);
+    }
+}
+```
+
+`controller.rs:861-879`（in-flight turn 中）:
+
+```rust
+Some(Method::PodEvent(event)) => {
+    let self_parent_socket = parent_socket.cloned();
+    crate::ipc::event::apply_event_side_effects(
+        &event, spawned_registry, self_name, &self_parent_socket,
+    ).await;
+    notify_buffer.push_pod_event(event);
+}
+```
+
+差分は 2 点:
+
+1. **buffer への push 経路**: `pod.push_pod_event_notify(event)` vs `notify_buffer.push_pod_event(event)`。両者は同じ `NotifyBuffer` を叩く（`pod.rs:845-846` は `self.pending_notifies.push_pod_event(event)` を呼ぶだけで、`notify_buffer_handle()` はその `pending_notifies.clone()` を返す）。**完全に等価**。
+2. **auto-kick**: idle 経路だけ `PendingRun::RunForNotification` を stage する。in-flight 経路は in-flight 自体が消化するので不要。
+
+つまり「event の処理本体」（side-effects + notify buffer への push）は同一で、後段の auto-kick だけが state-dependent な分岐。にもかかわらず関数化されておらず、片方をいじってもう片方を忘れると挙動が割れる。
+
+## 要件
+
+- side-effects 適用 + NotifyBuffer への typed push の流れを単一関数 `handle_inbound_pod_event` に切り出す。
+- `controller_loop` / `drive_turn` の両方からこのヘルパーを呼ぶ形に置き換える。
+- auto-kick (`PendingRun::RunForNotification` の stage) は呼び出し側の責務として残す。これは Pod のライフサイクル状態に依存した判断で、ヘルパー内には押し込めない。
+- 関数シグネチャは引数を最小化する。`event`、`spawned_registry`、`self_name: &str`、`self_parent_socket: &Option<PathBuf>` または `Option<&PathBuf>`、`notify_buffer: &NotifyBuffer` の 5 つで足りる前提。`Pod` への可変参照は不要（`notify_buffer` で代用可能）。
+- 動作変化なし。既存の `Method::PodEvent` 挙動（in-flight / idle 両方）が完全に同一で続行すること。
+
+## 完了条件
+
+- `controller.rs` 内に `apply_event_side_effects` 呼び出しが 1 箇所だけ残り、`controller_loop` と `drive_turn` の `Method::PodEvent` アームはどちらも `handle_inbound_pod_event(...)` 呼び出し + idle 経路のみ auto-kick stage、という形になる。
+- 既存の inbound PodEvent 関連テスト（特に `apply_event_side_effects` の idempotency や `notify_buffer` への typed push）が通る。
+
+## 範囲外
+
+- `apply_event_side_effects` 自体の中身変更。
+- `NotifyBuffer` API のリネーム / 統合。
+- `pod.push_pod_event_notify` の削除（[[pod-interrupt-prep-internalize]] と同じく将来の整理対象だが、本チケットでは外向き API は触らない）。
+
+
+---
--- a/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/artifacts/.gitkeep
+++ b/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/artifacts/.gitkeep
--- a/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/item.md
+++ b/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/item.md
@ -0,0 +1,45 @@
+---
+id: 20260530-053259-multi-pod-parallel-status-probes
+slug: multi-pod-parallel-status-probes
+title: Parallelize multi-Pod live status probes
+status: closed
+kind: task
+priority: P2
+labels: [tui, pod-dashboard, performance]
+created_at: 2026-05-30T05:32:59Z
+updated_at: 2026-05-30T05:45:37Z
+assignee: null
+legacy_ticket: null
+---
+
+## Background
+
+The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
+
+That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
+
+## Requirements
+
+- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
+- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
+- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
+- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
+- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
+- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
+- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
+
+## Non-goals
+
+- Do not redesign Pod notification or run completion delivery.
+- Do not persist last-known status in pod-store.
+- Do not change `AttachOrRestorePod` or restore semantics.
+- Do not make unreachable registry allocations appear attachable.
+
+## Acceptance criteria
+
+- Multiple live Pod status probes wait concurrently, not strictly one after another.
+- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
+- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
+- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
+- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
+- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
--- a/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/resolution.md
+++ b/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/resolution.md
@ -0,0 +1,45 @@
+---
+id: 20260530-053259-multi-pod-parallel-status-probes
+slug: multi-pod-parallel-status-probes
+title: Parallelize multi-Pod live status probes
+status: closed
+kind: task
+priority: P2
+labels: [tui, pod-dashboard, performance]
+created_at: 2026-05-30T05:32:59Z
+updated_at: 2026-05-30T05:45:37Z
+assignee: null
+legacy_ticket: null
+---
+
+## Background
+
+The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
+
+That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
+
+## Requirements
+
+- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
+- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
+- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
+- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
+- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
+- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
+- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
+
+## Non-goals
+
+- Do not redesign Pod notification or run completion delivery.
+- Do not persist last-known status in pod-store.
+- Do not change `AttachOrRestorePod` or restore semantics.
+- Do not make unreachable registry allocations appear attachable.
+
+## Acceptance criteria
+
+- Multiple live Pod status probes wait concurrently, not strictly one after another.
+- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
+- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
+- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
+- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
+- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
--- a/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/thread.md
+++ b/work-items/closed/20260530-053259-multi-pod-parallel-status-probes/thread.md
@ -0,0 +1,116 @@
+<!-- event: create author: tickets.sh at: 2026-05-30T05:32:59Z -->
+
+## Created
+
+Created by tickets.sh create.
+
+---
+
+<!-- event: plan author: hare at: 2026-05-30T05:33:43Z -->
+
+## Plan
+
+## Preflight
+
+Classification: implementation-ready.
+
+The issue is localized to TUI live status probing and dashboard labeling. The product decision is settled: reachable Pods with missing status remain live/attachable, while status probe timeout should be less frequent and less alarming. No restore semantics or Pod runtime protocol changes are required.
+
+Current code map:
+- `crates/tui/src/pod_list.rs`: `LIVE_STATUS_PROBE_TIMEOUT`, `load_live_pods`, `probe_live_status`, `LivePodInfo`, `PodList` merge/sort tests.
+- `crates/tui/src/multi_pod.rs`: row label rendering for live status, currently `live unknown` when `LivePodInfo.status` is `None`.
+
+Critical risks:
+- Do not make reload latency scale as N * timeout.
+- Do not treat `status = None` as unreachable.
+- Do not change restoreability/attachability semantics.
+- Avoid duplicating discovery logic outside shared `PodList`.
+
+
+---
+
+<!-- event: review author: hare at: 2026-05-30T05:45:14Z status: approve -->
+
+## Review: approve
+
+Approve.
+
+The implementation addresses the ticket in the shared `PodList` live-probe path. Live status probes now run concurrently with a more realistic per-Pod timeout, reachable sockets without reported status remain live/attachable, and the multi-Pod label is softened from `live unknown` to `live` while explicit statuses remain unchanged.
+
+Blocker findings: none.
+
+Requirement coverage:
+- Per-Pod timeout increased to 200ms.
+- Probes are concurrent and bounded by the slowest bounded probe rather than `N * timeout`.
+- Reachable sockets with no status remain reachable/live/attachable.
+- Restoreability remains separate and unchanged.
+- No last-known status is persisted to pod-store.
+- Explicit `live idle`, `live running`, and `live paused` labels remain.
+- `status = None` renders as `live`.
+- The implementation changes shared `PodList` live-probe logic rather than duplicating dashboard-only logic.
+- Tests cover concurrency, timeout/no-status reachable handling, and label rendering.
+
+Validation reviewed:
+- `cargo fmt --check` — passed.
+- `cargo test -p tui pod_list` — passed.
+- `cargo test -p tui multi_pod` — passed.
+- `cargo test -p tui` — passed.
+- Reviewer additionally ran `./tickets.sh doctor` — passed.
+
+Final verdict: approve.
+
+
+---
+
+<!-- event: close author: hare at: 2026-05-30T05:45:37Z status: closed -->
+
+## Closed
+
+---
+id: 20260530-053259-multi-pod-parallel-status-probes
+slug: multi-pod-parallel-status-probes
+title: Parallelize multi-Pod live status probes
+status: closed
+kind: task
+priority: P2
+labels: [tui, pod-dashboard, performance]
+created_at: 2026-05-30T05:32:59Z
+updated_at: 2026-05-30T05:45:37Z
+assignee: null
+legacy_ticket: null
+---
+
+## Background
+
+The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
+
+That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
+
+## Requirements
+
+- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
+- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
+- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
+- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
+- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
+- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
+- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
+
+## Non-goals
+
+- Do not redesign Pod notification or run completion delivery.
+- Do not persist last-known status in pod-store.
+- Do not change `AttachOrRestorePod` or restore semantics.
+- Do not make unreachable registry allocations appear attachable.
+
+## Acceptance criteria
+
+- Multiple live Pod status probes wait concurrently, not strictly one after another.
+- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
+- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
+- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
+- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
+- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
+
+
+---
--- a/work-items/open/20260527-000007-pod-inbound-pod-event-dedup/thread.md
+++ b/work-items/open/20260527-000007-pod-inbound-pod-event-dedup/thread.md
@ -1,7 +0,0 @@
-<!-- event: migration author: tickets.sh-migration at: 2026-05-27T00:00:07Z -->
-
-## Migrated
-
-Migrated from tickets/pod-inbound-pod-event-dedup.md. No legacy review file was present at migration time.
-
---
--- a/work-items/open/20260530-053721-tui-inflight-composer-injection/artifacts/.gitkeep
+++ b/work-items/open/20260530-053721-tui-inflight-composer-injection/artifacts/.gitkeep
--- a/work-items/open/20260530-053721-tui-inflight-composer-injection/item.md
+++ b/work-items/open/20260530-053721-tui-inflight-composer-injection/item.md
@ -0,0 +1,74 @@
+---
+id: 20260530-053721-tui-inflight-composer-injection
+slug: tui-inflight-composer-injection
+title: Support immediate in-flight TUI composer injection
+status: open
+kind: feature
+priority: P2
+labels: [tui, worker, interrupt, ux]
+created_at: 2026-05-30T05:37:21Z
+updated_at: 2026-05-30T05:38:11Z
+assignee: null
+legacy_ticket: null
+---
+
+## Background
+
+The TUI currently lets the user press Enter while a Pod is executing, but that input is queued for the next turn. This is useful when the user wants to continue the task after the current run finishes.
+
+There is a separate UX need: while the model is in the middle of a long run with tool calls, the user may want to send urgent supplemental context that should be seen as soon as possible, ideally between tool calls / LLM calls during the current run. This is different from ordinary queued input.
+
+We want both modes:
+
+- **after-run queue**: “when this task finishes, continue with this next request.”
+- **in-flight injection**: “while you are still working, please incorporate this additional context as soon as safe.”
+
+This ticket is for designing and implementing an explicit TUI path for the second mode without breaking the existing queued-input behavior.
+
+## Requirements
+
+- Preserve the current Enter-while-running behavior as the after-run queue.
+- Add an explicit user action / keybinding / command for immediate in-flight injection while a run is active.
+- In-flight injected text must be delivered through the Pod/Worker history path, not as hidden context-only injection. It must satisfy the project principle that new input placed into LLM context is first appended to `worker.history` / persisted history.
+- In-flight injection should be consumed at safe boundaries, such as before the next LLM request or between tool-call cycles, not by mutating an already-open provider stream.
+- The UI must make the distinction visible: queued-for-next-turn vs injected-into-current-run.
+- If no run is active, the immediate-injection action should either behave like normal submit or clearly report that there is no in-flight run to inject into.
+- If the current turn cannot accept in-flight input at a safe boundary, the UI should fail closed or fall back to explicit queued mode with a visible notice; do not silently drop input.
+- Preserve TUI-local input history behavior for submitted/queued text.
+
+## Non-goals
+
+- Do not interrupt/cancel the current run as part of this ticket.
+- Do not mutate provider streams already in progress.
+- Do not introduce hidden system-reminder/context-only messages that are not recorded in history.
+- Do not remove the existing queued composer behavior.
+- Do not redesign the entire Pod notification/input protocol unless a small typed Method/Event extension is required.
+
+## Open design questions
+
+- What should the TUI action be?
+  - Separate command such as `:inject`?
+  - Modified Enter keybinding such as Ctrl+Enter / Alt+Enter?
+  - Action menu entry?
+- What Pod protocol shape is best?
+  - Existing `Method::Notify` may already represent in-flight user-visible context, but semantics must be checked.
+  - A new typed method such as `Method::InjectInput` may be clearer if `Notify` is too generic.
+- What history item should represent the injected text?
+  - User item?
+  - System item with user-originated note?
+  - Existing Notify / PodEvent item?
+- What exact safe boundaries are supported in `Worker` / controller today?
+  - before the next LLM request;
+  - before resuming after tool results;
+  - while a tool call is running;
+  - while provider stream is open.
+- How should the UI display pending in-flight injection versus after-run queue?
+
+## Acceptance criteria
+
+- TUI users can choose between after-run queued submit and immediate in-flight injection while a Pod is running.
+- In-flight injected input is recorded in history before it can influence an LLM request.
+- In-flight injection is consumed only at safe boundaries and never mutates an active provider stream.
+- The TUI visibly distinguishes queued-next-turn input from injected-current-run input.
+- Existing queued Enter behavior remains intact.
+- Tests cover TUI input routing, protocol/controller handling, worker history append behavior, and safe-boundary behavior.
--- a/work-items/open/20260530-053721-tui-inflight-composer-injection/thread.md
+++ b/work-items/open/20260530-053721-tui-inflight-composer-injection/thread.md
@ -0,0 +1,33 @@
+<!-- event: create author: tickets.sh at: 2026-05-30T05:37:21Z -->
+
+## Created
+
+Created by tickets.sh create.
+
+---
+
+<!-- event: plan author: hare at: 2026-05-30T05:38:11Z -->
+
+## Plan
+
+## Initial preflight
+
+Classification: requirements-sync-needed.
+
+The user requirement is clear at the UX level: Enter while running remains an after-run queue, and a separate action should inject supplemental context into the current in-flight run as soon as safe. The exact protocol/history representation is not decided yet and must be designed before implementation.
+
+Critical constraints:
+- Do not place injected text into LLM context unless it has first been appended to Worker history / persisted history.
+- Do not mutate an active provider stream.
+- Consume injected text only at safe boundaries such as before a later LLM request or between tool-call cycles.
+- Do not silently drop text; if the active turn cannot accept injection, report/fail closed or explicitly queue.
+
+Design questions to settle before coding:
+- TUI action/keybinding/command name.
+- Whether existing `Method::Notify` is semantically sufficient or a new typed method is needed.
+- Which history item represents user-originated in-flight supplemental context.
+- Which Worker/controller boundaries can actually observe injected input before the next LLM call.
+- How queued-next-turn vs injected-current-run is displayed.
+
+
+---
Author	SHA1	Message	Date
Hare	97df1a4086	close: multi pod status probes	2026-05-30 14:45:39 +09:00
Hare	d8051af226	merge: multi pod status probes	2026-05-30 14:45:14 +09:00
Hare	f74cf78187	review: multi pod status probes	2026-05-30 14:45:14 +09:00
Hare	1ba99cdf8a	tui: probe multi-pod statuses concurrently	2026-05-30 14:40:53 +09:00
Hare	6899b446c7	ticket: add tui inflight injection artifacts dir	2026-05-30 14:38:21 +09:00
Hare	5b7b8fa37c	ticket: add tui inflight injection	2026-05-30 14:38:11 +09:00
Hare	205eb7bacb	refactor: deduplicate inbound pod events	2026-05-30 14:37:02 +09:00
Hare	45d2c67689	ticket: add multi pod status artifacts dir	2026-05-30 14:33:53 +09:00
Hare	17c0b8d0fe	ticket: add multi pod status probe task	2026-05-30 14:33:43 +09:00