Compare commits
9 Commits
7c387d60a8
...
97df1a4086
| Author | SHA1 | Date | |
|---|---|---|---|
| 97df1a4086 | |||
| d8051af226 | |||
| f74cf78187 | |||
| 1ba99cdf8a | |||
| 6899b446c7 | |||
| 5b7b8fa37c | |||
| 205eb7bacb | |||
| 45d2c67689 | |||
| 17c0b8d0fe |
|
|
@ -900,28 +900,15 @@ async fn controller_loop<C, St>(
|
|||
Method::ListCompletions { .. } => {}
|
||||
|
||||
Method::PodEvent(event) => {
|
||||
// For agent-visible PodEvents, live echo travels through the
|
||||
// SystemItem lane: once the interceptor drains the notify buffer,
|
||||
// the typed `SystemItem::PodEvent` lands as a
|
||||
// `LogEntry::SystemItem` entry and the sink forwards it
|
||||
// to clients as `Event::SystemItem`. Control-plane-only
|
||||
// PodEvents use this same receive path only for side effects.
|
||||
//
|
||||
// (1) system side effects — idempotent and tolerant of
|
||||
// out-of-order delivery (e.g. `TurnEnded` arriving
|
||||
// after `ShutDown`).
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event,
|
||||
if handle_inbound_pod_event(
|
||||
event,
|
||||
&spawned_registry,
|
||||
&spawner_name,
|
||||
&self_parent_socket,
|
||||
self_parent_socket.as_ref(),
|
||||
¬ify_buffer,
|
||||
)
|
||||
.await;
|
||||
// (2) agent-visible events enter the notification/history lane.
|
||||
// Control-plane-only events (currently ScopeSubDelegated)
|
||||
// stop after side effects so they do not wake or notify the LLM.
|
||||
if event.should_notify_agent() {
|
||||
pod.push_pod_event_notify(event);
|
||||
.await
|
||||
{
|
||||
// Auto-kick a turn if the Pod is idle so the
|
||||
// notification is not stranded. Matches the
|
||||
// `Method::Notify` idle path.
|
||||
|
|
@ -961,6 +948,35 @@ async fn controller_loop<C, St>(
|
|||
let _ = shutdown_tx.send(());
|
||||
}
|
||||
|
||||
/// Apply an inbound child `PodEvent` exactly once.
|
||||
///
|
||||
/// Side effects are control-plane state updates and upward propagation; they
|
||||
/// run for every event. Only agent-visible events are staged on the notify
|
||||
/// buffer. The caller owns lifecycle-dependent follow-up such as idle
|
||||
/// `RunForNotification` auto-kick.
|
||||
async fn handle_inbound_pod_event(
|
||||
event: protocol::PodEvent,
|
||||
spawned_registry: &Arc<SpawnedPodRegistry>,
|
||||
self_name: &str,
|
||||
parent_socket: Option<&PathBuf>,
|
||||
notify_buffer: &NotifyBuffer,
|
||||
) -> bool {
|
||||
let self_parent_socket = parent_socket.cloned();
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event,
|
||||
spawned_registry,
|
||||
self_name,
|
||||
&self_parent_socket,
|
||||
)
|
||||
.await;
|
||||
|
||||
let notify_agent = event.should_notify_agent();
|
||||
if notify_agent {
|
||||
notify_buffer.push_pod_event(event);
|
||||
}
|
||||
notify_agent
|
||||
}
|
||||
|
||||
/// Drives a Pod future (one in-flight turn) while concurrently
|
||||
/// processing incoming methods through an inner select! arm. Returns
|
||||
/// `(final_status, shutdown_requested)`.
|
||||
|
|
@ -1095,23 +1111,17 @@ where
|
|||
// mpsc is consume-once, so we cannot defer this
|
||||
// to the next main-loop iteration — drop here
|
||||
// would lose the event entirely (children fire
|
||||
// and forget). Apply the side effects inline
|
||||
// and, for agent-visible variants, stage the typed
|
||||
// event on the notification buffer so the in-flight
|
||||
// turn's next `pending_history_appends` surfaces it
|
||||
// as a typed `SystemItem::PodEvent`. Control-plane-only
|
||||
// variants stop after side effects.
|
||||
let self_parent_socket = parent_socket.cloned();
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event,
|
||||
// and forget). Auto-kick remains unnecessary here:
|
||||
// the in-flight turn will drain agent-visible events
|
||||
// from the notify buffer on its next history append.
|
||||
handle_inbound_pod_event(
|
||||
event,
|
||||
spawned_registry,
|
||||
self_name,
|
||||
&self_parent_socket,
|
||||
parent_socket,
|
||||
notify_buffer,
|
||||
)
|
||||
.await;
|
||||
if event.should_notify_agent() {
|
||||
notify_buffer.push_pod_event(event);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let _ = cancel_tx.try_send(());
|
||||
|
|
|
|||
|
|
@ -656,7 +656,7 @@ fn row_status_label(entry: &PodListEntry) -> (&'static str, Style) {
|
|||
.fg(Color::Cyan)
|
||||
.add_modifier(Modifier::BOLD),
|
||||
),
|
||||
None => ("live unknown", Style::default().fg(Color::DarkGray)),
|
||||
None => ("live", Style::default().fg(Color::DarkGray)),
|
||||
};
|
||||
}
|
||||
if entry
|
||||
|
|
@ -1194,6 +1194,31 @@ mod tests {
|
|||
assert!(app.selected_send_disabled_reason().is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_status_label_for_live_without_reported_status_is_softened() {
|
||||
let mut live = live_info("probing", PodStatus::Idle);
|
||||
live.status = None;
|
||||
let app = test_app(vec![live]);
|
||||
|
||||
let (label, _) = row_status_label(app.list.selected_entry().unwrap());
|
||||
|
||||
assert_eq!(label, "live");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_status_labels_preserve_explicit_live_statuses() {
|
||||
for (status, expected_label) in [
|
||||
(PodStatus::Idle, "live idle"),
|
||||
(PodStatus::Running, "live running"),
|
||||
(PodStatus::Paused, "live paused"),
|
||||
] {
|
||||
let app = test_app(vec![live_info("pod", status)]);
|
||||
let (label, _) = row_status_label(app.list.selected_entry().unwrap());
|
||||
|
||||
assert_eq!(label, expected_label);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_running_paused_and_stopped_targets_are_direct_send_disabled() {
|
||||
let mut app = test_app(vec![
|
||||
|
|
|
|||
|
|
@ -291,19 +291,39 @@ pub(crate) async fn read_reachable_live_pod_infos(
|
|||
store: &FsStore,
|
||||
) -> Result<Vec<LivePodInfo>, io::Error> {
|
||||
let records = read_live_pod_infos()?;
|
||||
let mut reachable = Vec::new();
|
||||
for mut record in records {
|
||||
let Ok(status) = probe_live_status(&record.socket_path).await else {
|
||||
probe_reachable_live_pod_infos(store, records).await
|
||||
}
|
||||
|
||||
async fn probe_reachable_live_pod_infos(
|
||||
store: &FsStore,
|
||||
records: Vec<LivePodInfo>,
|
||||
) -> Result<Vec<LivePodInfo>, io::Error> {
|
||||
let mut handles = Vec::with_capacity(records.len());
|
||||
for record in records {
|
||||
handles.push(tokio::spawn(probe_live_pod_info(record)));
|
||||
}
|
||||
|
||||
let mut reachable = Vec::with_capacity(handles.len());
|
||||
for handle in handles {
|
||||
let result = handle
|
||||
.await
|
||||
.map_err(|e| io::Error::other(format!("live status probe task failed: {e}")))?;
|
||||
let Ok(mut record) = result else {
|
||||
continue;
|
||||
};
|
||||
record.reachable = true;
|
||||
record.status = status;
|
||||
record.summary = summarize_live_pod(store, &record);
|
||||
reachable.push(record);
|
||||
}
|
||||
Ok(reachable)
|
||||
}
|
||||
|
||||
async fn probe_live_pod_info(mut record: LivePodInfo) -> Result<LivePodInfo, io::Error> {
|
||||
let status = probe_live_status(&record.socket_path).await?;
|
||||
record.reachable = true;
|
||||
record.status = status;
|
||||
Ok(record)
|
||||
}
|
||||
|
||||
pub(crate) fn live_socket_for_pod(pod_name: &str) -> Option<PathBuf> {
|
||||
read_live_pod_infos()
|
||||
.ok()?
|
||||
|
|
@ -343,7 +363,7 @@ fn corrupt_stored_info(pod_name: String, message: String) -> StoredPodInfo {
|
|||
}
|
||||
}
|
||||
|
||||
const LIVE_STATUS_PROBE_TIMEOUT: Duration = Duration::from_millis(25);
|
||||
const LIVE_STATUS_PROBE_TIMEOUT: Duration = Duration::from_millis(200);
|
||||
|
||||
async fn probe_live_status(socket_path: &Path) -> Result<Option<PodStatus>, io::Error> {
|
||||
let mut client = PodClient::connect(socket_path).await?;
|
||||
|
|
@ -561,11 +581,16 @@ fn trim_one_line(s: &str, max_chars: usize) -> String {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::Arc;
|
||||
|
||||
use llm_worker::llm_client::types::RequestConfig;
|
||||
use pod_store::FsPodStore;
|
||||
use pod_store::{PodActiveSegmentRef, PodMetadataStore};
|
||||
use protocol::stream::JsonLineWriter;
|
||||
use session_store::{new_segment_id, new_session_id};
|
||||
use tempfile::tempdir;
|
||||
use tokio::net::UnixListener;
|
||||
use tokio::sync::Barrier;
|
||||
|
||||
const SOURCE: PodVisibilitySource = PodVisibilitySource::ResumePicker;
|
||||
|
||||
|
|
@ -752,6 +777,30 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn live_reachable_row_without_reported_status_can_open_but_not_send_now() {
|
||||
let mut live = live_info("live", PodStatus::Idle);
|
||||
live.status = None;
|
||||
live.reachable = true;
|
||||
|
||||
let entry = single_entry(PodList::from_sources(SOURCE, vec![], vec![live], None, 10));
|
||||
|
||||
assert!(entry.actions.can_open);
|
||||
assert!(!entry.actions.can_restore);
|
||||
assert!(!entry.actions.can_send_now);
|
||||
assert!(!entry.actions.can_queue_send);
|
||||
assert_eq!(
|
||||
entry.attach_socket_path(),
|
||||
Some(Path::new("/tmp/live.sock"))
|
||||
);
|
||||
assert!(
|
||||
!entry
|
||||
.diagnostics
|
||||
.iter()
|
||||
.any(|diagnostic| diagnostic.kind == PodEntryDiagnosticKind::LiveUnreachable)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn live_running_reachable_row_can_open_but_not_send_now() {
|
||||
let entry = single_entry(PodList::from_sources(
|
||||
|
|
@ -811,6 +860,82 @@ mod tests {
|
|||
assert_eq!(status, Some(PodStatus::Idle));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn live_status_probes_run_concurrently() {
|
||||
let store_dir = tempdir().unwrap();
|
||||
let store = FsStore::new(store_dir.path()).unwrap();
|
||||
let socket_dir = tempdir().unwrap();
|
||||
let probe_count = 3;
|
||||
let barrier = Arc::new(Barrier::new(probe_count));
|
||||
let mut records = Vec::new();
|
||||
let mut servers = Vec::new();
|
||||
|
||||
for index in 0..probe_count {
|
||||
let pod_name = format!("pod-{index}");
|
||||
let socket_path = socket_dir.path().join(format!("{pod_name}.sock"));
|
||||
let listener = UnixListener::bind(&socket_path).unwrap();
|
||||
let barrier = Arc::clone(&barrier);
|
||||
servers.push(tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
barrier.wait().await;
|
||||
let mut writer = JsonLineWriter::new(stream);
|
||||
writer
|
||||
.write(&Event::Status {
|
||||
status: PodStatus::Idle,
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
}));
|
||||
records.push(live_probe_record(&pod_name, socket_path));
|
||||
}
|
||||
|
||||
let records = tokio::time::timeout(
|
||||
LIVE_STATUS_PROBE_TIMEOUT * 3,
|
||||
probe_reachable_live_pod_infos(&store, records),
|
||||
)
|
||||
.await
|
||||
.expect("status probes should complete")
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(records.len(), probe_count);
|
||||
assert!(records.iter().all(|record| record.reachable));
|
||||
assert!(
|
||||
records
|
||||
.iter()
|
||||
.all(|record| record.status == Some(PodStatus::Idle))
|
||||
);
|
||||
for server in servers {
|
||||
server.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn live_status_probe_timeout_still_marks_socket_reachable() {
|
||||
let store_dir = tempdir().unwrap();
|
||||
let store = FsStore::new(store_dir.path()).unwrap();
|
||||
let socket_dir = tempdir().unwrap();
|
||||
let socket_path = socket_dir.path().join("silent.sock");
|
||||
let listener = UnixListener::bind(&socket_path).unwrap();
|
||||
let server = tokio::spawn(async move {
|
||||
let (_stream, _) = listener.accept().await.unwrap();
|
||||
std::future::pending::<()>().await;
|
||||
});
|
||||
|
||||
let records = probe_reachable_live_pod_infos(
|
||||
&store,
|
||||
vec![live_probe_record("silent", socket_path.clone())],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(records.len(), 1);
|
||||
assert_eq!(records[0].pod_name, "silent");
|
||||
assert!(records[0].reachable);
|
||||
assert_eq!(records[0].status, None);
|
||||
assert_eq!(records[0].socket_path, socket_path);
|
||||
server.abort();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corrupt_stored_metadata_has_diagnostic() {
|
||||
let entry = single_entry(PodList::from_sources(
|
||||
|
|
@ -985,6 +1110,17 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn live_probe_record(pod_name: &str, socket_path: PathBuf) -> LivePodInfo {
|
||||
LivePodInfo {
|
||||
pod_name: pod_name.to_string(),
|
||||
socket_path,
|
||||
status: None,
|
||||
reachable: false,
|
||||
segment_id: None,
|
||||
summary: PodEntrySummary::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn test_greeting() -> protocol::Greeting {
|
||||
protocol::Greeting {
|
||||
pod_name: "live".to_string(),
|
||||
|
|
|
|||
|
|
@ -2,12 +2,12 @@
|
|||
id: 20260527-000007-pod-inbound-pod-event-dedup
|
||||
slug: pod-inbound-pod-event-dedup
|
||||
title: Inbound PodEvent ハンドリングの重複を統合する
|
||||
status: open
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [migrated]
|
||||
created_at: 2026-05-27T00:00:07Z
|
||||
updated_at: 2026-05-27T00:00:07Z
|
||||
updated_at: 2026-05-30T05:37:00Z
|
||||
assignee: null
|
||||
legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
|
||||
---
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
---
|
||||
id: 20260527-000007-pod-inbound-pod-event-dedup
|
||||
slug: pod-inbound-pod-event-dedup
|
||||
title: Inbound PodEvent ハンドリングの重複を統合する
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [migrated]
|
||||
created_at: 2026-05-27T00:00:07Z
|
||||
updated_at: 2026-05-30T05:37:00Z
|
||||
assignee: null
|
||||
legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
|
||||
---
|
||||
|
||||
## Migration reference
|
||||
|
||||
- legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
|
||||
- migrated_from: TODO.md / tickets directory migration on 2026-05-27
|
||||
|
||||
# Inbound PodEvent ハンドリングの重複を統合する
|
||||
|
||||
## 背景
|
||||
|
||||
子 Pod から `Method::PodEvent(event)` を受けたときの処理が `controller_loop` と `drive_turn` の 2 箇所にコピーされている。
|
||||
|
||||
`controller.rs:693-720`(idle / paused 中):
|
||||
|
||||
```rust
|
||||
Method::PodEvent(event) => {
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event, &spawned_registry, &spawner_name, &self_parent_socket,
|
||||
).await;
|
||||
pod.push_pod_event_notify(event);
|
||||
if shared_state.get_status() == PodStatus::Idle {
|
||||
pending = Some(PendingRun::RunForNotification);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`controller.rs:861-879`(in-flight turn 中):
|
||||
|
||||
```rust
|
||||
Some(Method::PodEvent(event)) => {
|
||||
let self_parent_socket = parent_socket.cloned();
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event, spawned_registry, self_name, &self_parent_socket,
|
||||
).await;
|
||||
notify_buffer.push_pod_event(event);
|
||||
}
|
||||
```
|
||||
|
||||
差分は 2 点:
|
||||
|
||||
1. **buffer への push 経路**: `pod.push_pod_event_notify(event)` vs `notify_buffer.push_pod_event(event)`。両者は同じ `NotifyBuffer` を叩く(`pod.rs:845-846` は `self.pending_notifies.push_pod_event(event)` を呼ぶだけで、`notify_buffer_handle()` はその `pending_notifies.clone()` を返す)。**完全に等価**。
|
||||
2. **auto-kick**: idle 経路だけ `PendingRun::RunForNotification` を stage する。in-flight 経路は in-flight 自体が消化するので不要。
|
||||
|
||||
つまり「event の処理本体」(side-effects + notify buffer への push)は同一で、後段の auto-kick だけが state-dependent な分岐。にもかかわらず関数化されておらず、片方をいじってもう片方を忘れると挙動が割れる。
|
||||
|
||||
## 要件
|
||||
|
||||
- side-effects 適用 + NotifyBuffer への typed push の流れを単一関数 `handle_inbound_pod_event` に切り出す。
|
||||
- `controller_loop` / `drive_turn` の両方からこのヘルパーを呼ぶ形に置き換える。
|
||||
- auto-kick (`PendingRun::RunForNotification` の stage) は呼び出し側の責務として残す。これは Pod のライフサイクル状態に依存した判断で、ヘルパー内には押し込めない。
|
||||
- 関数シグネチャは引数を最小化する。`event`、`spawned_registry`、`self_name: &str`、`self_parent_socket: &Option<PathBuf>` または `Option<&PathBuf>`、`notify_buffer: &NotifyBuffer` の 5 つで足りる前提。`Pod` への可変参照は不要(`notify_buffer` で代用可能)。
|
||||
- 動作変化なし。既存の `Method::PodEvent` 挙動(in-flight / idle 両方)が完全に同一で続行すること。
|
||||
|
||||
## 完了条件
|
||||
|
||||
- `controller.rs` 内に `apply_event_side_effects` 呼び出しが 1 箇所だけ残り、`controller_loop` と `drive_turn` の `Method::PodEvent` アームはどちらも `handle_inbound_pod_event(...)` 呼び出し + idle 経路のみ auto-kick stage、という形になる。
|
||||
- 既存の inbound PodEvent 関連テスト(特に `apply_event_side_effects` の idempotency や `notify_buffer` への typed push)が通る。
|
||||
|
||||
## 範囲外
|
||||
|
||||
- `apply_event_side_effects` 自体の中身変更。
|
||||
- `NotifyBuffer` API のリネーム / 統合。
|
||||
- `pod.push_pod_event_notify` の削除([[pod-interrupt-prep-internalize]] と同じく将来の整理対象だが、本チケットでは外向き API は触らない)。
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
<!-- event: migration author: tickets.sh-migration at: 2026-05-27T00:00:07Z -->
|
||||
|
||||
## Migrated
|
||||
|
||||
Migrated from tickets/pod-inbound-pod-event-dedup.md. No legacy review file was present at migration time.
|
||||
|
||||
---
|
||||
|
||||
<!-- event: close author: hare at: 2026-05-30T05:37:00Z status: closed -->
|
||||
|
||||
## Closed
|
||||
|
||||
---
|
||||
id: 20260527-000007-pod-inbound-pod-event-dedup
|
||||
slug: pod-inbound-pod-event-dedup
|
||||
title: Inbound PodEvent ハンドリングの重複を統合する
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [migrated]
|
||||
created_at: 2026-05-27T00:00:07Z
|
||||
updated_at: 2026-05-30T05:37:00Z
|
||||
assignee: null
|
||||
legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
|
||||
---
|
||||
|
||||
## Migration reference
|
||||
|
||||
- legacy_ticket: tickets/pod-inbound-pod-event-dedup.md
|
||||
- migrated_from: TODO.md / tickets directory migration on 2026-05-27
|
||||
|
||||
# Inbound PodEvent ハンドリングの重複を統合する
|
||||
|
||||
## 背景
|
||||
|
||||
子 Pod から `Method::PodEvent(event)` を受けたときの処理が `controller_loop` と `drive_turn` の 2 箇所にコピーされている。
|
||||
|
||||
`controller.rs:693-720`(idle / paused 中):
|
||||
|
||||
```rust
|
||||
Method::PodEvent(event) => {
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event, &spawned_registry, &spawner_name, &self_parent_socket,
|
||||
).await;
|
||||
pod.push_pod_event_notify(event);
|
||||
if shared_state.get_status() == PodStatus::Idle {
|
||||
pending = Some(PendingRun::RunForNotification);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
`controller.rs:861-879`(in-flight turn 中):
|
||||
|
||||
```rust
|
||||
Some(Method::PodEvent(event)) => {
|
||||
let self_parent_socket = parent_socket.cloned();
|
||||
crate::ipc::event::apply_event_side_effects(
|
||||
&event, spawned_registry, self_name, &self_parent_socket,
|
||||
).await;
|
||||
notify_buffer.push_pod_event(event);
|
||||
}
|
||||
```
|
||||
|
||||
差分は 2 点:
|
||||
|
||||
1. **buffer への push 経路**: `pod.push_pod_event_notify(event)` vs `notify_buffer.push_pod_event(event)`。両者は同じ `NotifyBuffer` を叩く(`pod.rs:845-846` は `self.pending_notifies.push_pod_event(event)` を呼ぶだけで、`notify_buffer_handle()` はその `pending_notifies.clone()` を返す)。**完全に等価**。
|
||||
2. **auto-kick**: idle 経路だけ `PendingRun::RunForNotification` を stage する。in-flight 経路は in-flight 自体が消化するので不要。
|
||||
|
||||
つまり「event の処理本体」(side-effects + notify buffer への push)は同一で、後段の auto-kick だけが state-dependent な分岐。にもかかわらず関数化されておらず、片方をいじってもう片方を忘れると挙動が割れる。
|
||||
|
||||
## 要件
|
||||
|
||||
- side-effects 適用 + NotifyBuffer への typed push の流れを単一関数 `handle_inbound_pod_event` に切り出す。
|
||||
- `controller_loop` / `drive_turn` の両方からこのヘルパーを呼ぶ形に置き換える。
|
||||
- auto-kick (`PendingRun::RunForNotification` の stage) は呼び出し側の責務として残す。これは Pod のライフサイクル状態に依存した判断で、ヘルパー内には押し込めない。
|
||||
- 関数シグネチャは引数を最小化する。`event`、`spawned_registry`、`self_name: &str`、`self_parent_socket: &Option<PathBuf>` または `Option<&PathBuf>`、`notify_buffer: &NotifyBuffer` の 5 つで足りる前提。`Pod` への可変参照は不要(`notify_buffer` で代用可能)。
|
||||
- 動作変化なし。既存の `Method::PodEvent` 挙動(in-flight / idle 両方)が完全に同一で続行すること。
|
||||
|
||||
## 完了条件
|
||||
|
||||
- `controller.rs` 内に `apply_event_side_effects` 呼び出しが 1 箇所だけ残り、`controller_loop` と `drive_turn` の `Method::PodEvent` アームはどちらも `handle_inbound_pod_event(...)` 呼び出し + idle 経路のみ auto-kick stage、という形になる。
|
||||
- 既存の inbound PodEvent 関連テスト(特に `apply_event_side_effects` の idempotency や `notify_buffer` への typed push)が通る。
|
||||
|
||||
## 範囲外
|
||||
|
||||
- `apply_event_side_effects` 自体の中身変更。
|
||||
- `NotifyBuffer` API のリネーム / 統合。
|
||||
- `pod.push_pod_event_notify` の削除([[pod-interrupt-prep-internalize]] と同じく将来の整理対象だが、本チケットでは外向き API は触らない)。
|
||||
|
||||
|
||||
---
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
---
|
||||
id: 20260530-053259-multi-pod-parallel-status-probes
|
||||
slug: multi-pod-parallel-status-probes
|
||||
title: Parallelize multi-Pod live status probes
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [tui, pod-dashboard, performance]
|
||||
created_at: 2026-05-30T05:32:59Z
|
||||
updated_at: 2026-05-30T05:45:37Z
|
||||
assignee: null
|
||||
legacy_ticket: null
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
|
||||
|
||||
That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
|
||||
- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
|
||||
- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
|
||||
- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
|
||||
- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
|
||||
- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
|
||||
- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Do not redesign Pod notification or run completion delivery.
|
||||
- Do not persist last-known status in pod-store.
|
||||
- Do not change `AttachOrRestorePod` or restore semantics.
|
||||
- Do not make unreachable registry allocations appear attachable.
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- Multiple live Pod status probes wait concurrently, not strictly one after another.
|
||||
- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
|
||||
- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
|
||||
- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
|
||||
- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
|
||||
- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
---
|
||||
id: 20260530-053259-multi-pod-parallel-status-probes
|
||||
slug: multi-pod-parallel-status-probes
|
||||
title: Parallelize multi-Pod live status probes
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [tui, pod-dashboard, performance]
|
||||
created_at: 2026-05-30T05:32:59Z
|
||||
updated_at: 2026-05-30T05:45:37Z
|
||||
assignee: null
|
||||
legacy_ticket: null
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
|
||||
|
||||
That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
|
||||
- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
|
||||
- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
|
||||
- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
|
||||
- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
|
||||
- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
|
||||
- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Do not redesign Pod notification or run completion delivery.
|
||||
- Do not persist last-known status in pod-store.
|
||||
- Do not change `AttachOrRestorePod` or restore semantics.
|
||||
- Do not make unreachable registry allocations appear attachable.
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- Multiple live Pod status probes wait concurrently, not strictly one after another.
|
||||
- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
|
||||
- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
|
||||
- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
|
||||
- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
|
||||
- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
<!-- event: create author: tickets.sh at: 2026-05-30T05:32:59Z -->
|
||||
|
||||
## Created
|
||||
|
||||
Created by tickets.sh create.
|
||||
|
||||
---
|
||||
|
||||
<!-- event: plan author: hare at: 2026-05-30T05:33:43Z -->
|
||||
|
||||
## Plan
|
||||
|
||||
## Preflight
|
||||
|
||||
Classification: implementation-ready.
|
||||
|
||||
The issue is localized to TUI live status probing and dashboard labeling. The product decision is settled: reachable Pods with missing status remain live/attachable, while status probe timeout should be less frequent and less alarming. No restore semantics or Pod runtime protocol changes are required.
|
||||
|
||||
Current code map:
|
||||
- `crates/tui/src/pod_list.rs`: `LIVE_STATUS_PROBE_TIMEOUT`, `load_live_pods`, `probe_live_status`, `LivePodInfo`, `PodList` merge/sort tests.
|
||||
- `crates/tui/src/multi_pod.rs`: row label rendering for live status, currently `live unknown` when `LivePodInfo.status` is `None`.
|
||||
|
||||
Critical risks:
|
||||
- Do not make reload latency scale as N * timeout.
|
||||
- Do not treat `status = None` as unreachable.
|
||||
- Do not change restoreability/attachability semantics.
|
||||
- Avoid duplicating discovery logic outside shared `PodList`.
|
||||
|
||||
|
||||
---
|
||||
|
||||
<!-- event: review author: hare at: 2026-05-30T05:45:14Z status: approve -->
|
||||
|
||||
## Review: approve
|
||||
|
||||
Approve.
|
||||
|
||||
The implementation addresses the ticket in the shared `PodList` live-probe path. Live status probes now run concurrently with a more realistic per-Pod timeout, reachable sockets without reported status remain live/attachable, and the multi-Pod label is softened from `live unknown` to `live` while explicit statuses remain unchanged.
|
||||
|
||||
Blocker findings: none.
|
||||
|
||||
Requirement coverage:
|
||||
- Per-Pod timeout increased to 200ms.
|
||||
- Probes are concurrent and bounded by the slowest bounded probe rather than `N * timeout`.
|
||||
- Reachable sockets with no status remain reachable/live/attachable.
|
||||
- Restoreability remains separate and unchanged.
|
||||
- No last-known status is persisted to pod-store.
|
||||
- Explicit `live idle`, `live running`, and `live paused` labels remain.
|
||||
- `status = None` renders as `live`.
|
||||
- The implementation changes shared `PodList` live-probe logic rather than duplicating dashboard-only logic.
|
||||
- Tests cover concurrency, timeout/no-status reachable handling, and label rendering.
|
||||
|
||||
Validation reviewed:
|
||||
- `cargo fmt --check` — passed.
|
||||
- `cargo test -p tui pod_list` — passed.
|
||||
- `cargo test -p tui multi_pod` — passed.
|
||||
- `cargo test -p tui` — passed.
|
||||
- Reviewer additionally ran `./tickets.sh doctor` — passed.
|
||||
|
||||
Final verdict: approve.
|
||||
|
||||
|
||||
---
|
||||
|
||||
<!-- event: close author: hare at: 2026-05-30T05:45:37Z status: closed -->
|
||||
|
||||
## Closed
|
||||
|
||||
---
|
||||
id: 20260530-053259-multi-pod-parallel-status-probes
|
||||
slug: multi-pod-parallel-status-probes
|
||||
title: Parallelize multi-Pod live status probes
|
||||
status: closed
|
||||
kind: task
|
||||
priority: P2
|
||||
labels: [tui, pod-dashboard, performance]
|
||||
created_at: 2026-05-30T05:32:59Z
|
||||
updated_at: 2026-05-30T05:45:37Z
|
||||
assignee: null
|
||||
legacy_ticket: null
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
The `--multi` dashboard frequently shows `[live unknown]` for reachable Pods. Current code probes each runtime-registry socket with a very short `LIVE_STATUS_PROBE_TIMEOUT` of 25ms in `crates/tui/src/pod_list.rs`. A live row becomes `status = None` when the socket connects but no `Event::Snapshot` / `Event::Status` is read before that deadline.
|
||||
|
||||
That label is misleading: the Pod is reachable, but status probing timed out or did not receive a status event quickly enough. Raising the timeout alone risks making dashboard reload latency scale linearly with the number of live Pods, because status probes are currently performed sequentially.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Increase the live status probe timeout to a more realistic value, likely in the 150ms–250ms range.
|
||||
- Run live status probes concurrently so reload latency does not become the sum of all per-Pod timeouts.
|
||||
- Keep reachable Pods with missing status as live/attachable; do not treat status timeout as unreachable.
|
||||
- Keep restoreability separate from live attachability; this ticket must not make runtime-only Pods restorable.
|
||||
- Replace or soften the `live unknown` label in `--multi` so it communicates reachable-live-with-unreported-status rather than broken state. Candidate labels: `live`, `live probing`, or similar.
|
||||
- Keep the implementation in shared `PodList` / live probe code where possible; avoid duplicating dashboard-specific discovery logic.
|
||||
- Preserve existing behavior for explicitly reported `Idle`, `Running`, and `Paused` statuses.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Do not redesign Pod notification or run completion delivery.
|
||||
- Do not persist last-known status in pod-store.
|
||||
- Do not change `AttachOrRestorePod` or restore semantics.
|
||||
- Do not make unreachable registry allocations appear attachable.
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- Multiple live Pod status probes wait concurrently, not strictly one after another.
|
||||
- The per-Pod timeout is long enough to significantly reduce false `status = None` cases compared to 25ms.
|
||||
- A reachable Pod whose status probe times out remains displayed as live and openable/attachable.
|
||||
- The multi-Pod row label for `status = None` is less misleading than `live unknown`.
|
||||
- Tests cover concurrent probing behavior, timeout/none-status handling, and label rendering.
|
||||
- `cargo test -p tui pod_list`, `cargo test -p tui multi_pod`, `cargo test -p tui`, `cargo fmt --check`, and `./tickets.sh doctor` pass.
|
||||
|
||||
|
||||
---
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
<!-- event: migration author: tickets.sh-migration at: 2026-05-27T00:00:07Z -->
|
||||
|
||||
## Migrated
|
||||
|
||||
Migrated from tickets/pod-inbound-pod-event-dedup.md. No legacy review file was present at migration time.
|
||||
|
||||
---
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
---
|
||||
id: 20260530-053721-tui-inflight-composer-injection
|
||||
slug: tui-inflight-composer-injection
|
||||
title: Support immediate in-flight TUI composer injection
|
||||
status: open
|
||||
kind: feature
|
||||
priority: P2
|
||||
labels: [tui, worker, interrupt, ux]
|
||||
created_at: 2026-05-30T05:37:21Z
|
||||
updated_at: 2026-05-30T05:38:11Z
|
||||
assignee: null
|
||||
legacy_ticket: null
|
||||
---
|
||||
|
||||
## Background
|
||||
|
||||
The TUI currently lets the user press Enter while a Pod is executing, but that input is queued for the next turn. This is useful when the user wants to continue the task after the current run finishes.
|
||||
|
||||
There is a separate UX need: while the model is in the middle of a long run with tool calls, the user may want to send urgent supplemental context that should be seen as soon as possible, ideally between tool calls / LLM calls during the current run. This is different from ordinary queued input.
|
||||
|
||||
We want both modes:
|
||||
|
||||
- **after-run queue**: “when this task finishes, continue with this next request.”
|
||||
- **in-flight injection**: “while you are still working, please incorporate this additional context as soon as safe.”
|
||||
|
||||
This ticket is for designing and implementing an explicit TUI path for the second mode without breaking the existing queued-input behavior.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Preserve the current Enter-while-running behavior as the after-run queue.
|
||||
- Add an explicit user action / keybinding / command for immediate in-flight injection while a run is active.
|
||||
- In-flight injected text must be delivered through the Pod/Worker history path, not as hidden context-only injection. It must satisfy the project principle that new input placed into LLM context is first appended to `worker.history` / persisted history.
|
||||
- In-flight injection should be consumed at safe boundaries, such as before the next LLM request or between tool-call cycles, not by mutating an already-open provider stream.
|
||||
- The UI must make the distinction visible: queued-for-next-turn vs injected-into-current-run.
|
||||
- If no run is active, the immediate-injection action should either behave like normal submit or clearly report that there is no in-flight run to inject into.
|
||||
- If the current turn cannot accept in-flight input at a safe boundary, the UI should fail closed or fall back to explicit queued mode with a visible notice; do not silently drop input.
|
||||
- Preserve TUI-local input history behavior for submitted/queued text.
|
||||
|
||||
## Non-goals
|
||||
|
||||
- Do not interrupt/cancel the current run as part of this ticket.
|
||||
- Do not mutate provider streams already in progress.
|
||||
- Do not introduce hidden system-reminder/context-only messages that are not recorded in history.
|
||||
- Do not remove the existing queued composer behavior.
|
||||
- Do not redesign the entire Pod notification/input protocol unless a small typed Method/Event extension is required.
|
||||
|
||||
## Open design questions
|
||||
|
||||
- What should the TUI action be?
|
||||
- Separate command such as `:inject`?
|
||||
- Modified Enter keybinding such as Ctrl+Enter / Alt+Enter?
|
||||
- Action menu entry?
|
||||
- What Pod protocol shape is best?
|
||||
- Existing `Method::Notify` may already represent in-flight user-visible context, but semantics must be checked.
|
||||
- A new typed method such as `Method::InjectInput` may be clearer if `Notify` is too generic.
|
||||
- What history item should represent the injected text?
|
||||
- User item?
|
||||
- System item with user-originated note?
|
||||
- Existing Notify / PodEvent item?
|
||||
- What exact safe boundaries are supported in `Worker` / controller today?
|
||||
- before the next LLM request;
|
||||
- before resuming after tool results;
|
||||
- while a tool call is running;
|
||||
- while provider stream is open.
|
||||
- How should the UI display pending in-flight injection versus after-run queue?
|
||||
|
||||
## Acceptance criteria
|
||||
|
||||
- TUI users can choose between after-run queued submit and immediate in-flight injection while a Pod is running.
|
||||
- In-flight injected input is recorded in history before it can influence an LLM request.
|
||||
- In-flight injection is consumed only at safe boundaries and never mutates an active provider stream.
|
||||
- The TUI visibly distinguishes queued-next-turn input from injected-current-run input.
|
||||
- Existing queued Enter behavior remains intact.
|
||||
- Tests cover TUI input routing, protocol/controller handling, worker history append behavior, and safe-boundary behavior.
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
<!-- event: create author: tickets.sh at: 2026-05-30T05:37:21Z -->
|
||||
|
||||
## Created
|
||||
|
||||
Created by tickets.sh create.
|
||||
|
||||
---
|
||||
|
||||
<!-- event: plan author: hare at: 2026-05-30T05:38:11Z -->
|
||||
|
||||
## Plan
|
||||
|
||||
## Initial preflight
|
||||
|
||||
Classification: requirements-sync-needed.
|
||||
|
||||
The user requirement is clear at the UX level: Enter while running remains an after-run queue, and a separate action should inject supplemental context into the current in-flight run as soon as safe. The exact protocol/history representation is not decided yet and must be designed before implementation.
|
||||
|
||||
Critical constraints:
|
||||
- Do not place injected text into LLM context unless it has first been appended to Worker history / persisted history.
|
||||
- Do not mutate an active provider stream.
|
||||
- Consume injected text only at safe boundaries such as before a later LLM request or between tool-call cycles.
|
||||
- Do not silently drop text; if the active turn cannot accept injection, report/fail closed or explicitly queue.
|
||||
|
||||
Design questions to settle before coding:
|
||||
- TUI action/keybinding/command name.
|
||||
- Whether existing `Method::Notify` is semantically sufficient or a new typed method is needed.
|
||||
- Which history item represents user-originated in-flight supplemental context.
|
||||
- Which Worker/controller boundaries can actually observe injected input before the next LLM call.
|
||||
- How queued-next-turn vs injected-current-run is displayed.
|
||||
|
||||
|
||||
---
|
||||
Loading…
Reference in New Issue
Block a user