diff --git a/Cargo.lock b/Cargo.lock index bb141e52..e25bc661 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2130,6 +2130,7 @@ dependencies = [ "protocol", "provider", "schemars", + "scope-lock", "serde", "serde_json", "session-store", @@ -2746,6 +2747,20 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "scope-lock" +version = "0.1.0" +dependencies = [ + "fs4", + "libc", + "manifest", + "serde", + "serde_json", + "session-store", + "tempfile", + "thiserror 2.0.18", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3574,6 +3589,7 @@ dependencies = [ "manifest", "protocol", "ratatui", + "scope-lock", "serde_json", "session-store", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 0d8a0195..f871a5ec 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "crates/pod", "crates/protocol", "crates/provider", + "crates/scope-lock", "crates/tools", "crates/tui", "crates/memory", ] diff --git a/crates/pod/Cargo.toml b/crates/pod/Cargo.toml index 8115afbf..ff4e2783 100644 --- a/crates/pod/Cargo.toml +++ b/crates/pod/Cargo.toml @@ -12,6 +12,7 @@ session-store = { version = "0.1.0", path = "../session-store" } manifest = { version = "0.1.0", path = "../manifest" } protocol = { version = "0.1.0", path = "../protocol" } provider = { version = "0.1.0", path = "../provider" } +scope-lock = { version = "0.1.0", path = "../scope-lock" } serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" thiserror = "2.0" diff --git a/crates/pod/src/main.rs b/crates/pod/src/main.rs index ba07d6d9..0aa85d5e 100644 --- a/crates/pod/src/main.rs +++ b/crates/pod/src/main.rs @@ -44,9 +44,9 @@ struct Cli { #[arg(long, value_name = "PATH", requires = "adopt")] callback: Option, - /// Restore a Pod from an existing session. The source session log - /// is forked at its head into a new session id, so the original - /// jsonl is left untouched and double-write races are impossible. + /// Restore a Pod from an existing session. The Pod re-uses the + /// given session id and appends new turns to the same jsonl; + /// concurrent writers are prevented by the `scope.lock` registry. /// Mutually exclusive with `--adopt` (spawned children always start /// fresh). #[arg(long, value_name = "UUID", conflicts_with = "adopt")] diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index 0678ace7..9de25981 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -100,10 +100,11 @@ pub struct Pod { /// PodInterceptor installed in `ensure_interceptor_installed`. pending_notifies: NotifyBuffer, /// Scope allocation in the machine-wide lock file. `Some` for - /// Pods built via `from_manifest` (production path); `None` for - /// lower-level constructors (`Pod::new`, `Pod::restore`) that - /// bypass the registry. Kept purely for its `Drop` impl, which - /// releases the allocation when the Pod is dropped. + /// Pods built via `from_manifest` / `from_manifest_spawned` / + /// `restore_from_manifest` (production paths); `None` for the + /// low-level `Pod::new` constructor used in tests, which bypasses + /// the registry. Kept purely for its `Drop` impl, which releases + /// the allocation when the Pod is dropped. #[allow(dead_code)] scope_allocation: Option, /// Socket path of the spawning Pod. `Some` only for Pods built via @@ -717,6 +718,7 @@ impl Pod { self.head_hash = Some(hash); return Ok(()); } + let prev_session_id = self.session_id; session_store::ensure_head_or_fork( &self.store, &mut self.session_id, @@ -724,6 +726,13 @@ impl Pod { state, ) .await?; + // ensure_head_or_fork mints a fresh session_id when it auto- + // forks. Sync that to scope.lock so a concurrent + // restore_from_manifest can't see "no live writer" for the new + // session and grab it. + if self.session_id != prev_session_id && self.scope_allocation.is_some() { + scope_lock::update_session(&self.manifest.pod.name, self.session_id)?; + } Ok(()) } @@ -1155,6 +1164,15 @@ impl Pod { // until its first LLM call. self.session_id = new_session_id; self.head_hash = Some(new_head_hash); + // Keep scope.lock pointing at the live session_id. Without this + // a concurrent `restore_from_manifest(new_session_id)` would + // see no live writer and grab the session this Pod just moved + // into, causing two writers to race on the same jsonl. Skipped + // when no allocation is installed (e.g. compact under + // `Pod::new` in tests). + if self.scope_allocation.is_some() { + scope_lock::update_session(&self.manifest.pod.name, new_session_id)?; + } let worker = self.worker.as_mut().unwrap(); worker.set_history(new_history); // Anchor the prompt cache at the summary item so that Anthropic @@ -1602,15 +1620,6 @@ impl Pod, St> { store: St, loader: PromptLoader, ) -> Result { - // Refuse to resume into a session that's already being written. - if let Some(info) = scope_lock::lookup_session(session_id)? { - return Err(PodError::SessionInUse { - session_id, - pod_name: info.pod_name, - socket: info.socket, - }); - } - let state = session_store::restore(&store, session_id).await?; if state.head_hash.is_none() { return Err(PodError::SessionEmpty { session_id }); @@ -1618,6 +1627,11 @@ impl Pod, St> { let common = prepare_pod_common(&manifest, &loader, /* parse_template */ false)?; + // Atomic: register_pod inside install_top_level rejects when + // another live allocation already holds `session_id`. Wrapping + // the lookup + install inside a single `LockFileGuard` is what + // makes "no two live Pods write to the same session log" + // actually structural rather than a hopeful pre-check. let socket_path = dir::default_base() .map_err(ScopeLockError::from)? .join(&manifest.pod.name) @@ -1883,16 +1897,6 @@ pub enum PodError { #[error("memory Phase 1 staging write failed: {0}")] ExtractStaging(#[source] memory::extract::StagingError), - #[error( - "session {session_id} is currently in use by pod `{pod_name}` at {}", - .socket.display() - )] - SessionInUse { - session_id: SessionId, - pod_name: String, - socket: PathBuf, - }, - #[error("session {session_id} has no entries to restore")] SessionEmpty { session_id: SessionId }, } diff --git a/crates/pod/src/runtime/mod.rs b/crates/pod/src/runtime/mod.rs index d47100ae..6d331d14 100644 --- a/crates/pod/src/runtime/mod.rs +++ b/crates/pod/src/runtime/mod.rs @@ -1,2 +1,2 @@ pub mod dir; -pub mod scope_lock; +pub use ::scope_lock; diff --git a/crates/pod/src/spawn/tool.rs b/crates/pod/src/spawn/tool.rs index 9eefd2af..530ba48e 100644 --- a/crates/pod/src/spawn/tool.rs +++ b/crates/pod/src/spawn/tool.rs @@ -441,7 +441,8 @@ fn scope_lock_err_to_tool(e: ScopeLockError) -> ToolError { ScopeLockError::NotSubset { .. } | ScopeLockError::WriteConflict { .. } | ScopeLockError::DuplicatePodName(_) - | ScopeLockError::UnknownPod(_) => ToolError::InvalidArgument(e.to_string()), + | ScopeLockError::UnknownPod(_) + | ScopeLockError::SessionConflict { .. } => ToolError::InvalidArgument(e.to_string()), ScopeLockError::Io(_) => ToolError::ExecutionFailed(e.to_string()), } } diff --git a/crates/pod/tests/restore_test.rs b/crates/pod/tests/restore_test.rs new file mode 100644 index 00000000..e82ffd65 --- /dev/null +++ b/crates/pod/tests/restore_test.rs @@ -0,0 +1,86 @@ +//! Integration tests for `Pod::restore_from_manifest`'s pre-build +//! validation paths. +//! +//! These cases all return before `prepare_pod_common` runs, so they +//! do not need a real LLM client or scope-lock environment — only the +//! session store needs to be present. + +use std::sync::{LazyLock, Mutex}; + +use pod::{Pod, PodError}; +use session_store::{FsStore, SessionId, StoreError}; + +const MINIMAL_MANIFEST_TOML: &str = r#" +[pod] +name = "restore-test" +pwd = "./" + +[model] +scheme = "anthropic" +model_id = "test-model" + +[worker] +max_tokens = 100 + +[[scope.allow]] +target = "./" +permission = "write" +"#; + +/// Serialises tests that mutate runtime-dir env vars, mirroring the +/// pattern used by other integration tests in this crate. +static ENV_LOCK: LazyLock> = LazyLock::new(|| Mutex::new(())); + +#[tokio::test] +async fn restore_from_manifest_rejects_unknown_session() { + let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + + let store_tmp = tempfile::tempdir().unwrap(); + let store = FsStore::new(store_tmp.path()).await.unwrap(); + let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap(); + + // A freshly-minted id with no jsonl file at all → store returns + // NotFound, which `Pod::restore_from_manifest` surfaces verbatim + // as `PodError::Store`. + let unknown = session_store::new_session_id(); + let result = Pod::restore_from_manifest( + unknown, + manifest, + store, + pod::PromptLoader::builtins_only(), + ) + .await; + + match result { + Err(PodError::Store(StoreError::NotFound(id))) => assert_eq!(id, unknown), + Err(other) => panic!("expected Store(NotFound), got {other:?}"), + Ok(_) => panic!("expected unknown session to fail"), + } +} + +#[tokio::test] +async fn restore_from_manifest_rejects_empty_session_log() { + let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + + let store_tmp = tempfile::tempdir().unwrap(); + let store = FsStore::new(store_tmp.path()).await.unwrap(); + let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap(); + + // Pre-create an empty `.jsonl` so `read_all` succeeds with no + // entries. `collect_state` returns `head_hash = None`, which + // `restore_from_manifest` rejects with `SessionEmpty` *before* it + // gets as far as building the LLM client — so the test does not + // need credentials or a runtime sandbox. + let id: SessionId = session_store::new_session_id(); + let path = store_tmp.path().join(format!("{id}.jsonl")); + std::fs::write(&path, b"").unwrap(); + + let result = + Pod::restore_from_manifest(id, manifest, store, pod::PromptLoader::builtins_only()).await; + + match result { + Err(PodError::SessionEmpty { session_id }) => assert_eq!(session_id, id), + Err(other) => panic!("expected SessionEmpty, got {other:?}"), + Ok(_) => panic!("expected empty session log to fail"), + } +} diff --git a/crates/scope-lock/Cargo.toml b/crates/scope-lock/Cargo.toml new file mode 100644 index 00000000..3c88c03b --- /dev/null +++ b/crates/scope-lock/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "scope-lock" +version = "0.1.0" +edition.workspace = true +license.workspace = true + +[dependencies] +fs4 = { version = "0.13.1", features = ["sync"] } +libc = "0.2.185" +manifest = { version = "0.1.0", path = "../manifest" } +serde = { version = "1.0.228", features = ["derive"] } +serde_json = "1.0.149" +session-store = { version = "0.1.0", path = "../session-store" } +thiserror = "2.0" + +[dev-dependencies] +tempfile = "3.27.0" diff --git a/crates/pod/src/runtime/scope_lock.rs b/crates/scope-lock/src/lib.rs similarity index 83% rename from crates/pod/src/runtime/scope_lock.rs rename to crates/scope-lock/src/lib.rs index 27416d6e..5e7a5187 100644 --- a/crates/pod/src/runtime/scope_lock.rs +++ b/crates/scope-lock/src/lib.rs @@ -303,6 +303,10 @@ fn find_conflict_in_subtree( /// Register a top-level Pod (started directly by a human, no /// delegation parent). Reclaims stale entries before checking /// conflicts so a crashed Pod's allocation doesn't block the new one. +/// +/// Rejects when another live allocation is already writing to +/// `session_id`, so two `restore_from_manifest` calls under different +/// `pod_name`s cannot both grab the same session log. pub fn register_pod( guard: &mut LockFileGuard, pod_name: String, @@ -315,6 +319,13 @@ pub fn register_pod( if guard.data().find(&pod_name).is_some() { return Err(ScopeLockError::DuplicatePodName(pod_name)); } + if let Some(existing) = guard.data().find_by_session(session_id) { + return Err(ScopeLockError::SessionConflict { + session_id, + pod_name: existing.pod_name.clone(), + socket: existing.socket.clone(), + }); + } for rule in scope_allow .iter() .filter(|r| r.permission == Permission::Write) @@ -550,6 +561,46 @@ pub fn adopt_allocation( }) } +/// Rewrite the `session_id` recorded for `pod_name` to +/// `new_session_id`. +/// +/// The Pod's in-memory `session_id` can change underneath the +/// allocation in two normal places: +/// +/// - `Pod::compact` mints a fresh session and swaps it in. +/// - `session_store::ensure_head_or_fork` auto-forks when another +/// writer has advanced the store head behind our back. +/// +/// Both paths must call this so subsequent `lookup_session` queries +/// find the live session id, not the old one. Without this update a +/// concurrent `restore_from_manifest(new_id)` would see "no live +/// writer" and proceed to register a competing allocation on the +/// session this Pod just moved into. +/// +/// The lock is opened once and the allocation is rewritten inside the +/// guard, so the session_id collision check is atomic with the +/// rewrite. +pub fn update_session(pod_name: &str, new_session_id: SessionId) -> Result<(), ScopeLockError> { + let lock_path = default_lock_path()?; + let mut guard = LockFileGuard::open(&lock_path)?; + if let Some(other) = guard.data().find_by_session(new_session_id) { + if other.pod_name != pod_name { + return Err(ScopeLockError::SessionConflict { + session_id: new_session_id, + pod_name: other.pod_name.clone(), + socket: other.socket.clone(), + }); + } + } + let alloc = guard + .data_mut() + .find_mut(pod_name) + .ok_or_else(|| ScopeLockError::UnknownPod(pod_name.into()))?; + alloc.session_id = Some(new_session_id); + guard.save()?; + Ok(()) +} + /// Information about a Pod that currently holds an allocation for a /// given session. #[derive(Debug, Clone)] @@ -593,6 +644,15 @@ pub enum ScopeLockError { NotSubset { spawner: String, rule: ScopeRule }, #[error("pod `{0}` is not registered")] UnknownPod(String), + #[error( + "session {session_id} is already held by pod `{pod_name}` at {}", + .socket.display() + )] + SessionConflict { + session_id: SessionId, + pod_name: String, + socket: PathBuf, + }, } #[cfg(test)] @@ -1200,4 +1260,164 @@ mod tests { other => panic!("expected WriteConflict, got {other:?}"), } } + + #[test] + fn find_by_session_skips_none_placeholders() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("scope.lock"); + let mut g = open_empty(&path); + // Pre-reservation: delegate_scope leaves session_id = None + // until adopt_allocation rewrites it. find_by_session must not + // match those placeholders, otherwise a freshly-spawning child + // would shadow itself before it has even chosen a session. + register_pod( + &mut g, + "parent".into(), + std::process::id(), + sock("parent"), + vec![write_rule("/p", true)], + sid(), + ) + .unwrap(); + delegate_scope( + &mut g, + "parent", + "child".into(), + std::process::id(), + sock("child"), + vec![write_rule("/p/sub", true)], + ) + .unwrap(); + + let target_session = sid(); + // The placeholder allocation has session_id = None and must + // not be returned for any lookup. + assert!(g.data().find_by_session(target_session).is_none()); + + // After adopt-style rewrite, the same allocation is now found. + g.data_mut() + .find_mut("child") + .unwrap() + .session_id = Some(target_session); + let found = g.data().find_by_session(target_session).unwrap(); + assert_eq!(found.pod_name, "child"); + } + + #[test] + fn register_pod_rejects_session_id_collision() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("scope.lock"); + let mut g = open_empty(&path); + let shared_session = sid(); + register_pod( + &mut g, + "first".into(), + std::process::id(), + sock("first"), + vec![write_rule("/work/a", true)], + shared_session, + ) + .unwrap(); + // Second registration tries to grab the same session_id under + // a different pod_name. Without the SessionConflict check both + // would succeed and race on the same jsonl. + let err = register_pod( + &mut g, + "second".into(), + std::process::id(), + sock("second"), + vec![write_rule("/work/b", true)], + shared_session, + ) + .unwrap_err(); + match err { + ScopeLockError::SessionConflict { + session_id, + pod_name, + .. + } => { + assert_eq!(session_id, shared_session); + assert_eq!(pod_name, "first"); + } + other => panic!("expected SessionConflict, got {other:?}"), + } + } + + #[test] + fn lookup_session_returns_live_writer_info() { + let dir = TempDir::new().unwrap(); + let _sandbox = RuntimeDirSandbox::new(dir.path()); + let s = sid(); + let guard = install_top_level( + "live".into(), + std::process::id(), + sock("live"), + vec![write_rule("/work", true)], + s, + ) + .unwrap(); + let info = lookup_session(s).unwrap().expect("expected live writer"); + assert_eq!(info.pod_name, "live"); + assert_eq!(info.socket, sock("live")); + drop(guard); + // After the guard's release, the lookup goes back to None. + assert!(lookup_session(s).unwrap().is_none()); + } + + #[test] + fn update_session_rewrites_allocation_session_id() { + let dir = TempDir::new().unwrap(); + let _sandbox = RuntimeDirSandbox::new(dir.path()); + let original = sid(); + let updated = sid(); + let _guard = install_top_level( + "p".into(), + std::process::id(), + sock("p"), + vec![write_rule("/work", true)], + original, + ) + .unwrap(); + update_session("p", updated).unwrap(); + // lookup against the original is now empty, the updated id wins. + assert!(lookup_session(original).unwrap().is_none()); + assert_eq!(lookup_session(updated).unwrap().unwrap().pod_name, "p"); + } + + #[test] + fn update_session_rejects_when_target_already_held() { + let dir = TempDir::new().unwrap(); + let _sandbox = RuntimeDirSandbox::new(dir.path()); + let s_a = sid(); + let s_b = sid(); + let _g_a = install_top_level( + "a".into(), + std::process::id(), + sock("a"), + vec![write_rule("/work/a", true)], + s_a, + ) + .unwrap(); + let _g_b = install_top_level( + "b".into(), + std::process::id(), + sock("b"), + vec![write_rule("/work/b", true)], + s_b, + ) + .unwrap(); + // `a` cannot adopt b's live session id. + let err = update_session("a", s_b).unwrap_err(); + match err { + ScopeLockError::SessionConflict { + pod_name, + session_id, + .. + } => { + assert_eq!(pod_name, "b"); + assert_eq!(session_id, s_b); + } + other => panic!("expected SessionConflict, got {other:?}"), + } + } } diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index 06c24bba..4e795838 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -15,3 +15,4 @@ uuid = "1.23" toml = "1.1.2" manifest = { version = "0.1.0", path = "../manifest" } session-store = { version = "0.1.0", path = "../session-store" } +scope-lock = { version = "0.1.0", path = "../scope-lock" } diff --git a/crates/tui/src/picker.rs b/crates/tui/src/picker.rs index 3ba7a694..e0726811 100644 --- a/crates/tui/src/picker.rs +++ b/crates/tui/src/picker.rs @@ -19,6 +19,7 @@ use ratatui::style::{Color, Modifier, Style}; use ratatui::text::{Line, Span}; use ratatui::widgets::Paragraph; use ratatui::{Frame, TerminalOptions, Viewport}; +use scope_lock::lookup_session; use session_store::{ ContentPart, FsStore, HashedEntry, Item, LogEntry, SessionId, Store, }; @@ -71,6 +72,11 @@ struct Row { id: SessionId, /// Last user / assistant snippet, or a `[corrupt]` placeholder. preview: String, + /// `Some(pod_name)` when a live Pod currently holds an allocation + /// for this session in `scope.lock`. Picking such a row launches + /// `pod --session ` which will fail with `SessionConflict` — + /// the badge warns the user up-front. + live_pod: Option, } pub async fn run() -> Result { @@ -82,7 +88,15 @@ pub async fn run() -> Result { let mut rows: Vec = Vec::with_capacity(MAX_ROWS); for id in ids.into_iter().take(MAX_ROWS) { let preview = build_preview(&store, id).await; - rows.push(Row { id, preview }); + // Best-effort live check. A scope.lock I/O hiccup downgrades + // the row to "no badge" rather than killing the picker — the + // user still gets to see the listing. + let live_pod = lookup_session(id).ok().flatten().map(|info| info.pod_name); + rows.push(Row { + id, + preview, + live_pod, + }); } let mut selected = 0usize; @@ -291,12 +305,19 @@ fn row_line(row: &Row, selected: bool) -> Line<'_> { } else { Style::default().fg(Color::DarkGray) }; - Line::from(vec![ + let mut spans = vec![ Span::raw(marker), Span::styled(short_session(row.id), id_style), Span::raw(" "), - Span::styled(row.preview.clone(), preview_style), - ]) + ]; + if let Some(ref pod_name) = row.live_pod { + spans.push(Span::styled( + format!("[live: {pod_name}] "), + Style::default().fg(Color::Red).add_modifier(Modifier::BOLD), + )); + } + spans.push(Span::styled(row.preview.clone(), preview_style)); + Line::from(spans) } fn short_session(id: SessionId) -> String { diff --git a/crates/tui/src/spawn.rs b/crates/tui/src/spawn.rs index 10de2e68..1b522b97 100644 --- a/crates/tui/src/spawn.rs +++ b/crates/tui/src/spawn.rs @@ -447,8 +447,8 @@ struct Form { editing: bool, /// `Some(id)` flips the dialog into "Resume Pod" mode: the title /// switches, the source session is shown to the user, and the - /// child pod is launched with `--session ` so it forks and - /// restores `id`. + /// child pod is launched with `--session ` so it restores + /// from `id` and appends to the same session log. resume_from: Option, } diff --git a/tickets/tui-session-restore.md b/tickets/tui-session-restore.md index 54d99c80..93002733 100644 --- a/tickets/tui-session-restore.md +++ b/tickets/tui-session-restore.md @@ -100,3 +100,8 @@ TUI には既に新規 Pod 起動用の spawn UI があるため、同じよう - 別マシンから転送された session store の import UI - `tui` での picker 復帰や自動 attach 切替(live セッション選択時はエラー終了) - 任意位置からの fork 起動(`fork_at` を resume 経路に組み込まない。将来別フローとして扱う) + +## Review +- 状態: Approve with follow-up +- レビュー詳細: [./tui-session-restore.review.md](./tui-session-restore.review.md) +- 日付: 2026-04-28 diff --git a/tickets/tui-session-restore.review.md b/tickets/tui-session-restore.review.md new file mode 100644 index 00000000..e17574ab --- /dev/null +++ b/tickets/tui-session-restore.review.md @@ -0,0 +1,105 @@ +# Review: TUI: 既存セッションからの Pod 復帰 + +レビュー対象は `3c90729` 以降のワーキングツリー (`e98a596` までの 2 commit) と、それに伴う `tickets/tui-session-restore.md` の更新版。 + +## 前提・要件の確認 + +### 起動導線 +- `tui`(引数なし)/ `tui ` の既存挙動は `Mode::Spawn` / `Mode::Attach` として温存(`crates/tui/src/main.rs:130-136`)。OK。 +- `tui -r` / `tui --resume`(`Mode::Resume`)は picker → `run_spawn(Some(id))` の二段構成で実装(`crates/tui/src/main.rs:202-211`)。OK。 +- `tui --session ` は picker をスキップして `run_spawn(Some(id))` 直行(`crates/tui/src/main.rs:166`)。OK。 +- `--resume` と `--session` の排他は `parse_args` で明示チェック(`crates/tui/src/main.rs:120-122`)。OK。 +- pod CLI に `--session` を追加し `--adopt` と排他(`crates/pod/src/main.rs:52`)。OK。 + +### セッション一覧 +- `manifest::paths::sessions_dir()` 経由で store を開き直近 10 件を表示(`crates/tui/src/picker.rs:76-86`)。OK。 +- `SessionId` 短縮表記、最後の user/assistant メッセージプレビュー(同 `:259-300`, `last_message_preview`)。OK。 +- 並び順は `Store::list_sessions` の UUIDv7 順(=作成時刻順、新しい順)。OK。 +- session が 0 件のときは `PickerError::NoSessions` を返してエラー終了(`:79-81`)。OK。 +- ログ破損時は `[corrupt]` プレビューで行を残す(`build_preview` の `Err` 分岐, `:154`)。スキップではないが「エラー表示して」の文言を満たす。OK。 +- **未実装: 「その session が今 live かどうか」の表示** — `scope_lock` 連携が picker に入っていない(`crates/tui/src/picker.rs` 内に `scope_lock` 参照なし)。要件に明記された情報項目なので blocking 寄り。後述。 + +### 復元 Pod の構築 +- 同じ `session_id` を引き継ぎ、追記する設計に変更されている(`crates/pod/src/pod.rs:1599-1690`)。fork は行わない。OK。 +- `system_prompt` は session 保存値そのまま、`SystemPromptTemplate` 再レンダなし(`prepare_pod_common(.., parse_template = false)`、`system_prompt_template = None`)。OK。 +- `head_hash` は `RestoredState.head_hash` をそのまま保持(`:1666`)。OK。 +- 履歴・request_config・turn_count・usage_history・extract_pointer・interrupted フラグ復元は揃っている(`:1651-1686`)。OK。 +- 圧縮アンカー: 先頭が `Role::System` のとき `cache_anchor=Some(0)` にする処理(`:1644-1657`)。compact 後の resume を想定した妥当な追加。OK。 + +### Pod / 高レベル API の整理 +- `Pod::restore_from_manifest(session_id, manifest, store, loader)` 新設(`:1599`)。OK。 +- 旧 `Pod::restore` は削除済み(grep で呼出元なし、`Pod::new` のドキュメントコメントだけ「Pod::restore」表記が残存=軽微)。OK。 +- `prepare_pod_common` で `from_manifest` / `from_manifest_spawned` / `restore_from_manifest` の共通部分を集約(`:1922`)。粒度妥当。OK。 + +### scope.lock への session_id 追加と live 検出 +- `Allocation::session_id: Option` を追加(`crates/pod/src/runtime/scope_lock.rs:58-60`)。`#[serde(default)]` で旧フォーマット読込時は `None` 扱いになる(dev 期間互換不要だが副作用としては安全)。OK。 +- `register_pod` / `install_top_level` / `adopt_allocation` のシグネチャに `session_id` を追加(`:306, :501, :533`)。OK。 +- `delegate_scope` のシグネチャは変更せず `session_id = None` のまま事前予約し、子側 `adopt_allocation` で確定する設計(`:344-387, :533-551`)。チケット文面(57 行)は `delegate_scope` も含めて挙げているが、子の session_id が spawner 時点で未確定であるため `Option` 化が技術的に妥当で、ユーザの設計判断 (3) として明確に記録されている。**ticket 本文と乖離があるが、コメント (`:55-60, :381-384`) で意図が明記されているため許容**。 +- `LockFile::find_by_session` 提供(`:73-77`)。OK。 +- `scope_lock::lookup_session(id) -> Option` 提供(`:567-578`)、`SessionLockInfo { pod_name, socket, pid }` も要件通り(`:556-560`)。OK。 +- 既存 `scope.lock` の互換性は `#[serde(default)]` で旧形式も壊れずに読める。OK。 + +### 二重起動の扱い +- TUI 経路では `pod` 子プロセスが `PodError::SessionInUse` で非ゼロ終了 → spawn が `PodExitedEarly` でスタブ末尾を viewport に表示してそのまま終了(`crates/tui/src/spawn.rs:329-336`)。エラーメッセージ本体(`session_id` / `pod_name` / `socket`)も `PodError::SessionInUse` の `#[error]` に含まれて stderr 経由で通る(`crates/pod/src/pod.rs:1886-1894`)。要件通り picker 復帰や自動 attach 切替なし。OK。 + +### UI / 操作 +- 上下キー / `j` `k` / Enter / Esc / Ctrl-C で picker 操作(`crates/tui/src/picker.rs:223-232`)。OK。 +- 直近 10 件のみのためスクロール UI なし。OK。 +- 決定後 name 入力ダイアログを別 inline viewport として描画。`close_viewport` で改行を 1 行押し出して name 入力との重なりを防止(`:127-138`)。OK。 +- title `resume pod session: ` で復帰モード明示(`crates/tui/src/spawn.rs:516-519`)。OK。 +- name のデフォルトは cwd 由来でこれまでと同じ。OK。 +- 検索フィルタは将来拡張余地のある state で、必須にしない。OK。 + +### 完了条件 +- 「`pod --session ` で fork 由来の空 session が生成されない」: 実装上 `Pod::restore_from_manifest` は新 session を作らず元の `session_id` を再利用するので OK。 +- 「復帰直後に過去履歴が表示される」: `RestoredState.history` を `worker.set_history` 経由で流し込んでおり、Controller の history.json/Event::History 経路は変更不要。OK(手動動作確認は範囲外)。 +- 「同一 source session に対する live Pod が存在する場合、復元起動はエラーで終了し、`pod_name` / socket がメッセージに表示」: 静的状態に対しては成立。ただし live 写し書き入れ替わり時の整合性に注意点あり(後述 Blocking)。 + +## アーキテクチャ・スコープ +- 新規クレートなし。`tui` 側に `session-store` を依存追加したのみ(`crates/tui/Cargo.toml:17`)。`session-store` が picker のために `ContentPart` / `Item` / `Role` を `pub use` で再公開(`crates/session-store/src/lib.rs:43`)。`llm-worker` のレイヤを跨いで TUI が直接型を持ち込まずに済むので妥当。 +- `prepare_pod_common` の抽出は 3 系統の共通部分(pwd / scope / client / catalog / template parse 切替)の自然な集約で、過剰抽象化していない。`PodCommon` 構造体も local。OK。 +- `PodError` への `SessionInUse` / `SessionEmpty` 追加は `restore` 固有の正当な失敗モード。`thiserror` の表現に揃っている。OK。 +- picker の preview 実装は素朴(`read_all` で全件読む)だが、大セッション時の重さは本チケット範囲外と user note (5) で明記されており妥当。OK。 +- spawn.rs と picker.rs の境界が「picker は選択のみ、scope-lock チェックや fork は後段」で揃っており、低レベル基盤と UI が混ざっていない。OK。 + +## 指摘事項 + +### Blocking + +- **scope.lock の session_id による「構造的」二重書き込み防止には穴がある** + - 要件 / 設計判断 (1) は「同一 session への同時書き込みは `scope.lock` の `session_id` で構造的に防止する」が前提。実装は (a) `restore_from_manifest` 入口の `lookup_session` チェックと (b) `adopt_allocation`/`install_top_level` 時の `session_id` 記録、の二点のみ。次の経路で構造保証が崩れる: + 1. **compaction 後**(`crates/pod/src/pod.rs:1133-1156`): `self.session_id = new_session_id` に差し替わるが、scope.lock 上の `Allocation.session_id` は旧 session_id のまま更新されない。結果として `lookup_session(new_session_id)` は `None` を返し、live Pod が新セッションへ書いている最中でも別プロセスが `restore_from_manifest(new_session_id)` を成立させてしまう。 + 2. **`ensure_head_or_fork` 後**(`crates/session-store/src/session.rs:109-138`): `self.session_id` が auto-fork で書き換わるが scope.lock は同じく置き去り。 + 3. **TOCTOU**: `lookup_session` の `LockFileGuard` を閉じてから後段の `install_top_level` で再オープンするまでの間に、別プロセスが同じ流れで通り抜ける可能性。`register_pod` 内では `session_id` の重複チェックを行っていない(`pod_name` と scope のみ)ので、別 `pod_name` を指定する 2 つの resume が両方成功し、両方が同じ session jsonl に append しに行ける。 + - 実害は `ensure_head_or_fork` の auto-fork が後段で救うとはいえ、それ自体「fork しない」設計の前提を破る挙動になる。少なくとも以下のいずれかが必要: + - `register_pod` に session_id 重複チェックを追加し、`lookup_session` と install を同一 `LockFileGuard` 配下で行う API を提供する。 + - compaction 完了時 / `ensure_head_or_fork` 後に scope.lock の自分のエントリの `session_id` を書き換える明示的な hook を入れる。 + - 該当箇所: `crates/pod/src/pod.rs:1156`、`crates/pod/src/runtime/scope_lock.rs:306-339`(`register_pod` に session_id 衝突チェックなし)、`crates/pod/src/pod.rs:1599-1631`(lookup と install の分離)。 + +### Non-blocking / Follow-up + +- **picker に live セッションの可視化が無い**(要件「その session が今 live かどうか(`scope.lock` を引いて判定)」)。`crates/tui/src/picker.rs` 全体に `scope_lock` 参照なし。`Row` に `live: bool` を持たせ、`scope_lock::lookup_session(id)` で判定して表示色を変える程度で完結する。要件の文言上は blocking 級だが、live セッションを picker から選んだ場合でも `Pod::restore_from_manifest` 側で `SessionInUse` で弾かれて pod が落ち、TUI もそのまま終了するという完了条件は満たせるため follow-up とする。次回の TUI 改善で対処を推奨。 + +- **`pod` CLI および TUI の `Form.resume_from` ドキュメントコメントが旧 fork 設計のまま**。 + - `crates/pod/src/main.rs:47-51`: 「The source session log is forked at its head into a new session id, so the original jsonl is left untouched and double-write races are impossible.」 → 実装は fork しないし、二重書き込み防止は scope.lock 経由の前提に変わっている。 + - `crates/tui/src/spawn.rs:448-452`: 「the child pod is launched with `--session ` so it forks and restores `id`」 → 同上。 + - 動作には影響しないが、レビュー観点(コードベースを歪めていないか)で読者を誤導するため早めに直したい。 + +- **`Pod::new` のドキュメントコメントに削除済みの `Pod::restore` 表記が残存**(`crates/pod/src/pod.rs:104`)。「lower-level constructors (`Pod::new`, `Pod::restore`)」の `Pod::restore` を削るか、低レベル API の用途記述を `Pod::new` 単独に書き換えれば足りる。 + +- **`restore_from_manifest` / `lookup_session` / `find_by_session` / `SessionInUse` / `SessionEmpty` のテストが未追加**。`scope_lock.rs` 既存テストは register/delegate/adopt 系のみ。`find_by_session` の挙動(`session_id == Some(_)` のみマッチし `None` の placeholder をスキップする)と、empty session に対する `restore_from_manifest` の `SessionEmpty` 経路は意図そのものが今回新規なので、ユニットテスト 1〜2 本でも欲しい。 + +- **picker の preview が重い場合**(user note (5) 明記)の挙動は範囲外で良いが、`read_all` を 10 セッション分逐次(`for id in ids ... build_preview(...).await`)で行っているため大規模 store ではブロッキング感が出る。将来 `tokio::join_all` 並走化で済む程度なので、follow-up 記録のみ。 + +### Nits + +- `PickerError` の `Display` が `unable` 系の標準形式を踏襲していて良いが、`StoreError` を `Box` 経由で渡す経路で `From` の方向(`From` for `PickerError`)と main.rs 側の `Box` の整合は保たれている。問題なし。 +- `crates/tui/src/picker.rs:23` の `pub use llm_worker::llm_client::types::{ContentPart, Item, Role}` は本来 picker のためだけに `session-store` 経由で入っている。将来 picker 以外も使うようなら `session-store` が「保存対象の構造を再公開する」責務として正規化する余地あり。 + +## 判断 + +**Approve with follow-up** — チケットのコア要件(`tui --resume` / `--session` 起点の復帰経路、同 session_id 再利用、`scope.lock` への session_id 拡張、二重起動時のエラー終了、共通部分の抽出と低レベル `Pod::restore` 削除)はすべて達成されており、build / `cargo test -p pod` も緑。 + +ただし設計判断 (1) で「scope.lock で構造的に二重書き込み防止」と打ち出している割に、(a) compaction 後 (b) `ensure_head_or_fork` 後 (c) lookup→install の TOCTOU の 3 経路で構造保証が崩れる点は要承知。ticket の「fork しない」前提を厳密に守るには上記 Blocking のいずれかの補強が必要だが、現実には `ensure_head_or_fork` の auto-fork で実害が起きにくいため、本チケット完了は Non-blocking として現行 PR を許容しつつ、scope.lock 二重書き防止の補強を別チケットで切り出す運用が妥当。 + +加えて、要件文に明記されている picker の live 表示と、stale ドキュメント(pod CLI / spawn.rs Form / Pod::new)の刷新は早めに follow-up で潰してほしい。