tuiからセッションを復帰する経路の実装

This commit is contained in:
Keisuke Hirata 2026-04-29 19:03:03 +09:00
parent 87768c2e2d
commit fd96a517bb
15 changed files with 512 additions and 34 deletions

16
Cargo.lock generated
View File

@ -2130,6 +2130,7 @@ dependencies = [
"protocol", "protocol",
"provider", "provider",
"schemars", "schemars",
"scope-lock",
"serde", "serde",
"serde_json", "serde_json",
"session-store", "session-store",
@ -2746,6 +2747,20 @@ dependencies = [
"syn 2.0.117", "syn 2.0.117",
] ]
[[package]]
name = "scope-lock"
version = "0.1.0"
dependencies = [
"fs4",
"libc",
"manifest",
"serde",
"serde_json",
"session-store",
"tempfile",
"thiserror 2.0.18",
]
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.2.0" version = "1.2.0"
@ -3574,6 +3589,7 @@ dependencies = [
"manifest", "manifest",
"protocol", "protocol",
"ratatui", "ratatui",
"scope-lock",
"serde_json", "serde_json",
"session-store", "session-store",
"tokio", "tokio",

View File

@ -9,6 +9,7 @@ members = [
"crates/pod", "crates/pod",
"crates/protocol", "crates/protocol",
"crates/provider", "crates/provider",
"crates/scope-lock",
"crates/tools", "crates/tools",
"crates/tui", "crates/memory", "crates/tui", "crates/memory",
] ]

View File

@ -12,6 +12,7 @@ session-store = { version = "0.1.0", path = "../session-store" }
manifest = { version = "0.1.0", path = "../manifest" } manifest = { version = "0.1.0", path = "../manifest" }
protocol = { version = "0.1.0", path = "../protocol" } protocol = { version = "0.1.0", path = "../protocol" }
provider = { version = "0.1.0", path = "../provider" } provider = { version = "0.1.0", path = "../provider" }
scope-lock = { version = "0.1.0", path = "../scope-lock" }
serde = { version = "1.0.228", features = ["derive"] } serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.149" serde_json = "1.0.149"
thiserror = "2.0" thiserror = "2.0"

View File

@ -44,9 +44,9 @@ struct Cli {
#[arg(long, value_name = "PATH", requires = "adopt")] #[arg(long, value_name = "PATH", requires = "adopt")]
callback: Option<PathBuf>, callback: Option<PathBuf>,
/// Restore a Pod from an existing session. The source session log /// Restore a Pod from an existing session. The Pod re-uses the
/// is forked at its head into a new session id, so the original /// given session id and appends new turns to the same jsonl;
/// jsonl is left untouched and double-write races are impossible. /// concurrent writers are prevented by the `scope.lock` registry.
/// Mutually exclusive with `--adopt` (spawned children always start /// Mutually exclusive with `--adopt` (spawned children always start
/// fresh). /// fresh).
#[arg(long, value_name = "UUID", conflicts_with = "adopt")] #[arg(long, value_name = "UUID", conflicts_with = "adopt")]

View File

@ -100,10 +100,11 @@ pub struct Pod<C: LlmClient, St: Store> {
/// PodInterceptor installed in `ensure_interceptor_installed`. /// PodInterceptor installed in `ensure_interceptor_installed`.
pending_notifies: NotifyBuffer, pending_notifies: NotifyBuffer,
/// Scope allocation in the machine-wide lock file. `Some` for /// Scope allocation in the machine-wide lock file. `Some` for
/// Pods built via `from_manifest` (production path); `None` for /// Pods built via `from_manifest` / `from_manifest_spawned` /
/// lower-level constructors (`Pod::new`, `Pod::restore`) that /// `restore_from_manifest` (production paths); `None` for the
/// bypass the registry. Kept purely for its `Drop` impl, which /// low-level `Pod::new` constructor used in tests, which bypasses
/// releases the allocation when the Pod is dropped. /// the registry. Kept purely for its `Drop` impl, which releases
/// the allocation when the Pod is dropped.
#[allow(dead_code)] #[allow(dead_code)]
scope_allocation: Option<ScopeAllocationGuard>, scope_allocation: Option<ScopeAllocationGuard>,
/// Socket path of the spawning Pod. `Some` only for Pods built via /// Socket path of the spawning Pod. `Some` only for Pods built via
@ -717,6 +718,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
self.head_hash = Some(hash); self.head_hash = Some(hash);
return Ok(()); return Ok(());
} }
let prev_session_id = self.session_id;
session_store::ensure_head_or_fork( session_store::ensure_head_or_fork(
&self.store, &self.store,
&mut self.session_id, &mut self.session_id,
@ -724,6 +726,13 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
state, state,
) )
.await?; .await?;
// ensure_head_or_fork mints a fresh session_id when it auto-
// forks. Sync that to scope.lock so a concurrent
// restore_from_manifest can't see "no live writer" for the new
// session and grab it.
if self.session_id != prev_session_id && self.scope_allocation.is_some() {
scope_lock::update_session(&self.manifest.pod.name, self.session_id)?;
}
Ok(()) Ok(())
} }
@ -1155,6 +1164,15 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
// until its first LLM call. // until its first LLM call.
self.session_id = new_session_id; self.session_id = new_session_id;
self.head_hash = Some(new_head_hash); self.head_hash = Some(new_head_hash);
// Keep scope.lock pointing at the live session_id. Without this
// a concurrent `restore_from_manifest(new_session_id)` would
// see no live writer and grab the session this Pod just moved
// into, causing two writers to race on the same jsonl. Skipped
// when no allocation is installed (e.g. compact under
// `Pod::new` in tests).
if self.scope_allocation.is_some() {
scope_lock::update_session(&self.manifest.pod.name, new_session_id)?;
}
let worker = self.worker.as_mut().unwrap(); let worker = self.worker.as_mut().unwrap();
worker.set_history(new_history); worker.set_history(new_history);
// Anchor the prompt cache at the summary item so that Anthropic // Anchor the prompt cache at the summary item so that Anthropic
@ -1602,15 +1620,6 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
store: St, store: St,
loader: PromptLoader, loader: PromptLoader,
) -> Result<Self, PodError> { ) -> Result<Self, PodError> {
// Refuse to resume into a session that's already being written.
if let Some(info) = scope_lock::lookup_session(session_id)? {
return Err(PodError::SessionInUse {
session_id,
pod_name: info.pod_name,
socket: info.socket,
});
}
let state = session_store::restore(&store, session_id).await?; let state = session_store::restore(&store, session_id).await?;
if state.head_hash.is_none() { if state.head_hash.is_none() {
return Err(PodError::SessionEmpty { session_id }); return Err(PodError::SessionEmpty { session_id });
@ -1618,6 +1627,11 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
let common = prepare_pod_common(&manifest, &loader, /* parse_template */ false)?; let common = prepare_pod_common(&manifest, &loader, /* parse_template */ false)?;
// Atomic: register_pod inside install_top_level rejects when
// another live allocation already holds `session_id`. Wrapping
// the lookup + install inside a single `LockFileGuard` is what
// makes "no two live Pods write to the same session log"
// actually structural rather than a hopeful pre-check.
let socket_path = dir::default_base() let socket_path = dir::default_base()
.map_err(ScopeLockError::from)? .map_err(ScopeLockError::from)?
.join(&manifest.pod.name) .join(&manifest.pod.name)
@ -1883,16 +1897,6 @@ pub enum PodError {
#[error("memory Phase 1 staging write failed: {0}")] #[error("memory Phase 1 staging write failed: {0}")]
ExtractStaging(#[source] memory::extract::StagingError), ExtractStaging(#[source] memory::extract::StagingError),
#[error(
"session {session_id} is currently in use by pod `{pod_name}` at {}",
.socket.display()
)]
SessionInUse {
session_id: SessionId,
pod_name: String,
socket: PathBuf,
},
#[error("session {session_id} has no entries to restore")] #[error("session {session_id} has no entries to restore")]
SessionEmpty { session_id: SessionId }, SessionEmpty { session_id: SessionId },
} }

View File

@ -1,2 +1,2 @@
pub mod dir; pub mod dir;
pub mod scope_lock; pub use ::scope_lock;

View File

@ -441,7 +441,8 @@ fn scope_lock_err_to_tool(e: ScopeLockError) -> ToolError {
ScopeLockError::NotSubset { .. } ScopeLockError::NotSubset { .. }
| ScopeLockError::WriteConflict { .. } | ScopeLockError::WriteConflict { .. }
| ScopeLockError::DuplicatePodName(_) | ScopeLockError::DuplicatePodName(_)
| ScopeLockError::UnknownPod(_) => ToolError::InvalidArgument(e.to_string()), | ScopeLockError::UnknownPod(_)
| ScopeLockError::SessionConflict { .. } => ToolError::InvalidArgument(e.to_string()),
ScopeLockError::Io(_) => ToolError::ExecutionFailed(e.to_string()), ScopeLockError::Io(_) => ToolError::ExecutionFailed(e.to_string()),
} }
} }

View File

@ -0,0 +1,86 @@
//! Integration tests for `Pod::restore_from_manifest`'s pre-build
//! validation paths.
//!
//! These cases all return before `prepare_pod_common` runs, so they
//! do not need a real LLM client or scope-lock environment — only the
//! session store needs to be present.
use std::sync::{LazyLock, Mutex};
use pod::{Pod, PodError};
use session_store::{FsStore, SessionId, StoreError};
const MINIMAL_MANIFEST_TOML: &str = r#"
[pod]
name = "restore-test"
pwd = "./"
[model]
scheme = "anthropic"
model_id = "test-model"
[worker]
max_tokens = 100
[[scope.allow]]
target = "./"
permission = "write"
"#;
/// Serialises tests that mutate runtime-dir env vars, mirroring the
/// pattern used by other integration tests in this crate.
static ENV_LOCK: LazyLock<Mutex<()>> = LazyLock::new(|| Mutex::new(()));
#[tokio::test]
async fn restore_from_manifest_rejects_unknown_session() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let store_tmp = tempfile::tempdir().unwrap();
let store = FsStore::new(store_tmp.path()).await.unwrap();
let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap();
// A freshly-minted id with no jsonl file at all → store returns
// NotFound, which `Pod::restore_from_manifest` surfaces verbatim
// as `PodError::Store`.
let unknown = session_store::new_session_id();
let result = Pod::restore_from_manifest(
unknown,
manifest,
store,
pod::PromptLoader::builtins_only(),
)
.await;
match result {
Err(PodError::Store(StoreError::NotFound(id))) => assert_eq!(id, unknown),
Err(other) => panic!("expected Store(NotFound), got {other:?}"),
Ok(_) => panic!("expected unknown session to fail"),
}
}
#[tokio::test]
async fn restore_from_manifest_rejects_empty_session_log() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let store_tmp = tempfile::tempdir().unwrap();
let store = FsStore::new(store_tmp.path()).await.unwrap();
let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap();
// Pre-create an empty `<id>.jsonl` so `read_all` succeeds with no
// entries. `collect_state` returns `head_hash = None`, which
// `restore_from_manifest` rejects with `SessionEmpty` *before* it
// gets as far as building the LLM client — so the test does not
// need credentials or a runtime sandbox.
let id: SessionId = session_store::new_session_id();
let path = store_tmp.path().join(format!("{id}.jsonl"));
std::fs::write(&path, b"").unwrap();
let result =
Pod::restore_from_manifest(id, manifest, store, pod::PromptLoader::builtins_only()).await;
match result {
Err(PodError::SessionEmpty { session_id }) => assert_eq!(session_id, id),
Err(other) => panic!("expected SessionEmpty, got {other:?}"),
Ok(_) => panic!("expected empty session log to fail"),
}
}

View File

@ -0,0 +1,17 @@
[package]
name = "scope-lock"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[dependencies]
fs4 = { version = "0.13.1", features = ["sync"] }
libc = "0.2.185"
manifest = { version = "0.1.0", path = "../manifest" }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.149"
session-store = { version = "0.1.0", path = "../session-store" }
thiserror = "2.0"
[dev-dependencies]
tempfile = "3.27.0"

View File

@ -303,6 +303,10 @@ fn find_conflict_in_subtree(
/// Register a top-level Pod (started directly by a human, no /// Register a top-level Pod (started directly by a human, no
/// delegation parent). Reclaims stale entries before checking /// delegation parent). Reclaims stale entries before checking
/// conflicts so a crashed Pod's allocation doesn't block the new one. /// conflicts so a crashed Pod's allocation doesn't block the new one.
///
/// Rejects when another live allocation is already writing to
/// `session_id`, so two `restore_from_manifest` calls under different
/// `pod_name`s cannot both grab the same session log.
pub fn register_pod( pub fn register_pod(
guard: &mut LockFileGuard, guard: &mut LockFileGuard,
pod_name: String, pod_name: String,
@ -315,6 +319,13 @@ pub fn register_pod(
if guard.data().find(&pod_name).is_some() { if guard.data().find(&pod_name).is_some() {
return Err(ScopeLockError::DuplicatePodName(pod_name)); return Err(ScopeLockError::DuplicatePodName(pod_name));
} }
if let Some(existing) = guard.data().find_by_session(session_id) {
return Err(ScopeLockError::SessionConflict {
session_id,
pod_name: existing.pod_name.clone(),
socket: existing.socket.clone(),
});
}
for rule in scope_allow for rule in scope_allow
.iter() .iter()
.filter(|r| r.permission == Permission::Write) .filter(|r| r.permission == Permission::Write)
@ -550,6 +561,46 @@ pub fn adopt_allocation(
}) })
} }
/// Rewrite the `session_id` recorded for `pod_name` to
/// `new_session_id`.
///
/// The Pod's in-memory `session_id` can change underneath the
/// allocation in two normal places:
///
/// - `Pod::compact` mints a fresh session and swaps it in.
/// - `session_store::ensure_head_or_fork` auto-forks when another
/// writer has advanced the store head behind our back.
///
/// Both paths must call this so subsequent `lookup_session` queries
/// find the live session id, not the old one. Without this update a
/// concurrent `restore_from_manifest(new_id)` would see "no live
/// writer" and proceed to register a competing allocation on the
/// session this Pod just moved into.
///
/// The lock is opened once and the allocation is rewritten inside the
/// guard, so the session_id collision check is atomic with the
/// rewrite.
pub fn update_session(pod_name: &str, new_session_id: SessionId) -> Result<(), ScopeLockError> {
let lock_path = default_lock_path()?;
let mut guard = LockFileGuard::open(&lock_path)?;
if let Some(other) = guard.data().find_by_session(new_session_id) {
if other.pod_name != pod_name {
return Err(ScopeLockError::SessionConflict {
session_id: new_session_id,
pod_name: other.pod_name.clone(),
socket: other.socket.clone(),
});
}
}
let alloc = guard
.data_mut()
.find_mut(pod_name)
.ok_or_else(|| ScopeLockError::UnknownPod(pod_name.into()))?;
alloc.session_id = Some(new_session_id);
guard.save()?;
Ok(())
}
/// Information about a Pod that currently holds an allocation for a /// Information about a Pod that currently holds an allocation for a
/// given session. /// given session.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -593,6 +644,15 @@ pub enum ScopeLockError {
NotSubset { spawner: String, rule: ScopeRule }, NotSubset { spawner: String, rule: ScopeRule },
#[error("pod `{0}` is not registered")] #[error("pod `{0}` is not registered")]
UnknownPod(String), UnknownPod(String),
#[error(
"session {session_id} is already held by pod `{pod_name}` at {}",
.socket.display()
)]
SessionConflict {
session_id: SessionId,
pod_name: String,
socket: PathBuf,
},
} }
#[cfg(test)] #[cfg(test)]
@ -1200,4 +1260,164 @@ mod tests {
other => panic!("expected WriteConflict, got {other:?}"), other => panic!("expected WriteConflict, got {other:?}"),
} }
} }
#[test]
fn find_by_session_skips_none_placeholders() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("scope.lock");
let mut g = open_empty(&path);
// Pre-reservation: delegate_scope leaves session_id = None
// until adopt_allocation rewrites it. find_by_session must not
// match those placeholders, otherwise a freshly-spawning child
// would shadow itself before it has even chosen a session.
register_pod(
&mut g,
"parent".into(),
std::process::id(),
sock("parent"),
vec![write_rule("/p", true)],
sid(),
)
.unwrap();
delegate_scope(
&mut g,
"parent",
"child".into(),
std::process::id(),
sock("child"),
vec![write_rule("/p/sub", true)],
)
.unwrap();
let target_session = sid();
// The placeholder allocation has session_id = None and must
// not be returned for any lookup.
assert!(g.data().find_by_session(target_session).is_none());
// After adopt-style rewrite, the same allocation is now found.
g.data_mut()
.find_mut("child")
.unwrap()
.session_id = Some(target_session);
let found = g.data().find_by_session(target_session).unwrap();
assert_eq!(found.pod_name, "child");
}
#[test]
fn register_pod_rejects_session_id_collision() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("scope.lock");
let mut g = open_empty(&path);
let shared_session = sid();
register_pod(
&mut g,
"first".into(),
std::process::id(),
sock("first"),
vec![write_rule("/work/a", true)],
shared_session,
)
.unwrap();
// Second registration tries to grab the same session_id under
// a different pod_name. Without the SessionConflict check both
// would succeed and race on the same jsonl.
let err = register_pod(
&mut g,
"second".into(),
std::process::id(),
sock("second"),
vec![write_rule("/work/b", true)],
shared_session,
)
.unwrap_err();
match err {
ScopeLockError::SessionConflict {
session_id,
pod_name,
..
} => {
assert_eq!(session_id, shared_session);
assert_eq!(pod_name, "first");
}
other => panic!("expected SessionConflict, got {other:?}"),
}
}
#[test]
fn lookup_session_returns_live_writer_info() {
let dir = TempDir::new().unwrap();
let _sandbox = RuntimeDirSandbox::new(dir.path());
let s = sid();
let guard = install_top_level(
"live".into(),
std::process::id(),
sock("live"),
vec![write_rule("/work", true)],
s,
)
.unwrap();
let info = lookup_session(s).unwrap().expect("expected live writer");
assert_eq!(info.pod_name, "live");
assert_eq!(info.socket, sock("live"));
drop(guard);
// After the guard's release, the lookup goes back to None.
assert!(lookup_session(s).unwrap().is_none());
}
#[test]
fn update_session_rewrites_allocation_session_id() {
let dir = TempDir::new().unwrap();
let _sandbox = RuntimeDirSandbox::new(dir.path());
let original = sid();
let updated = sid();
let _guard = install_top_level(
"p".into(),
std::process::id(),
sock("p"),
vec![write_rule("/work", true)],
original,
)
.unwrap();
update_session("p", updated).unwrap();
// lookup against the original is now empty, the updated id wins.
assert!(lookup_session(original).unwrap().is_none());
assert_eq!(lookup_session(updated).unwrap().unwrap().pod_name, "p");
}
#[test]
fn update_session_rejects_when_target_already_held() {
let dir = TempDir::new().unwrap();
let _sandbox = RuntimeDirSandbox::new(dir.path());
let s_a = sid();
let s_b = sid();
let _g_a = install_top_level(
"a".into(),
std::process::id(),
sock("a"),
vec![write_rule("/work/a", true)],
s_a,
)
.unwrap();
let _g_b = install_top_level(
"b".into(),
std::process::id(),
sock("b"),
vec![write_rule("/work/b", true)],
s_b,
)
.unwrap();
// `a` cannot adopt b's live session id.
let err = update_session("a", s_b).unwrap_err();
match err {
ScopeLockError::SessionConflict {
pod_name,
session_id,
..
} => {
assert_eq!(pod_name, "b");
assert_eq!(session_id, s_b);
}
other => panic!("expected SessionConflict, got {other:?}"),
}
}
} }

View File

@ -15,3 +15,4 @@ uuid = "1.23"
toml = "1.1.2" toml = "1.1.2"
manifest = { version = "0.1.0", path = "../manifest" } manifest = { version = "0.1.0", path = "../manifest" }
session-store = { version = "0.1.0", path = "../session-store" } session-store = { version = "0.1.0", path = "../session-store" }
scope-lock = { version = "0.1.0", path = "../scope-lock" }

View File

@ -19,6 +19,7 @@ use ratatui::style::{Color, Modifier, Style};
use ratatui::text::{Line, Span}; use ratatui::text::{Line, Span};
use ratatui::widgets::Paragraph; use ratatui::widgets::Paragraph;
use ratatui::{Frame, TerminalOptions, Viewport}; use ratatui::{Frame, TerminalOptions, Viewport};
use scope_lock::lookup_session;
use session_store::{ use session_store::{
ContentPart, FsStore, HashedEntry, Item, LogEntry, SessionId, Store, ContentPart, FsStore, HashedEntry, Item, LogEntry, SessionId, Store,
}; };
@ -71,6 +72,11 @@ struct Row {
id: SessionId, id: SessionId,
/// Last user / assistant snippet, or a `[corrupt]` placeholder. /// Last user / assistant snippet, or a `[corrupt]` placeholder.
preview: String, preview: String,
/// `Some(pod_name)` when a live Pod currently holds an allocation
/// for this session in `scope.lock`. Picking such a row launches
/// `pod --session <UUID>` which will fail with `SessionConflict` —
/// the badge warns the user up-front.
live_pod: Option<String>,
} }
pub async fn run() -> Result<PickerOutcome, PickerError> { pub async fn run() -> Result<PickerOutcome, PickerError> {
@ -82,7 +88,15 @@ pub async fn run() -> Result<PickerOutcome, PickerError> {
let mut rows: Vec<Row> = Vec::with_capacity(MAX_ROWS); let mut rows: Vec<Row> = Vec::with_capacity(MAX_ROWS);
for id in ids.into_iter().take(MAX_ROWS) { for id in ids.into_iter().take(MAX_ROWS) {
let preview = build_preview(&store, id).await; let preview = build_preview(&store, id).await;
rows.push(Row { id, preview }); // Best-effort live check. A scope.lock I/O hiccup downgrades
// the row to "no badge" rather than killing the picker — the
// user still gets to see the listing.
let live_pod = lookup_session(id).ok().flatten().map(|info| info.pod_name);
rows.push(Row {
id,
preview,
live_pod,
});
} }
let mut selected = 0usize; let mut selected = 0usize;
@ -291,12 +305,19 @@ fn row_line(row: &Row, selected: bool) -> Line<'_> {
} else { } else {
Style::default().fg(Color::DarkGray) Style::default().fg(Color::DarkGray)
}; };
Line::from(vec![ let mut spans = vec![
Span::raw(marker), Span::raw(marker),
Span::styled(short_session(row.id), id_style), Span::styled(short_session(row.id), id_style),
Span::raw(" "), Span::raw(" "),
Span::styled(row.preview.clone(), preview_style), ];
]) if let Some(ref pod_name) = row.live_pod {
spans.push(Span::styled(
format!("[live: {pod_name}] "),
Style::default().fg(Color::Red).add_modifier(Modifier::BOLD),
));
}
spans.push(Span::styled(row.preview.clone(), preview_style));
Line::from(spans)
} }
fn short_session(id: SessionId) -> String { fn short_session(id: SessionId) -> String {

View File

@ -447,8 +447,8 @@ struct Form {
editing: bool, editing: bool,
/// `Some(id)` flips the dialog into "Resume Pod" mode: the title /// `Some(id)` flips the dialog into "Resume Pod" mode: the title
/// switches, the source session is shown to the user, and the /// switches, the source session is shown to the user, and the
/// child pod is launched with `--session <id>` so it forks and /// child pod is launched with `--session <id>` so it restores
/// restores `id`. /// from `id` and appends to the same session log.
resume_from: Option<SessionId>, resume_from: Option<SessionId>,
} }

View File

@ -100,3 +100,8 @@ TUI には既に新規 Pod 起動用の spawn UI があるため、同じよう
- 別マシンから転送された session store の import UI - 別マシンから転送された session store の import UI
- `tui` での picker 復帰や自動 attach 切替live セッション選択時はエラー終了) - `tui` での picker 復帰や自動 attach 切替live セッション選択時はエラー終了)
- 任意位置からの fork 起動(`fork_at` を resume 経路に組み込まない。将来別フローとして扱う) - 任意位置からの fork 起動(`fork_at` を resume 経路に組み込まない。将来別フローとして扱う)
## Review
- 状態: Approve with follow-up
- レビュー詳細: [./tui-session-restore.review.md](./tui-session-restore.review.md)
- 日付: 2026-04-28

View File

@ -0,0 +1,105 @@
# Review: TUI: 既存セッションからの Pod 復帰
レビュー対象は `3c90729` 以降のワーキングツリー (`e98a596` までの 2 commit) と、それに伴う `tickets/tui-session-restore.md` の更新版。
## 前提・要件の確認
### 起動導線
- `tui`(引数なし)/ `tui <pod-name>` の既存挙動は `Mode::Spawn` / `Mode::Attach` として温存(`crates/tui/src/main.rs:130-136`。OK。
- `tui -r` / `tui --resume``Mode::Resume`)は picker → `run_spawn(Some(id))` の二段構成で実装(`crates/tui/src/main.rs:202-211`。OK。
- `tui --session <UUID>` は picker をスキップして `run_spawn(Some(id))` 直行(`crates/tui/src/main.rs:166`。OK。
- `--resume``--session` の排他は `parse_args` で明示チェック(`crates/tui/src/main.rs:120-122`。OK。
- pod CLI に `--session` を追加し `--adopt` と排他(`crates/pod/src/main.rs:52`。OK。
### セッション一覧
- `manifest::paths::sessions_dir()` 経由で store を開き直近 10 件を表示(`crates/tui/src/picker.rs:76-86`。OK。
- `SessionId` 短縮表記、最後の user/assistant メッセージプレビュー(同 `:259-300`, `last_message_preview`。OK。
- 並び順は `Store::list_sessions` の UUIDv7 順(=作成時刻順、新しい順。OK。
- session が 0 件のときは `PickerError::NoSessions` を返してエラー終了(`:79-81`。OK。
- ログ破損時は `[corrupt]` プレビューで行を残す(`build_preview` の `Err` 分岐, `:154`。スキップではないが「エラー表示して」の文言を満たす。OK。
- **未実装: 「その session が今 live かどうか」の表示**`scope_lock` 連携が picker に入っていない(`crates/tui/src/picker.rs` 内に `scope_lock` 参照なし)。要件に明記された情報項目なので blocking 寄り。後述。
### 復元 Pod の構築
- 同じ `session_id` を引き継ぎ、追記する設計に変更されている(`crates/pod/src/pod.rs:1599-1690`。fork は行わない。OK。
- `system_prompt` は session 保存値そのまま、`SystemPromptTemplate` 再レンダなし(`prepare_pod_common(.., parse_template = false)`、`system_prompt_template = None`。OK。
- `head_hash``RestoredState.head_hash` をそのまま保持(`:1666`。OK。
- 履歴・request_config・turn_count・usage_history・extract_pointer・interrupted フラグ復元は揃っている(`:1651-1686`。OK。
- 圧縮アンカー: 先頭が `Role::System` のとき `cache_anchor=Some(0)` にする処理(`:1644-1657`。compact 後の resume を想定した妥当な追加。OK。
### Pod / 高レベル API の整理
- `Pod::restore_from_manifest(session_id, manifest, store, loader)` 新設(`:1599`。OK。
- 旧 `Pod::restore` は削除済みgrep で呼出元なし、`Pod::new` のドキュメントコメントだけ「Pod::restore」表記が残存=軽微。OK。
- `prepare_pod_common``from_manifest` / `from_manifest_spawned` / `restore_from_manifest` の共通部分を集約(`:1922`。粒度妥当。OK。
### scope.lock への session_id 追加と live 検出
- `Allocation::session_id: Option<SessionId>` を追加(`crates/pod/src/runtime/scope_lock.rs:58-60`)。`#[serde(default)]` で旧フォーマット読込時は `None` 扱いになるdev 期間互換不要だが副作用としては安全。OK。
- `register_pod` / `install_top_level` / `adopt_allocation` のシグネチャに `session_id` を追加(`:306, :501, :533`。OK。
- `delegate_scope` のシグネチャは変更せず `session_id = None` のまま事前予約し、子側 `adopt_allocation` で確定する設計(`:344-387, :533-551`。チケット文面57 行)は `delegate_scope` も含めて挙げているが、子の session_id が spawner 時点で未確定であるため `Option` 化が技術的に妥当で、ユーザの設計判断 (3) として明確に記録されている。**ticket 本文と乖離があるが、コメント (`:55-60, :381-384`) で意図が明記されているため許容**。
- `LockFile::find_by_session` 提供(`:73-77`。OK。
- `scope_lock::lookup_session(id) -> Option<SessionLockInfo>` 提供(`:567-578`)、`SessionLockInfo { pod_name, socket, pid }` も要件通り(`:556-560`。OK。
- 既存 `scope.lock` の互換性は `#[serde(default)]` で旧形式も壊れずに読める。OK。
### 二重起動の扱い
- TUI 経路では `pod` 子プロセスが `PodError::SessionInUse` で非ゼロ終了 → spawn が `PodExitedEarly` でスタブ末尾を viewport に表示してそのまま終了(`crates/tui/src/spawn.rs:329-336`)。エラーメッセージ本体(`session_id` / `pod_name` / `socket`)も `PodError::SessionInUse``#[error]` に含まれて stderr 経由で通る(`crates/pod/src/pod.rs:1886-1894`)。要件通り picker 復帰や自動 attach 切替なし。OK。
### UI / 操作
- 上下キー / `j` `k` / Enter / Esc / Ctrl-C で picker 操作(`crates/tui/src/picker.rs:223-232`。OK。
- 直近 10 件のみのためスクロール UI なし。OK。
- 決定後 name 入力ダイアログを別 inline viewport として描画。`close_viewport` で改行を 1 行押し出して name 入力との重なりを防止(`:127-138`。OK。
- title `resume pod session: <short-id>` で復帰モード明示(`crates/tui/src/spawn.rs:516-519`。OK。
- name のデフォルトは cwd 由来でこれまでと同じ。OK。
- 検索フィルタは将来拡張余地のある state で、必須にしない。OK。
### 完了条件
- 「`pod --session <UUID>` で fork 由来の空 session が生成されない」: 実装上 `Pod::restore_from_manifest` は新 session を作らず元の `session_id` を再利用するので OK。
- 「復帰直後に過去履歴が表示される」: `RestoredState.history``worker.set_history` 経由で流し込んでおり、Controller の history.json/Event::History 経路は変更不要。OK手動動作確認は範囲外
- 「同一 source session に対する live Pod が存在する場合、復元起動はエラーで終了し、`pod_name` / socket がメッセージに表示」: 静的状態に対しては成立。ただし live 写し書き入れ替わり時の整合性に注意点あり(後述 Blocking
## アーキテクチャ・スコープ
- 新規クレートなし。`tui` 側に `session-store` を依存追加したのみ(`crates/tui/Cargo.toml:17`)。`session-store` が picker のために `ContentPart` / `Item` / `Role``pub use` で再公開(`crates/session-store/src/lib.rs:43`)。`llm-worker` のレイヤを跨いで TUI が直接型を持ち込まずに済むので妥当。
- `prepare_pod_common` の抽出は 3 系統の共通部分pwd / scope / client / catalog / template parse 切替)の自然な集約で、過剰抽象化していない。`PodCommon` 構造体も local。OK。
- `PodError` への `SessionInUse` / `SessionEmpty` 追加は `restore` 固有の正当な失敗モード。`thiserror` の表現に揃っている。OK。
- picker の preview 実装は素朴(`read_all` で全件読む)だが、大セッション時の重さは本チケット範囲外と user note (5) で明記されており妥当。OK。
- spawn.rs と picker.rs の境界が「picker は選択のみ、scope-lock チェックや fork は後段」で揃っており、低レベル基盤と UI が混ざっていない。OK。
## 指摘事項
### Blocking
- **scope.lock の session_id による「構造的」二重書き込み防止には穴がある**
- 要件 / 設計判断 (1) は「同一 session への同時書き込みは `scope.lock``session_id` で構造的に防止する」が前提。実装は (a) `restore_from_manifest` 入口の `lookup_session` チェックと (b) `adopt_allocation`/`install_top_level` 時の `session_id` 記録、の二点のみ。次の経路で構造保証が崩れる:
1. **compaction 後**`crates/pod/src/pod.rs:1133-1156`: `self.session_id = new_session_id` に差し替わるが、scope.lock 上の `Allocation.session_id` は旧 session_id のまま更新されない。結果として `lookup_session(new_session_id)``None` を返し、live Pod が新セッションへ書いている最中でも別プロセスが `restore_from_manifest(new_session_id)` を成立させてしまう。
2. **`ensure_head_or_fork` 後**`crates/session-store/src/session.rs:109-138`: `self.session_id` が auto-fork で書き換わるが scope.lock は同じく置き去り。
3. **TOCTOU**: `lookup_session``LockFileGuard` を閉じてから後段の `install_top_level` で再オープンするまでの間に、別プロセスが同じ流れで通り抜ける可能性。`register_pod` 内では `session_id` の重複チェックを行っていない(`pod_name` と scope のみ)ので、別 `pod_name` を指定する 2 つの resume が両方成功し、両方が同じ session jsonl に append しに行ける。
- 実害は `ensure_head_or_fork` の auto-fork が後段で救うとはいえ、それ自体「fork しない」設計の前提を破る挙動になる。少なくとも以下のいずれかが必要:
- `register_pod` に session_id 重複チェックを追加し、`lookup_session` と install を同一 `LockFileGuard` 配下で行う API を提供する。
- compaction 完了時 / `ensure_head_or_fork` 後に scope.lock の自分のエントリの `session_id` を書き換える明示的な hook を入れる。
- 該当箇所: `crates/pod/src/pod.rs:1156`、`crates/pod/src/runtime/scope_lock.rs:306-339``register_pod` に session_id 衝突チェックなし)、`crates/pod/src/pod.rs:1599-1631`lookup と install の分離)。
### Non-blocking / Follow-up
- **picker に live セッションの可視化が無い**(要件「その session が今 live かどうか(`scope.lock` を引いて判定)」)。`crates/tui/src/picker.rs` 全体に `scope_lock` 参照なし。`Row` に `live: bool` を持たせ、`scope_lock::lookup_session(id)` で判定して表示色を変える程度で完結する。要件の文言上は blocking 級だが、live セッションを picker から選んだ場合でも `Pod::restore_from_manifest` 側で `SessionInUse` で弾かれて pod が落ち、TUI もそのまま終了するという完了条件は満たせるため follow-up とする。次回の TUI 改善で対処を推奨。
- **`pod` CLI および TUI の `Form.resume_from` ドキュメントコメントが旧 fork 設計のまま**。
- `crates/pod/src/main.rs:47-51`: 「The source session log is forked at its head into a new session id, so the original jsonl is left untouched and double-write races are impossible.」 → 実装は fork しないし、二重書き込み防止は scope.lock 経由の前提に変わっている。
- `crates/tui/src/spawn.rs:448-452`: 「the child pod is launched with `--session <id>` so it forks and restores `id`」 → 同上。
- 動作には影響しないが、レビュー観点(コードベースを歪めていないか)で読者を誤導するため早めに直したい。
- **`Pod::new` のドキュメントコメントに削除済みの `Pod::restore` 表記が残存**`crates/pod/src/pod.rs:104`。「lower-level constructors (`Pod::new`, `Pod::restore`)」の `Pod::restore` を削るか、低レベル API の用途記述を `Pod::new` 単独に書き換えれば足りる。
- **`restore_from_manifest` / `lookup_session` / `find_by_session` / `SessionInUse` / `SessionEmpty` のテストが未追加**。`scope_lock.rs` 既存テストは register/delegate/adopt 系のみ。`find_by_session` の挙動(`session_id == Some(_)` のみマッチし `None` の placeholder をスキップすると、empty session に対する `restore_from_manifest``SessionEmpty` 経路は意図そのものが今回新規なので、ユニットテスト 1〜2 本でも欲しい。
- **picker の preview が重い場合**user note (5) 明記)の挙動は範囲外で良いが、`read_all` を 10 セッション分逐次(`for id in ids ... build_preview(...).await`)で行っているため大規模 store ではブロッキング感が出る。将来 `tokio::join_all` 並走化で済む程度なので、follow-up 記録のみ。
### Nits
- `PickerError``Display``unable` 系の標準形式を踏襲していて良いが、`StoreError` を `Box<dyn Error>` 経由で渡す経路で `From` の方向(`From<StoreError>` for `PickerError`)と main.rs 側の `Box<dyn Error>` の整合は保たれている。問題なし。
- `crates/tui/src/picker.rs:23``pub use llm_worker::llm_client::types::{ContentPart, Item, Role}` は本来 picker のためだけに `session-store` 経由で入っている。将来 picker 以外も使うようなら `session-store` が「保存対象の構造を再公開する」責務として正規化する余地あり。
## 判断
**Approve with follow-up** — チケットのコア要件(`tui --resume` / `--session` 起点の復帰経路、同 session_id 再利用、`scope.lock` への session_id 拡張、二重起動時のエラー終了、共通部分の抽出と低レベル `Pod::restore` 削除はすべて達成されており、build / `cargo test -p pod` も緑。
ただし設計判断 (1) で「scope.lock で構造的に二重書き込み防止」と打ち出している割に、(a) compaction 後 (b) `ensure_head_or_fork` 後 (c) lookup→install の TOCTOU の 3 経路で構造保証が崩れる点は要承知。ticket の「fork しない」前提を厳密に守るには上記 Blocking のいずれかの補強が必要だが、現実には `ensure_head_or_fork` の auto-fork で実害が起きにくいため、本チケット完了は Non-blocking として現行 PR を許容しつつ、scope.lock 二重書き防止の補強を別チケットで切り出す運用が妥当。
加えて、要件文に明記されている picker の live 表示と、stale ドキュメントpod CLI / spawn.rs Form / Pod::newの刷新は早めに follow-up で潰してほしい。