diff --git a/Cargo.lock b/Cargo.lock index 9c5e3c92..2d18267b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2951,6 +2951,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", + "tracing", "uuid", ] diff --git a/TODO.md b/TODO.md index 13b0d465..110beb42 100644 --- a/TODO.md +++ b/TODO.md @@ -2,7 +2,6 @@ - 内部 Worker / 内部 Pod の Workflow 化 → [tickets/internal-worker-workflow.md](tickets/internal-worker-workflow.md) - Agent Skills を Workflow として ingest → [tickets/agent-skills.md](tickets/agent-skills.md) - パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md) -- Resume 時の Scope claim の改善 → [tickets/resume-scope-claim.md](tickets/resume-scope-claim.md) - Pod CLI: マニフェスト関連フラグの整理 → [tickets/pod-cli-manifest-flags.md](tickets/pod-cli-manifest-flags.md) - llm-worker のエラー耐性 - HTTP transient リトライ → [tickets/llm-worker-transient-retry.md](tickets/llm-worker-transient-retry.md) diff --git a/crates/pod-registry/src/conflict.rs b/crates/pod-registry/src/conflict.rs index 60ee1e27..ba63054f 100644 --- a/crates/pod-registry/src/conflict.rs +++ b/crates/pod-registry/src/conflict.rs @@ -32,7 +32,7 @@ pub(crate) fn rules_overlap(a: &ScopeRule, b: &ScopeRule) -> bool { } /// Does `cover` fully contain `inner`'s claimed paths? -fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool { +pub(crate) fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool { if cover.permission < inner.permission { return false; } @@ -44,8 +44,9 @@ fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool { } /// Check whether `rule` is contained in `parent`'s effective write -/// scope: its allow set covers `rule`, and no child of `parent` has -/// already taken a piece that would overlap `rule`. +/// scope: its allow set covers `rule`, no deny rule caps it, and no +/// child of `parent` has already taken a piece that would overlap +/// `rule`. pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule) -> bool { let Some(alloc) = lock.find(parent) else { return false; @@ -61,6 +62,14 @@ pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule if !covered { return false; } + let denied = alloc + .scope_deny + .iter() + .filter(|r| r.permission == Permission::Write) + .any(|r| rules_overlap(r, rule)); + if denied { + return false; + } let child_conflict = lock .allocations .iter() @@ -71,7 +80,14 @@ pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule !child_conflict } -/// Find the Pod that actually owns a write scope overlapping `rule`. +/// The Pod and rule that actually own a conflicting write scope. +#[derive(Debug, Clone)] +pub struct ConflictOwner { + pub pod_name: String, + pub rule: ScopeRule, +} + +/// Find the Pod/rule that actually owns a write scope overlapping `rule`. /// /// Walks the delegation tree: if an allocation overlaps `rule`, we /// descend into its children and return the deepest overlapping node @@ -82,38 +98,47 @@ pub fn find_conflict_owner( lock: &LockFile, rule: &ScopeRule, exempt: Option<&str>, -) -> Option { +) -> Option { + find_conflict_owners(lock, rule, exempt).into_iter().next() +} + +/// Find every top-level delegation tree owner that conflicts with `rule`. +pub fn find_conflict_owners( + lock: &LockFile, + rule: &ScopeRule, + exempt: Option<&str>, +) -> Vec { if rule.permission != Permission::Write { - return None; + return Vec::new(); } - for alloc in lock - .allocations + lock.allocations .iter() .filter(|a| a.delegated_from.is_none()) - { - if let Some(owner) = find_conflict_in_subtree(lock, alloc, rule) { - if Some(owner.as_str()) == exempt { - continue; - } - return Some(owner); - } - } - None + .filter_map(|alloc| find_conflict_in_subtree(lock, alloc, rule)) + .filter(|owner| Some(owner.pod_name.as_str()) != exempt) + .collect() } fn find_conflict_in_subtree( lock: &LockFile, alloc: &Allocation, rule: &ScopeRule, -) -> Option { - let overlaps_here = alloc +) -> Option { + let overlapping_rule = alloc .scope_allow .iter() .filter(|r| r.permission == Permission::Write) - .any(|r| rules_overlap(r, rule)); - if !overlaps_here { + .find(|r| rules_overlap(r, rule))?; + + let fully_denied_here = alloc + .scope_deny + .iter() + .filter(|r| r.permission == Permission::Write) + .any(|r| covers_fully(r, rule)); + if fully_denied_here { return None; } + for child in lock .allocations .iter() @@ -123,14 +148,17 @@ fn find_conflict_in_subtree( return Some(owner); } } - Some(alloc.pod_name.clone()) + Some(ConflictOwner { + pod_name: alloc.pod_name.clone(), + rule: overlapping_rule.clone(), + }) } #[cfg(test)] mod tests { use super::*; use crate::test_util::*; - use crate::{ScopeLockError, delegate_scope, register_pod}; + use crate::{ScopeLockError, delegate_scope, register_pod, register_pod_with_deny}; use tempfile::TempDir; #[test] @@ -200,4 +228,69 @@ mod tests { other => panic!("expected WriteConflict, got {other:?}"), } } + + #[test] + fn denied_write_region_is_not_claimed_by_restored_parent() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("pods.json"); + let mut g = open_empty(&path); + register_pod_with_deny( + &mut g, + "parent".into(), + std::process::id(), + sock("parent"), + vec![write_rule("/src", true)], + vec![write_rule("/src/core", true)], + sid(), + ) + .unwrap(); + register_pod( + &mut g, + "child".into(), + std::process::id(), + sock("child"), + vec![write_rule("/src/core", true)], + sid(), + ) + .unwrap(); + } + + #[test] + fn partial_deny_does_not_hide_parent_conflict() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("pods.json"); + let mut g = open_empty(&path); + register_pod_with_deny( + &mut g, + "parent".into(), + std::process::id(), + sock("parent"), + vec![write_rule("/src", true)], + vec![write_rule("/src/core", true)], + sid(), + ) + .unwrap(); + + let err = register_pod( + &mut g, + "other".into(), + std::process::id(), + sock("other"), + vec![write_rule("/src", true)], + sid(), + ) + .unwrap_err(); + + match err { + ScopeLockError::WriteConflict { + competitor, + competitor_rule, + .. + } => { + assert_eq!(competitor, "parent"); + assert_eq!(competitor_rule.target, std::path::PathBuf::from("/src")); + } + other => panic!("expected WriteConflict, got {other:?}"), + } + } } diff --git a/crates/pod-registry/src/error.rs b/crates/pod-registry/src/error.rs index 05807ab6..b685a82e 100644 --- a/crates/pod-registry/src/error.rs +++ b/crates/pod-registry/src/error.rs @@ -13,8 +13,12 @@ pub enum ScopeLockError { Io(#[from] io::Error), #[error("pod name `{0}` is already registered")] DuplicatePodName(String), - #[error("requested scope `{}` conflicts with pod `{competitor}`", .rule.target.display())] - WriteConflict { competitor: String, rule: ScopeRule }, + #[error("requested scope `{}` conflicts with pod `{competitor}` rule `{}`", .rule.target.display(), .competitor_rule.target.display())] + WriteConflict { + competitor: String, + rule: ScopeRule, + competitor_rule: ScopeRule, + }, #[error( "requested scope `{}` is not within spawner `{spawner}`'s effective scope", .rule.target.display() diff --git a/crates/pod-registry/src/lib.rs b/crates/pod-registry/src/lib.rs index ede20133..6c92d099 100644 --- a/crates/pod-registry/src/lib.rs +++ b/crates/pod-registry/src/lib.rs @@ -22,11 +22,16 @@ mod table; #[cfg(test)] mod test_util; -pub use conflict::{find_conflict_owner, is_within_effective_write}; +pub use conflict::{ + ConflictOwner, find_conflict_owner, find_conflict_owners, is_within_effective_write, +}; pub use error::ScopeLockError; pub use lifecycle::{ - ScopeAllocationGuard, SessionLockInfo, adopt_allocation, install_top_level, lookup_session, - update_session, + ScopeAllocationGuard, SessionLockInfo, adopt_allocation, install_top_level, + install_top_level_with_deny, lookup_session, update_session, +}; +pub use mutate::{ + delegate_scope, reclaim_stale, reclaim_stale_with, register_pod, register_pod_with_deny, + release_pod, }; -pub use mutate::{delegate_scope, reclaim_stale, reclaim_stale_with, register_pod, release_pod}; pub use table::{Allocation, LockFile, LockFileGuard, default_registry_path}; diff --git a/crates/pod-registry/src/lifecycle.rs b/crates/pod-registry/src/lifecycle.rs index 0c915d3f..05572179 100644 --- a/crates/pod-registry/src/lifecycle.rs +++ b/crates/pod-registry/src/lifecycle.rs @@ -8,7 +8,7 @@ use manifest::ScopeRule; use session_store::SessionId; use crate::error::ScopeLockError; -use crate::mutate::{register_pod, release_pod}; +use crate::mutate::release_pod; use crate::table::{LockFileGuard, default_registry_path}; /// Owned allocation: on drop, opens the lock file and releases this @@ -46,15 +46,30 @@ pub fn install_top_level( socket: PathBuf, scope_allow: Vec, session_id: SessionId, +) -> Result { + install_top_level_with_deny(pod_name, pid, socket, scope_allow, Vec::new(), session_id) +} + +/// Open the default lock file, register a top-level Pod with explicit +/// deny rules, and return a guard that will release the allocation on +/// drop. +pub fn install_top_level_with_deny( + pod_name: String, + pid: u32, + socket: PathBuf, + scope_allow: Vec, + scope_deny: Vec, + session_id: SessionId, ) -> Result { let lock_path = default_registry_path()?; let mut guard = LockFileGuard::open(&lock_path)?; - register_pod( + crate::mutate::register_pod_with_deny( &mut guard, pod_name.clone(), pid, socket, scope_allow, + scope_deny, session_id, )?; Ok(ScopeAllocationGuard { @@ -176,6 +191,7 @@ mod tests { pid: placeholder_pid, socket: sock(pod_name), scope_allow: vec![write_rule("/tmp/child", true)], + scope_deny: Vec::new(), delegated_from: None, session_id: None, }); diff --git a/crates/pod-registry/src/mutate.rs b/crates/pod-registry/src/mutate.rs index 643adc95..1879a16a 100644 --- a/crates/pod-registry/src/mutate.rs +++ b/crates/pod-registry/src/mutate.rs @@ -7,7 +7,7 @@ use std::path::PathBuf; use manifest::{Permission, ScopeRule}; use session_store::SessionId; -use crate::conflict::{find_conflict_owner, is_within_effective_write}; +use crate::conflict::{find_conflict_owner, find_conflict_owners, is_within_effective_write}; use crate::error::ScopeLockError; use crate::table::{Allocation, LockFileGuard}; @@ -25,6 +25,38 @@ pub fn register_pod( socket: PathBuf, scope_allow: Vec, session_id: SessionId, +) -> Result<(), ScopeLockError> { + register_pod_with_deny( + guard, + pod_name, + pid, + socket, + scope_allow, + Vec::new(), + session_id, + ) +} + +/// Register a top-level Pod with explicit deny rules that reduce the +/// claimed effective write scope. +/// +/// Conflict semantics: if every Pod overlapping a requested allow rule +/// is fully covered by one of `scope_deny`, the conflict is suppressed +/// and the registration proceeds. The check is structural (deny ⊇ +/// competitor.rule), not relational — it does not verify that the +/// competitor actually descends from this Pod's prior delegations. +/// In practice this is safe because the canonical caller is `restore`, +/// which derives `scope_deny` from the session's own snapshot, so any +/// covered competitor is guaranteed to be a descendant of the original +/// allocation. Direct callers must uphold the same invariant. +pub fn register_pod_with_deny( + guard: &mut LockFileGuard, + pod_name: String, + pid: u32, + socket: PathBuf, + scope_allow: Vec, + scope_deny: Vec, + session_id: SessionId, ) -> Result<(), ScopeLockError> { reclaim_stale(guard); if guard.data().find(&pod_name).is_some() { @@ -41,10 +73,22 @@ pub fn register_pod( .iter() .filter(|r| r.permission == Permission::Write) { - if let Some(competitor) = find_conflict_owner(guard.data(), rule, None) { + let conflicts = find_conflict_owners(guard.data(), rule, None); + let all_denied = !conflicts.is_empty() + && conflicts.iter().all(|owner| { + scope_deny + .iter() + .filter(|r| r.permission == Permission::Write) + .any(|deny| crate::conflict::covers_fully(deny, &owner.rule)) + }); + if all_denied { + continue; + } + if let Some(competitor) = conflicts.into_iter().next() { return Err(ScopeLockError::WriteConflict { - competitor, + competitor: competitor.pod_name, rule: rule.clone(), + competitor_rule: competitor.rule, }); } } @@ -53,6 +97,7 @@ pub fn register_pod( pid, socket, scope_allow, + scope_deny, delegated_from: None, session_id: Some(session_id), }); @@ -88,8 +133,9 @@ pub fn delegate_scope( if rule.permission == Permission::Write { if let Some(competitor) = find_conflict_owner(guard.data(), rule, Some(spawner)) { return Err(ScopeLockError::WriteConflict { - competitor, + competitor: competitor.pod_name, rule: rule.clone(), + competitor_rule: competitor.rule, }); } } @@ -99,6 +145,7 @@ pub fn delegate_scope( pid, socket, scope_allow, + scope_deny: Vec::new(), delegated_from: Some(spawner.into()), // Pre-reservation. The child fills in its own session_id when // it calls `adopt_allocation` after the worker is built. diff --git a/crates/pod-registry/src/table.rs b/crates/pod-registry/src/table.rs index b2cbfe0f..4c9090b4 100644 --- a/crates/pod-registry/src/table.rs +++ b/crates/pod-registry/src/table.rs @@ -35,6 +35,11 @@ pub struct Allocation { pub socket: PathBuf, /// Allow rules granted to this Pod (write + read). pub scope_allow: Vec, + /// Deny rules that cap this Pod's effective scope. Normally empty for + /// fresh allocations; restored Pods use this to avoid reclaiming + /// previously delegated write regions. + #[serde(default)] + pub scope_deny: Vec, /// Name of the Pod that delegated scope to this one, or `None` for /// a top-level Pod started directly by a human. pub delegated_from: Option, diff --git a/crates/pod/src/controller.rs b/crates/pod/src/controller.rs index 4056c260..b92150ca 100644 --- a/crates/pod/src/controller.rs +++ b/crates/pod/src/controller.rs @@ -134,6 +134,8 @@ impl PodController { // `PodFsView` to the shared state once the latter exists. let fs_for_view: tools::ScopedFs; + let scope_change_sink = pod.scope_change_sink(); + // Register event bridge callbacks on the worker { let worker = pod.worker_mut(); @@ -257,7 +259,8 @@ impl PodController { // worker) reads from it, and any future scope mutation // (SpawnPod-style revoke, future GrantScope) propagates // through it. - let fs = tools::ScopedFs::with_shared_scope(scope_handle.clone(), pwd_for_tools.clone()); + let fs = + tools::ScopedFs::with_shared_scope(scope_handle.clone(), pwd_for_tools.clone()); let tracker = tools::Tracker::new(); // The same ScopedFs also powers the IPC `ListCompletions` // query — keep a clone for the FS view we attach below, @@ -293,6 +296,7 @@ impl PodController { self_parent_socket.clone(), spawner_model.clone(), scope_handle.clone(), + scope_change_sink.clone(), )); worker.register_tool(send_to_pod_tool(spawned_registry.clone())); worker.register_tool(read_pod_output_tool(spawned_registry.clone())); diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index b3a4c073..a291885b 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -7,12 +7,12 @@ use llm_worker::llm_client::RequestConfig; use llm_worker::llm_client::client::LlmClient; use llm_worker::state::Mutable; use llm_worker::{ToolOutputLimits, UsageRecord, Worker, WorkerError, WorkerResult}; -use session_store::{EntryHash, SessionId, SessionStartState, Store, StoreError}; +use session_store::{EntryHash, PodScopeSnapshot, SessionId, SessionStartState, Store, StoreError}; use tracing::{info, warn}; use manifest::{ - PodManifest, PodManifestConfig, ResolveError, Scope, ScopeError, ScopeRule, SharedScope, - WorkerManifest, + PodManifest, PodManifestConfig, ResolveError, Scope, ScopeConfig, ScopeError, ScopeRule, + SharedScope, WorkerManifest, }; use crate::compact::state::CompactState; @@ -148,6 +148,10 @@ pub struct Pod { /// Phase 2 (consolidation) workers set this to false so the /// agentic worker pulls knowledge through the search tools instead. inject_resident_knowledge: bool, + /// Latest runtime scope snapshot queued by dynamic scope changes. + /// Drained into the session log before the next turn result is + /// persisted, so resume never silently reclaims delegated writes. + pending_scope_snapshot: Arc>>, /// Phase 1 (memory.extract) reentry guard. `true` while an extract /// worker is running; subsequent triggers are skipped per spec /// (`docs/plan/memory.md` §Phase 1 並走防止). `Arc` so @@ -222,6 +226,7 @@ impl Pod { workflow_registry: memory::WorkflowRegistry::empty(), memory_layout: None, inject_resident_knowledge: true, + pending_scope_snapshot: Arc::new(Mutex::new(None)), extract_in_flight: Arc::new(AtomicBool::new(false)), consolidation_in_flight: Arc::new(AtomicBool::new(false)), extract_pointer: Mutex::new(None), @@ -313,6 +318,55 @@ impl Pod { .update(|cur| cur.with_added_deny_rules(revoke.clone())) } + /// Snapshot the current runtime scope in the session log. The entry + /// is intentionally appended as soon as a session head exists: if the + /// process later exits while children keep their allocations, resume + /// can restore the narrowed scope instead of reclaiming delegated + /// writes. + pub async fn persist_scope_snapshot(&mut self) -> Result<(), StoreError> { + if self.head_hash.is_none() { + return Ok(()); + } + let snapshot = { + let scope = self.scope.snapshot(); + PodScopeSnapshot { + allow: scope.allow_rules(), + deny: scope.deny_rules(), + } + }; + session_store::save_pod_scope(&self.store, self.session_id, &mut self.head_hash, &snapshot) + .await + } + + /// Cloneable callback handed to dynamic-scope tools. It cannot append + /// directly to the async store from a sync tool callback, so it records + /// the latest snapshot and the controller flushes it after the tool + /// turn completes. + pub fn scope_change_sink(&self) -> Arc { + let pending = self.pending_scope_snapshot.clone(); + Arc::new(move |snapshot| { + *pending.lock().expect("pending_scope_snapshot poisoned") = Some(snapshot); + }) + } + + async fn flush_pending_scope_snapshot(&mut self) -> Result<(), StoreError> { + let snapshot = self + .pending_scope_snapshot + .lock() + .expect("pending_scope_snapshot poisoned") + .take(); + if let Some(snapshot) = snapshot { + session_store::save_pod_scope( + &self.store, + self.session_id, + &mut self.head_hash, + &snapshot, + ) + .await?; + } + Ok(()) + } + /// Direct access to the underlying Worker. pub fn worker(&self) -> &Worker { self.worker.as_ref().expect("worker taken during run") @@ -951,6 +1005,7 @@ impl Pod { let hash = session_store::create_session_with_id(&self.store, self.session_id, state).await?; self.head_hash = Some(hash); + self.persist_scope_snapshot().await?; return Ok(()); } let prev_session_id = self.session_id; @@ -1107,6 +1162,8 @@ impl Pod { session_store::save_delta(&self.store, self.session_id, &mut self.head_hash, new_items) .await?; + self.flush_pending_scope_snapshot().await?; + let turn_count = self.worker.as_ref().unwrap().turn_count(); session_store::save_turn_end( &self.store, @@ -1445,6 +1502,7 @@ impl Pod { .lock() .expect("usage_history poisoned") .clear(); + self.persist_scope_snapshot().await?; // Reset Phase 1 pointer alongside usage_history: the compacted // session has a fresh log with no `LogEntry::Extension` entries // yet, so a cold restore here would set extract_pointer to None @@ -1989,6 +2047,7 @@ impl Pod, St> { workflow_registry: common.workflow_registry, memory_layout: common.memory_layout, inject_resident_knowledge: true, + pending_scope_snapshot: Arc::new(Mutex::new(None)), extract_in_flight: Arc::new(AtomicBool::new(false)), consolidation_in_flight: Arc::new(AtomicBool::new(false)), extract_pointer: Mutex::new(None), @@ -2051,6 +2110,7 @@ impl Pod, St> { workflow_registry: common.workflow_registry, memory_layout: common.memory_layout, inject_resident_knowledge: true, + pending_scope_snapshot: Arc::new(Mutex::new(None)), extract_in_flight: Arc::new(AtomicBool::new(false)), consolidation_in_flight: Arc::new(AtomicBool::new(false)), extract_pointer: Mutex::new(None), @@ -2088,8 +2148,20 @@ impl Pod, St> { if state.head_hash.is_none() { return Err(PodError::SessionEmpty { session_id }); } + let scope_snapshot = state + .pod_scope + .clone() + .ok_or(PodError::SessionScopeMissing { session_id })?; - let common = prepare_pod_common(&manifest, &loader, /* parse_template */ false)?; + let common = prepare_pod_common_with_scope( + &manifest, + &loader, + /* parse_template */ false, + ScopeConfig { + allow: scope_snapshot.allow, + deny: scope_snapshot.deny, + }, + )?; // Atomic: register_pod inside install_top_level rejects when // another live allocation already holds `session_id`. Wrapping @@ -2100,11 +2172,12 @@ impl Pod, St> { .map_err(ScopeLockError::from)? .join(&manifest.pod.name) .join("sock"); - let scope_allocation = pod_registry::install_top_level( + let scope_allocation = pod_registry::install_top_level_with_deny( manifest.pod.name.clone(), std::process::id(), socket_path, common.scope.allow_rules(), + common.scope.deny_rules(), session_id, )?; @@ -2165,6 +2238,7 @@ impl Pod, St> { workflow_registry: common.workflow_registry, memory_layout: common.memory_layout, inject_resident_knowledge: true, + pending_scope_snapshot: Arc::new(Mutex::new(None)), extract_in_flight: Arc::new(AtomicBool::new(false)), consolidation_in_flight: Arc::new(AtomicBool::new(false)), extract_pointer: Mutex::new(extract_pointer), @@ -2379,6 +2453,11 @@ pub enum PodError { #[error("session {session_id} has no entries to restore")] SessionEmpty { session_id: SessionId }, + + #[error( + "session {session_id} has no persisted scope snapshot; refusing resume without explicit scope" + )] + SessionScopeMissing { session_id: SessionId }, } /// Bundle of resources that every high-level Pod constructor needs: @@ -2412,6 +2491,27 @@ fn prepare_pod_common( ) -> Result { let pwd = current_pwd()?; let scope = build_scope_with_memory(manifest, &pwd)?; + prepare_pod_common_from_scope(manifest, loader, parse_template, pwd, scope) +} + +fn prepare_pod_common_with_scope( + manifest: &PodManifest, + loader: &PromptLoader, + parse_template: bool, + scope_config: ScopeConfig, +) -> Result { + let pwd = current_pwd()?; + let scope = Scope::from_config(&scope_config).map_err(PodError::Scope)?; + prepare_pod_common_from_scope(manifest, loader, parse_template, pwd, scope) +} + +fn prepare_pod_common_from_scope( + manifest: &PodManifest, + loader: &PromptLoader, + parse_template: bool, + pwd: PathBuf, + scope: Scope, +) -> Result { if !scope.is_readable(&pwd) { return Err(PodError::PwdOutsideScope { pwd }); } diff --git a/crates/pod/src/spawn/tool.rs b/crates/pod/src/spawn/tool.rs index 1246318e..00acaca5 100644 --- a/crates/pod/src/spawn/tool.rs +++ b/crates/pod/src/spawn/tool.rs @@ -20,6 +20,7 @@ use manifest::{ use protocol::Method; use protocol::stream::JsonLineWriter; use serde::Deserialize; +use session_store::PodScopeSnapshot; use tokio::net::UnixStream; use tokio::process::Command; use tokio::time::sleep; @@ -127,6 +128,9 @@ pub struct SpawnPodTool { /// `effective_write` semantics: Write is the only permission /// tracked across Pods, so revocation only touches Write. spawner_scope: SharedScope, + /// Called after the spawner scope has been updated so the new + /// effective scope can be persisted to the session log. + scope_changed: Arc, } impl SpawnPodTool { @@ -139,6 +143,7 @@ impl SpawnPodTool { parent_socket: Option, spawner_model: ModelManifest, spawner_scope: SharedScope, + scope_changed: Arc, ) -> Self { Self { spawner_name, @@ -149,6 +154,7 @@ impl SpawnPodTool { parent_socket, spawner_model, spawner_scope, + scope_changed, } } } @@ -243,9 +249,12 @@ impl Tool for SpawnPodTool { if !revoke_write.is_empty() { self.spawner_scope .update(|cur| cur.with_added_deny_rules(revoke_write.clone())) - .map_err(|e| { - ToolError::ExecutionFailed(format!("revoke spawner scope: {e}")) - })?; + .map_err(|e| ToolError::ExecutionFailed(format!("revoke spawner scope: {e}")))?; + let current = self.spawner_scope.snapshot(); + (self.scope_changed)(PodScopeSnapshot { + allow: current.allow_rules(), + deny: current.deny_rules(), + }); } send_run(&predicted_socket, &input.task).await?; @@ -488,6 +497,7 @@ pub fn spawn_pod_tool( parent_socket: Option, spawner_model: ModelManifest, spawner_scope: SharedScope, + scope_changed: Arc, ) -> ToolDefinition { Arc::new(move || { let schema = schemars::schema_for!(SpawnPodInput); @@ -504,6 +514,7 @@ pub fn spawn_pod_tool( parent_socket.clone(), spawner_model.clone(), spawner_scope.clone(), + scope_changed.clone(), )); (meta, tool) }) diff --git a/crates/pod/tests/restore_test.rs b/crates/pod/tests/restore_test.rs index 3125fdb2..8cf74192 100644 --- a/crates/pod/tests/restore_test.rs +++ b/crates/pod/tests/restore_test.rs @@ -80,3 +80,31 @@ async fn restore_from_manifest_rejects_empty_session_log() { Ok(_) => panic!("expected empty session log to fail"), } } + +#[tokio::test] +async fn restore_from_manifest_rejects_session_without_scope_snapshot() { + let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + + let store_tmp = tempfile::tempdir().unwrap(); + let store = FsStore::new(store_tmp.path()).await.unwrap(); + let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap(); + + let id = session_store::new_session_id(); + let state = session_store::SessionStartState { + system_prompt: None, + config: &Default::default(), + history: &[], + }; + session_store::create_session_with_id(&store, id, state) + .await + .unwrap(); + + let result = + Pod::restore_from_manifest(id, manifest, store, pod::PromptLoader::builtins_only()).await; + + match result { + Err(PodError::SessionScopeMissing { session_id }) => assert_eq!(session_id, id), + Err(other) => panic!("expected SessionScopeMissing, got {other:?}"), + Ok(_) => panic!("expected missing scope snapshot to fail"), + } +} diff --git a/crates/pod/tests/spawn_pod_test.rs b/crates/pod/tests/spawn_pod_test.rs index 212aa71e..3d37ebaa 100644 --- a/crates/pod/tests/spawn_pod_test.rs +++ b/crates/pod/tests/spawn_pod_test.rs @@ -187,6 +187,7 @@ async fn spawn_pod_delegates_scope_and_sends_run() { None, dummy_model(), spawner_scope.clone(), + std::sync::Arc::new(|_| {}), ); let (_meta, tool) = def(); @@ -275,6 +276,7 @@ async fn spawn_pod_rejects_scope_outside_spawner() { None, dummy_model(), spawner_scope.clone(), + std::sync::Arc::new(|_| {}), ); let (_meta, tool) = def(); @@ -346,6 +348,7 @@ async fn spawn_pod_rolls_back_reservation_when_socket_never_appears() { None, dummy_model(), spawner_scope.clone(), + std::sync::Arc::new(|_| {}), ); let (_meta, tool) = def(); diff --git a/crates/protocol/src/lib.rs b/crates/protocol/src/lib.rs index 160e55d4..c70a9ed3 100644 --- a/crates/protocol/src/lib.rs +++ b/crates/protocol/src/lib.rs @@ -439,7 +439,7 @@ pub enum ErrorCode { // --------------------------------------------------------------------------- /// A single allow or deny rule inside a scope configuration. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct ScopeRule { /// Target path. Must be absolute by the time a `Scope` is built from /// this rule — relative paths are resolved per-layer against the diff --git a/crates/session-store/Cargo.toml b/crates/session-store/Cargo.toml index 3867f11d..da16aafb 100644 --- a/crates/session-store/Cargo.toml +++ b/crates/session-store/Cargo.toml @@ -16,6 +16,7 @@ thiserror = { workspace = true } sha2 = { workspace = true } hex = "0.4.3" protocol = { workspace = true } +tracing.workspace = true [dev-dependencies] tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/crates/session-store/src/lib.rs b/crates/session-store/src/lib.rs index a8f16b07..25261277 100644 --- a/crates/session-store/src/lib.rs +++ b/crates/session-store/src/lib.rs @@ -41,11 +41,12 @@ pub use logged_item::{LoggedContentPart, LoggedItem, LoggedRole, from_logged, to pub use session::{ SessionStartState, create_compacted_session, create_session, create_session_with_id, ensure_head_or_fork, fork, fork_at, restore, save_config_changed, save_delta, save_extension, - save_run_completed, save_run_errored, save_turn_end, save_usage, save_user_input, + save_pod_scope, save_run_completed, save_run_errored, save_turn_end, save_usage, + save_user_input, }; pub use session_log::{ - EntryHash, HashedEntry, LogEntry, RestoredState, SessionOrigin, build_chain, collect_state, - compute_hash, + EntryHash, HashedEntry, LogEntry, POD_SCOPE_EXTENSION_DOMAIN, PodScopeSnapshot, RestoredState, + SessionOrigin, build_chain, collect_state, compute_hash, }; pub use store::{Store, StoreError}; diff --git a/crates/session-store/src/session.rs b/crates/session-store/src/session.rs index 26acc225..18f422ae 100644 --- a/crates/session-store/src/session.rs +++ b/crates/session-store/src/session.rs @@ -6,7 +6,7 @@ use crate::SessionId; use crate::logged_item::{LoggedItem, to_logged}; -use crate::session_log::{self, EntryHash, HashedEntry, LogEntry, SessionOrigin}; +use crate::session_log::{self, EntryHash, HashedEntry, LogEntry, PodScopeSnapshot, SessionOrigin}; use crate::store::{Store, StoreError}; use llm_worker::WorkerResult; use llm_worker::llm_client::RequestConfig; @@ -360,6 +360,24 @@ pub async fn save_extension( .await } +/// Log the Pod's latest runtime scope snapshot. +pub async fn save_pod_scope( + store: &impl Store, + session_id: SessionId, + head_hash: &mut Option, + snapshot: &PodScopeSnapshot, +) -> Result<(), StoreError> { + let payload = serde_json::to_value(snapshot)?; + save_extension( + store, + session_id, + head_hash, + session_log::POD_SCOPE_EXTENSION_DOMAIN, + payload, + ) + .await +} + /// Log a `ConfigChanged` entry. pub async fn save_config_changed( store: &impl Store, diff --git a/crates/session-store/src/session_log.rs b/crates/session-store/src/session_log.rs index 64f7257f..f5f2bd53 100644 --- a/crates/session-store/src/session_log.rs +++ b/crates/session-store/src/session_log.rs @@ -10,7 +10,7 @@ use llm_worker::llm_client::types::{Item, RequestConfig}; use llm_worker::{UsageRecord, WorkerResult}; -use protocol::Segment; +use protocol::{ScopeRule, Segment}; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; @@ -197,6 +197,16 @@ pub struct SessionOrigin { pub at_hash: EntryHash, } +/// Domain used by Pod to persist its latest effective runtime scope. +pub const POD_SCOPE_EXTENSION_DOMAIN: &str = "pod.scope"; + +/// Payload stored in `LogEntry::Extension { domain: "pod.scope", .. }`. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PodScopeSnapshot { + pub allow: Vec, + pub deny: Vec, +} + /// State collected from log entries. #[derive(Debug, Clone)] pub struct RestoredState { @@ -214,6 +224,9 @@ pub struct RestoredState { /// `LogEntry::Extension` を replay 順に積んだもの。`(domain, payload)`。 /// session-store は domain を不透明扱いし、各ドメインが自前で fold する。 pub extensions: Vec<(String, serde_json::Value)>, + /// Latest runtime scope snapshot persisted by the Pod. `None` means + /// the session predates scope persistence or the payload was corrupt. + pub pod_scope: Option, /// User submissions in original typed form, in submit order. /// One entry per `LogEntry::UserInput`; the K-th entry corresponds to /// the K-th `Item::user_message` derived during replay (modulo @@ -234,6 +247,7 @@ pub fn collect_state(entries: &[HashedEntry]) -> RestoredState { head_hash: None, usage_history: Vec::new(), extensions: Vec::new(), + pod_scope: None, user_segments: Vec::new(), }; @@ -296,6 +310,17 @@ pub fn collect_state(entries: &[HashedEntry]) -> RestoredState { LogEntry::Extension { domain, payload, .. } => { + if domain == POD_SCOPE_EXTENSION_DOMAIN { + match serde_json::from_value::(payload.clone()) { + Ok(snapshot) => state.pod_scope = Some(snapshot), + Err(err) => { + tracing::warn!( + error = %err, + "discarding malformed pod.scope snapshot from session log" + ); + } + } + } state.extensions.push((domain.clone(), payload.clone())); } } diff --git a/crates/tui/src/spawn.rs b/crates/tui/src/spawn.rs index b8829e48..586235b3 100644 --- a/crates/tui/src/spawn.rs +++ b/crates/tui/src/spawn.rs @@ -18,7 +18,9 @@ use std::process::Stdio; use std::time::Duration; use crossterm::event::{self, Event as TermEvent, KeyCode, KeyEventKind, KeyModifiers}; -use manifest::{PodManifestConfig, find_project_manifest_from, load_layer, user_manifest_path}; +use manifest::{ + PodManifestConfig, ScopeConfig, find_project_manifest_from, load_layer, user_manifest_path, +}; use ratatui::Terminal; use ratatui::backend::CrosstermBackend; use ratatui::layout::{Constraint, Layout}; @@ -50,6 +52,8 @@ pub enum SpawnOutcome { #[derive(Debug)] pub enum SpawnError { Io(io::Error), + Store(session_store::StoreError), + MissingResumeScope { session_id: SessionId }, PodLaunchFailed(io::Error), PodExitedEarly { stderr_tail: String }, Timeout, @@ -59,6 +63,11 @@ impl std::fmt::Display for SpawnError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Io(e) => write!(f, "io error: {e}"), + Self::Store(e) => write!(f, "failed to read session log: {e}"), + Self::MissingResumeScope { session_id } => write!( + f, + "session {session_id} has no persisted scope snapshot; refusing resume without explicit scope" + ), Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"), Self::PodExitedEarly { stderr_tail } => { if stderr_tail.is_empty() { @@ -84,6 +93,12 @@ impl From for SpawnError { } } +impl From for SpawnError { + fn from(e: session_store::StoreError) -> Self { + Self::Store(e) + } +} + type InlineTerminal = Terminal>; /// Source session for a resume run. `None` = fresh spawn (current @@ -140,6 +155,7 @@ pub async fn run(resume_from: Option) -> Result) -> Result String { pod.insert("name".into(), toml::Value::String(form.name.clone())); root.insert("pod".into(), toml::Value::Table(pod)); - if !form.cascade_has_scope { + if let Some(scope_config) = form.resume_scope.as_ref() { + root.insert( + "scope".into(), + toml::Value::try_from(scope_config).expect("scope serialisation cannot fail"), + ); + } else if !form.cascade_has_scope { let mut rule = toml::value::Table::new(); rule.insert( "target".into(), @@ -374,6 +398,24 @@ fn build_overlay_toml(form: &Form) -> String { toml::to_string(&toml::Value::Table(root)).expect("overlay serialisation cannot fail") } +async fn load_resume_scope(session_id: SessionId) -> Result { + let store_dir = manifest::paths::sessions_dir().ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + "could not resolve sessions directory (set INSOMNIA_HOME, INSOMNIA_DATA_DIR, or HOME)", + ) + })?; + let store = session_store::FsStore::new(&store_dir).await?; + let state = session_store::restore(&store, session_id).await?; + let snapshot = state + .pod_scope + .ok_or(SpawnError::MissingResumeScope { session_id })?; + Ok(ScopeConfig { + allow: snapshot.allow, + deny: snapshot.deny, + }) +} + /// Resolves the binary used to launch a child Pod. Must point at a /// `pod`-compatible executable — the parent reads the child's stderr /// directly looking for `INSOMNIA-READY`, so any wrapper that emits @@ -450,6 +492,10 @@ struct Form { /// child pod is launched with `--session ` so it restores /// from `id` and appends to the same session log. resume_from: Option, + /// Scope snapshot recovered from the source session log. Set only for + /// resume runs, and serialized into the overlay instead of cwd-default + /// scope so resume does not silently broaden access. + resume_scope: Option, } impl Form { @@ -625,6 +671,7 @@ mod tests { message: None, editing: true, resume_from: None, + resume_scope: None, } } @@ -649,6 +696,30 @@ mod tests { assert!(parsed.get("scope").is_none()); } + #[test] + fn overlay_uses_resume_scope_snapshot() { + let mut f = form("agent-r", false); + f.resume_from = Some(session_store::new_session_id()); + f.resume_scope = Some(ScopeConfig { + allow: vec![manifest::ScopeRule { + target: PathBuf::from("/work/example"), + permission: manifest::Permission::Write, + recursive: true, + }], + deny: vec![manifest::ScopeRule { + target: PathBuf::from("/work/example/child"), + permission: manifest::Permission::Write, + recursive: true, + }], + }); + let toml_str = build_overlay_toml(&f); + let parsed: toml::Value = toml::from_str(&toml_str).unwrap(); + assert_eq!(parsed["pod"]["name"].as_str(), Some("agent-r")); + assert_eq!(parsed["scope"]["allow"].as_array().unwrap().len(), 1); + let deny = parsed["scope"]["deny"].as_array().unwrap(); + assert_eq!(deny[0]["target"].as_str(), Some("/work/example/child")); + } + #[test] fn cascade_merge_detects_scope_from_any_layer() { let user = PodManifestConfig::from_toml( diff --git a/tickets/resume-scope-claim.md b/tickets/resume-scope-claim.md deleted file mode 100644 index 48f0d896..00000000 --- a/tickets/resume-scope-claim.md +++ /dev/null @@ -1,40 +0,0 @@ -# Resume 時の Scope Claim の改善 - -## 背景 - -`tickets/dynamic-scope.md` で in-process Scope の縮小(SpawnPod による委譲時の Write revoke)と pod-registry 上の delegation 記録が揃った。これにより「セッション中に scope が縮む」状態を Pod / registry の双方が一貫して表現できる。 - -一方で `tui -r` 経由の resume は、`crates/tui/src/spawn.rs` の `build_overlay_toml` を通じて fresh spawn と同じロジックで overlay を合成する。manifest cascade に scope 宣言が無い場合、cwd 直下に `write` 再帰の rule を毎回付ける挙動。 - -このため次のような衝突が起きる: - -- セッション S が稼働中に SpawnPod で子 C を作り、cwd 配下のサブパスを委譲した -- 親が exit、子 C は registry 上にエントリが残存(あるいはまだ稼働中) -- ユーザーが S を resume しようとすると、新しい Pod が cwd 全体に `write` を claim → 委譲された部分と overlap して registry が拒否 - -resume の意図は「過去のセッションの続きを取る」であって「過去の effective scope より広い範囲を新たに掴み直す」ではない。現状は後者になっており、過去に手放した scope を resume が勝手に取り戻そうとする形になっている。 - -## ゴール - -セッション resume 時に claim する scope が、当該セッションが最後に持っていた effective scope に揃う。委譲済み・他 Pod が保持中の部分は claim 対象から外れ、resume された Pod は当時と同じ範囲だけで動作する。 - -## 要件 - -- resume 時の overlay 合成は cwd 盲信ではなく、当該セッションが過去に持っていた scope を反映する。情報源は session log / registry / その他のいずれでも良いが、何らかの永続情報から復元できること -- 過去の scope 情報が取得できないセッション(旧形式 / 破損)は、明示的なエラーで止めるか、ユーザーに確認させてから fresh claim にフォールバックする(黙って広げない) -- claim 試行が registry の既存 allocation と衝突した場合、エラーメッセージで衝突相手の Pod 名 と target rule の双方が伝わる(現状は Pod 名のみ) -- 委譲済みエントリ(`delegated_from` を持つ allocation)が同じセッションの委譲チェーンに属する場合、resume はその範囲を claim せずに進行する - -## 完了条件 - -- 「親 Pod がセッション中に SpawnPod を実行 → 子に委譲 → 親 exit → 親セッションを resume」のフローが、既存子 allocation を残したまま衝突なしで成功する -- 既存の無関係な Pod と衝突するケースは、衝突 rule と相手 Pod 名を含む明確なエラーで失敗する -- 単体テスト or 統合テストで上記 2 ケースが検証される -- 既存の fresh spawn (resume なし) の挙動には変化なし - -## 範囲外 - -- 過去スコープの永続化スキーマを新規導入するかの判断は実装時に決める(session log の既存フィールドで足りるなら追加しない) -- 自動的に既存 Pod を kill / reclaim して claim を通す挙動 -- protocol 経由の外部からの GrantScope / RevokeScope(`tickets/dynamic-scope.md` の範囲外宣言を継承) -- registry 側のエラー型の全面再設計(rule 情報を含めるための最小限の拡張のみで足りる想定)