Merge branch 'resume-scope-claim' into develop

# Conflicts:
#	TODO.md
This commit is contained in:
Keisuke Hirata 2026-05-03 18:59:01 +09:00
commit 30bb096513
20 changed files with 485 additions and 93 deletions

1
Cargo.lock generated
View File

@ -2951,6 +2951,7 @@ dependencies = [
"tempfile",
"thiserror 2.0.18",
"tokio",
"tracing",
"uuid",
]

View File

@ -2,7 +2,6 @@
- 内部 Worker / 内部 Pod の Workflow 化 → [tickets/internal-worker-workflow.md](tickets/internal-worker-workflow.md)
- Agent Skills を Workflow として ingest → [tickets/agent-skills.md](tickets/agent-skills.md)
- パーミッション: パターンベースのツール実行制御 → [tickets/permission-extension-point.md](tickets/permission-extension-point.md)
- Resume 時の Scope claim の改善 → [tickets/resume-scope-claim.md](tickets/resume-scope-claim.md)
- Pod CLI: マニフェスト関連フラグの整理 → [tickets/pod-cli-manifest-flags.md](tickets/pod-cli-manifest-flags.md)
- llm-worker のエラー耐性
- HTTP transient リトライ → [tickets/llm-worker-transient-retry.md](tickets/llm-worker-transient-retry.md)

View File

@ -32,7 +32,7 @@ pub(crate) fn rules_overlap(a: &ScopeRule, b: &ScopeRule) -> bool {
}
/// Does `cover` fully contain `inner`'s claimed paths?
fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool {
pub(crate) fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool {
if cover.permission < inner.permission {
return false;
}
@ -44,8 +44,9 @@ fn covers_fully(cover: &ScopeRule, inner: &ScopeRule) -> bool {
}
/// Check whether `rule` is contained in `parent`'s effective write
/// scope: its allow set covers `rule`, and no child of `parent` has
/// already taken a piece that would overlap `rule`.
/// scope: its allow set covers `rule`, no deny rule caps it, and no
/// child of `parent` has already taken a piece that would overlap
/// `rule`.
pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule) -> bool {
let Some(alloc) = lock.find(parent) else {
return false;
@ -61,6 +62,14 @@ pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule
if !covered {
return false;
}
let denied = alloc
.scope_deny
.iter()
.filter(|r| r.permission == Permission::Write)
.any(|r| rules_overlap(r, rule));
if denied {
return false;
}
let child_conflict = lock
.allocations
.iter()
@ -71,7 +80,14 @@ pub fn is_within_effective_write(lock: &LockFile, parent: &str, rule: &ScopeRule
!child_conflict
}
/// Find the Pod that actually owns a write scope overlapping `rule`.
/// The Pod and rule that actually own a conflicting write scope.
#[derive(Debug, Clone)]
pub struct ConflictOwner {
pub pod_name: String,
pub rule: ScopeRule,
}
/// Find the Pod/rule that actually owns a write scope overlapping `rule`.
///
/// Walks the delegation tree: if an allocation overlaps `rule`, we
/// descend into its children and return the deepest overlapping node
@ -82,38 +98,47 @@ pub fn find_conflict_owner(
lock: &LockFile,
rule: &ScopeRule,
exempt: Option<&str>,
) -> Option<String> {
) -> Option<ConflictOwner> {
find_conflict_owners(lock, rule, exempt).into_iter().next()
}
/// Find every top-level delegation tree owner that conflicts with `rule`.
pub fn find_conflict_owners(
lock: &LockFile,
rule: &ScopeRule,
exempt: Option<&str>,
) -> Vec<ConflictOwner> {
if rule.permission != Permission::Write {
return None;
return Vec::new();
}
for alloc in lock
.allocations
lock.allocations
.iter()
.filter(|a| a.delegated_from.is_none())
{
if let Some(owner) = find_conflict_in_subtree(lock, alloc, rule) {
if Some(owner.as_str()) == exempt {
continue;
}
return Some(owner);
}
}
None
.filter_map(|alloc| find_conflict_in_subtree(lock, alloc, rule))
.filter(|owner| Some(owner.pod_name.as_str()) != exempt)
.collect()
}
fn find_conflict_in_subtree(
lock: &LockFile,
alloc: &Allocation,
rule: &ScopeRule,
) -> Option<String> {
let overlaps_here = alloc
) -> Option<ConflictOwner> {
let overlapping_rule = alloc
.scope_allow
.iter()
.filter(|r| r.permission == Permission::Write)
.any(|r| rules_overlap(r, rule));
if !overlaps_here {
.find(|r| rules_overlap(r, rule))?;
let fully_denied_here = alloc
.scope_deny
.iter()
.filter(|r| r.permission == Permission::Write)
.any(|r| covers_fully(r, rule));
if fully_denied_here {
return None;
}
for child in lock
.allocations
.iter()
@ -123,14 +148,17 @@ fn find_conflict_in_subtree(
return Some(owner);
}
}
Some(alloc.pod_name.clone())
Some(ConflictOwner {
pod_name: alloc.pod_name.clone(),
rule: overlapping_rule.clone(),
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::test_util::*;
use crate::{ScopeLockError, delegate_scope, register_pod};
use crate::{ScopeLockError, delegate_scope, register_pod, register_pod_with_deny};
use tempfile::TempDir;
#[test]
@ -200,4 +228,69 @@ mod tests {
other => panic!("expected WriteConflict, got {other:?}"),
}
}
#[test]
fn denied_write_region_is_not_claimed_by_restored_parent() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("pods.json");
let mut g = open_empty(&path);
register_pod_with_deny(
&mut g,
"parent".into(),
std::process::id(),
sock("parent"),
vec![write_rule("/src", true)],
vec![write_rule("/src/core", true)],
sid(),
)
.unwrap();
register_pod(
&mut g,
"child".into(),
std::process::id(),
sock("child"),
vec![write_rule("/src/core", true)],
sid(),
)
.unwrap();
}
#[test]
fn partial_deny_does_not_hide_parent_conflict() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("pods.json");
let mut g = open_empty(&path);
register_pod_with_deny(
&mut g,
"parent".into(),
std::process::id(),
sock("parent"),
vec![write_rule("/src", true)],
vec![write_rule("/src/core", true)],
sid(),
)
.unwrap();
let err = register_pod(
&mut g,
"other".into(),
std::process::id(),
sock("other"),
vec![write_rule("/src", true)],
sid(),
)
.unwrap_err();
match err {
ScopeLockError::WriteConflict {
competitor,
competitor_rule,
..
} => {
assert_eq!(competitor, "parent");
assert_eq!(competitor_rule.target, std::path::PathBuf::from("/src"));
}
other => panic!("expected WriteConflict, got {other:?}"),
}
}
}

View File

@ -13,8 +13,12 @@ pub enum ScopeLockError {
Io(#[from] io::Error),
#[error("pod name `{0}` is already registered")]
DuplicatePodName(String),
#[error("requested scope `{}` conflicts with pod `{competitor}`", .rule.target.display())]
WriteConflict { competitor: String, rule: ScopeRule },
#[error("requested scope `{}` conflicts with pod `{competitor}` rule `{}`", .rule.target.display(), .competitor_rule.target.display())]
WriteConflict {
competitor: String,
rule: ScopeRule,
competitor_rule: ScopeRule,
},
#[error(
"requested scope `{}` is not within spawner `{spawner}`'s effective scope",
.rule.target.display()

View File

@ -22,11 +22,16 @@ mod table;
#[cfg(test)]
mod test_util;
pub use conflict::{find_conflict_owner, is_within_effective_write};
pub use conflict::{
ConflictOwner, find_conflict_owner, find_conflict_owners, is_within_effective_write,
};
pub use error::ScopeLockError;
pub use lifecycle::{
ScopeAllocationGuard, SessionLockInfo, adopt_allocation, install_top_level, lookup_session,
update_session,
ScopeAllocationGuard, SessionLockInfo, adopt_allocation, install_top_level,
install_top_level_with_deny, lookup_session, update_session,
};
pub use mutate::{
delegate_scope, reclaim_stale, reclaim_stale_with, register_pod, register_pod_with_deny,
release_pod,
};
pub use mutate::{delegate_scope, reclaim_stale, reclaim_stale_with, register_pod, release_pod};
pub use table::{Allocation, LockFile, LockFileGuard, default_registry_path};

View File

@ -8,7 +8,7 @@ use manifest::ScopeRule;
use session_store::SessionId;
use crate::error::ScopeLockError;
use crate::mutate::{register_pod, release_pod};
use crate::mutate::release_pod;
use crate::table::{LockFileGuard, default_registry_path};
/// Owned allocation: on drop, opens the lock file and releases this
@ -46,15 +46,30 @@ pub fn install_top_level(
socket: PathBuf,
scope_allow: Vec<ScopeRule>,
session_id: SessionId,
) -> Result<ScopeAllocationGuard, ScopeLockError> {
install_top_level_with_deny(pod_name, pid, socket, scope_allow, Vec::new(), session_id)
}
/// Open the default lock file, register a top-level Pod with explicit
/// deny rules, and return a guard that will release the allocation on
/// drop.
pub fn install_top_level_with_deny(
pod_name: String,
pid: u32,
socket: PathBuf,
scope_allow: Vec<ScopeRule>,
scope_deny: Vec<ScopeRule>,
session_id: SessionId,
) -> Result<ScopeAllocationGuard, ScopeLockError> {
let lock_path = default_registry_path()?;
let mut guard = LockFileGuard::open(&lock_path)?;
register_pod(
crate::mutate::register_pod_with_deny(
&mut guard,
pod_name.clone(),
pid,
socket,
scope_allow,
scope_deny,
session_id,
)?;
Ok(ScopeAllocationGuard {
@ -176,6 +191,7 @@ mod tests {
pid: placeholder_pid,
socket: sock(pod_name),
scope_allow: vec![write_rule("/tmp/child", true)],
scope_deny: Vec::new(),
delegated_from: None,
session_id: None,
});

View File

@ -7,7 +7,7 @@ use std::path::PathBuf;
use manifest::{Permission, ScopeRule};
use session_store::SessionId;
use crate::conflict::{find_conflict_owner, is_within_effective_write};
use crate::conflict::{find_conflict_owner, find_conflict_owners, is_within_effective_write};
use crate::error::ScopeLockError;
use crate::table::{Allocation, LockFileGuard};
@ -25,6 +25,38 @@ pub fn register_pod(
socket: PathBuf,
scope_allow: Vec<ScopeRule>,
session_id: SessionId,
) -> Result<(), ScopeLockError> {
register_pod_with_deny(
guard,
pod_name,
pid,
socket,
scope_allow,
Vec::new(),
session_id,
)
}
/// Register a top-level Pod with explicit deny rules that reduce the
/// claimed effective write scope.
///
/// Conflict semantics: if every Pod overlapping a requested allow rule
/// is fully covered by one of `scope_deny`, the conflict is suppressed
/// and the registration proceeds. The check is structural (deny ⊇
/// competitor.rule), not relational — it does not verify that the
/// competitor actually descends from this Pod's prior delegations.
/// In practice this is safe because the canonical caller is `restore`,
/// which derives `scope_deny` from the session's own snapshot, so any
/// covered competitor is guaranteed to be a descendant of the original
/// allocation. Direct callers must uphold the same invariant.
pub fn register_pod_with_deny(
guard: &mut LockFileGuard,
pod_name: String,
pid: u32,
socket: PathBuf,
scope_allow: Vec<ScopeRule>,
scope_deny: Vec<ScopeRule>,
session_id: SessionId,
) -> Result<(), ScopeLockError> {
reclaim_stale(guard);
if guard.data().find(&pod_name).is_some() {
@ -41,10 +73,22 @@ pub fn register_pod(
.iter()
.filter(|r| r.permission == Permission::Write)
{
if let Some(competitor) = find_conflict_owner(guard.data(), rule, None) {
let conflicts = find_conflict_owners(guard.data(), rule, None);
let all_denied = !conflicts.is_empty()
&& conflicts.iter().all(|owner| {
scope_deny
.iter()
.filter(|r| r.permission == Permission::Write)
.any(|deny| crate::conflict::covers_fully(deny, &owner.rule))
});
if all_denied {
continue;
}
if let Some(competitor) = conflicts.into_iter().next() {
return Err(ScopeLockError::WriteConflict {
competitor,
competitor: competitor.pod_name,
rule: rule.clone(),
competitor_rule: competitor.rule,
});
}
}
@ -53,6 +97,7 @@ pub fn register_pod(
pid,
socket,
scope_allow,
scope_deny,
delegated_from: None,
session_id: Some(session_id),
});
@ -88,8 +133,9 @@ pub fn delegate_scope(
if rule.permission == Permission::Write {
if let Some(competitor) = find_conflict_owner(guard.data(), rule, Some(spawner)) {
return Err(ScopeLockError::WriteConflict {
competitor,
competitor: competitor.pod_name,
rule: rule.clone(),
competitor_rule: competitor.rule,
});
}
}
@ -99,6 +145,7 @@ pub fn delegate_scope(
pid,
socket,
scope_allow,
scope_deny: Vec::new(),
delegated_from: Some(spawner.into()),
// Pre-reservation. The child fills in its own session_id when
// it calls `adopt_allocation` after the worker is built.

View File

@ -35,6 +35,11 @@ pub struct Allocation {
pub socket: PathBuf,
/// Allow rules granted to this Pod (write + read).
pub scope_allow: Vec<ScopeRule>,
/// Deny rules that cap this Pod's effective scope. Normally empty for
/// fresh allocations; restored Pods use this to avoid reclaiming
/// previously delegated write regions.
#[serde(default)]
pub scope_deny: Vec<ScopeRule>,
/// Name of the Pod that delegated scope to this one, or `None` for
/// a top-level Pod started directly by a human.
pub delegated_from: Option<String>,

View File

@ -134,6 +134,8 @@ impl PodController {
// `PodFsView` to the shared state once the latter exists.
let fs_for_view: tools::ScopedFs;
let scope_change_sink = pod.scope_change_sink();
// Register event bridge callbacks on the worker
{
let worker = pod.worker_mut();
@ -257,7 +259,8 @@ impl PodController {
// worker) reads from it, and any future scope mutation
// (SpawnPod-style revoke, future GrantScope) propagates
// through it.
let fs = tools::ScopedFs::with_shared_scope(scope_handle.clone(), pwd_for_tools.clone());
let fs =
tools::ScopedFs::with_shared_scope(scope_handle.clone(), pwd_for_tools.clone());
let tracker = tools::Tracker::new();
// The same ScopedFs also powers the IPC `ListCompletions`
// query — keep a clone for the FS view we attach below,
@ -293,6 +296,7 @@ impl PodController {
self_parent_socket.clone(),
spawner_model.clone(),
scope_handle.clone(),
scope_change_sink.clone(),
));
worker.register_tool(send_to_pod_tool(spawned_registry.clone()));
worker.register_tool(read_pod_output_tool(spawned_registry.clone()));

View File

@ -7,12 +7,12 @@ use llm_worker::llm_client::RequestConfig;
use llm_worker::llm_client::client::LlmClient;
use llm_worker::state::Mutable;
use llm_worker::{ToolOutputLimits, UsageRecord, Worker, WorkerError, WorkerResult};
use session_store::{EntryHash, SessionId, SessionStartState, Store, StoreError};
use session_store::{EntryHash, PodScopeSnapshot, SessionId, SessionStartState, Store, StoreError};
use tracing::{info, warn};
use manifest::{
PodManifest, PodManifestConfig, ResolveError, Scope, ScopeError, ScopeRule, SharedScope,
WorkerManifest,
PodManifest, PodManifestConfig, ResolveError, Scope, ScopeConfig, ScopeError, ScopeRule,
SharedScope, WorkerManifest,
};
use crate::compact::state::CompactState;
@ -148,6 +148,10 @@ pub struct Pod<C: LlmClient, St: Store> {
/// Phase 2 (consolidation) workers set this to false so the
/// agentic worker pulls knowledge through the search tools instead.
inject_resident_knowledge: bool,
/// Latest runtime scope snapshot queued by dynamic scope changes.
/// Drained into the session log before the next turn result is
/// persisted, so resume never silently reclaims delegated writes.
pending_scope_snapshot: Arc<Mutex<Option<PodScopeSnapshot>>>,
/// Phase 1 (memory.extract) reentry guard. `true` while an extract
/// worker is running; subsequent triggers are skipped per spec
/// (`docs/plan/memory.md` §Phase 1 並走防止). `Arc<AtomicBool>` so
@ -222,6 +226,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
workflow_registry: memory::WorkflowRegistry::empty(),
memory_layout: None,
inject_resident_knowledge: true,
pending_scope_snapshot: Arc::new(Mutex::new(None)),
extract_in_flight: Arc::new(AtomicBool::new(false)),
consolidation_in_flight: Arc::new(AtomicBool::new(false)),
extract_pointer: Mutex::new(None),
@ -313,6 +318,55 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
.update(|cur| cur.with_added_deny_rules(revoke.clone()))
}
/// Snapshot the current runtime scope in the session log. The entry
/// is intentionally appended as soon as a session head exists: if the
/// process later exits while children keep their allocations, resume
/// can restore the narrowed scope instead of reclaiming delegated
/// writes.
pub async fn persist_scope_snapshot(&mut self) -> Result<(), StoreError> {
if self.head_hash.is_none() {
return Ok(());
}
let snapshot = {
let scope = self.scope.snapshot();
PodScopeSnapshot {
allow: scope.allow_rules(),
deny: scope.deny_rules(),
}
};
session_store::save_pod_scope(&self.store, self.session_id, &mut self.head_hash, &snapshot)
.await
}
/// Cloneable callback handed to dynamic-scope tools. It cannot append
/// directly to the async store from a sync tool callback, so it records
/// the latest snapshot and the controller flushes it after the tool
/// turn completes.
pub fn scope_change_sink(&self) -> Arc<dyn Fn(PodScopeSnapshot) + Send + Sync> {
let pending = self.pending_scope_snapshot.clone();
Arc::new(move |snapshot| {
*pending.lock().expect("pending_scope_snapshot poisoned") = Some(snapshot);
})
}
async fn flush_pending_scope_snapshot(&mut self) -> Result<(), StoreError> {
let snapshot = self
.pending_scope_snapshot
.lock()
.expect("pending_scope_snapshot poisoned")
.take();
if let Some(snapshot) = snapshot {
session_store::save_pod_scope(
&self.store,
self.session_id,
&mut self.head_hash,
&snapshot,
)
.await?;
}
Ok(())
}
/// Direct access to the underlying Worker.
pub fn worker(&self) -> &Worker<C, Mutable> {
self.worker.as_ref().expect("worker taken during run")
@ -951,6 +1005,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
let hash =
session_store::create_session_with_id(&self.store, self.session_id, state).await?;
self.head_hash = Some(hash);
self.persist_scope_snapshot().await?;
return Ok(());
}
let prev_session_id = self.session_id;
@ -1107,6 +1162,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
session_store::save_delta(&self.store, self.session_id, &mut self.head_hash, new_items)
.await?;
self.flush_pending_scope_snapshot().await?;
let turn_count = self.worker.as_ref().unwrap().turn_count();
session_store::save_turn_end(
&self.store,
@ -1445,6 +1502,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
.lock()
.expect("usage_history poisoned")
.clear();
self.persist_scope_snapshot().await?;
// Reset Phase 1 pointer alongside usage_history: the compacted
// session has a fresh log with no `LogEntry::Extension` entries
// yet, so a cold restore here would set extract_pointer to None
@ -1989,6 +2047,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
workflow_registry: common.workflow_registry,
memory_layout: common.memory_layout,
inject_resident_knowledge: true,
pending_scope_snapshot: Arc::new(Mutex::new(None)),
extract_in_flight: Arc::new(AtomicBool::new(false)),
consolidation_in_flight: Arc::new(AtomicBool::new(false)),
extract_pointer: Mutex::new(None),
@ -2051,6 +2110,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
workflow_registry: common.workflow_registry,
memory_layout: common.memory_layout,
inject_resident_knowledge: true,
pending_scope_snapshot: Arc::new(Mutex::new(None)),
extract_in_flight: Arc::new(AtomicBool::new(false)),
consolidation_in_flight: Arc::new(AtomicBool::new(false)),
extract_pointer: Mutex::new(None),
@ -2088,8 +2148,20 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
if state.head_hash.is_none() {
return Err(PodError::SessionEmpty { session_id });
}
let scope_snapshot = state
.pod_scope
.clone()
.ok_or(PodError::SessionScopeMissing { session_id })?;
let common = prepare_pod_common(&manifest, &loader, /* parse_template */ false)?;
let common = prepare_pod_common_with_scope(
&manifest,
&loader,
/* parse_template */ false,
ScopeConfig {
allow: scope_snapshot.allow,
deny: scope_snapshot.deny,
},
)?;
// Atomic: register_pod inside install_top_level rejects when
// another live allocation already holds `session_id`. Wrapping
@ -2100,11 +2172,12 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
.map_err(ScopeLockError::from)?
.join(&manifest.pod.name)
.join("sock");
let scope_allocation = pod_registry::install_top_level(
let scope_allocation = pod_registry::install_top_level_with_deny(
manifest.pod.name.clone(),
std::process::id(),
socket_path,
common.scope.allow_rules(),
common.scope.deny_rules(),
session_id,
)?;
@ -2165,6 +2238,7 @@ impl<St: Store> Pod<Box<dyn LlmClient>, St> {
workflow_registry: common.workflow_registry,
memory_layout: common.memory_layout,
inject_resident_knowledge: true,
pending_scope_snapshot: Arc::new(Mutex::new(None)),
extract_in_flight: Arc::new(AtomicBool::new(false)),
consolidation_in_flight: Arc::new(AtomicBool::new(false)),
extract_pointer: Mutex::new(extract_pointer),
@ -2379,6 +2453,11 @@ pub enum PodError {
#[error("session {session_id} has no entries to restore")]
SessionEmpty { session_id: SessionId },
#[error(
"session {session_id} has no persisted scope snapshot; refusing resume without explicit scope"
)]
SessionScopeMissing { session_id: SessionId },
}
/// Bundle of resources that every high-level Pod constructor needs:
@ -2412,6 +2491,27 @@ fn prepare_pod_common(
) -> Result<PodCommon, PodError> {
let pwd = current_pwd()?;
let scope = build_scope_with_memory(manifest, &pwd)?;
prepare_pod_common_from_scope(manifest, loader, parse_template, pwd, scope)
}
fn prepare_pod_common_with_scope(
manifest: &PodManifest,
loader: &PromptLoader,
parse_template: bool,
scope_config: ScopeConfig,
) -> Result<PodCommon, PodError> {
let pwd = current_pwd()?;
let scope = Scope::from_config(&scope_config).map_err(PodError::Scope)?;
prepare_pod_common_from_scope(manifest, loader, parse_template, pwd, scope)
}
fn prepare_pod_common_from_scope(
manifest: &PodManifest,
loader: &PromptLoader,
parse_template: bool,
pwd: PathBuf,
scope: Scope,
) -> Result<PodCommon, PodError> {
if !scope.is_readable(&pwd) {
return Err(PodError::PwdOutsideScope { pwd });
}

View File

@ -20,6 +20,7 @@ use manifest::{
use protocol::Method;
use protocol::stream::JsonLineWriter;
use serde::Deserialize;
use session_store::PodScopeSnapshot;
use tokio::net::UnixStream;
use tokio::process::Command;
use tokio::time::sleep;
@ -127,6 +128,9 @@ pub struct SpawnPodTool {
/// `effective_write` semantics: Write is the only permission
/// tracked across Pods, so revocation only touches Write.
spawner_scope: SharedScope,
/// Called after the spawner scope has been updated so the new
/// effective scope can be persisted to the session log.
scope_changed: Arc<dyn Fn(PodScopeSnapshot) + Send + Sync>,
}
impl SpawnPodTool {
@ -139,6 +143,7 @@ impl SpawnPodTool {
parent_socket: Option<PathBuf>,
spawner_model: ModelManifest,
spawner_scope: SharedScope,
scope_changed: Arc<dyn Fn(PodScopeSnapshot) + Send + Sync>,
) -> Self {
Self {
spawner_name,
@ -149,6 +154,7 @@ impl SpawnPodTool {
parent_socket,
spawner_model,
spawner_scope,
scope_changed,
}
}
}
@ -243,9 +249,12 @@ impl Tool for SpawnPodTool {
if !revoke_write.is_empty() {
self.spawner_scope
.update(|cur| cur.with_added_deny_rules(revoke_write.clone()))
.map_err(|e| {
ToolError::ExecutionFailed(format!("revoke spawner scope: {e}"))
})?;
.map_err(|e| ToolError::ExecutionFailed(format!("revoke spawner scope: {e}")))?;
let current = self.spawner_scope.snapshot();
(self.scope_changed)(PodScopeSnapshot {
allow: current.allow_rules(),
deny: current.deny_rules(),
});
}
send_run(&predicted_socket, &input.task).await?;
@ -488,6 +497,7 @@ pub fn spawn_pod_tool(
parent_socket: Option<PathBuf>,
spawner_model: ModelManifest,
spawner_scope: SharedScope,
scope_changed: Arc<dyn Fn(PodScopeSnapshot) + Send + Sync>,
) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(SpawnPodInput);
@ -504,6 +514,7 @@ pub fn spawn_pod_tool(
parent_socket.clone(),
spawner_model.clone(),
spawner_scope.clone(),
scope_changed.clone(),
));
(meta, tool)
})

View File

@ -80,3 +80,31 @@ async fn restore_from_manifest_rejects_empty_session_log() {
Ok(_) => panic!("expected empty session log to fail"),
}
}
#[tokio::test]
async fn restore_from_manifest_rejects_session_without_scope_snapshot() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let store_tmp = tempfile::tempdir().unwrap();
let store = FsStore::new(store_tmp.path()).await.unwrap();
let manifest = pod::PodManifest::from_toml(MINIMAL_MANIFEST_TOML).unwrap();
let id = session_store::new_session_id();
let state = session_store::SessionStartState {
system_prompt: None,
config: &Default::default(),
history: &[],
};
session_store::create_session_with_id(&store, id, state)
.await
.unwrap();
let result =
Pod::restore_from_manifest(id, manifest, store, pod::PromptLoader::builtins_only()).await;
match result {
Err(PodError::SessionScopeMissing { session_id }) => assert_eq!(session_id, id),
Err(other) => panic!("expected SessionScopeMissing, got {other:?}"),
Ok(_) => panic!("expected missing scope snapshot to fail"),
}
}

View File

@ -187,6 +187,7 @@ async fn spawn_pod_delegates_scope_and_sends_run() {
None,
dummy_model(),
spawner_scope.clone(),
std::sync::Arc::new(|_| {}),
);
let (_meta, tool) = def();
@ -275,6 +276,7 @@ async fn spawn_pod_rejects_scope_outside_spawner() {
None,
dummy_model(),
spawner_scope.clone(),
std::sync::Arc::new(|_| {}),
);
let (_meta, tool) = def();
@ -346,6 +348,7 @@ async fn spawn_pod_rolls_back_reservation_when_socket_never_appears() {
None,
dummy_model(),
spawner_scope.clone(),
std::sync::Arc::new(|_| {}),
);
let (_meta, tool) = def();

View File

@ -439,7 +439,7 @@ pub enum ErrorCode {
// ---------------------------------------------------------------------------
/// A single allow or deny rule inside a scope configuration.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct ScopeRule {
/// Target path. Must be absolute by the time a `Scope` is built from
/// this rule — relative paths are resolved per-layer against the

View File

@ -16,6 +16,7 @@ thiserror = { workspace = true }
sha2 = { workspace = true }
hex = "0.4.3"
protocol = { workspace = true }
tracing.workspace = true
[dev-dependencies]
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

View File

@ -41,11 +41,12 @@ pub use logged_item::{LoggedContentPart, LoggedItem, LoggedRole, from_logged, to
pub use session::{
SessionStartState, create_compacted_session, create_session, create_session_with_id,
ensure_head_or_fork, fork, fork_at, restore, save_config_changed, save_delta, save_extension,
save_run_completed, save_run_errored, save_turn_end, save_usage, save_user_input,
save_pod_scope, save_run_completed, save_run_errored, save_turn_end, save_usage,
save_user_input,
};
pub use session_log::{
EntryHash, HashedEntry, LogEntry, RestoredState, SessionOrigin, build_chain, collect_state,
compute_hash,
EntryHash, HashedEntry, LogEntry, POD_SCOPE_EXTENSION_DOMAIN, PodScopeSnapshot, RestoredState,
SessionOrigin, build_chain, collect_state, compute_hash,
};
pub use store::{Store, StoreError};

View File

@ -6,7 +6,7 @@
use crate::SessionId;
use crate::logged_item::{LoggedItem, to_logged};
use crate::session_log::{self, EntryHash, HashedEntry, LogEntry, SessionOrigin};
use crate::session_log::{self, EntryHash, HashedEntry, LogEntry, PodScopeSnapshot, SessionOrigin};
use crate::store::{Store, StoreError};
use llm_worker::WorkerResult;
use llm_worker::llm_client::RequestConfig;
@ -360,6 +360,24 @@ pub async fn save_extension(
.await
}
/// Log the Pod's latest runtime scope snapshot.
pub async fn save_pod_scope(
store: &impl Store,
session_id: SessionId,
head_hash: &mut Option<EntryHash>,
snapshot: &PodScopeSnapshot,
) -> Result<(), StoreError> {
let payload = serde_json::to_value(snapshot)?;
save_extension(
store,
session_id,
head_hash,
session_log::POD_SCOPE_EXTENSION_DOMAIN,
payload,
)
.await
}
/// Log a `ConfigChanged` entry.
pub async fn save_config_changed(
store: &impl Store,

View File

@ -10,7 +10,7 @@
use llm_worker::llm_client::types::{Item, RequestConfig};
use llm_worker::{UsageRecord, WorkerResult};
use protocol::Segment;
use protocol::{ScopeRule, Segment};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
@ -197,6 +197,16 @@ pub struct SessionOrigin {
pub at_hash: EntryHash,
}
/// Domain used by Pod to persist its latest effective runtime scope.
pub const POD_SCOPE_EXTENSION_DOMAIN: &str = "pod.scope";
/// Payload stored in `LogEntry::Extension { domain: "pod.scope", .. }`.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct PodScopeSnapshot {
pub allow: Vec<ScopeRule>,
pub deny: Vec<ScopeRule>,
}
/// State collected from log entries.
#[derive(Debug, Clone)]
pub struct RestoredState {
@ -214,6 +224,9 @@ pub struct RestoredState {
/// `LogEntry::Extension` を replay 順に積んだもの。`(domain, payload)`。
/// session-store は domain を不透明扱いし、各ドメインが自前で fold する。
pub extensions: Vec<(String, serde_json::Value)>,
/// Latest runtime scope snapshot persisted by the Pod. `None` means
/// the session predates scope persistence or the payload was corrupt.
pub pod_scope: Option<PodScopeSnapshot>,
/// User submissions in original typed form, in submit order.
/// One entry per `LogEntry::UserInput`; the K-th entry corresponds to
/// the K-th `Item::user_message` derived during replay (modulo
@ -234,6 +247,7 @@ pub fn collect_state(entries: &[HashedEntry]) -> RestoredState {
head_hash: None,
usage_history: Vec::new(),
extensions: Vec::new(),
pod_scope: None,
user_segments: Vec::new(),
};
@ -296,6 +310,17 @@ pub fn collect_state(entries: &[HashedEntry]) -> RestoredState {
LogEntry::Extension {
domain, payload, ..
} => {
if domain == POD_SCOPE_EXTENSION_DOMAIN {
match serde_json::from_value::<PodScopeSnapshot>(payload.clone()) {
Ok(snapshot) => state.pod_scope = Some(snapshot),
Err(err) => {
tracing::warn!(
error = %err,
"discarding malformed pod.scope snapshot from session log"
);
}
}
}
state.extensions.push((domain.clone(), payload.clone()));
}
}

View File

@ -18,7 +18,9 @@ use std::process::Stdio;
use std::time::Duration;
use crossterm::event::{self, Event as TermEvent, KeyCode, KeyEventKind, KeyModifiers};
use manifest::{PodManifestConfig, find_project_manifest_from, load_layer, user_manifest_path};
use manifest::{
PodManifestConfig, ScopeConfig, find_project_manifest_from, load_layer, user_manifest_path,
};
use ratatui::Terminal;
use ratatui::backend::CrosstermBackend;
use ratatui::layout::{Constraint, Layout};
@ -50,6 +52,8 @@ pub enum SpawnOutcome {
#[derive(Debug)]
pub enum SpawnError {
Io(io::Error),
Store(session_store::StoreError),
MissingResumeScope { session_id: SessionId },
PodLaunchFailed(io::Error),
PodExitedEarly { stderr_tail: String },
Timeout,
@ -59,6 +63,11 @@ impl std::fmt::Display for SpawnError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Io(e) => write!(f, "io error: {e}"),
Self::Store(e) => write!(f, "failed to read session log: {e}"),
Self::MissingResumeScope { session_id } => write!(
f,
"session {session_id} has no persisted scope snapshot; refusing resume without explicit scope"
),
Self::PodLaunchFailed(e) => write!(f, "failed to launch pod: {e}"),
Self::PodExitedEarly { stderr_tail } => {
if stderr_tail.is_empty() {
@ -84,6 +93,12 @@ impl From<io::Error> for SpawnError {
}
}
impl From<session_store::StoreError> for SpawnError {
fn from(e: session_store::StoreError) -> Self {
Self::Store(e)
}
}
type InlineTerminal = Terminal<CrosstermBackend<io::Stdout>>;
/// Source session for a resume run. `None` = fresh spawn (current
@ -140,6 +155,7 @@ pub async fn run(resume_from: Option<SessionId>) -> Result<SpawnOutcome, SpawnEr
message: None,
editing: true,
resume_from,
resume_scope: None,
};
let mut terminal = make_inline_terminal()?;
@ -173,6 +189,9 @@ pub async fn run(resume_from: Option<SessionId>) -> Result<SpawnOutcome, SpawnEr
}
}
if let Some(id) = form.resume_from {
form.resume_scope = Some(load_resume_scope(id).await?);
}
let overlay_toml = build_overlay_toml(&form);
// Phase 2: launch pod and wait for ready line. Drop the cursor
@ -356,7 +375,12 @@ fn build_overlay_toml(form: &Form) -> String {
pod.insert("name".into(), toml::Value::String(form.name.clone()));
root.insert("pod".into(), toml::Value::Table(pod));
if !form.cascade_has_scope {
if let Some(scope_config) = form.resume_scope.as_ref() {
root.insert(
"scope".into(),
toml::Value::try_from(scope_config).expect("scope serialisation cannot fail"),
);
} else if !form.cascade_has_scope {
let mut rule = toml::value::Table::new();
rule.insert(
"target".into(),
@ -374,6 +398,24 @@ fn build_overlay_toml(form: &Form) -> String {
toml::to_string(&toml::Value::Table(root)).expect("overlay serialisation cannot fail")
}
async fn load_resume_scope(session_id: SessionId) -> Result<ScopeConfig, SpawnError> {
let store_dir = manifest::paths::sessions_dir().ok_or_else(|| {
io::Error::new(
io::ErrorKind::NotFound,
"could not resolve sessions directory (set INSOMNIA_HOME, INSOMNIA_DATA_DIR, or HOME)",
)
})?;
let store = session_store::FsStore::new(&store_dir).await?;
let state = session_store::restore(&store, session_id).await?;
let snapshot = state
.pod_scope
.ok_or(SpawnError::MissingResumeScope { session_id })?;
Ok(ScopeConfig {
allow: snapshot.allow,
deny: snapshot.deny,
})
}
/// Resolves the binary used to launch a child Pod. Must point at a
/// `pod`-compatible executable — the parent reads the child's stderr
/// directly looking for `INSOMNIA-READY`, so any wrapper that emits
@ -450,6 +492,10 @@ struct Form {
/// child pod is launched with `--session <id>` so it restores
/// from `id` and appends to the same session log.
resume_from: Option<SessionId>,
/// Scope snapshot recovered from the source session log. Set only for
/// resume runs, and serialized into the overlay instead of cwd-default
/// scope so resume does not silently broaden access.
resume_scope: Option<ScopeConfig>,
}
impl Form {
@ -625,6 +671,7 @@ mod tests {
message: None,
editing: true,
resume_from: None,
resume_scope: None,
}
}
@ -649,6 +696,30 @@ mod tests {
assert!(parsed.get("scope").is_none());
}
#[test]
fn overlay_uses_resume_scope_snapshot() {
let mut f = form("agent-r", false);
f.resume_from = Some(session_store::new_session_id());
f.resume_scope = Some(ScopeConfig {
allow: vec![manifest::ScopeRule {
target: PathBuf::from("/work/example"),
permission: manifest::Permission::Write,
recursive: true,
}],
deny: vec![manifest::ScopeRule {
target: PathBuf::from("/work/example/child"),
permission: manifest::Permission::Write,
recursive: true,
}],
});
let toml_str = build_overlay_toml(&f);
let parsed: toml::Value = toml::from_str(&toml_str).unwrap();
assert_eq!(parsed["pod"]["name"].as_str(), Some("agent-r"));
assert_eq!(parsed["scope"]["allow"].as_array().unwrap().len(), 1);
let deny = parsed["scope"]["deny"].as_array().unwrap();
assert_eq!(deny[0]["target"].as_str(), Some("/work/example/child"));
}
#[test]
fn cascade_merge_detects_scope_from_any_layer() {
let user = PodManifestConfig::from_toml(

View File

@ -1,40 +0,0 @@
# Resume 時の Scope Claim の改善
## 背景
`tickets/dynamic-scope.md` で in-process Scope の縮小SpawnPod による委譲時の Write revokeと pod-registry 上の delegation 記録が揃った。これにより「セッション中に scope が縮む」状態を Pod / registry の双方が一貫して表現できる。
一方で `tui -r` 経由の resume は、`crates/tui/src/spawn.rs` の `build_overlay_toml` を通じて fresh spawn と同じロジックで overlay を合成する。manifest cascade に scope 宣言が無い場合、cwd 直下に `write` 再帰の rule を毎回付ける挙動。
このため次のような衝突が起きる:
- セッション S が稼働中に SpawnPod で子 C を作り、cwd 配下のサブパスを委譲した
- 親が exit、子 C は registry 上にエントリが残存(あるいはまだ稼働中)
- ユーザーが S を resume しようとすると、新しい Pod が cwd 全体に `write` を claim → 委譲された部分と overlap して registry が拒否
resume の意図は「過去のセッションの続きを取る」であって「過去の effective scope より広い範囲を新たに掴み直す」ではない。現状は後者になっており、過去に手放した scope を resume が勝手に取り戻そうとする形になっている。
## ゴール
セッション resume 時に claim する scope が、当該セッションが最後に持っていた effective scope に揃う。委譲済み・他 Pod が保持中の部分は claim 対象から外れ、resume された Pod は当時と同じ範囲だけで動作する。
## 要件
- resume 時の overlay 合成は cwd 盲信ではなく、当該セッションが過去に持っていた scope を反映する。情報源は session log / registry / その他のいずれでも良いが、何らかの永続情報から復元できること
- 過去の scope 情報が取得できないセッション(旧形式 / 破損)は、明示的なエラーで止めるか、ユーザーに確認させてから fresh claim にフォールバックする(黙って広げない)
- claim 試行が registry の既存 allocation と衝突した場合、エラーメッセージで衝突相手の Pod 名 と target rule の双方が伝わる(現状は Pod 名のみ)
- 委譲済みエントリ(`delegated_from` を持つ allocationが同じセッションの委譲チェーンに属する場合、resume はその範囲を claim せずに進行する
## 完了条件
- 「親 Pod がセッション中に SpawnPod を実行 → 子に委譲 → 親 exit → 親セッションを resume」のフローが、既存子 allocation を残したまま衝突なしで成功する
- 既存の無関係な Pod と衝突するケースは、衝突 rule と相手 Pod 名を含む明確なエラーで失敗する
- 単体テスト or 統合テストで上記 2 ケースが検証される
- 既存の fresh spawn (resume なし) の挙動には変化なし
## 範囲外
- 過去スコープの永続化スキーマを新規導入するかの判断は実装時に決めるsession log の既存フィールドで足りるなら追加しない)
- 自動的に既存 Pod を kill / reclaim して claim を通す挙動
- protocol 経由の外部からの GrantScope / RevokeScope`tickets/dynamic-scope.md` の範囲外宣言を継承)
- registry 側のエラー型の全面再設計rule 情報を含めるための最小限の拡張のみで足りる想定)