yoi/crates/pod/src/spawn_pod.rs
2026-04-19 06:32:44 +09:00

380 lines
13 KiB
Rust

//! `SpawnPod` tool — launch a new Pod process as a child of this one.
//!
//! Wires scope-lock delegation, overlay-TOML construction, subprocess
//! launch, and socket handoff into a single `Tool` implementation. When
//! the LLM calls `SpawnPod`, a fresh `pod` binary is exec'd in its own
//! process group, the scope lock is updated atomically, and the child's
//! first turn is kicked off by handing its socket a `Method::Run`.
use std::path::{Path, PathBuf};
use std::process::Stdio;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
use manifest::{
Permission, PodManifestConfig, PodMetaConfig, ScopeConfig, ScopeRule, WorkerManifestConfig,
};
use protocol::Method;
use protocol::stream::JsonLineWriter;
use serde::Deserialize;
use tokio::net::UnixStream;
use tokio::process::Command;
use tokio::time::sleep;
use crate::runtime_dir::SpawnedPodRecord;
use crate::scope_lock::{self, LockFileGuard, ScopeLockError};
use crate::spawned_pod_registry::SpawnedPodRegistry;
const DESCRIPTION: &str = "Spawn a new Pod process to work on a delegated task. \
The spawner's write scope is reduced by the scope passed here; the spawned \
Pod receives its own socket and starts running `task` immediately. The \
spawned Pod outlives the spawner's current turn and can be contacted again \
through its socket path.";
const DEFAULT_INSTRUCTION: &str = "$insomnia/default";
/// How long we will wait for the spawned Pod's socket to become
/// connectable before treating the spawn as failed.
const SOCKET_WAIT_TIMEOUT: Duration = Duration::from_secs(10);
#[derive(Debug, Deserialize, schemars::JsonSchema)]
struct SpawnPodInput {
/// Identifier for the spawned Pod. Must be unique machine-wide.
name: String,
/// Instruction-file reference (e.g. `$insomnia/default`, `$user/my-agent`).
#[serde(default)]
instruction: Option<String>,
/// First message sent to the spawned Pod via `Method::Run`.
task: String,
/// Allow rules delegated to the spawned Pod. Must be a subset of the
/// spawner's effective write scope.
scope: Vec<ScopeRuleInput>,
}
#[derive(Debug, Deserialize, schemars::JsonSchema)]
struct ScopeRuleInput {
/// Absolute target path. Relative paths are rejected.
target: PathBuf,
/// `"read"` or `"write"`.
permission: PermissionInput,
/// When `false`, the rule matches the target itself and its direct
/// children only. Defaults to `true`.
#[serde(default = "default_true")]
recursive: bool,
}
#[derive(Debug, Deserialize, schemars::JsonSchema, Clone, Copy)]
#[serde(rename_all = "lowercase")]
enum PermissionInput {
Read,
Write,
}
fn default_true() -> bool {
true
}
impl From<PermissionInput> for Permission {
fn from(p: PermissionInput) -> Self {
match p {
PermissionInput::Read => Permission::Read,
PermissionInput::Write => Permission::Write,
}
}
}
/// Runtime dependencies the `SpawnPod` tool needs in order to launch a
/// child Pod and record the handoff locally. Constructed by the Pod
/// controller once per Pod lifetime.
pub struct SpawnPodTool {
/// Spawner's own pod name — becomes the spawned Pod's
/// `delegated_from` in the scope-lock registry.
spawner_name: String,
/// Path to the spawner's Unix socket. Handed to the child via
/// `--callback` so `Method::Notify` has somewhere to land.
callback_socket: PathBuf,
/// Root of the `$XDG_RUNTIME_DIR/insomnia/` tree, used to predict
/// the spawned Pod's socket path before the child has bound it.
runtime_base: PathBuf,
/// Directory the spawned Pod should run in when the LLM did not
/// override it. Defaults to the spawner's pwd — see module docs.
spawner_pwd: PathBuf,
/// Shared registry of spawned children, also used by the
/// pod-comm tools (`SendToPod` / `ReadPodOutput` / `StopPod` /
/// `ListPods`). Writes the list to `spawned_pods.json` on each add.
registry: Arc<SpawnedPodRegistry>,
}
impl SpawnPodTool {
pub fn new(
spawner_name: String,
callback_socket: PathBuf,
runtime_base: PathBuf,
spawner_pwd: PathBuf,
registry: Arc<SpawnedPodRegistry>,
) -> Self {
Self {
spawner_name,
callback_socket,
runtime_base,
spawner_pwd,
registry,
}
}
}
#[async_trait]
impl Tool for SpawnPodTool {
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
let input: SpawnPodInput = serde_json::from_str(input_json)
.map_err(|e| ToolError::InvalidArgument(format!("invalid SpawnPod input: {e}")))?;
// `delegate_scope` catches this too (as `DuplicatePodName`), but
// the dedicated message is kinder to the LLM — which gets the
// error back verbatim — than the generic duplicate-name error.
if input.name == self.spawner_name {
return Err(ToolError::InvalidArgument(format!(
"spawned pod name `{}` collides with spawner's own name",
input.name
)));
}
let scope_allow = parse_scope(&input.scope)?;
let instruction = input
.instruction
.clone()
.unwrap_or_else(|| DEFAULT_INSTRUCTION.to_string());
let predicted_socket = self.runtime_base.join(&input.name).join("sock");
let lock_path = scope_lock::default_lock_path()
.map_err(|e| ToolError::ExecutionFailed(format!("scope lock path: {e}")))?;
// Reserve the allocation up front. Spawner's pid is a live
// placeholder; the child will rewrite it via `adopt_allocation`.
{
let mut guard = LockFileGuard::open(&lock_path)
.map_err(|e| ToolError::ExecutionFailed(format!("scope lock open: {e}")))?;
scope_lock::delegate_scope(
&mut guard,
&self.spawner_name,
input.name.clone(),
std::process::id(),
predicted_socket.clone(),
scope_allow.clone(),
)
.map_err(scope_lock_err_to_tool)?;
}
// `start_outcome` covers steps that happen before the child is
// observably alive (exec + socket bind). Once its socket is
// listening, the child owns the allocation and we must not roll
// it back — even if later steps (Method::Run delivery, record
// write) fail, the child is running and will release its own
// entry on exit.
let overlay_toml =
match build_overlay_toml(&input.name, &self.spawner_pwd, &instruction, &scope_allow) {
Ok(s) => s,
Err(e) => {
self.release_reservation(&lock_path, &input.name);
return Err(ToolError::ExecutionFailed(format!(
"overlay serialisation: {e}"
)));
}
};
let start_outcome = self.exec_child(&overlay_toml, &predicted_socket).await;
if let Err(e) = start_outcome {
self.release_reservation(&lock_path, &input.name);
return Err(e);
}
// Child is live. Post-start errors propagate but do not roll
// back the scope allocation — the child already owns it.
send_run(&predicted_socket, &input.task).await?;
let record = SpawnedPodRecord {
pod_name: input.name.clone(),
socket_path: predicted_socket.clone(),
scope_delegated: scope_allow,
callback_address: self.callback_socket.clone(),
};
self.registry
.add(record)
.await
.map_err(|e| ToolError::ExecutionFailed(format!("write spawned_pods.json: {e}")))?;
Ok(ToolOutput {
summary: format!(
"spawned pod `{}` listening on {}",
input.name,
predicted_socket.display()
),
content: None,
})
}
}
impl SpawnPodTool {
async fn exec_child(
&self,
overlay_toml: &str,
predicted_socket: &Path,
) -> Result<(), ToolError> {
let pod_command = std::env::var("INSOMNIA_POD_COMMAND").unwrap_or_else(|_| "pod".into());
let mut cmd = Command::new(&pod_command);
cmd.arg("--adopt")
.arg("--callback")
.arg(&self.callback_socket)
.arg("--overlay")
.arg(overlay_toml)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.process_group(0);
let child = cmd
.spawn()
.map_err(|e| ToolError::ExecutionFailed(format!("failed to spawn `{pod_command}`: {e}")))?;
// Default `kill_on_drop = false` keeps the process alive after
// the `Child` is dropped. We intentionally do not `.wait()` —
// when the spawner later exits, init adopts any remaining
// orphans. Lifecycle tracking lives in `spawned_pods.json`.
drop(child);
wait_for_socket(predicted_socket, SOCKET_WAIT_TIMEOUT).await
}
fn release_reservation(&self, lock_path: &Path, pod_name: &str) {
if let Ok(mut g) = LockFileGuard::open(lock_path) {
let _ = scope_lock::release_pod(&mut g, pod_name);
}
}
}
fn parse_scope(rules: &[ScopeRuleInput]) -> Result<Vec<ScopeRule>, ToolError> {
if rules.is_empty() {
return Err(ToolError::InvalidArgument("scope must not be empty".into()));
}
rules
.iter()
.map(|r| {
if !r.target.is_absolute() {
return Err(ToolError::InvalidArgument(format!(
"scope.target must be absolute: {}",
r.target.display()
)));
}
Ok(ScopeRule {
target: r.target.clone(),
permission: r.permission.into(),
recursive: r.recursive,
})
})
.collect()
}
/// Serialise the overlay TOML that gets handed to the child `pod`
/// binary via `--overlay`. `PodManifestConfig`'s `Serialize` impl is
/// the single source of truth for the on-disk manifest format.
fn build_overlay_toml(
name: &str,
pwd: &Path,
instruction: &str,
scope_allow: &[ScopeRule],
) -> Result<String, toml::ser::Error> {
let overlay = PodManifestConfig {
pod: PodMetaConfig {
name: Some(name.to_string()),
pwd: Some(pwd.to_path_buf()),
},
worker: WorkerManifestConfig {
instruction: Some(instruction.to_string()),
..Default::default()
},
scope: ScopeConfig {
allow: scope_allow.to_vec(),
deny: Vec::new(),
},
..Default::default()
};
toml::to_string(&overlay)
}
async fn wait_for_socket(path: &Path, timeout: Duration) -> Result<(), ToolError> {
let deadline = tokio::time::Instant::now() + timeout;
loop {
if path.exists() {
if let Ok(stream) = UnixStream::connect(path).await {
drop(stream);
return Ok(());
}
}
if tokio::time::Instant::now() >= deadline {
return Err(ToolError::ExecutionFailed(format!(
"spawned pod socket did not appear within {timeout:?}: {}",
path.display()
)));
}
sleep(Duration::from_millis(50)).await;
}
}
async fn send_run(socket: &Path, task: &str) -> Result<(), ToolError> {
let stream = UnixStream::connect(socket)
.await
.map_err(|e| ToolError::ExecutionFailed(format!("connect {}: {e}", socket.display())))?;
let (_reader, writer) = stream.into_split();
let mut w = JsonLineWriter::new(writer);
w.write(&Method::Run {
input: task.to_string(),
})
.await
.map_err(|e| ToolError::ExecutionFailed(format!("send Method::Run: {e}")))?;
// Drop the writer to close the socket's write half. The flush
// inside `JsonLineWriter::write` has already pushed the bytes
// across, so the child will see a complete method line followed by
// EOF.
drop(w);
Ok(())
}
fn scope_lock_err_to_tool(e: ScopeLockError) -> ToolError {
match e {
ScopeLockError::NotSubset { .. }
| ScopeLockError::WriteConflict { .. }
| ScopeLockError::DuplicatePodName(_)
| ScopeLockError::UnknownPod(_) => ToolError::InvalidArgument(e.to_string()),
ScopeLockError::Io(_) => ToolError::ExecutionFailed(e.to_string()),
}
}
/// Factory for the `SpawnPod` tool.
pub fn spawn_pod_tool(
spawner_name: String,
callback_socket: PathBuf,
runtime_base: PathBuf,
spawner_pwd: PathBuf,
registry: Arc<SpawnedPodRegistry>,
) -> ToolDefinition {
Arc::new(move || {
let schema = schemars::schema_for!(SpawnPodInput);
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
let meta = ToolMeta::new("SpawnPod")
.description(DESCRIPTION)
.input_schema(schema_value);
let tool: Arc<dyn Tool> = Arc::new(SpawnPodTool::new(
spawner_name.clone(),
callback_socket.clone(),
runtime_base.clone(),
spawner_pwd.clone(),
registry.clone(),
));
(meta, tool)
})
}