From f70975789e23540b978914b6b97cdd2cacc1fb09 Mon Sep 17 00:00:00 2001 From: Hare Date: Mon, 11 May 2026 22:46:18 +0900 Subject: [PATCH] refactor: extract workflow crate --- Cargo.lock | 16 ++ Cargo.toml | 2 + crates/memory/src/error.rs | 5 - crates/memory/src/lib.rs | 9 - crates/memory/src/linter/existing.rs | 15 +- crates/memory/src/linter/mod.rs | 159 +------------- crates/memory/src/linter/references.rs | 4 - crates/memory/src/schema/mod.rs | 2 - crates/memory/src/schema/workflow.rs | 50 ----- crates/memory/src/scope.rs | 15 +- crates/memory/src/tool/write.rs | 2 +- crates/memory/src/workspace.rs | 18 +- crates/pod/Cargo.toml | 1 + crates/pod/src/pod.rs | 47 +++-- crates/pod/src/prompt/system.rs | 3 +- crates/pod/src/workflow/mod.rs | 13 +- crates/workflow/Cargo.toml | 18 ++ crates/workflow/src/error.rs | 39 ++++ crates/workflow/src/lib.rs | 22 ++ crates/workflow/src/linter.rs | 223 ++++++++++++++++++++ crates/workflow/src/schema.rs | 90 ++++++++ crates/workflow/src/scope.rs | 36 ++++ crates/{memory => workflow}/src/skill.rs | 17 +- crates/workflow/src/slug.rs | 146 +++++++++++++ crates/{memory => workflow}/src/workflow.rs | 45 ++-- 25 files changed, 686 insertions(+), 311 deletions(-) delete mode 100644 crates/memory/src/schema/workflow.rs create mode 100644 crates/workflow/Cargo.toml create mode 100644 crates/workflow/src/error.rs create mode 100644 crates/workflow/src/lib.rs create mode 100644 crates/workflow/src/linter.rs create mode 100644 crates/workflow/src/schema.rs create mode 100644 crates/workflow/src/scope.rs rename crates/{memory => workflow}/src/skill.rs (97%) create mode 100644 crates/workflow/src/slug.rs rename crates/{memory => workflow}/src/workflow.rs (95%) diff --git a/Cargo.lock b/Cargo.lock index 3f2fab3a..1c4e4e94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2164,6 +2164,7 @@ dependencies = [ "tools", "tracing", "uuid", + "workflow", ] [[package]] @@ -4398,6 +4399,21 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "workflow" +version = "0.1.0" +dependencies = [ + "chrono", + "manifest", + "memory", + "serde", + "serde_json", + "serde_yaml", + "tempfile", + "thiserror 2.0.18", + "tracing", +] + [[package]] name = "writeable" version = "0.6.3" diff --git a/Cargo.toml b/Cargo.toml index fe47bfdc..05151fc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "crates/tools", "crates/tui", "crates/memory", + "crates/workflow", ] [workspace.package] @@ -28,6 +29,7 @@ llm-worker = { path = "crates/llm-worker", version = "0.2" } llm-worker-macros = { path = "crates/llm-worker-macros", version = "0.2" } manifest = { path = "crates/manifest" } memory = { path = "crates/memory" } +workflow = { path = "crates/workflow" } pod-registry = { path = "crates/pod-registry" } protocol = { path = "crates/protocol" } provider = { path = "crates/provider" } diff --git a/crates/memory/src/error.rs b/crates/memory/src/error.rs index 8eddd086..0e9cebdd 100644 --- a/crates/memory/src/error.rs +++ b/crates/memory/src/error.rs @@ -69,11 +69,6 @@ pub enum LintError { #[error("body exceeds the size limit for this record kind: {actual} chars > {limit}")] BodyTooLong { actual: usize, limit: usize }, - #[error( - "write to a Workflow path is forbidden via the memory tool — Workflows are human-edited" - )] - WorkflowWriteForbidden, - #[error("slug `{0}` already exists; use the edit tool instead of creating a new record")] SlugAlreadyExists(String), diff --git a/crates/memory/src/lib.rs b/crates/memory/src/lib.rs index 30d4125d..596cf49a 100644 --- a/crates/memory/src/lib.rs +++ b/crates/memory/src/lib.rs @@ -13,11 +13,9 @@ pub mod linter; pub mod resident; pub mod schema; pub mod scope; -pub mod skill; pub mod slug; pub mod tool; pub mod usage; -pub mod workflow; pub mod workspace; pub use error::{LintError, LintWarning, MemoryError}; @@ -25,17 +23,10 @@ pub use extract::ExtractPointerPayload; pub use linter::{LintReport, Linter}; pub use resident::{ResidentKnowledgeEntry, collect_resident_knowledge}; pub use scope::deny_write_rules; -pub use skill::{ - SKILL_FILENAME, SkillParseError, SkillRecord, load_skills_from_dir, parse_skill_md, -}; pub use slug::Slug; pub use usage::{ UsageEvent, UsageEventKind, UsageRecordSnapshot, UsageReport, UsageReportRecord, UsageSource, append_resident_exposure_event, append_usage_event, append_use_event, build_usage_report, snapshot_record_from_bytes, snapshot_record_from_layout, }; -pub use workflow::{ - ResidentWorkflowEntry, ShadowedSkill, WORKFLOW_DESCRIPTION_HARD_CAP, WorkflowLoadError, - WorkflowRecord, WorkflowRegistry, WorkflowSource, load_workflows, -}; pub use workspace::WorkspaceLayout; diff --git a/crates/memory/src/linter/existing.rs b/crates/memory/src/linter/existing.rs index 90a0d12b..53427e5e 100644 --- a/crates/memory/src/linter/existing.rs +++ b/crates/memory/src/linter/existing.rs @@ -1,5 +1,4 @@ -//! Walks `/memory/{decisions,requests}/`, -//! `/workflow/`, and `/knowledge/` to collect +//! Walks `/memory/{decisions,requests}/` and `/knowledge/` to collect //! the slug set the linter needs for reference-integrity and //! same-slug-duplication checks. //! @@ -11,8 +10,7 @@ use std::io; use std::path::Path; use crate::schema::{ - DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, WorkflowFrontmatter, - split_frontmatter, + DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, split_frontmatter, }; use crate::slug::Slug; use crate::workspace::{RecordKind, WorkspaceLayout}; @@ -28,7 +26,6 @@ pub struct ExistingRecords { decisions: HashMap, requests: HashSet, knowledge: HashSet, - workflow: HashSet, } #[derive(Debug, Clone)] @@ -42,7 +39,7 @@ impl ExistingRecords { RecordKind::Decision => self.decisions.contains_key(slug), RecordKind::Request => self.requests.contains(slug), RecordKind::Knowledge => self.knowledge.contains(slug), - RecordKind::Workflow => self.workflow.contains(slug), + RecordKind::Workflow => false, RecordKind::Summary => false, } } @@ -56,7 +53,7 @@ impl ExistingRecords { RecordKind::Decision => self.decisions.keys().collect(), RecordKind::Request => self.requests.iter().collect(), RecordKind::Knowledge => self.knowledge.iter().collect(), - RecordKind::Workflow => self.workflow.iter().collect(), + RecordKind::Workflow => Vec::new(), RecordKind::Summary => Vec::new(), } } @@ -82,10 +79,6 @@ pub fn scan_existing(layout: &WorkspaceLayout) -> io::Result { let _ = parse_silent::(path); out.knowledge.insert(slug); })?; - scan_dir(&layout.workflow_dir(), |path, slug| { - let _ = parse_silent::(path); - out.workflow.insert(slug); - })?; Ok(out) } diff --git a/crates/memory/src/linter/mod.rs b/crates/memory/src/linter/mod.rs index fc050011..3039df49 100644 --- a/crates/memory/src/linter/mod.rs +++ b/crates/memory/src/linter/mod.rs @@ -23,9 +23,8 @@ use serde::de::DeserializeOwned; use crate::error::{LintError, LintWarning}; use crate::schema::{ DecisionFrontmatter, KnowledgeFrontmatter, RequestFrontmatter, SummaryFrontmatter, - WorkflowFrontmatter, split_frontmatter, + split_frontmatter, }; -use crate::workflow::WORKFLOW_DESCRIPTION_HARD_CAP; use crate::workspace::{ClassifiedPath, RecordKind, WorkspaceLayout}; pub use existing::{ExistingRecords, scan_existing}; @@ -99,12 +98,6 @@ impl Linter { } }; - // 2. Workflow paths are sub-Worker-forbidden at the tool layer. - if classified.kind == RecordKind::Workflow { - report.push_error(LintError::WorkflowWriteForbidden); - return report; - } - // 3. Frontmatter parse + kind-specific structural checks + // size limits. Reference-integrity needs the existing // record set, fetched once below. @@ -146,7 +139,9 @@ impl Linter { RecordKind::Summary => { self.check_kind::(content, &classified, &mut report); } - RecordKind::Workflow => unreachable!("guarded above"), + RecordKind::Workflow => { + unreachable!("workflow paths are not classified by memory linter") + } } report @@ -240,59 +235,6 @@ impl Linter { } } -impl Linter { - /// Workflow record validator exposed for human-edit paths - /// (CLI / pre-commit). Not used by the memory tool, which rejects - /// workflow writes outright. - /// - /// Verifies frontmatter shape, body size, and that every slug in - /// `requires` points at an existing Knowledge record under the - /// workspace's `knowledge/` directory. - pub fn lint_workflow(&self, content: &str) -> LintReport { - let mut report = LintReport::default(); - let parsed = match parse_frontmatter::(content) { - Ok(p) => p, - Err(e) => { - report.push_error(e); - return report; - } - }; - size::check_body::(parsed.body, &mut report); - - // Mirror the loader's cap so human-edit paths fail fast instead - // of surfacing the same error only at Pod startup. - if parsed.frontmatter.model_invokation { - let actual = parsed.frontmatter.description.chars().count(); - if actual > WORKFLOW_DESCRIPTION_HARD_CAP { - report.push_error(LintError::DescriptionTooLong { - actual, - limit: WORKFLOW_DESCRIPTION_HARD_CAP, - }); - } - } - - let existing = match existing::scan_existing(&self.layout) { - Ok(e) => e, - Err(e) => { - report.push_error(LintError::MalformedFrontmatter(format!( - "failed to scan existing records: {e}" - ))); - return report; - } - }; - for slug in &parsed.frontmatter.requires { - if !existing.contains(crate::workspace::RecordKind::Knowledge, slug) { - report.push_error(LintError::UnknownReference { - field: "requires", - kind: "knowledge", - slug: slug.to_string(), - }); - } - } - report - } -} - struct Parsed<'a, F> { frontmatter: F, body: &'a str, @@ -332,22 +274,6 @@ mod tests { (dir, linter) } - #[test] - fn workflow_write_rejected() { - let (dir, linter) = workspace(); - let path = dir.path().join(".insomnia/workflow/wf.md"); - let content = - "---\ndescription: x\nmodel_invokation: false\nuser_invocable: true\n---\nbody" - .to_string(); - let report = linter.lint(&path, &content, WriteMode::Create); - assert!( - report - .errors - .iter() - .any(|e| matches!(e, LintError::WorkflowWriteForbidden)) - ); - } - #[test] fn outside_memory_tree_rejected() { let (dir, linter) = workspace(); @@ -499,83 +425,6 @@ mod tests { ); } - #[test] - fn workflow_lint_accepts_valid_record() { - let (dir, linter) = workspace(); - // Place a Knowledge record that the workflow will reference. - let kn = dir.path().join(".insomnia/knowledge/foo.md"); - write( - &kn, - &format!( - "---\ncreated_at: {n}\nupdated_at: {n}\nkind: rule\ndescription: x\nmodel_invokation: false\nuser_invocable: true\nlast_sources: []\n---\n", - n = iso_now() - ), - ); - let wf = "---\ndescription: do thing\nmodel_invokation: false\nuser_invocable: true\nrequires: [foo]\n---\nstep 1\n".to_string(); - let report = linter.lint_workflow(&wf); - assert!(!report.has_errors(), "got errors: {:?}", report.errors); - } - - #[test] - fn workflow_lint_flags_unknown_requires() { - let (_dir, linter) = workspace(); - let wf = "---\ndescription: x\nmodel_invokation: false\nuser_invocable: true\nrequires: [missing-knowledge]\n---\n".to_string(); - let report = linter.lint_workflow(&wf); - assert!(report.errors.iter().any(|e| matches!( - e, - LintError::UnknownReference { - field: "requires", - kind: "knowledge", - .. - } - ))); - } - - #[test] - fn workflow_lint_flags_long_description_when_model_invokation() { - let (_dir, linter) = workspace(); - let desc = "x".repeat(crate::workflow::WORKFLOW_DESCRIPTION_HARD_CAP + 1); - let wf = format!( - "---\ndescription: {desc}\nmodel_invokation: true\nuser_invocable: true\n---\n" - ); - let report = linter.lint_workflow(&wf); - assert!( - report - .errors - .iter() - .any(|e| matches!(e, LintError::DescriptionTooLong { .. })), - ); - } - - #[test] - fn workflow_lint_allows_long_description_when_not_model_invokation() { - let (_dir, linter) = workspace(); - let desc = "x".repeat(crate::workflow::WORKFLOW_DESCRIPTION_HARD_CAP + 1); - let wf = format!( - "---\ndescription: {desc}\nmodel_invokation: false\nuser_invocable: true\n---\n" - ); - let report = linter.lint_workflow(&wf); - assert!( - !report - .errors - .iter() - .any(|e| matches!(e, LintError::DescriptionTooLong { .. })), - ); - } - - #[test] - fn workflow_lint_collects_multiple_unknown_requires() { - let (_dir, linter) = workspace(); - let wf = "---\ndescription: x\nmodel_invokation: false\nuser_invocable: true\nrequires: [a, b, c]\n---\n".to_string(); - let report = linter.lint_workflow(&wf); - let unknown_count = report - .errors - .iter() - .filter(|e| matches!(e, LintError::UnknownReference { .. })) - .count(); - assert_eq!(unknown_count, 3); - } - #[test] fn similar_slugs_warns_on_cluster() { let (dir, linter) = workspace(); diff --git a/crates/memory/src/linter/references.rs b/crates/memory/src/linter/references.rs index 00fe02b4..fc9dccae 100644 --- a/crates/memory/src/linter/references.rs +++ b/crates/memory/src/linter/references.rs @@ -1,8 +1,4 @@ //! Reference-integrity checks: `replaced_by` existence + cycle detection. -//! -//! `requires` (Workflow) is checked symmetrically when/if the Workflow -//! linter is invoked from a human-edit path; the memory tool itself -//! never writes Workflow records. use std::collections::HashSet; diff --git a/crates/memory/src/schema/mod.rs b/crates/memory/src/schema/mod.rs index f1db752b..5f958229 100644 --- a/crates/memory/src/schema/mod.rs +++ b/crates/memory/src/schema/mod.rs @@ -10,11 +10,9 @@ mod decision; mod knowledge; mod request; mod summary; -mod workflow; pub use common::{Frontmatter, SourceRef, split_frontmatter}; pub use decision::{DecisionFrontmatter, DecisionStatus}; pub use knowledge::{KNOWLEDGE_DESCRIPTION_HARD_CAP, KnowledgeFrontmatter}; pub use request::RequestFrontmatter; pub use summary::SummaryFrontmatter; -pub use workflow::WorkflowFrontmatter; diff --git a/crates/memory/src/schema/workflow.rs b/crates/memory/src/schema/workflow.rs deleted file mode 100644 index 8dc5620b..00000000 --- a/crates/memory/src/schema/workflow.rs +++ /dev/null @@ -1,50 +0,0 @@ -//! Workflow frontmatter schema. -//! -//! NOTE: Workflows are written by humans, not by the memory tool. The -//! linter only validates frontmatter when invoked directly (e.g. by a -//! future CLI / pre-commit hook). The memory write/edit tool rejects -//! `.insomnia/workflow/` paths outright via -//! [`LintError::WorkflowWriteForbidden`]. - -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; - -use crate::schema::common::Frontmatter; -use crate::slug::Slug; - -#[derive(Debug, Clone, Deserialize, Serialize)] -pub struct WorkflowFrontmatter { - /// Workflows do not require timestamps in the MVP. Human-authored files - /// may carry them; when absent the linter uses Unix epoch as a neutral - /// placeholder for the shared `Frontmatter` trait. - #[serde(default)] - pub updated_at: Option>, - #[serde(default)] - pub created_at: Option>, - pub description: String, - #[serde(default)] - pub model_invokation: bool, - #[serde(default = "default_user_invocable")] - pub user_invocable: bool, - #[serde(default)] - pub requires: Vec, -} - -fn default_user_invocable() -> bool { - true -} - -fn epoch() -> DateTime { - DateTime::::from_timestamp(0, 0).expect("Unix epoch timestamp is valid") -} - -impl Frontmatter for WorkflowFrontmatter { - const BODY_LIMIT: usize = 8000; - - fn created_at(&self) -> DateTime { - self.created_at.or(self.updated_at).unwrap_or_else(epoch) - } - fn updated_at(&self) -> DateTime { - self.updated_at.unwrap_or_else(epoch) - } -} diff --git a/crates/memory/src/scope.rs b/crates/memory/src/scope.rs index 30e34dc9..faf1596b 100644 --- a/crates/memory/src/scope.rs +++ b/crates/memory/src/scope.rs @@ -13,17 +13,13 @@ use manifest::{Permission, ScopeRule}; use crate::workspace::WorkspaceLayout; -/// Build deny rules that strip Write permission from `/memory/`, -/// `/knowledge/`, and `/workflow/`. Recursive — -/// every descendant is capped at Read for the generic tools. -/// -/// Workflow files are human-edited on the host side; the generic CRUD -/// tools must not touch them. +/// Build deny rules that strip Write permission from `/memory/` +/// and `/knowledge/`. Recursive — every descendant is capped at +/// Read for the generic tools. pub fn deny_write_rules(layout: &WorkspaceLayout) -> Vec { vec![ deny_write(layout.memory_dir().as_path()), deny_write(layout.knowledge_dir().as_path()), - deny_write(layout.workflow_dir().as_path()), ] } @@ -41,14 +37,13 @@ mod tests { use std::path::PathBuf; #[test] - fn deny_targets_memory_knowledge_and_workflow() { + fn deny_targets_memory_and_knowledge() { let layout = WorkspaceLayout::new(PathBuf::from("/ws")); let rules = deny_write_rules(&layout); - assert_eq!(rules.len(), 3); + assert_eq!(rules.len(), 2); assert_eq!(rules[0].target, PathBuf::from("/ws/.insomnia/memory")); assert_eq!(rules[0].permission, Permission::Write); assert!(rules[0].recursive); assert_eq!(rules[1].target, PathBuf::from("/ws/.insomnia/knowledge")); - assert_eq!(rules[2].target, PathBuf::from("/ws/.insomnia/workflow")); } } diff --git a/crates/memory/src/tool/write.rs b/crates/memory/src/tool/write.rs index 6ca72ab8..1ad3e79f 100644 --- a/crates/memory/src/tool/write.rs +++ b/crates/memory/src/tool/write.rs @@ -2,7 +2,7 @@ //! //! Creates or overwrites a memory or knowledge record by `(kind, slug)`. //! Pre-write Linter validates frontmatter, slug uniqueness (Create only), -//! reference integrity, size limits, and the workflow-write ban. On any +//! reference integrity, size limits. On any //! Linter error the tool returns `ToolError::InvalidArgument` with all //! violations aggregated and the file is **not** written. diff --git a/crates/memory/src/workspace.rs b/crates/memory/src/workspace.rs index b8ececf0..68834e32 100644 --- a/crates/memory/src/workspace.rs +++ b/crates/memory/src/workspace.rs @@ -153,8 +153,8 @@ impl WorkspaceLayout { } /// Classify a path under the memory tree. Returns `None` if the - /// path is not under `.insomnia/memory/`, `.insomnia/knowledge/`, - /// or `.insomnia/workflow/` of this workspace, or if it lives in + /// path is not under `.insomnia/memory/` or `.insomnia/knowledge/` + /// of this workspace, or if it lives in /// `_staging/` / `_usage/` (opaque subsystem-owned trees). /// /// On a conventional path that's *almost* a record but malformed @@ -164,14 +164,10 @@ impl WorkspaceLayout { pub fn classify(&self, path: &Path) -> Result, LintError> { let memory = self.memory_dir(); let knowledge = self.knowledge_dir(); - let workflow = self.workflow_dir(); if let Ok(rel) = path.strip_prefix(&knowledge) { return Ok(Some(classify_kinded_md(rel, RecordKind::Knowledge, path)?)); } - if let Ok(rel) = path.strip_prefix(&workflow) { - return Ok(Some(classify_kinded_md(rel, RecordKind::Workflow, path)?)); - } let rel = match path.strip_prefix(&memory) { Ok(r) => r, Err(_) => return Ok(None), @@ -277,16 +273,6 @@ mod tests { assert_eq!(cp.kind, RecordKind::Knowledge); } - #[test] - fn classifies_workflow() { - let cp = layout() - .classify(&PathBuf::from("/ws/.insomnia/workflow/wf.md")) - .unwrap() - .unwrap(); - assert_eq!(cp.kind, RecordKind::Workflow); - assert_eq!(cp.slug.unwrap().as_str(), "wf"); - } - #[test] fn workflow_under_memory_is_invalid_path() { let err = layout() diff --git a/crates/pod/Cargo.toml b/crates/pod/Cargo.toml index 8b1610ab..a655c25c 100644 --- a/crates/pod/Cargo.toml +++ b/crates/pod/Cargo.toml @@ -27,6 +27,7 @@ fs4 = { workspace = true, features = ["sync"] } libc = { workspace = true } schemars = { workspace = true } memory = { workspace = true } +workflow-crate = { package = "workflow", path = "../workflow" } uuid = { workspace = true, features = ["v7"] } session-metrics = { workspace = true } diff --git a/crates/pod/src/pod.rs b/crates/pod/src/pod.rs index c1c2ff75..51bea2f0 100644 --- a/crates/pod/src/pod.rs +++ b/crates/pod/src/pod.rs @@ -150,7 +150,7 @@ pub struct Pod { prompts: Arc, /// Registry loaded from `/.insomnia/workflow/*.md` when /// memory is enabled. Missing memory config keeps this empty. - workflow_registry: memory::WorkflowRegistry, + workflow_registry: workflow_crate::WorkflowRegistry, /// Memory workspace layout used by the workflow resolver to load required /// Knowledge records by exact slug. memory_layout: Option, @@ -323,7 +323,7 @@ impl Pod { scope_allocation: None, callback_socket: None, prompts, - workflow_registry: memory::WorkflowRegistry::empty(), + workflow_registry: workflow_crate::WorkflowRegistry::empty(), memory_layout: None, inject_resident_knowledge: true, pending_scope_snapshot: Arc::new(Mutex::new(None)), @@ -865,13 +865,13 @@ impl Pod { } else { None }; - let resident_workflows: Vec = + let resident_workflows: Vec = if self.inject_resident_knowledge && self.memory_layout.is_some() { self.workflow_registry.resident_entries() } else { Vec::new() }; - let resident_workflow_slice: Option<&[memory::ResidentWorkflowEntry]> = + let resident_workflow_slice: Option<&[workflow_crate::ResidentWorkflowEntry]> = if self.inject_resident_knowledge && self.memory_layout.is_some() { Some(&resident_workflows) } else { @@ -1106,7 +1106,7 @@ impl Pod { fn resident_exposure_snapshots( &self, knowledge: &[memory::ResidentKnowledgeEntry], - workflows: &[memory::ResidentWorkflowEntry], + workflows: &[workflow_crate::ResidentWorkflowEntry], ) -> Vec { let Some(layout) = self.memory_layout.as_ref() else { return Vec::new(); @@ -1220,8 +1220,8 @@ impl Pod { let Segment::WorkflowInvoke { slug } = seg else { continue; }; - let parsed = - memory::Slug::parse(slug.clone()).map_err(WorkflowResolveError::InvalidSlug)?; + let parsed = workflow_crate::Slug::parse(slug.clone()) + .map_err(WorkflowResolveError::InvalidSlug)?; let record = self .workflow_registry .get(&parsed) @@ -2886,7 +2886,7 @@ pub enum PodError { ConsolidationLock(#[source] memory::consolidate::LockError), #[error("workflow load failed: {0}")] - WorkflowLoad(#[source] memory::WorkflowLoadError), + WorkflowLoad(#[source] workflow_crate::WorkflowLoadError), #[error("workflow invocation failed: {0}")] WorkflowResolve(#[from] WorkflowResolveError), @@ -2909,14 +2909,14 @@ struct PodCommon { scope: Scope, client: Box, prompts: Arc, - workflow_registry: memory::WorkflowRegistry, + workflow_registry: workflow_crate::WorkflowRegistry, memory_layout: Option, system_prompt_template: Option, /// SKILL.md shadow events surfaced during workflow-registry build. /// The Pod constructor drains these into the notify buffer right /// after the Pod is materialised so the first LLM request observes /// any skill ↔ workflow collisions. - skill_shadows: Vec, + skill_shadows: Vec, } /// Resolve pwd / scope / LLM client / prompt catalog from a validated @@ -2968,8 +2968,8 @@ fn prepare_pod_common_from_scope( .as_ref() .map(|mem| memory::WorkspaceLayout::resolve(mem, &pwd)); let mut workflow_registry = match memory_layout.as_ref() { - Some(layout) => memory::load_workflows(layout).map_err(PodError::WorkflowLoad)?, - None => memory::WorkflowRegistry::empty(), + Some(layout) => workflow_crate::load_workflows(layout).map_err(PodError::WorkflowLoad)?, + None => workflow_crate::WorkflowRegistry::empty(), }; let skill_shadows = ingest_skills(&mut workflow_registry, manifest); @@ -2998,21 +2998,21 @@ fn prepare_pod_common_from_scope( /// /// Skills come exclusively from the manifest's `[skills] directories` /// list (resolved against the manifest base directory). Internal -/// Workflows already loaded via [`memory::load_workflows`] take priority +/// Workflows already loaded via [`workflow_crate::load_workflows`] take priority /// over skills sharing the same slug; collisions are surfaced as -/// [`memory::ShadowedSkill`] events that the caller pushes onto the +/// [`workflow_crate::ShadowedSkill`] events that the caller pushes onto the /// Pod's notification buffer. fn ingest_skills( - registry: &mut memory::WorkflowRegistry, + registry: &mut workflow_crate::WorkflowRegistry, manifest: &PodManifest, -) -> Vec { +) -> Vec { let mut shadows = Vec::new(); let Some(skills_cfg) = manifest.skills.as_ref() else { return shadows; }; for dir in &skills_cfg.directories { - for skill in memory::load_skills_from_dir(dir) { - let source = memory::WorkflowSource::Skill { dir: dir.clone() }; + for skill in workflow_crate::load_skills_from_dir(dir) { + let source = workflow_crate::WorkflowSource::Skill { dir: dir.clone() }; let record = skill.into_workflow_record(source); if let Some(shadow) = registry.merge_skill(record) { shadows.push(shadow); @@ -3024,7 +3024,7 @@ fn ingest_skills( /// Drain skill-ingest shadow events into the Pod's notify buffer so the /// first LLM request renders them as system-message attachments. -fn drain_skill_shadows(pod: &Pod, shadows: Vec) +fn drain_skill_shadows(pod: &Pod, shadows: Vec) where C: LlmClient, S: Store, @@ -3048,6 +3048,9 @@ fn build_scope_with_memory(manifest: &PodManifest, pwd: &Path) -> Result {limit}")] + BodyTooLong { actual: usize, limit: usize }, + + #[error("`{field}` references unknown {kind} slug `{slug}`")] + UnknownReference { + field: &'static str, + kind: &'static str, + slug: String, + }, + + #[error("path is not a valid Workflow location: {}", .0.display())] + InvalidPath(PathBuf), +} diff --git a/crates/workflow/src/lib.rs b/crates/workflow/src/lib.rs new file mode 100644 index 00000000..065f5cac --- /dev/null +++ b/crates/workflow/src/lib.rs @@ -0,0 +1,22 @@ +//! Workflow records, loading, Agent Skill ingestion, and human-edit linting. + +mod error; +mod linter; +mod schema; +mod scope; +mod skill; +mod slug; +mod workflow; + +pub use error::WorkflowLintError; +pub use linter::{WorkflowLintReport, WorkflowLinter}; +pub use schema::{WorkflowFrontmatter, split_frontmatter}; +pub use scope::deny_write_rules; +pub use skill::{ + SKILL_FILENAME, SkillParseError, SkillRecord, load_skills_from_dir, parse_skill_md, +}; +pub use slug::{Slug, is_valid_slug}; +pub use workflow::{ + ResidentWorkflowEntry, ShadowedSkill, WORKFLOW_DESCRIPTION_HARD_CAP, WorkflowLoadError, + WorkflowRecord, WorkflowRegistry, WorkflowSource, load_workflows, +}; diff --git a/crates/workflow/src/linter.rs b/crates/workflow/src/linter.rs new file mode 100644 index 00000000..e5cf7b1f --- /dev/null +++ b/crates/workflow/src/linter.rs @@ -0,0 +1,223 @@ +//! Human-edit linter for Workflow files. + +use std::collections::HashSet; + +use memory::WorkspaceLayout; + +use crate::{Slug, WorkflowLintError}; +use serde::de::DeserializeOwned; + +use crate::schema::{WORKFLOW_BODY_LIMIT, WorkflowFrontmatter, split_frontmatter}; +use crate::workflow::WORKFLOW_DESCRIPTION_HARD_CAP; + +#[derive(Debug, Default, Clone)] +pub struct WorkflowLintReport { + pub errors: Vec, +} + +impl WorkflowLintReport { + pub fn has_errors(&self) -> bool { + !self.errors.is_empty() + } + + pub fn push_error(&mut self, err: WorkflowLintError) { + self.errors.push(err); + } +} + +#[derive(Debug, Clone)] +pub struct WorkflowLinter { + layout: WorkspaceLayout, +} + +impl WorkflowLinter { + pub fn new(layout: WorkspaceLayout) -> Self { + Self { layout } + } + + pub fn layout(&self) -> &WorkspaceLayout { + &self.layout + } + + /// Validate a human-authored Workflow document. + /// + /// Verifies frontmatter shape, body size, resident description size, and + /// that every `requires` slug points at an existing Knowledge record. + pub fn lint(&self, content: &str) -> WorkflowLintReport { + let mut report = WorkflowLintReport::default(); + let parsed = match parse_frontmatter::(content) { + Ok(parsed) => parsed, + Err(err) => { + report.push_error(err); + return report; + } + }; + + let body_chars = parsed.body.chars().count(); + if body_chars > WORKFLOW_BODY_LIMIT { + report.push_error(WorkflowLintError::BodyTooLong { + actual: body_chars, + limit: WORKFLOW_BODY_LIMIT, + }); + } + + if parsed.frontmatter.model_invokation { + let actual = parsed.frontmatter.description.chars().count(); + if actual > WORKFLOW_DESCRIPTION_HARD_CAP { + report.push_error(WorkflowLintError::DescriptionTooLong { + actual, + limit: WORKFLOW_DESCRIPTION_HARD_CAP, + }); + } + } + + let knowledge = match scan_knowledge_slugs(&self.layout) { + Ok(knowledge) => knowledge, + Err(err) => { + report.push_error(WorkflowLintError::MalformedFrontmatter(format!( + "failed to scan existing Knowledge records: {err}" + ))); + return report; + } + }; + + for slug in &parsed.frontmatter.requires { + if !knowledge.contains(slug) { + report.push_error(WorkflowLintError::UnknownReference { + field: "requires", + kind: "knowledge", + slug: slug.to_string(), + }); + } + } + + report + } +} + +struct Parsed<'a, F> { + frontmatter: F, + body: &'a str, +} + +fn parse_frontmatter( + content: &str, +) -> Result, WorkflowLintError> { + let (yaml, body) = split_frontmatter(content)?; + let frontmatter = serde_yaml::from_str::(yaml).map_err(|err| { + let msg = err.to_string(); + if let Some(field) = parse_missing_field(&msg) { + WorkflowLintError::MissingField(field) + } else { + WorkflowLintError::MalformedFrontmatter(msg) + } + })?; + Ok(Parsed { frontmatter, body }) +} + +fn parse_missing_field(msg: &str) -> Option<&'static str> { + let needle = "missing field `"; + let start = msg.find(needle)? + needle.len(); + let end = msg[start..].find('`')? + start; + match &msg[start..end] { + "description" => Some("description"), + "model_invokation" => Some("model_invokation"), + "user_invocable" => Some("user_invocable"), + "requires" => Some("requires"), + _ => None, + } +} + +fn scan_knowledge_slugs(layout: &WorkspaceLayout) -> std::io::Result> { + let mut out = HashSet::new(); + let entries = match std::fs::read_dir(layout.knowledge_dir()) { + Ok(entries) => entries, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(out), + Err(err) => return Err(err), + }; + for entry in entries { + let entry = entry?; + let path = entry.path(); + if !path.is_file() || path.extension().and_then(|s| s.to_str()) != Some("md") { + continue; + } + let Some(stem) = path.file_stem().and_then(|s| s.to_str()) else { + continue; + }; + if let Ok(slug) = Slug::parse(stem) { + out.insert(slug); + } + } + Ok(out) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + fn write(path: &std::path::Path, content: &str) { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + std::fs::write(path, content).unwrap(); + } + + fn workspace() -> (TempDir, WorkflowLinter) { + let dir = TempDir::new().unwrap(); + let layout = WorkspaceLayout::new(dir.path().to_path_buf()); + (dir, WorkflowLinter::new(layout)) + } + + #[test] + fn workflow_lint_accepts_valid_file() { + let (dir, linter) = workspace(); + write( + &dir.path().join(".insomnia/knowledge/policy.md"), + "---\ndescription: p\n---\nbody", + ); + let wf = "---\ndescription: run\nrequires: [policy]\n---\nbody"; + let report = linter.lint(wf); + assert!(!report.has_errors(), "{:?}", report.errors); + } + + #[test] + fn workflow_lint_rejects_missing_required_knowledge() { + let (_dir, linter) = workspace(); + let wf = "---\ndescription: run\nrequires: [ghost]\n---\nbody"; + let report = linter.lint(wf); + assert!(report.errors.iter().any(|err| matches!( + err, + WorkflowLintError::UnknownReference { field: "requires", kind: "knowledge", slug } + if slug == "ghost" + ))); + } + + #[test] + fn workflow_lint_enforces_resident_description_cap() { + let (_dir, linter) = workspace(); + let desc = "x".repeat(WORKFLOW_DESCRIPTION_HARD_CAP + 1); + let wf = format!("---\ndescription: {desc}\nmodel_invokation: true\n---\nbody"); + let report = linter.lint(&wf); + assert!( + report + .errors + .iter() + .any(|err| matches!(err, WorkflowLintError::DescriptionTooLong { .. })) + ); + } + + #[test] + fn workflow_lint_enforces_body_limit() { + let (_dir, linter) = workspace(); + let body = "x".repeat(WORKFLOW_BODY_LIMIT + 1); + let wf = format!("---\ndescription: run\n---\n{body}"); + let report = linter.lint(&wf); + assert!( + report + .errors + .iter() + .any(|err| matches!(err, WorkflowLintError::BodyTooLong { .. })) + ); + } +} diff --git a/crates/workflow/src/schema.rs b/crates/workflow/src/schema.rs new file mode 100644 index 00000000..9538a4bf --- /dev/null +++ b/crates/workflow/src/schema.rs @@ -0,0 +1,90 @@ +//! Workflow frontmatter schema and frontmatter splitting helpers. + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +use crate::{Slug, WorkflowLintError}; + +pub const WORKFLOW_BODY_LIMIT: usize = 8000; + +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct WorkflowFrontmatter { + /// Workflows do not require timestamps in the MVP. Human-authored files + /// may carry them. + #[serde(default)] + pub updated_at: Option>, + #[serde(default)] + pub created_at: Option>, + pub description: String, + #[serde(default)] + pub model_invokation: bool, + #[serde(default = "default_user_invocable")] + pub user_invocable: bool, + #[serde(default)] + pub requires: Vec, +} + +fn default_user_invocable() -> bool { + true +} + +const FRONTMATTER_DELIM: &str = "---"; + +/// Split a markdown document into `(yaml_frontmatter, body)`. +pub fn split_frontmatter(content: &str) -> Result<(&str, &str), WorkflowLintError> { + let after_open = content + .strip_prefix(FRONTMATTER_DELIM) + .and_then(|s| s.strip_prefix('\n').or(Some(s))) + .ok_or(WorkflowLintError::MissingFrontmatter)?; + + let mut yaml_end = None; + let mut byte_offset = 0usize; + for line in after_open.split_inclusive('\n') { + let trimmed = line.trim_end_matches('\n').trim_end_matches('\r'); + if trimmed == FRONTMATTER_DELIM { + yaml_end = Some((byte_offset, byte_offset + line.len())); + break; + } + byte_offset += line.len(); + } + + let (yaml_end_excl, body_start) = yaml_end.ok_or_else(|| { + WorkflowLintError::MalformedFrontmatter("missing closing `---` line".to_string()) + })?; + + let yaml = &after_open[..yaml_end_excl]; + let body = &after_open[body_start..]; + Ok((yaml, body)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn splits_simple() { + let doc = "---\nfoo: 1\n---\nbody here\n"; + let (y, b) = split_frontmatter(doc).unwrap(); + assert_eq!(y, "foo: 1\n"); + assert_eq!(b, "body here\n"); + } + + #[test] + fn no_leading_delim_errors() { + let err = split_frontmatter("hello").unwrap_err(); + assert!(matches!(err, WorkflowLintError::MissingFrontmatter)); + } + + #[test] + fn no_closing_delim_errors() { + let err = split_frontmatter("---\nfoo: 1\nno close\n").unwrap_err(); + assert!(matches!(err, WorkflowLintError::MalformedFrontmatter(_))); + } + + #[test] + fn handles_empty_body() { + let doc = "---\nfoo: 1\n---\n"; + let (_, b) = split_frontmatter(doc).unwrap(); + assert_eq!(b, ""); + } +} diff --git a/crates/workflow/src/scope.rs b/crates/workflow/src/scope.rs new file mode 100644 index 00000000..a13a34f5 --- /dev/null +++ b/crates/workflow/src/scope.rs @@ -0,0 +1,36 @@ +//! Scope deny helpers for human-authored Workflow files. + +use std::path::Path; + +use manifest::{Permission, ScopeRule}; +use memory::WorkspaceLayout; + +/// Build deny rules that strip Write permission from +/// `/.insomnia/workflow/` for generic CRUD tools. +pub fn deny_write_rules(layout: &WorkspaceLayout) -> Vec { + vec![deny_write(layout.workflow_dir().as_path())] +} + +fn deny_write(target: &Path) -> ScopeRule { + ScopeRule { + target: target.to_path_buf(), + permission: Permission::Write, + recursive: true, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn deny_targets_workflow() { + let layout = WorkspaceLayout::new(PathBuf::from("/ws")); + let rules = deny_write_rules(&layout); + assert_eq!(rules.len(), 1); + assert_eq!(rules[0].target, PathBuf::from("/ws/.insomnia/workflow")); + assert_eq!(rules[0].permission, Permission::Write); + assert!(rules[0].recursive); + } +} diff --git a/crates/memory/src/skill.rs b/crates/workflow/src/skill.rs similarity index 97% rename from crates/memory/src/skill.rs rename to crates/workflow/src/skill.rs index 01203bf0..9450701a 100644 --- a/crates/memory/src/skill.rs +++ b/crates/workflow/src/skill.rs @@ -19,10 +19,9 @@ use serde::Deserialize; use thiserror::Error; use tracing::warn; -use crate::error::LintError; use crate::schema::split_frontmatter; -use crate::slug::Slug; use crate::workflow::{WORKFLOW_DESCRIPTION_HARD_CAP, WorkflowRecord, WorkflowSource}; +use crate::{Slug, WorkflowLintError}; /// Filename within a skill directory carrying the frontmatter + body. pub const SKILL_FILENAME: &str = "SKILL.md"; @@ -34,6 +33,7 @@ pub const SKILL_FILENAME: &str = "SKILL.md"; /// `metadata` are documentary, while `allowed-tools` is recognised and /// emits a warning until [`permission-extension-point.md`] lands. #[derive(Debug, Clone, Deserialize)] +#[allow(dead_code)] pub struct SkillFrontmatter { pub name: String, pub description: String, @@ -49,7 +49,7 @@ pub struct SkillFrontmatter { /// Validated skill record. Constructed by [`parse_skill_md`] and converted /// to a `WorkflowRecord` by the caller via the `Skill → Workflow` -/// projection in [`crate::workflow`]. +/// projection in [`crate::WorkflowRecord`]. #[derive(Debug, Clone, PartialEq, Eq)] pub struct SkillRecord { pub slug: Slug, @@ -94,7 +94,7 @@ pub enum SkillParseError { Frontmatter { path: PathBuf, #[source] - source: LintError, + source: WorkflowLintError, }, #[error( "SKILL.md `name` `{name}` does not match its directory name `{dir_name}` (at {})", @@ -109,7 +109,7 @@ pub enum SkillParseError { InvalidName { skill_md_path: PathBuf, #[source] - source: LintError, + source: WorkflowLintError, }, #[error("SKILL.md `description` must be non-empty (at {})", .skill_md_path.display())] DescriptionEmpty { skill_md_path: PathBuf }, @@ -150,7 +150,7 @@ pub fn parse_skill_md(skill_md_path: &Path) -> Result) -> Result { + let s = s.into(); + if is_valid_slug(&s) { + Ok(Self(s)) + } else { + Err(WorkflowLintError::InvalidSlug(s)) + } + } + + pub fn as_str(&self) -> &str { + &self.0 + } + + pub fn into_string(self) -> String { + self.0 + } +} + +impl fmt::Display for Slug { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +impl AsRef for Slug { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl FromStr for Slug { + type Err = WorkflowLintError; + + fn from_str(s: &str) -> Result { + Self::parse(s) + } +} + +impl<'de> Deserialize<'de> for Slug { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let raw = String::deserialize(deserializer)?; + Self::parse(raw).map_err(serde::de::Error::custom) + } +} + +/// Pure-fn predicate matching the agent-skills slug regex without +/// pulling in the `regex` crate. +pub fn is_valid_slug(s: &str) -> bool { + let bytes = s.as_bytes(); + let len = bytes.len(); + if len < MIN_LEN || len > MAX_LEN { + return false; + } + if !is_alnum_lower(bytes[0]) || !is_alnum_lower(bytes[len - 1]) { + return false; + } + let mut prev_dash = false; + for &b in bytes { + if b == b'-' { + if prev_dash { + return false; + } + prev_dash = true; + } else if is_alnum_lower(b) { + prev_dash = false; + } else { + return false; + } + } + true +} + +fn is_alnum_lower(b: u8) -> bool { + b.is_ascii_digit() || b.is_ascii_lowercase() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn accepts_basic_slugs() { + for s in ["a", "ab", "abc-def", "x9", "a-b-c", "123", "a-1"] { + assert!(is_valid_slug(s), "expected `{s}` valid"); + assert!(Slug::parse(s).is_ok()); + } + } + + #[test] + fn rejects_bad_slugs() { + for s in [ + "", "-", "-foo", "foo-", "Foo", "foo_bar", "foo bar", "foo--bar", "foo.bar", "ä", + ] { + assert!(!is_valid_slug(s), "expected `{s}` invalid"); + assert!(Slug::parse(s).is_err()); + } + } + + #[test] + fn enforces_length_bounds() { + let too_long = "a".repeat(MAX_LEN + 1); + assert!(!is_valid_slug(&too_long)); + let max = "a".repeat(MAX_LEN); + assert!(is_valid_slug(&max)); + } + + #[test] + fn deserializes_via_serde() { + let json = "\"valid-slug\""; + let slug: Slug = serde_json::from_str(json).unwrap(); + assert_eq!(slug.as_str(), "valid-slug"); + + let bad = "\"BAD\""; + let err: Result = serde_json::from_str(bad); + assert!(err.is_err()); + } +} diff --git a/crates/memory/src/workflow.rs b/crates/workflow/src/workflow.rs similarity index 95% rename from crates/memory/src/workflow.rs rename to crates/workflow/src/workflow.rs index 9e1b5948..cbe74260 100644 --- a/crates/memory/src/workflow.rs +++ b/crates/workflow/src/workflow.rs @@ -12,10 +12,10 @@ use std::path::{Path, PathBuf}; use thiserror::Error; use tracing::warn; -use crate::error::LintError; use crate::schema::{WorkflowFrontmatter, split_frontmatter}; -use crate::slug::Slug; -use crate::workspace::WorkspaceLayout; +use memory::WorkspaceLayout; + +use crate::{Slug, WorkflowLintError}; /// Hard cap on Workflow descriptions that are advertised resident. /// Mirrors agent-skills and resident Knowledge descriptions. @@ -167,9 +167,15 @@ pub enum WorkflowLoadError { #[error("failed to read workflow file {}: {source}", .path.display())] ReadFile { path: PathBuf, source: io::Error }, #[error("invalid workflow file name {}: {source}", .path.display())] - InvalidSlug { path: PathBuf, source: LintError }, + InvalidSlug { + path: PathBuf, + source: WorkflowLintError, + }, #[error("invalid workflow frontmatter in {}: {source}", .path.display())] - Frontmatter { path: PathBuf, source: LintError }, + Frontmatter { + path: PathBuf, + source: WorkflowLintError, + }, #[error( "Workflow {} with model_invokation: true cannot have description longer than {limit} chars (got {actual})", .path.display() @@ -281,12 +287,12 @@ fn warn_unknown_workflow_fields(path: &Path, yaml: &str) { } } -fn map_serde_workflow_error(err: serde_yaml::Error) -> LintError { +fn map_serde_workflow_error(err: serde_yaml::Error) -> WorkflowLintError { let msg = err.to_string(); if let Some(field) = parse_missing_field(&msg) { - return LintError::MissingField(field); + return WorkflowLintError::MissingField(field); } - LintError::MalformedFrontmatter(msg) + WorkflowLintError::MalformedFrontmatter(msg) } fn parse_missing_field(msg: &str) -> Option<&'static str> { @@ -416,9 +422,18 @@ mod tests { #[test] fn merge_skill_shadows_existing_workflow() { let (dir, layout) = setup(); - write_workflow(dir.path(), "shared", "description: Internal", "internal body"); + write_workflow( + dir.path(), + "shared", + "description: Internal", + "internal body", + ); let mut reg = load_workflows(&layout).unwrap(); - let skill_path = dir.path().join("user-skills").join("shared").join("SKILL.md"); + let skill_path = dir + .path() + .join("user-skills") + .join("shared") + .join("SKILL.md"); std::fs::create_dir_all(skill_path.parent().unwrap()).unwrap(); std::fs::write(&skill_path, "ignored").unwrap(); let incoming = WorkflowRecord { @@ -435,8 +450,14 @@ mod tests { }; let shadow = reg.merge_skill(incoming).expect("expected shadow"); assert_eq!(shadow.slug.as_str(), "shared"); - assert!(matches!(shadow.kept_source, WorkflowSource::WorkspaceWorkflow)); - assert!(matches!(shadow.shadowed_source, WorkflowSource::Skill { .. })); + assert!(matches!( + shadow.kept_source, + WorkflowSource::WorkspaceWorkflow + )); + assert!(matches!( + shadow.shadowed_source, + WorkflowSource::Skill { .. } + )); // The kept record is still the workspace workflow. let kept = reg.get(&Slug::parse("shared").unwrap()).unwrap(); assert!(matches!(kept.source, WorkflowSource::WorkspaceWorkflow));