yoi/crates/pod/src/prompt/catalog.rs

646 lines
22 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Central catalog of Pod-level prompt strings.
//!
//! Prompts that Pod injects into a Worker (compaction system prompt,
//! notification wrapper, interrupt notes, system-prompt trailing
//! sections, AGENTS.md truncation notice, ...) are enumerated by
//! [`PodPrompt`] and rendered through a single [`PromptCatalog`]. Direct
//! `const &str` / `format!` authoring of these strings elsewhere in
//! `crates/pod` is deliberately avoided — new injection points add a
//! variant here, which forces a matching entry in
//! `resources/prompts/internal.toml` (checked at build time) and keeps
//! the "Pod tone" editable in one place.
//!
//! # Layering
//!
//! Values are merged key-wise from low priority to high:
//!
//! 1. **builtin** — `resources/prompts/internal.toml`, baked into the
//! binary. Must cover every [`PodPrompt`] variant (build-time check).
//! 2. **user** — `<user_manifest_dir>/prompts.toml`, auto-discovered by
//! [`PodFactory`]. Optional.
//! 3. **workspace** — `<project>/.insomnia/prompts.toml`, auto-discovered.
//! Optional.
//! 4. **manifest pack** — `manifest.pod.prompt_pack`, an explicit path
//! per-Pod. Optional.
//!
//! Unknown keys in layers 24 are logged via `tracing::warn!` and
//! ignored (forward compatibility). Layer 1 is enforced at build time.
//!
//! # Template language
//!
//! All values are minijinja templates. `{% include "$prefix/..." %}`
//! resolves through the same [`PromptLoader`] used by the system-prompt
//! template, so long prompt bodies can be factored into `.md` files
//! under `resources/prompts/...`, the user prompts library, or the
//! workspace prompts library.
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use minijinja::value::Value;
use minijinja::{Environment, ErrorKind, UndefinedBehavior};
use serde::Deserialize;
use thiserror::Error;
use tracing::warn;
use crate::prompt::loader::PromptLoader;
// Generated by build.rs from `resources/prompts/internal.toml`.
include!(concat!(env!("OUT_DIR"), "/internal_keys.rs"));
/// Source of the builtin pack. Baked in at compile time.
const INTERNAL_TOML: &str = include_str!("../../../../resources/prompts/internal.toml");
/// Pod-level prompt injection point.
///
/// Adding a new variant also requires adding a matching key to
/// `resources/prompts/internal.toml`; the build fails otherwise.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum PodPrompt {
/// System prompt of the compaction (summary) Worker.
CompactSystem,
/// System prompt of the memory Phase 1 (extract) Worker.
MemoryExtractSystem,
/// System prompt of the memory Phase 2 (consolidation + tidy) Worker.
MemoryConsolidationSystem,
/// Wrapper around an incoming `Method::Notify` message injected into
/// the next LLM request context as a transient system message.
NotifyWrapper,
/// Synthetic `Item::ToolResult` summary used to close out orphaned
/// tool calls when a paused turn is interrupted by the user.
InterruptToolResultSummary,
/// System note prepended to the new turn after an interrupt.
InterruptSystemNote,
/// Trailing `## Working boundaries` section appended to every
/// materialised system prompt.
WorkingBoundariesSection,
/// Trailing `## Project instructions (AGENTS.md)` section, appended
/// after the scope summary when an AGENTS.md is present.
AgentsMdSection,
/// Trailing `## Resident knowledge` section, appended after the
/// AGENTS.md section when memory is enabled and at least one
/// `knowledge/*` record advertises `model_invokation: true`.
ResidentKnowledgeSection,
/// Trailing `## Resident workflows` section, appended after resident
/// knowledge when memory is enabled and at least one workflow advertises
/// `model_invokation: true`.
ResidentWorkflowsSection,
}
impl PodPrompt {
pub fn key(self) -> &'static str {
match self {
Self::CompactSystem => "compact_system",
Self::MemoryExtractSystem => "memory_extract_system",
Self::MemoryConsolidationSystem => "memory_consolidation_system",
Self::NotifyWrapper => "notify_wrapper",
Self::InterruptToolResultSummary => "interrupt_tool_result_summary",
Self::InterruptSystemNote => "interrupt_system_note",
Self::WorkingBoundariesSection => "working_boundaries_section",
Self::AgentsMdSection => "agents_md_section",
Self::ResidentKnowledgeSection => "resident_knowledge_section",
Self::ResidentWorkflowsSection => "resident_workflows_section",
}
}
/// All variants in declaration order. The associated `KEYS` slice
/// mirrors this for const-eval coverage checks against
/// `INTERNAL_KEYS` (generated by `build.rs`).
pub const ALL: &'static [PodPrompt] = &[
PodPrompt::CompactSystem,
PodPrompt::MemoryExtractSystem,
PodPrompt::MemoryConsolidationSystem,
PodPrompt::NotifyWrapper,
PodPrompt::InterruptToolResultSummary,
PodPrompt::InterruptSystemNote,
PodPrompt::WorkingBoundariesSection,
PodPrompt::AgentsMdSection,
PodPrompt::ResidentKnowledgeSection,
PodPrompt::ResidentWorkflowsSection,
];
pub const KEYS: &'static [&'static str] = &[
"compact_system",
"memory_extract_system",
"memory_consolidation_system",
"notify_wrapper",
"interrupt_tool_result_summary",
"interrupt_system_note",
"working_boundaries_section",
"agents_md_section",
"resident_knowledge_section",
"resident_workflows_section",
];
}
// --- build-time bidirectional coverage check --------------------------------
const _: () = {
// Every enum key must appear in the builtin TOML.
let mut i = 0;
while i < PodPrompt::KEYS.len() {
if !const_slice_contains(INTERNAL_KEYS, PodPrompt::KEYS[i]) {
panic!(
"resources/prompts/internal.toml is missing a key declared by \
PodPrompt — regenerate the TOML or remove the variant"
);
}
i += 1;
}
// Every TOML key must correspond to an enum variant.
let mut i = 0;
while i < INTERNAL_KEYS.len() {
if !const_slice_contains(PodPrompt::KEYS, INTERNAL_KEYS[i]) {
panic!(
"resources/prompts/internal.toml has a key not declared by \
PodPrompt — add the variant or drop the key"
);
}
i += 1;
}
};
const fn const_str_eq(a: &str, b: &str) -> bool {
let a = a.as_bytes();
let b = b.as_bytes();
if a.len() != b.len() {
return false;
}
let mut i = 0;
while i < a.len() {
if a[i] != b[i] {
return false;
}
i += 1;
}
true
}
const fn const_slice_contains(haystack: &[&str], needle: &str) -> bool {
let mut i = 0;
while i < haystack.len() {
if const_str_eq(haystack[i], needle) {
return true;
}
i += 1;
}
false
}
// --- errors ----------------------------------------------------------------
#[derive(Debug, Error)]
pub enum CatalogError {
#[error("failed to read prompt pack {}: {source}", .path.display())]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to parse prompt pack {}: {source}", .path.display())]
ParseToml {
path: PathBuf,
#[source]
source: toml::de::Error,
},
#[error("failed to parse builtin prompt pack: {0}")]
ParseBuiltin(#[source] toml::de::Error),
#[error("failed to compile prompt template '{key}': {source}")]
TemplateCompile {
key: String,
#[source]
source: minijinja::Error,
},
#[error("failed to render prompt '{key}': {source}")]
Render {
key: String,
#[source]
source: minijinja::Error,
},
#[error("prompt key '{key}' is not registered in the catalog")]
UnknownKey { key: String },
}
// --- pack file shape -------------------------------------------------------
#[derive(Debug, Deserialize)]
struct PackFile {
#[serde(default)]
prompt: HashMap<String, String>,
}
// --- catalog ---------------------------------------------------------------
/// Merged, compiled pod-prompt catalog.
///
/// Owns a `minijinja::Environment` with one template registered per
/// [`PodPrompt`] key (after the 4-layer merge). Includes inside templates
/// are resolved via a provided [`PromptLoader`], so values can pull from
/// `$insomnia` / `$user` / `$workspace`.
pub struct PromptCatalog {
env: Environment<'static>,
}
impl std::fmt::Debug for PromptCatalog {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PromptCatalog").finish_non_exhaustive()
}
}
impl PromptCatalog {
/// Builtin-only catalog. All `{% include %}` references must resolve
/// through `$insomnia` (user/workspace prefixes are unavailable).
pub fn builtins_only() -> Result<Arc<Self>, CatalogError> {
Self::load(&PromptLoader::builtins_only(), None)
}
/// Load the catalog honouring the 4-layer overlay.
///
/// - Layer 1 (builtin): `INTERNAL_TOML` baked into the binary.
/// - Layer 2 (user): `loader.user_pack_file()` if present.
/// - Layer 3 (workspace): `loader.workspace_pack_file()` if present.
/// - Layer 4 (manifest): `manifest_pack` as an absolute filesystem
/// path (pre-resolved by the manifest cascade).
pub fn load(
loader: &PromptLoader,
manifest_pack: Option<&Path>,
) -> Result<Arc<Self>, CatalogError> {
let mut merged = parse_builtin_pack()?;
if let Some(path) = loader.user_pack_file() {
if path.is_file() {
let pack = parse_pack_file(path)?;
merge_into(&mut merged, pack, "user");
}
}
if let Some(path) = loader.workspace_pack_file() {
if path.is_file() {
let pack = parse_pack_file(path)?;
merge_into(&mut merged, pack, "workspace");
}
}
if let Some(path) = manifest_pack {
let pack = parse_pack_file(path)?;
merge_into(&mut merged, pack, "manifest");
}
build_catalog(merged, loader.clone()).map(Arc::new)
}
/// Render a prompt by variant. `ctx` provides template variables; use
/// [`Value::UNDEFINED`] (or a helper below) when the template takes
/// no inputs.
pub fn render(&self, prompt: PodPrompt, ctx: Value) -> Result<String, CatalogError> {
let key = prompt.key();
let tmpl = self
.env
.get_template(key)
.map_err(|_| CatalogError::UnknownKey {
key: key.to_string(),
})?;
tmpl.render(ctx).map_err(|source| CatalogError::Render {
key: key.to_string(),
source,
})
}
/// Render `PodPrompt::CompactSystem` (no inputs).
pub fn compact_system(&self) -> Result<String, CatalogError> {
self.render(PodPrompt::CompactSystem, Value::UNDEFINED)
}
/// Render `PodPrompt::MemoryExtractSystem` (no inputs).
pub fn memory_extract_system(&self) -> Result<String, CatalogError> {
self.render(PodPrompt::MemoryExtractSystem, Value::UNDEFINED)
}
/// Render `PodPrompt::MemoryConsolidationSystem` (no inputs).
pub fn memory_consolidation_system(&self) -> Result<String, CatalogError> {
self.render(PodPrompt::MemoryConsolidationSystem, Value::UNDEFINED)
}
/// Render `PodPrompt::NotifyWrapper` with `{{ message }}`.
pub fn notify_wrapper(&self, message: &str) -> Result<String, CatalogError> {
self.render(PodPrompt::NotifyWrapper, single("message", message))
}
/// Render `PodPrompt::InterruptToolResultSummary` (no inputs).
pub fn interrupt_tool_result_summary(&self) -> Result<String, CatalogError> {
self.render(PodPrompt::InterruptToolResultSummary, Value::UNDEFINED)
}
/// Render `PodPrompt::InterruptSystemNote` (no inputs).
pub fn interrupt_system_note(&self) -> Result<String, CatalogError> {
self.render(PodPrompt::InterruptSystemNote, Value::UNDEFINED)
}
/// Render `PodPrompt::WorkingBoundariesSection` with `{{ scope_summary }}`.
pub fn working_boundaries_section(&self, scope_summary: &str) -> Result<String, CatalogError> {
self.render(
PodPrompt::WorkingBoundariesSection,
single("scope_summary", scope_summary),
)
}
/// Render `PodPrompt::AgentsMdSection` with `{{ agents_md }}`.
pub fn agents_md_section(&self, agents_md: &str) -> Result<String, CatalogError> {
self.render(PodPrompt::AgentsMdSection, single("agents_md", agents_md))
}
/// Render `PodPrompt::ResidentKnowledgeSection` with `{{ entries }}`
/// (a pre-formatted list block authored by the caller).
pub fn resident_knowledge_section(&self, entries: &str) -> Result<String, CatalogError> {
self.render(
PodPrompt::ResidentKnowledgeSection,
single("entries", entries),
)
}
/// Render `PodPrompt::ResidentWorkflowsSection` with `{{ entries }}`
/// (a pre-formatted list block authored by the caller).
pub fn resident_workflows_section(&self, entries: &str) -> Result<String, CatalogError> {
self.render(
PodPrompt::ResidentWorkflowsSection,
single("entries", entries),
)
}
}
fn single(key: &'static str, value: &str) -> Value {
use std::collections::BTreeMap;
let mut m: BTreeMap<&'static str, Value> = BTreeMap::new();
m.insert(key, Value::from(value));
Value::from(m)
}
fn parse_builtin_pack() -> Result<HashMap<String, String>, CatalogError> {
let parsed: PackFile = toml::from_str(INTERNAL_TOML).map_err(CatalogError::ParseBuiltin)?;
Ok(parsed.prompt)
}
fn parse_pack_file(path: &Path) -> Result<HashMap<String, String>, CatalogError> {
let src = fs::read_to_string(path).map_err(|source| CatalogError::Io {
path: path.to_path_buf(),
source,
})?;
let parsed: PackFile = toml::from_str(&src).map_err(|source| CatalogError::ParseToml {
path: path.to_path_buf(),
source,
})?;
Ok(parsed.prompt)
}
fn merge_into(
base: &mut HashMap<String, String>,
upper: HashMap<String, String>,
origin: &'static str,
) {
for (k, v) in upper {
if !PodPrompt::KEYS.iter().any(|declared| *declared == k) {
warn!(
origin = origin,
key = %k,
"unknown prompt pack key; ignoring"
);
continue;
}
base.insert(k, v);
}
}
fn build_catalog(
templates: HashMap<String, String>,
loader: PromptLoader,
) -> Result<PromptCatalog, CatalogError> {
let mut env = Environment::new();
env.set_undefined_behavior(UndefinedBehavior::Strict);
// Reuse the system-prompt-template resolver so `{% include
// "$prefix/..." %}` inside a catalog value pulls from the same asset
// namespaces.
let loader_for_join = loader.clone();
env.set_path_join_callback(move |name, parent| {
let parent_ref = loader_for_join.parse_ref(parent, None).ok();
match loader_for_join.parse_ref(name, parent_ref.as_ref()) {
Ok(r) => r.to_qualified_string().into(),
Err(_) => name.to_string().into(),
}
});
let loader_for_src = loader.clone();
env.set_loader(move |name| {
let reference = loader_for_src
.parse_ref(name, None)
.map_err(|e| minijinja::Error::new(ErrorKind::TemplateNotFound, e.to_string()))?;
match loader_for_src.load(&reference) {
Ok(src) => Ok(Some(src)),
Err(e) => Err(minijinja::Error::new(
ErrorKind::TemplateNotFound,
e.to_string(),
)),
}
});
for (k, v) in templates {
env.add_template_owned(k.clone(), v)
.map_err(|source| CatalogError::TemplateCompile {
key: k.clone(),
source,
})?;
}
Ok(PromptCatalog { env })
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn loader_with_packs(
user_dir: Option<PathBuf>,
workspace_dir: Option<PathBuf>,
user_pack: Option<PathBuf>,
workspace_pack: Option<PathBuf>,
) -> PromptLoader {
PromptLoader::new(user_dir, workspace_dir).with_pack_files(user_pack, workspace_pack)
}
#[test]
fn builtin_covers_every_variant() {
let cat = PromptCatalog::builtins_only().unwrap();
for p in PodPrompt::ALL {
assert!(
cat.env.get_template(p.key()).is_ok(),
"builtin missing key: {}",
p.key()
);
}
}
#[test]
fn builtin_render_compact_system_includes_worker_instructions() {
let cat = PromptCatalog::builtins_only().unwrap();
let rendered = cat.compact_system().unwrap();
assert!(rendered.contains("write_summary"));
assert!(rendered.contains("mark_read_required"));
}
#[test]
fn notify_wrapper_interpolates_message() {
let cat = PromptCatalog::builtins_only().unwrap();
let out = cat.notify_wrapper("file changed").unwrap();
assert!(out.contains("[Notification]"));
assert!(out.contains("file changed"));
assert!(out.contains("not a blocking request"));
}
#[test]
fn working_boundaries_section_wraps_summary() {
let cat = PromptCatalog::builtins_only().unwrap();
let out = cat.working_boundaries_section("Readable: /a").unwrap();
assert!(out.contains("## Working boundaries"));
assert!(out.contains("Readable: /a"));
}
#[test]
fn agents_md_section_contains_marker() {
let cat = PromptCatalog::builtins_only().unwrap();
let out = cat.agents_md_section("PROJECT DOCS").unwrap();
assert!(out.contains("## Project instructions (AGENTS.md)"));
assert!(out.contains("PROJECT DOCS"));
}
#[test]
fn user_pack_overrides_builtin() {
let tmp = TempDir::new().unwrap();
let pack = tmp.path().join("prompts.toml");
fs::write(
&pack,
r#"
[prompt]
interrupt_system_note = "[OVERRIDDEN]"
"#,
)
.unwrap();
let loader = loader_with_packs(None, None, Some(pack), None);
let cat = PromptCatalog::load(&loader, None).unwrap();
assert_eq!(cat.interrupt_system_note().unwrap(), "[OVERRIDDEN]");
// Other keys still come from the builtin.
assert!(cat.notify_wrapper("x").unwrap().contains("[Notification]"));
}
#[test]
fn workspace_pack_wins_over_user_pack() {
let tmp = TempDir::new().unwrap();
let user = tmp.path().join("user.toml");
let ws = tmp.path().join("ws.toml");
fs::write(
&user,
r#"
[prompt]
interrupt_system_note = "[USER]"
"#,
)
.unwrap();
fs::write(
&ws,
r#"
[prompt]
interrupt_system_note = "[WS]"
"#,
)
.unwrap();
let loader = loader_with_packs(None, None, Some(user), Some(ws));
let cat = PromptCatalog::load(&loader, None).unwrap();
assert_eq!(cat.interrupt_system_note().unwrap(), "[WS]");
}
#[test]
fn manifest_pack_wins_over_workspace_pack() {
let tmp = TempDir::new().unwrap();
let ws = tmp.path().join("ws.toml");
let mf = tmp.path().join("mf.toml");
fs::write(
&ws,
r#"
[prompt]
interrupt_system_note = "[WS]"
"#,
)
.unwrap();
fs::write(
&mf,
r#"
[prompt]
interrupt_system_note = "[MF]"
"#,
)
.unwrap();
let loader = loader_with_packs(None, None, None, Some(ws));
let cat = PromptCatalog::load(&loader, Some(mf.as_path())).unwrap();
assert_eq!(cat.interrupt_system_note().unwrap(), "[MF]");
}
#[test]
fn unknown_key_in_runtime_pack_is_ignored_with_warning() {
let tmp = TempDir::new().unwrap();
let pack = tmp.path().join("p.toml");
fs::write(
&pack,
r#"
[prompt]
interrupt_system_note = "[OK]"
future_injection_point = "tolerated"
"#,
)
.unwrap();
let loader = loader_with_packs(None, None, Some(pack), None);
// Loads without error; the unknown key is dropped silently at
// runtime (log warning is emitted via tracing).
let cat = PromptCatalog::load(&loader, None).unwrap();
assert_eq!(cat.interrupt_system_note().unwrap(), "[OK]");
}
#[test]
fn manifest_pack_reads_from_absolute_path() {
let tmp = TempDir::new().unwrap();
let pack = tmp.path().join("mine.toml");
fs::write(
&pack,
r#"
[prompt]
interrupt_system_note = "[FROM-MANIFEST-PACK]"
"#,
)
.unwrap();
let loader = PromptLoader::builtins_only();
let cat = PromptCatalog::load(&loader, Some(pack.as_path())).unwrap();
assert_eq!(cat.interrupt_system_note().unwrap(), "[FROM-MANIFEST-PACK]");
}
#[test]
fn value_can_pull_long_text_via_include() {
// A runtime pack that overrides `compact_system` with an
// `{% include %}` into the same `$insomnia` namespace — exercises
// the template resolver path through all four layers.
let tmp = TempDir::new().unwrap();
let pack = tmp.path().join("p.toml");
fs::write(
&pack,
r#"
[prompt]
compact_system = "PREFIX\n{% include \"$insomnia/internal/compact_system\" %}"
"#,
)
.unwrap();
let loader = loader_with_packs(None, None, Some(pack), None);
let cat = PromptCatalog::load(&loader, None).unwrap();
let rendered = cat.compact_system().unwrap();
assert!(rendered.starts_with("PREFIX\n"));
assert!(rendered.contains("write_summary"));
}
}