Compare commits
11 Commits
365b8c34fd
...
eda4c4ce47
| Author | SHA1 | Date | |
|---|---|---|---|
| eda4c4ce47 | |||
| 7265e83e44 | |||
| 22df14a66c | |||
| 0e4ab1d496 | |||
| 4450e1da9d | |||
| 5d104a1cc6 | |||
| b2efd2906f | |||
| 98522911a4 | |||
| 01c41ae86c | |||
| 9fe2799732 | |||
| 2c3eddd218 |
|
|
@ -32,10 +32,16 @@ pub enum PromptAction {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Action before an LLM request.
|
/// Action before an LLM request.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum PreRequestAction {
|
pub enum PreRequestAction {
|
||||||
/// Proceed normally.
|
/// Proceed normally.
|
||||||
Continue,
|
Continue,
|
||||||
|
/// Proceed after appending these items to durable worker history.
|
||||||
|
///
|
||||||
|
/// This is for upper-layer budget/status nudges that the model may react
|
||||||
|
/// to: the items are committed before the request so later turns can see
|
||||||
|
/// why the worker changed course.
|
||||||
|
ContinueWith(Vec<Item>),
|
||||||
/// Cancel with a reason (treated as an error).
|
/// Cancel with a reason (treated as an error).
|
||||||
Cancel(String),
|
Cancel(String),
|
||||||
/// Yield control to the caller for external processing.
|
/// Yield control to the caller for external processing.
|
||||||
|
|
@ -149,11 +155,12 @@ pub trait Interceptor: Send + Sync {
|
||||||
|
|
||||||
/// Called before each LLM request. The context starts as a clone
|
/// Called before each LLM request. The context starts as a clone
|
||||||
/// of `worker.history` (after `pending_history_appends` and the
|
/// of `worker.history` (after `pending_history_appends` and the
|
||||||
/// Worker's own prune projection have been applied) and can be
|
/// Worker's own prune projection have been applied).
|
||||||
/// further modified for that single request only — mutations here
|
///
|
||||||
/// are **not** persisted back to history. Use
|
/// Direct mutations to `context` remain request-local and are not persisted.
|
||||||
/// [`Self::pending_history_appends`] for inputs that need to land
|
/// If an interceptor derives a human/model-visible nudge from the current
|
||||||
/// in history.
|
/// request context, return [`PreRequestAction::ContinueWith`] so the Worker
|
||||||
|
/// commits it to history before the request is sent.
|
||||||
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
|
async fn pre_llm_request(&self, _context: &mut Vec<Item>) -> PreRequestAction {
|
||||||
PreRequestAction::Continue
|
PreRequestAction::Continue
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ impl Default for RetryPolicy {
|
||||||
base: Duration::from_millis(500),
|
base: Duration::from_millis(500),
|
||||||
cap: Duration::from_secs(10),
|
cap: Duration::from_secs(10),
|
||||||
max_attempts: 4,
|
max_attempts: 4,
|
||||||
total_timeout: Duration::from_secs(30),
|
total_timeout: Duration::from_secs(40),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -75,7 +75,7 @@ mod tests {
|
||||||
assert_eq!(p.base, Duration::from_millis(500));
|
assert_eq!(p.base, Duration::from_millis(500));
|
||||||
assert_eq!(p.cap, Duration::from_secs(10));
|
assert_eq!(p.cap, Duration::from_secs(10));
|
||||||
assert_eq!(p.max_attempts, 4);
|
assert_eq!(p.max_attempts, 4);
|
||||||
assert_eq!(p.total_timeout, Duration::from_secs(30));
|
assert_eq!(p.total_timeout, Duration::from_secs(40));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
use std::pin::Pin;
|
use std::pin::Pin;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use eventsource_stream::Eventsource;
|
use eventsource_stream::Eventsource;
|
||||||
|
|
@ -14,6 +14,7 @@ use futures::{Stream, StreamExt, TryStreamExt};
|
||||||
use reqwest::header::{
|
use reqwest::header::{
|
||||||
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
|
ACCEPT, CONTENT_ENCODING, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue, RETRY_AFTER,
|
||||||
};
|
};
|
||||||
|
use serde_json::{Value, json};
|
||||||
|
|
||||||
use super::auth::{AuthProvider, AuthRequirement};
|
use super::auth::{AuthProvider, AuthRequirement};
|
||||||
use super::capability::ModelCapability;
|
use super::capability::ModelCapability;
|
||||||
|
|
@ -23,7 +24,7 @@ use super::event::Event;
|
||||||
use super::scheme::Scheme;
|
use super::scheme::Scheme;
|
||||||
use super::types::{Request, RequestConfig};
|
use super::types::{Request, RequestConfig};
|
||||||
|
|
||||||
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(30);
|
pub const DEFAULT_STREAM_OPEN_TIMEOUT: Duration = Duration::from_secs(20);
|
||||||
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
|
pub const DEFAULT_FIRST_STREAM_EVENT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||||
|
|
||||||
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
|
/// `AuthRef` を解決したランタイム表現。`crates/provider` が構築する。
|
||||||
|
|
@ -192,16 +193,71 @@ impl<S: Scheme> HttpTransport<S> {
|
||||||
}
|
}
|
||||||
|
|
||||||
let raw = serde_json::to_vec(body)?;
|
let raw = serde_json::to_vec(body)?;
|
||||||
|
let raw_json_bytes = raw.len();
|
||||||
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
|
let compressed = zstd::stream::encode_all(std::io::Cursor::new(raw), 3)
|
||||||
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
|
.map_err(|e| ClientError::Config(format!("failed to zstd-compress request: {e}")))?;
|
||||||
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
|
headers.insert(CONTENT_ENCODING, HeaderValue::from_static("zstd"));
|
||||||
Ok(RequestBody::CompressedJson(compressed))
|
Ok(RequestBody::CompressedJson {
|
||||||
|
bytes: compressed,
|
||||||
|
raw_json_bytes,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RequestBody {
|
enum RequestBody {
|
||||||
Json(serde_json::Value),
|
Json(serde_json::Value),
|
||||||
CompressedJson(Vec<u8>),
|
CompressedJson {
|
||||||
|
bytes: Vec<u8>,
|
||||||
|
raw_json_bytes: usize,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RequestBody {
|
||||||
|
fn encoding(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Json(_) => "json",
|
||||||
|
Self::CompressedJson { .. } => "zstd",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn raw_json_bytes(&self) -> Option<usize> {
|
||||||
|
match self {
|
||||||
|
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
|
||||||
|
Self::CompressedJson { raw_json_bytes, .. } => Some(*raw_json_bytes),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn wire_bytes(&self) -> Option<usize> {
|
||||||
|
match self {
|
||||||
|
Self::Json(body) => serde_json::to_vec(body).ok().map(|bytes| bytes.len()),
|
||||||
|
Self::CompressedJson { bytes, .. } => Some(bytes.len()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn auth_kind(auth: &ResolvedAuth) -> &'static str {
|
||||||
|
match auth {
|
||||||
|
ResolvedAuth::None => "none",
|
||||||
|
ResolvedAuth::ApiKey(_) => "api_key",
|
||||||
|
ResolvedAuth::Custom(_) => "custom",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit_transport_trace(request: &Request, label: &str, data: Value) {
|
||||||
|
if let Some(trace) = &request.transport_trace {
|
||||||
|
trace.emit(label, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn json_value_kind(value: &Value) -> &'static str {
|
||||||
|
match value {
|
||||||
|
Value::Null => "null",
|
||||||
|
Value::Bool(_) => "bool",
|
||||||
|
Value::Number(_) => "number",
|
||||||
|
Value::String(_) => "string",
|
||||||
|
Value::Array(_) => "array",
|
||||||
|
Value::Object(_) => "object",
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn response_with_timeout(
|
async fn response_with_timeout(
|
||||||
|
|
@ -273,27 +329,175 @@ impl<S: Scheme + Clone + 'static> LlmClient for HttpTransport<S> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
|
async fn stream(&self, request: Request) -> Result<ResponseStream, ClientError> {
|
||||||
|
let total_started = Instant::now();
|
||||||
|
let path = self.scheme.path(&self.model_id);
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_start",
|
||||||
|
json!({
|
||||||
|
"model": &self.model_id,
|
||||||
|
"path": path,
|
||||||
|
"auth_kind": auth_kind(&self.auth),
|
||||||
|
"required_auth": format!("{:?}", self.scheme.required_auth()),
|
||||||
|
"codex_backend": self.is_codex_backend(),
|
||||||
|
"cache_key_present": request.cache_key.is_some(),
|
||||||
|
"stream_open_timeout_ms": DEFAULT_STREAM_OPEN_TIMEOUT.as_millis() as u64,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
let url = self.build_url();
|
let url = self.build_url();
|
||||||
let mut headers = self.build_headers().await?;
|
let headers_started = Instant::now();
|
||||||
self.apply_stream_headers(&mut headers, &request)?;
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_headers_start",
|
||||||
|
json!({
|
||||||
|
"auth_kind": auth_kind(&self.auth),
|
||||||
|
"required_auth": format!("{:?}", self.scheme.required_auth()),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
let mut headers = match self.build_headers().await {
|
||||||
|
Ok(headers) => {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_headers_done",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
|
||||||
|
"headers_len": headers.len(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
headers
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_headers_error",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": headers_started.elapsed().as_millis() as u64,
|
||||||
|
"error": error.to_string(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let stream_headers_started = Instant::now();
|
||||||
|
if let Err(error) = self.apply_stream_headers(&mut headers, &request) {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_stream_headers_error",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
|
||||||
|
"error": error.to_string(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_stream_headers_done",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": stream_headers_started.elapsed().as_millis() as u64,
|
||||||
|
"headers_len": headers.len(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let body_started = Instant::now();
|
||||||
|
emit_transport_trace(&request, "transport_body_build_start", json!({}));
|
||||||
let body = self
|
let body = self
|
||||||
.scheme
|
.scheme
|
||||||
.build_request_body(&self.model_id, &request, &self.capability);
|
.build_request_body(&self.model_id, &request, &self.capability);
|
||||||
let request_body = self.encode_request_body(&body, &mut headers)?;
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_body_build_done",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": body_started.elapsed().as_millis() as u64,
|
||||||
|
"body_kind": json_value_kind(&body),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
let encode_started = Instant::now();
|
||||||
|
let request_body = match self.encode_request_body(&body, &mut headers) {
|
||||||
|
Ok(body) => body,
|
||||||
|
Err(error) => {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_body_encode_error",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
|
||||||
|
"error": error.to_string(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_body_encode_done",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": encode_started.elapsed().as_millis() as u64,
|
||||||
|
"encoding": request_body.encoding(),
|
||||||
|
"raw_json_bytes": request_body.raw_json_bytes(),
|
||||||
|
"wire_bytes": request_body.wire_bytes(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
let builder = self.http_client.post(&url).headers(headers);
|
let builder = self.http_client.post(&url).headers(headers);
|
||||||
let builder = match request_body {
|
let builder = match request_body {
|
||||||
RequestBody::Json(body) => builder.json(&body),
|
RequestBody::Json(body) => builder.json(&body),
|
||||||
RequestBody::CompressedJson(body) => builder.body(body),
|
RequestBody::CompressedJson { bytes, .. } => builder.body(bytes),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let send_started = Instant::now();
|
||||||
|
emit_transport_trace(&request, "transport_http_send_start", json!({}));
|
||||||
let response =
|
let response =
|
||||||
response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
|
match response_with_timeout(builder.send(), DEFAULT_STREAM_OPEN_TIMEOUT, "stream_open")
|
||||||
.await?;
|
.await
|
||||||
|
{
|
||||||
|
Ok(response) => {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_http_headers_received",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": send_started.elapsed().as_millis() as u64,
|
||||||
|
"status": response.status().as_u16(),
|
||||||
|
"success": response.status().is_success(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
response
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_http_send_error",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": send_started.elapsed().as_millis() as u64,
|
||||||
|
"error": error.to_string(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
return Err(error);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if !response.status().is_success() {
|
if !response.status().is_success() {
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_http_status_error",
|
||||||
|
json!({
|
||||||
|
"status": response.status().as_u16(),
|
||||||
|
"retry_after_present": response.headers().get(RETRY_AFTER).is_some(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
return Err(classify_error_response(response).await);
|
return Err(classify_error_response(response).await);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
emit_transport_trace(
|
||||||
|
&request,
|
||||||
|
"transport_stream_ready",
|
||||||
|
json!({
|
||||||
|
"elapsed_ms": total_started.elapsed().as_millis() as u64,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
let scheme = self.scheme.clone();
|
let scheme = self.scheme.clone();
|
||||||
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
|
let byte_stream = response.bytes_stream().map_err(std::io::Error::other);
|
||||||
let event_stream = byte_stream.eventsource();
|
let event_stream = byte_stream.eventsource();
|
||||||
|
|
@ -449,9 +653,14 @@ mod tests {
|
||||||
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
|
assert_eq!(headers.get("x-client-request-id").unwrap(), "segment-123");
|
||||||
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
|
assert_eq!(headers.get(CONTENT_ENCODING).unwrap(), "zstd");
|
||||||
|
|
||||||
let RequestBody::CompressedJson(compressed) = encoded else {
|
let RequestBody::CompressedJson {
|
||||||
|
bytes: compressed,
|
||||||
|
raw_json_bytes,
|
||||||
|
} = encoded
|
||||||
|
else {
|
||||||
panic!("Codex backend request body must be zstd-compressed");
|
panic!("Codex backend request body must be zstd-compressed");
|
||||||
};
|
};
|
||||||
|
assert!(raw_json_bytes > 0);
|
||||||
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
|
let decoded = zstd::stream::decode_all(std::io::Cursor::new(compressed)).unwrap();
|
||||||
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
|
let decoded: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
|
||||||
assert_eq!(decoded["prompt_cache_key"], "segment-123");
|
assert_eq!(decoded["prompt_cache_key"], "segment-123");
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,8 @@
|
||||||
//! - ToolResult items (tool results)
|
//! - ToolResult items (tool results)
|
||||||
//! - Reasoning items (extended thinking)
|
//! - Reasoning items (extended thinking)
|
||||||
|
|
||||||
|
use std::{fmt, sync::Arc};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
fn is_false(value: &bool) -> bool {
|
fn is_false(value: &bool) -> bool {
|
||||||
|
|
@ -23,6 +25,35 @@ pub type ItemId = String;
|
||||||
/// Call ID type for linking function calls to their outputs
|
/// Call ID type for linking function calls to their outputs
|
||||||
pub type CallId = String;
|
pub type CallId = String;
|
||||||
|
|
||||||
|
/// Callback sink for request-local transport lifecycle diagnostics.
|
||||||
|
///
|
||||||
|
/// This is carried on [`Request`] so generic [`crate::llm_client::LlmClient`]
|
||||||
|
/// implementations can emit fine-grained transport milestones without widening
|
||||||
|
/// the trait method signature. The callback must never receive request body
|
||||||
|
/// contents or secret header values.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct RequestTrace {
|
||||||
|
callback: Arc<dyn Fn(&str, serde_json::Value) + Send + Sync>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RequestTrace {
|
||||||
|
pub fn new(callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static) -> Self {
|
||||||
|
Self {
|
||||||
|
callback: Arc::new(callback),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn emit(&self, label: &str, data: serde_json::Value) {
|
||||||
|
(self.callback)(label, data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for RequestTrace {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("RequestTrace").finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Conversation item - the primary unit of conversation history
|
/// Conversation item - the primary unit of conversation history
|
||||||
///
|
///
|
||||||
/// Items represent discrete elements in a conversation. Tool calls and reasoning
|
/// Items represent discrete elements in a conversation. Tool calls and reasoning
|
||||||
|
|
@ -497,6 +528,9 @@ pub struct Request {
|
||||||
/// 別の概念。`cache_anchor` を読まない provider と同じく、
|
/// 別の概念。`cache_anchor` を読まない provider と同じく、
|
||||||
/// `prompt_cache_key` を持たない provider は無視する。
|
/// `prompt_cache_key` を持たない provider は無視する。
|
||||||
pub cache_key: Option<String>,
|
pub cache_key: Option<String>,
|
||||||
|
/// Request-local diagnostics sink for transport lifecycle tracing.
|
||||||
|
#[doc(hidden)]
|
||||||
|
pub transport_trace: Option<RequestTrace>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Request {
|
impl Request {
|
||||||
|
|
@ -547,6 +581,15 @@ impl Request {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attach a request-local transport trace callback.
|
||||||
|
pub fn transport_trace(
|
||||||
|
mut self,
|
||||||
|
callback: impl Fn(&str, serde_json::Value) + Send + Sync + 'static,
|
||||||
|
) -> Self {
|
||||||
|
self.transport_trace = Some(RequestTrace::new(callback));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Set max tokens
|
/// Set max tokens
|
||||||
pub fn max_tokens(mut self, max_tokens: u32) -> Self {
|
pub fn max_tokens(mut self, max_tokens: u32) -> Self {
|
||||||
self.config.max_tokens = Some(max_tokens);
|
self.config.max_tokens = Some(max_tokens);
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::{marker::PhantomData, time::Instant};
|
use std::{marker::PhantomData, sync::Arc, time::Instant};
|
||||||
|
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use serde_json::{Value, json};
|
use serde_json::{Value, json};
|
||||||
|
|
@ -207,7 +207,7 @@ pub struct Worker<C: LlmClient, S: WorkerState = Mutable> {
|
||||||
stream_event_cbs: Vec<Box<dyn Fn(usize, usize, &Event) + Send + Sync>>,
|
stream_event_cbs: Vec<Box<dyn Fn(usize, usize, &Event) + Send + Sync>>,
|
||||||
/// Pre-stream lifecycle callbacks for debugging stalls before provider
|
/// Pre-stream lifecycle callbacks for debugging stalls before provider
|
||||||
/// stream events become visible.
|
/// stream events become visible.
|
||||||
lifecycle_trace_cbs: Vec<Box<dyn Fn(usize, usize, &str, &Value) + Send + Sync>>,
|
lifecycle_trace_cbs: Vec<Arc<dyn Fn(usize, usize, &str, &Value) + Send + Sync>>,
|
||||||
/// Non-fatal warning callbacks. Invoked when the Worker wants to
|
/// Non-fatal warning callbacks. Invoked when the Worker wants to
|
||||||
/// surface an advisory message to the upper layer (e.g. Pod) so it
|
/// surface an advisory message to the upper layer (e.g. Pod) so it
|
||||||
/// can be forwarded to the user — distinct from `tracing::warn!`,
|
/// can be forwarded to the user — distinct from `tracing::warn!`,
|
||||||
|
|
@ -435,7 +435,7 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
|
||||||
&mut self,
|
&mut self,
|
||||||
callback: impl Fn(usize, usize, &str, &Value) + Send + Sync + 'static,
|
callback: impl Fn(usize, usize, &str, &Value) + Send + Sync + 'static,
|
||||||
) {
|
) {
|
||||||
self.lifecycle_trace_cbs.push(Box::new(callback));
|
self.lifecycle_trace_cbs.push(Arc::new(callback));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn emit_lifecycle_trace(&self, turn: usize, llm_call: usize, label: &str, data: Value) {
|
fn emit_lifecycle_trace(&self, turn: usize, llm_call: usize, label: &str, data: Value) {
|
||||||
|
|
@ -444,6 +444,19 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn attach_transport_trace(&self, request: Request, turn: usize, llm_call: usize) -> Request {
|
||||||
|
if self.lifecycle_trace_cbs.is_empty() {
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
let callbacks = self.lifecycle_trace_cbs.clone();
|
||||||
|
request.transport_trace(move |label, data| {
|
||||||
|
for cb in &callbacks {
|
||||||
|
cb(turn, llm_call, label, &data);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Register a non-fatal warning callback.
|
/// Register a non-fatal warning callback.
|
||||||
///
|
///
|
||||||
/// The callback is invoked with a short human-readable message
|
/// The callback is invoked with a short human-readable message
|
||||||
|
|
@ -1169,6 +1182,10 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
|
||||||
self.last_run_interrupted = true;
|
self.last_run_interrupted = true;
|
||||||
return Ok(WorkerResult::Yielded);
|
return Ok(WorkerResult::Yielded);
|
||||||
}
|
}
|
||||||
|
PreRequestAction::ContinueWith(items) => {
|
||||||
|
self.append_history_items(items.clone());
|
||||||
|
request_context.extend(items);
|
||||||
|
}
|
||||||
PreRequestAction::Continue => {}
|
PreRequestAction::Continue => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1194,6 +1211,7 @@ impl<C: LlmClient, S: WorkerState> Worker<C, S> {
|
||||||
"build_request_done",
|
"build_request_done",
|
||||||
self.request_trace_payload(&request),
|
self.request_trace_payload(&request),
|
||||||
);
|
);
|
||||||
|
let request = self.attach_transport_trace(request, current_turn, current_llm_call);
|
||||||
let stream_outcome = self
|
let stream_outcome = self
|
||||||
.stream_response(request, current_turn, current_llm_call)
|
.stream_response(request, current_turn, current_llm_call)
|
||||||
.await?;
|
.await?;
|
||||||
|
|
|
||||||
|
|
@ -125,18 +125,34 @@ pub struct CompactionConfigPartial {
|
||||||
pub prune_protected_tokens: Option<u64>,
|
pub prune_protected_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub prune_min_savings: Option<u64>,
|
pub prune_min_savings: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_threshold")]
|
||||||
|
pub threshold: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_request_threshold")]
|
||||||
|
pub request_threshold: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_retained_tokens")]
|
||||||
|
pub retained_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_threshold: Option<u64>,
|
pub overview_target_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_request_threshold: Option<u64>,
|
pub overview_warning_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_retained_tokens: Option<u64>,
|
pub overview_deadline_tokens: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_worker_max_input_tokens")]
|
||||||
|
pub worker_context_max_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_auto_read_budget: Option<u64>,
|
pub finish_warning_remaining_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_worker_max_input_tokens: Option<u64>,
|
pub final_reserve_tokens: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_worker_max_turns")]
|
||||||
|
pub worker_max_turns: Option<u32>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub compact_worker_max_turns: Option<u32>,
|
pub summary_target_tokens: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub summary_max_tokens: Option<u64>,
|
||||||
|
#[serde(default, alias = "compact_auto_read_budget")]
|
||||||
|
pub auto_read_budget_tokens: Option<u64>,
|
||||||
|
#[serde(default)]
|
||||||
|
pub result_context_max_tokens: Option<u64>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub model: Option<ModelManifest>,
|
pub model: Option<ModelManifest>,
|
||||||
}
|
}
|
||||||
|
|
@ -386,22 +402,32 @@ impl CompactionConfigPartial {
|
||||||
Self {
|
Self {
|
||||||
prune_protected_tokens: upper.prune_protected_tokens.or(self.prune_protected_tokens),
|
prune_protected_tokens: upper.prune_protected_tokens.or(self.prune_protected_tokens),
|
||||||
prune_min_savings: upper.prune_min_savings.or(self.prune_min_savings),
|
prune_min_savings: upper.prune_min_savings.or(self.prune_min_savings),
|
||||||
compact_threshold: upper.compact_threshold.or(self.compact_threshold),
|
threshold: upper.threshold.or(self.threshold),
|
||||||
compact_request_threshold: upper
|
request_threshold: upper.request_threshold.or(self.request_threshold),
|
||||||
.compact_request_threshold
|
retained_tokens: upper.retained_tokens.or(self.retained_tokens),
|
||||||
.or(self.compact_request_threshold),
|
overview_target_tokens: upper.overview_target_tokens.or(self.overview_target_tokens),
|
||||||
compact_retained_tokens: upper
|
overview_warning_tokens: upper
|
||||||
.compact_retained_tokens
|
.overview_warning_tokens
|
||||||
.or(self.compact_retained_tokens),
|
.or(self.overview_warning_tokens),
|
||||||
compact_auto_read_budget: upper
|
overview_deadline_tokens: upper
|
||||||
.compact_auto_read_budget
|
.overview_deadline_tokens
|
||||||
.or(self.compact_auto_read_budget),
|
.or(self.overview_deadline_tokens),
|
||||||
compact_worker_max_input_tokens: upper
|
worker_context_max_tokens: upper
|
||||||
.compact_worker_max_input_tokens
|
.worker_context_max_tokens
|
||||||
.or(self.compact_worker_max_input_tokens),
|
.or(self.worker_context_max_tokens),
|
||||||
compact_worker_max_turns: upper
|
finish_warning_remaining_tokens: upper
|
||||||
.compact_worker_max_turns
|
.finish_warning_remaining_tokens
|
||||||
.or(self.compact_worker_max_turns),
|
.or(self.finish_warning_remaining_tokens),
|
||||||
|
final_reserve_tokens: upper.final_reserve_tokens.or(self.final_reserve_tokens),
|
||||||
|
worker_max_turns: upper.worker_max_turns.or(self.worker_max_turns),
|
||||||
|
summary_target_tokens: upper.summary_target_tokens.or(self.summary_target_tokens),
|
||||||
|
summary_max_tokens: upper.summary_max_tokens.or(self.summary_max_tokens),
|
||||||
|
auto_read_budget_tokens: upper
|
||||||
|
.auto_read_budget_tokens
|
||||||
|
.or(self.auto_read_budget_tokens),
|
||||||
|
result_context_max_tokens: upper
|
||||||
|
.result_context_max_tokens
|
||||||
|
.or(self.result_context_max_tokens),
|
||||||
model: merge_option(self.model, upper.model, ModelManifest::merge),
|
model: merge_option(self.model, upper.model, ModelManifest::merge),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -544,20 +570,42 @@ impl TryFrom<PodManifestConfig> for PodManifest {
|
||||||
.prune_protected_tokens
|
.prune_protected_tokens
|
||||||
.unwrap_or(defaults::PRUNE_PROTECTED_TOKENS),
|
.unwrap_or(defaults::PRUNE_PROTECTED_TOKENS),
|
||||||
prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
|
prune_min_savings: c.prune_min_savings.unwrap_or(defaults::PRUNE_MIN_SAVINGS),
|
||||||
compact_threshold: c.compact_threshold,
|
threshold: c.threshold,
|
||||||
compact_request_threshold: c.compact_request_threshold,
|
request_threshold: c.request_threshold,
|
||||||
compact_retained_tokens: c
|
retained_tokens: c
|
||||||
.compact_retained_tokens
|
.retained_tokens
|
||||||
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
|
.unwrap_or(defaults::COMPACT_RETAINED_TOKENS),
|
||||||
compact_auto_read_budget: c
|
overview_target_tokens: c
|
||||||
.compact_auto_read_budget
|
.overview_target_tokens
|
||||||
.unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET),
|
.unwrap_or(defaults::COMPACT_OVERVIEW_TARGET_TOKENS),
|
||||||
compact_worker_max_input_tokens: c
|
overview_warning_tokens: c
|
||||||
.compact_worker_max_input_tokens
|
.overview_warning_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_OVERVIEW_WARNING_TOKENS),
|
||||||
|
overview_deadline_tokens: c
|
||||||
|
.overview_deadline_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS),
|
||||||
|
worker_context_max_tokens: c
|
||||||
|
.worker_context_max_tokens
|
||||||
.unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS),
|
.unwrap_or(defaults::COMPACT_WORKER_MAX_INPUT_TOKENS),
|
||||||
compact_worker_max_turns: c
|
finish_warning_remaining_tokens: c
|
||||||
.compact_worker_max_turns
|
.finish_warning_remaining_tokens
|
||||||
.or(defaults::COMPACT_WORKER_MAX_TURNS),
|
.unwrap_or(defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS),
|
||||||
|
final_reserve_tokens: c
|
||||||
|
.final_reserve_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_FINAL_RESERVE_TOKENS),
|
||||||
|
worker_max_turns: c.worker_max_turns.or(defaults::COMPACT_WORKER_MAX_TURNS),
|
||||||
|
summary_target_tokens: c
|
||||||
|
.summary_target_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_SUMMARY_TARGET_TOKENS),
|
||||||
|
summary_max_tokens: c
|
||||||
|
.summary_max_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_SUMMARY_MAX_TOKENS),
|
||||||
|
auto_read_budget_tokens: c
|
||||||
|
.auto_read_budget_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_AUTO_READ_BUDGET),
|
||||||
|
result_context_max_tokens: c
|
||||||
|
.result_context_max_tokens
|
||||||
|
.unwrap_or(defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS),
|
||||||
model: c.model,
|
model: c.model,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
@ -984,7 +1032,7 @@ mod tests {
|
||||||
fn merge_option_struct_field_wise() {
|
fn merge_option_struct_field_wise() {
|
||||||
let lower = PodManifestConfig {
|
let lower = PodManifestConfig {
|
||||||
compaction: Some(CompactionConfigPartial {
|
compaction: Some(CompactionConfigPartial {
|
||||||
compact_threshold: Some(50_000),
|
threshold: Some(50_000),
|
||||||
prune_protected_tokens: Some(5_000),
|
prune_protected_tokens: Some(5_000),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}),
|
}),
|
||||||
|
|
@ -992,14 +1040,14 @@ mod tests {
|
||||||
};
|
};
|
||||||
let upper = PodManifestConfig {
|
let upper = PodManifestConfig {
|
||||||
compaction: Some(CompactionConfigPartial {
|
compaction: Some(CompactionConfigPartial {
|
||||||
compact_threshold: Some(80_000),
|
threshold: Some(80_000),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}),
|
}),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
let merged = lower.merge(upper);
|
let merged = lower.merge(upper);
|
||||||
let c = merged.compaction.unwrap();
|
let c = merged.compaction.unwrap();
|
||||||
assert_eq!(c.compact_threshold, Some(80_000));
|
assert_eq!(c.threshold, Some(80_000));
|
||||||
// field from lower retained when upper has None
|
// field from lower retained when upper has None
|
||||||
assert_eq!(c.prune_protected_tokens, Some(5_000));
|
assert_eq!(c.prune_protected_tokens, Some(5_000));
|
||||||
}
|
}
|
||||||
|
|
@ -1122,27 +1170,27 @@ stop_sequences = ["\n\n", "</stop>"]
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn from_toml_accepts_compact_worker_max_turns() {
|
fn from_toml_accepts_worker_max_turns() {
|
||||||
let cfg = PodManifestConfig::from_toml(
|
let cfg = PodManifestConfig::from_toml(
|
||||||
r#"
|
r#"
|
||||||
[compaction]
|
[compaction]
|
||||||
compact_worker_max_turns = 7
|
worker_max_turns = 7
|
||||||
"#,
|
"#,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
assert_eq!(cfg.compaction.unwrap().compact_worker_max_turns, Some(7));
|
assert_eq!(cfg.compaction.unwrap().worker_max_turns, Some(7));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn try_from_compaction_defaults_compact_worker_max_turns() {
|
fn try_from_compaction_defaults_worker_max_turns() {
|
||||||
let mut cfg = minimal_valid();
|
let mut cfg = minimal_valid();
|
||||||
cfg.compaction = Some(CompactionConfigPartial::default());
|
cfg.compaction = Some(CompactionConfigPartial::default());
|
||||||
|
|
||||||
let manifest = PodManifest::try_from(cfg).unwrap();
|
let manifest = PodManifest::try_from(cfg).unwrap();
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
manifest.compaction.unwrap().compact_worker_max_turns,
|
manifest.compaction.unwrap().worker_max_turns,
|
||||||
defaults::COMPACT_WORKER_MAX_TURNS
|
defaults::COMPACT_WORKER_MAX_TURNS
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,23 @@ pub const PRUNE_MIN_SAVINGS: u64 = 4096;
|
||||||
/// Token budget retained (unchanged) at the tail of the history across
|
/// Token budget retained (unchanged) at the tail of the history across
|
||||||
/// a compact. Items whose cumulative token count fits within this budget
|
/// a compact. Items whose cumulative token count fits within this budget
|
||||||
/// starting from the end are kept verbatim; the rest are summarised.
|
/// starting from the end are kept verbatim; the rest are summarised.
|
||||||
/// See [`crate::CompactionConfig::compact_retained_tokens`].
|
/// See [`crate::CompactionConfig::retained_tokens`].
|
||||||
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
|
pub const COMPACT_RETAINED_TOKENS: u64 = 8000;
|
||||||
|
|
||||||
|
/// Target size for the deterministic compact overview/index fed to the
|
||||||
|
/// compact worker. Exceeding this target is tolerated.
|
||||||
|
/// See [`crate::CompactionConfig::overview_target_tokens`].
|
||||||
|
pub const COMPACT_OVERVIEW_TARGET_TOKENS: u64 = 8_000;
|
||||||
|
|
||||||
|
/// Warning threshold for compact overview/index size. Compaction continues.
|
||||||
|
/// See [`crate::CompactionConfig::overview_warning_tokens`].
|
||||||
|
pub const COMPACT_OVERVIEW_WARNING_TOKENS: u64 = 16_000;
|
||||||
|
|
||||||
|
/// Hard deterministic-overview deadline. When exceeded, overview generation
|
||||||
|
/// falls back to a coarser index before the compact worker is started.
|
||||||
|
/// See [`crate::CompactionConfig::overview_deadline_tokens`].
|
||||||
|
pub const COMPACT_OVERVIEW_DEADLINE_TOKENS: u64 = 40_000;
|
||||||
|
|
||||||
/// Default instruction asset reference used when `worker.instruction`
|
/// Default instruction asset reference used when `worker.instruction`
|
||||||
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
|
/// is omitted. See the `PromptLoader` prefix addressing scheme for the
|
||||||
/// `$insomnia/` / `$user/` / `$workspace/` namespaces.
|
/// `$insomnia/` / `$user/` / `$workspace/` namespaces.
|
||||||
|
|
@ -42,19 +56,39 @@ pub const WORKER_LANGUAGE: &str =
|
||||||
/// session after compaction. Limits how much raw file text the
|
/// session after compaction. Limits how much raw file text the
|
||||||
/// compact worker can pull into the compacted context via
|
/// compact worker can pull into the compacted context via
|
||||||
/// `mark_read_required`. See
|
/// `mark_read_required`. See
|
||||||
/// [`crate::CompactionConfig::compact_auto_read_budget`].
|
/// [`crate::CompactionConfig::auto_read_budget_tokens`].
|
||||||
pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;
|
pub const COMPACT_AUTO_READ_BUDGET: u64 = 8000;
|
||||||
|
|
||||||
/// Current prompt-occupancy cap for the compact worker's own LLM
|
/// Current prompt-occupancy cap for the compact worker's own LLM
|
||||||
/// calls. Exceeding this aborts the compact run (circuit-breaker
|
/// calls. Exceeding this aborts the compact run (circuit-breaker
|
||||||
/// path). See
|
/// path). See [`crate::CompactionConfig::worker_context_max_tokens`].
|
||||||
/// [`crate::CompactionConfig::compact_worker_max_input_tokens`].
|
|
||||||
pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;
|
pub const COMPACT_WORKER_MAX_INPUT_TOKENS: u64 = 50_000;
|
||||||
|
|
||||||
|
/// Remaining compact-worker context threshold that triggers an instruction
|
||||||
|
/// to stop exploring and call `write_summary`.
|
||||||
|
/// See [`crate::CompactionConfig::finish_warning_remaining_tokens`].
|
||||||
|
pub const COMPACT_FINISH_WARNING_REMAINING_TOKENS: u64 = 8_000;
|
||||||
|
|
||||||
|
/// Context reserve preserved for final summary/tool closing turns.
|
||||||
|
/// See [`crate::CompactionConfig::final_reserve_tokens`].
|
||||||
|
pub const COMPACT_FINAL_RESERVE_TOKENS: u64 = 4_000;
|
||||||
|
|
||||||
/// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
|
/// Optional maximum compact-worker tool-loop depth. `None` means unlimited.
|
||||||
/// See [`crate::CompactionConfig::compact_worker_max_turns`].
|
/// See [`crate::CompactionConfig::worker_max_turns`].
|
||||||
pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);
|
pub const COMPACT_WORKER_MAX_TURNS: Option<u32> = Some(20);
|
||||||
|
|
||||||
|
/// Target size for the `write_summary` text. Used in prompt/nudge text.
|
||||||
|
/// See [`crate::CompactionConfig::summary_target_tokens`].
|
||||||
|
pub const COMPACT_SUMMARY_TARGET_TOKENS: u64 = 2_000;
|
||||||
|
|
||||||
|
/// Hard validation cap for the final `write_summary` text.
|
||||||
|
/// See [`crate::CompactionConfig::summary_max_tokens`].
|
||||||
|
pub const COMPACT_SUMMARY_MAX_TOKENS: u64 = 4_000;
|
||||||
|
|
||||||
|
/// Dry-run cap for the compacted session's initial request context.
|
||||||
|
/// See [`crate::CompactionConfig::result_context_max_tokens`].
|
||||||
|
pub const COMPACT_RESULT_CONTEXT_MAX_TOKENS: u64 = 60_000;
|
||||||
|
|
||||||
/// Number of recently-touched files fed to the compact worker as
|
/// Number of recently-touched files fed to the compact worker as
|
||||||
/// default references.
|
/// default references.
|
||||||
pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
|
pub const COMPACT_DEFAULT_REFERENCE_COUNT: usize = 5;
|
||||||
|
|
|
||||||
|
|
@ -363,8 +363,8 @@ pub struct CompactionConfig {
|
||||||
/// Checked by the Controller after each run. When current occupancy
|
/// Checked by the Controller after each run. When current occupancy
|
||||||
/// exceeds this value, compact runs before the next turn. `None`
|
/// exceeds this value, compact runs before the next turn. `None`
|
||||||
/// disables the between-turns check.
|
/// disables the between-turns check.
|
||||||
#[serde(default)]
|
#[serde(default, alias = "compact_threshold")]
|
||||||
pub compact_threshold: Option<u64>,
|
pub threshold: Option<u64>,
|
||||||
|
|
||||||
/// Safety-net (between-requests) compaction threshold.
|
/// Safety-net (between-requests) compaction threshold.
|
||||||
///
|
///
|
||||||
|
|
@ -373,32 +373,76 @@ pub struct CompactionConfig {
|
||||||
/// Controller can compact before the next LLM request. `None`
|
/// Controller can compact before the next LLM request. `None`
|
||||||
/// disables the between-requests check.
|
/// disables the between-requests check.
|
||||||
///
|
///
|
||||||
/// Expected relation: `compact_threshold < compact_request_threshold`
|
/// Expected relation: `threshold < request_threshold` (proactive triggers
|
||||||
/// (proactive triggers before safety net). A reversed configuration
|
/// before safety net). A reversed configuration is accepted but logged as
|
||||||
/// is accepted but logged as a warning.
|
/// a warning.
|
||||||
#[serde(default)]
|
#[serde(default, alias = "compact_request_threshold")]
|
||||||
pub compact_request_threshold: Option<u64>,
|
pub request_threshold: Option<u64>,
|
||||||
|
|
||||||
/// Token budget retained verbatim at the tail of the history after
|
/// Token budget retained verbatim at the tail of the history after
|
||||||
/// compaction. Measured against the occupancy estimate from
|
/// compaction. Measured against the occupancy estimate from
|
||||||
/// `UsageRecord` history; turn boundaries are ignored.
|
/// `UsageRecord` history; turn boundaries are ignored.
|
||||||
#[serde(default = "default_compact_retained_tokens")]
|
#[serde(default = "default_retained_tokens", alias = "compact_retained_tokens")]
|
||||||
pub compact_retained_tokens: u64,
|
pub retained_tokens: u64,
|
||||||
|
|
||||||
/// Aggregate token budget for auto-read file contents injected into
|
/// Target size for the deterministic overview/index fed to the compact
|
||||||
/// the compacted session by the compact worker.
|
/// worker. Overshooting this target is not an error.
|
||||||
#[serde(default = "default_compact_auto_read_budget")]
|
#[serde(default = "default_overview_target_tokens")]
|
||||||
pub compact_auto_read_budget: u64,
|
pub overview_target_tokens: u64,
|
||||||
|
|
||||||
|
/// Warning threshold for deterministic overview/index size.
|
||||||
|
#[serde(default = "default_overview_warning_tokens")]
|
||||||
|
pub overview_warning_tokens: u64,
|
||||||
|
|
||||||
|
/// Deadline threshold for deterministic overview/index generation.
|
||||||
|
/// Oversized overviews fall back to a coarser deterministic index.
|
||||||
|
#[serde(default = "default_overview_deadline_tokens")]
|
||||||
|
pub overview_deadline_tokens: u64,
|
||||||
|
|
||||||
/// Current prompt-occupancy cap for the compact worker's own LLM
|
/// Current prompt-occupancy cap for the compact worker's own LLM
|
||||||
/// requests. Exceeding this aborts the compact run.
|
/// requests. Exceeding this aborts the compact run.
|
||||||
#[serde(default = "default_compact_worker_max_input_tokens")]
|
#[serde(
|
||||||
pub compact_worker_max_input_tokens: u64,
|
default = "default_worker_context_max_tokens",
|
||||||
|
alias = "compact_worker_max_input_tokens"
|
||||||
|
)]
|
||||||
|
pub worker_context_max_tokens: u64,
|
||||||
|
|
||||||
|
/// Remaining compact-worker context threshold that triggers a warning and
|
||||||
|
/// an instruction to stop exploring and call `write_summary`.
|
||||||
|
#[serde(default = "default_finish_warning_remaining_tokens")]
|
||||||
|
pub finish_warning_remaining_tokens: u64,
|
||||||
|
|
||||||
|
/// Context reserve preserved for final summary/tool closing turns.
|
||||||
|
#[serde(default = "default_final_reserve_tokens")]
|
||||||
|
pub final_reserve_tokens: u64,
|
||||||
|
|
||||||
/// Optional maximum compact-worker tool-loop depth. `None` leaves the
|
/// Optional maximum compact-worker tool-loop depth. `None` leaves the
|
||||||
/// worker unlimited; the default bounds runaway short-context loops.
|
/// worker unlimited; the default bounds runaway short-context loops.
|
||||||
#[serde(default = "default_compact_worker_max_turns")]
|
#[serde(
|
||||||
pub compact_worker_max_turns: Option<u32>,
|
default = "default_worker_max_turns",
|
||||||
|
alias = "compact_worker_max_turns"
|
||||||
|
)]
|
||||||
|
pub worker_max_turns: Option<u32>,
|
||||||
|
|
||||||
|
/// Target size for the `write_summary` text. Used in prompt/nudge text.
|
||||||
|
#[serde(default = "default_summary_target_tokens")]
|
||||||
|
pub summary_target_tokens: u64,
|
||||||
|
|
||||||
|
/// Hard validation cap for the final `write_summary` text.
|
||||||
|
#[serde(default = "default_summary_max_tokens")]
|
||||||
|
pub summary_max_tokens: u64,
|
||||||
|
|
||||||
|
/// Aggregate token budget for auto-read file contents injected into
|
||||||
|
/// the compacted session by the compact worker.
|
||||||
|
#[serde(
|
||||||
|
default = "default_auto_read_budget_tokens",
|
||||||
|
alias = "compact_auto_read_budget"
|
||||||
|
)]
|
||||||
|
pub auto_read_budget_tokens: u64,
|
||||||
|
|
||||||
|
/// Dry-run cap for the compacted session's initial request context.
|
||||||
|
#[serde(default = "default_result_context_max_tokens")]
|
||||||
|
pub result_context_max_tokens: u64,
|
||||||
|
|
||||||
/// Optional model for the compactor (summary) LLM.
|
/// Optional model for the compactor (summary) LLM.
|
||||||
/// If omitted, the main model is cloned via `clone_boxed()`.
|
/// If omitted, the main model is cloned via `clone_boxed()`.
|
||||||
|
|
@ -412,30 +456,62 @@ fn default_prune_protected_tokens() -> u64 {
|
||||||
fn default_prune_min_savings() -> u64 {
|
fn default_prune_min_savings() -> u64 {
|
||||||
defaults::PRUNE_MIN_SAVINGS
|
defaults::PRUNE_MIN_SAVINGS
|
||||||
}
|
}
|
||||||
fn default_compact_retained_tokens() -> u64 {
|
fn default_retained_tokens() -> u64 {
|
||||||
defaults::COMPACT_RETAINED_TOKENS
|
defaults::COMPACT_RETAINED_TOKENS
|
||||||
}
|
}
|
||||||
fn default_compact_auto_read_budget() -> u64 {
|
fn default_overview_target_tokens() -> u64 {
|
||||||
defaults::COMPACT_AUTO_READ_BUDGET
|
defaults::COMPACT_OVERVIEW_TARGET_TOKENS
|
||||||
}
|
}
|
||||||
fn default_compact_worker_max_input_tokens() -> u64 {
|
fn default_overview_warning_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_OVERVIEW_WARNING_TOKENS
|
||||||
|
}
|
||||||
|
fn default_overview_deadline_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS
|
||||||
|
}
|
||||||
|
fn default_worker_context_max_tokens() -> u64 {
|
||||||
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
|
defaults::COMPACT_WORKER_MAX_INPUT_TOKENS
|
||||||
}
|
}
|
||||||
fn default_compact_worker_max_turns() -> Option<u32> {
|
fn default_finish_warning_remaining_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS
|
||||||
|
}
|
||||||
|
fn default_final_reserve_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_FINAL_RESERVE_TOKENS
|
||||||
|
}
|
||||||
|
fn default_worker_max_turns() -> Option<u32> {
|
||||||
defaults::COMPACT_WORKER_MAX_TURNS
|
defaults::COMPACT_WORKER_MAX_TURNS
|
||||||
}
|
}
|
||||||
|
fn default_summary_target_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_SUMMARY_TARGET_TOKENS
|
||||||
|
}
|
||||||
|
fn default_summary_max_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_SUMMARY_MAX_TOKENS
|
||||||
|
}
|
||||||
|
fn default_auto_read_budget_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_AUTO_READ_BUDGET
|
||||||
|
}
|
||||||
|
fn default_result_context_max_tokens() -> u64 {
|
||||||
|
defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for CompactionConfig {
|
impl Default for CompactionConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
prune_protected_tokens: default_prune_protected_tokens(),
|
prune_protected_tokens: default_prune_protected_tokens(),
|
||||||
prune_min_savings: default_prune_min_savings(),
|
prune_min_savings: default_prune_min_savings(),
|
||||||
compact_threshold: None,
|
threshold: None,
|
||||||
compact_request_threshold: None,
|
request_threshold: None,
|
||||||
compact_retained_tokens: default_compact_retained_tokens(),
|
retained_tokens: default_retained_tokens(),
|
||||||
compact_auto_read_budget: default_compact_auto_read_budget(),
|
overview_target_tokens: default_overview_target_tokens(),
|
||||||
compact_worker_max_input_tokens: default_compact_worker_max_input_tokens(),
|
overview_warning_tokens: default_overview_warning_tokens(),
|
||||||
compact_worker_max_turns: default_compact_worker_max_turns(),
|
overview_deadline_tokens: default_overview_deadline_tokens(),
|
||||||
|
worker_context_max_tokens: default_worker_context_max_tokens(),
|
||||||
|
finish_warning_remaining_tokens: default_finish_warning_remaining_tokens(),
|
||||||
|
final_reserve_tokens: default_final_reserve_tokens(),
|
||||||
|
worker_max_turns: default_worker_max_turns(),
|
||||||
|
summary_target_tokens: default_summary_target_tokens(),
|
||||||
|
summary_max_tokens: default_summary_max_tokens(),
|
||||||
|
auto_read_budget_tokens: default_auto_read_budget_tokens(),
|
||||||
|
result_context_max_tokens: default_result_context_max_tokens(),
|
||||||
model: None,
|
model: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -592,15 +668,15 @@ model_id = "claude-sonnet-4-20250514"
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn parse_compaction_config() {
|
fn parse_compaction_config() {
|
||||||
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\ncompact_threshold = 80000\n");
|
let toml = format!("{MINIMAL_REQUIRED}\n[compaction]\nthreshold = 80000\n");
|
||||||
let manifest = PodManifest::from_toml(&toml).unwrap();
|
let manifest = PodManifest::from_toml(&toml).unwrap();
|
||||||
let c = manifest.compaction.unwrap();
|
let c = manifest.compaction.unwrap();
|
||||||
assert_eq!(c.prune_protected_tokens, 8000);
|
assert_eq!(c.prune_protected_tokens, 8000);
|
||||||
assert_eq!(c.prune_min_savings, 4096);
|
assert_eq!(c.prune_min_savings, 4096);
|
||||||
assert_eq!(c.compact_threshold, Some(80000));
|
assert_eq!(c.threshold, Some(80000));
|
||||||
assert_eq!(c.compact_request_threshold, None);
|
assert_eq!(c.request_threshold, None);
|
||||||
assert_eq!(c.compact_retained_tokens, 8000);
|
assert_eq!(c.retained_tokens, 8000);
|
||||||
assert_eq!(c.compact_worker_max_turns, Some(20));
|
assert_eq!(c.worker_max_turns, Some(20));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -618,11 +694,11 @@ model_id = "claude-sonnet-4-20250514"
|
||||||
let toml = format!(
|
let toml = format!(
|
||||||
"{MINIMAL_REQUIRED}\n\
|
"{MINIMAL_REQUIRED}\n\
|
||||||
[compaction]\n\
|
[compaction]\n\
|
||||||
compact_worker_max_turns = 7\n"
|
worker_max_turns = 7\n"
|
||||||
);
|
);
|
||||||
let manifest = PodManifest::from_toml(&toml).unwrap();
|
let manifest = PodManifest::from_toml(&toml).unwrap();
|
||||||
let c = manifest.compaction.unwrap();
|
let c = manifest.compaction.unwrap();
|
||||||
assert_eq!(c.compact_worker_max_turns, Some(7));
|
assert_eq!(c.worker_max_turns, Some(7));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -630,13 +706,13 @@ model_id = "claude-sonnet-4-20250514"
|
||||||
let toml = format!(
|
let toml = format!(
|
||||||
"{MINIMAL_REQUIRED}\n\
|
"{MINIMAL_REQUIRED}\n\
|
||||||
[compaction]\n\
|
[compaction]\n\
|
||||||
compact_threshold = 80000\n\
|
threshold = 80000\n\
|
||||||
compact_request_threshold = 90000\n"
|
request_threshold = 90000\n"
|
||||||
);
|
);
|
||||||
let manifest = PodManifest::from_toml(&toml).unwrap();
|
let manifest = PodManifest::from_toml(&toml).unwrap();
|
||||||
let c = manifest.compaction.unwrap();
|
let c = manifest.compaction.unwrap();
|
||||||
assert_eq!(c.compact_threshold, Some(80000));
|
assert_eq!(c.threshold, Some(80000));
|
||||||
assert_eq!(c.compact_request_threshold, Some(90000));
|
assert_eq!(c.request_threshold, Some(90000));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -644,12 +720,12 @@ model_id = "claude-sonnet-4-20250514"
|
||||||
let toml = format!(
|
let toml = format!(
|
||||||
"{MINIMAL_REQUIRED}\n\
|
"{MINIMAL_REQUIRED}\n\
|
||||||
[compaction]\n\
|
[compaction]\n\
|
||||||
compact_request_threshold = 90000\n"
|
request_threshold = 90000\n"
|
||||||
);
|
);
|
||||||
let manifest = PodManifest::from_toml(&toml).unwrap();
|
let manifest = PodManifest::from_toml(&toml).unwrap();
|
||||||
let c = manifest.compaction.unwrap();
|
let c = manifest.compaction.unwrap();
|
||||||
assert_eq!(c.compact_threshold, None);
|
assert_eq!(c.threshold, None);
|
||||||
assert_eq!(c.compact_request_threshold, Some(90000));
|
assert_eq!(c.request_threshold, Some(90000));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -657,7 +733,7 @@ model_id = "claude-sonnet-4-20250514"
|
||||||
let toml = format!(
|
let toml = format!(
|
||||||
"{MINIMAL_REQUIRED}\n\
|
"{MINIMAL_REQUIRED}\n\
|
||||||
[compaction]\n\
|
[compaction]\n\
|
||||||
compact_threshold = 80000\n\n\
|
threshold = 80000\n\n\
|
||||||
[compaction.model]\n\
|
[compaction.model]\n\
|
||||||
scheme = \"gemini\"\n\
|
scheme = \"gemini\"\n\
|
||||||
model_id = \"gemini-2.0-flash\"\n"
|
model_id = \"gemini-2.0-flash\"\n"
|
||||||
|
|
|
||||||
|
|
@ -279,11 +279,11 @@ mod tests {
|
||||||
fn runtime_dir_prefers_xdg_runtime_dir() {
|
fn runtime_dir_prefers_xdg_runtime_dir() {
|
||||||
let _g = EnvGuard::new(&[
|
let _g = EnvGuard::new(&[
|
||||||
("HOME", Some("/h")),
|
("HOME", Some("/h")),
|
||||||
("XDG_RUNTIME_DIR", Some("/run/user/1000")),
|
("XDG_RUNTIME_DIR", Some("/xdg-runtime")),
|
||||||
]);
|
]);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
runtime_dir().unwrap(),
|
runtime_dir().unwrap(),
|
||||||
PathBuf::from("<runtime-dir>")
|
PathBuf::from("/xdg-runtime/insomnia")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,12 +18,13 @@
|
||||||
//! compacted session's opening system messages.
|
//! compacted session's opening system messages.
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use llm_worker::Item;
|
use llm_worker::Item;
|
||||||
use llm_worker::interceptor::{Interceptor, PreRequestAction};
|
use llm_worker::interceptor::{Interceptor, PreRequestAction, PreToolAction, ToolCallInfo};
|
||||||
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput};
|
use llm_worker::tool::{Tool, ToolDefinition, ToolError, ToolMeta, ToolOutput, ToolResult};
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use tools::ScopedFs;
|
use tools::ScopedFs;
|
||||||
|
|
||||||
|
|
@ -83,6 +84,39 @@ struct SummaryParams {
|
||||||
pub text: String,
|
pub text: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Input to `search_session_log`.
|
||||||
|
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||||
|
struct SearchSessionParams {
|
||||||
|
/// Case-insensitive substring to search in compact-target history.
|
||||||
|
pub query: String,
|
||||||
|
/// 0-based item offset to start searching from.
|
||||||
|
#[serde(default)]
|
||||||
|
pub offset: Option<usize>,
|
||||||
|
/// Maximum number of hits to return.
|
||||||
|
#[serde(default)]
|
||||||
|
pub limit: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Input to `read_session_items`.
|
||||||
|
#[derive(Debug, Deserialize, schemars::JsonSchema)]
|
||||||
|
struct ReadSessionParams {
|
||||||
|
/// 0-based compact-target history item offset.
|
||||||
|
pub offset: usize,
|
||||||
|
/// Maximum number of items to return.
|
||||||
|
pub limit: usize,
|
||||||
|
/// `compact` omits tool arguments/full results; `full` includes message text and tool result content.
|
||||||
|
#[serde(default = "default_session_read_mode")]
|
||||||
|
pub mode: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_session_read_mode() -> String {
|
||||||
|
"compact".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
const SESSION_TOOL_MAX_OUTPUT_TOKENS: u64 = 12_000;
|
||||||
|
const SESSION_SEARCH_MAX_RESULTS: usize = 50;
|
||||||
|
const SESSION_READ_MAX_ITEMS: usize = 80;
|
||||||
|
|
||||||
const MARK_DESCRIPTION: &str = "Inject a file's contents into the compacted context so the \
|
const MARK_DESCRIPTION: &str = "Inject a file's contents into the compacted context so the \
|
||||||
next session starts with it already read. Use this for files the next task needs in full. \
|
next session starts with it already read. Use this for files the next task needs in full. \
|
||||||
Optionally specify `offset` (0-based line) and `limit` (line count) to inject only a slice. \
|
Optionally specify `offset` (0-based line) and `limit` (line count) to inject only a slice. \
|
||||||
|
|
@ -97,6 +131,236 @@ const SUMMARY_DESCRIPTION: &str = "Provide the final structured summary text. Su
|
||||||
replace the previous content; only the last call is used. Must be called before the compact run \
|
replace the previous content; only the last call is used. Must be called before the compact run \
|
||||||
ends or compaction fails.";
|
ends or compaction fails.";
|
||||||
|
|
||||||
|
const SEARCH_SESSION_DESCRIPTION: &str = "Search the compact-target session history by \
|
||||||
|
case-insensitive substring. Returns item indexes and compact snippets. Use this when the initial \
|
||||||
|
overview is not enough to identify which part of the session matters. Results are bounded; narrow \
|
||||||
|
the query if important details are omitted.";
|
||||||
|
|
||||||
|
const READ_SESSION_DESCRIPTION: &str = "Read a bounded range of compact-target session history \
|
||||||
|
items by 0-based index. mode='compact' omits tool arguments, full tool results, and reasoning \
|
||||||
|
bodies; mode='full' includes message text and tool result content but still remains bounded. Use \
|
||||||
|
this to verify details before writing the summary.";
|
||||||
|
|
||||||
|
struct SessionLogToolState {
|
||||||
|
items: Arc<Vec<Item>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SearchSessionLogTool {
|
||||||
|
state: Arc<SessionLogToolState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Tool for SearchSessionLogTool {
|
||||||
|
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||||
|
let params: SearchSessionParams = serde_json::from_str(input_json).map_err(|e| {
|
||||||
|
ToolError::InvalidArgument(format!("invalid search_session_log input: {e}"))
|
||||||
|
})?;
|
||||||
|
let query = params.query.trim().to_lowercase();
|
||||||
|
if query.is_empty() {
|
||||||
|
return Err(ToolError::InvalidArgument(
|
||||||
|
"search_session_log query must not be empty".to_string(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let offset = params.offset.unwrap_or(0).min(self.state.items.len());
|
||||||
|
let limit = params
|
||||||
|
.limit
|
||||||
|
.unwrap_or(20)
|
||||||
|
.clamp(1, SESSION_SEARCH_MAX_RESULTS);
|
||||||
|
let mut hits = Vec::new();
|
||||||
|
for (idx, item) in self.state.items.iter().enumerate().skip(offset) {
|
||||||
|
let haystack = session_item_search_text(item).to_lowercase();
|
||||||
|
if haystack.contains(&query) {
|
||||||
|
hits.push(format_session_item(
|
||||||
|
idx,
|
||||||
|
item,
|
||||||
|
SessionReadMode::Compact,
|
||||||
|
600,
|
||||||
|
));
|
||||||
|
if hits.len() >= limit {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut content = hits.join("\n\n");
|
||||||
|
let truncated = truncate_to_token_budget(&mut content, SESSION_TOOL_MAX_OUTPUT_TOKENS);
|
||||||
|
let summary = if hits.is_empty() {
|
||||||
|
format!("No session log hits for {query:?} from item offset {offset}.")
|
||||||
|
} else if truncated {
|
||||||
|
format!(
|
||||||
|
"Found {} session log hit(s) for {query:?}; output truncated. Narrow the query.",
|
||||||
|
hits.len()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!("Found {} session log hit(s) for {query:?}.", hits.len())
|
||||||
|
};
|
||||||
|
Ok(ToolOutput {
|
||||||
|
summary,
|
||||||
|
content: (!content.is_empty()).then_some(content),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ReadSessionItemsTool {
|
||||||
|
state: Arc<SessionLogToolState>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl Tool for ReadSessionItemsTool {
|
||||||
|
async fn execute(&self, input_json: &str) -> Result<ToolOutput, ToolError> {
|
||||||
|
let params: ReadSessionParams = serde_json::from_str(input_json).map_err(|e| {
|
||||||
|
ToolError::InvalidArgument(format!("invalid read_session_items input: {e}"))
|
||||||
|
})?;
|
||||||
|
let mode = SessionReadMode::parse(¶ms.mode)?;
|
||||||
|
let offset = params.offset.min(self.state.items.len());
|
||||||
|
let limit = params.limit.clamp(1, SESSION_READ_MAX_ITEMS);
|
||||||
|
let end = offset.saturating_add(limit).min(self.state.items.len());
|
||||||
|
let mut blocks = Vec::new();
|
||||||
|
for idx in offset..end {
|
||||||
|
blocks.push(format_session_item(
|
||||||
|
idx,
|
||||||
|
&self.state.items[idx],
|
||||||
|
mode,
|
||||||
|
4_000,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let mut content = blocks.join("\n\n");
|
||||||
|
let truncated = truncate_to_token_budget(&mut content, SESSION_TOOL_MAX_OUTPUT_TOKENS);
|
||||||
|
let summary = if truncated {
|
||||||
|
format!(
|
||||||
|
"Read session items {offset}..{end} in {mode:?} mode; output truncated. Narrow the range."
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
format!("Read session items {offset}..{end} in {mode:?} mode.")
|
||||||
|
};
|
||||||
|
Ok(ToolOutput {
|
||||||
|
summary,
|
||||||
|
content: (!content.is_empty()).then_some(content),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum SessionReadMode {
|
||||||
|
Compact,
|
||||||
|
Full,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SessionReadMode {
|
||||||
|
fn parse(value: &str) -> Result<Self, ToolError> {
|
||||||
|
match value {
|
||||||
|
"compact" => Ok(Self::Compact),
|
||||||
|
"full" => Ok(Self::Full),
|
||||||
|
other => Err(ToolError::InvalidArgument(format!(
|
||||||
|
"invalid read_session_items mode {other:?}; expected 'compact' or 'full'"
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn session_item_search_text(item: &Item) -> String {
|
||||||
|
match item {
|
||||||
|
Item::Message { role, content, .. } => format!(
|
||||||
|
"{:?} {}",
|
||||||
|
role,
|
||||||
|
content
|
||||||
|
.iter()
|
||||||
|
.map(|p| p.as_text())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("")
|
||||||
|
),
|
||||||
|
Item::ToolCall {
|
||||||
|
name, arguments, ..
|
||||||
|
} => format!("tool_call {name} {arguments}"),
|
||||||
|
Item::ToolResult {
|
||||||
|
summary, content, ..
|
||||||
|
} => format!(
|
||||||
|
"tool_result {summary} {}",
|
||||||
|
content.as_deref().unwrap_or_default()
|
||||||
|
),
|
||||||
|
Item::Reasoning { text, summary, .. } => format!("reasoning {text} {}", summary.join(" ")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn format_session_item(idx: usize, item: &Item, mode: SessionReadMode, max_chars: usize) -> String {
|
||||||
|
match item {
|
||||||
|
Item::Message { role, content, .. } => {
|
||||||
|
let text = content
|
||||||
|
.iter()
|
||||||
|
.map(|p| p.as_text())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("");
|
||||||
|
format!(
|
||||||
|
"[{idx} Message {:?}] {}",
|
||||||
|
role,
|
||||||
|
truncate_chars(&text, max_chars)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Item::ToolCall {
|
||||||
|
name, arguments, ..
|
||||||
|
} => match mode {
|
||||||
|
SessionReadMode::Compact => format!("[{idx} ToolCall] {name} (arguments omitted)"),
|
||||||
|
SessionReadMode::Full => format!(
|
||||||
|
"[{idx} ToolCall] {name}\narguments: {}",
|
||||||
|
truncate_chars(arguments, max_chars)
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Item::ToolResult {
|
||||||
|
summary,
|
||||||
|
content,
|
||||||
|
is_error,
|
||||||
|
..
|
||||||
|
} => match mode {
|
||||||
|
SessionReadMode::Compact => format!(
|
||||||
|
"[{idx} ToolResult{}] {} (content omitted)",
|
||||||
|
if *is_error { " error" } else { "" },
|
||||||
|
truncate_chars(summary, 800)
|
||||||
|
),
|
||||||
|
SessionReadMode::Full => format!(
|
||||||
|
"[{idx} ToolResult{}] {}\ncontent: {}",
|
||||||
|
if *is_error { " error" } else { "" },
|
||||||
|
truncate_chars(summary, 800),
|
||||||
|
truncate_chars(content.as_deref().unwrap_or(""), max_chars)
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Item::Reasoning { summary, .. } => match mode {
|
||||||
|
SessionReadMode::Compact => format!(
|
||||||
|
"[{idx} Reasoning] {} (body omitted)",
|
||||||
|
truncate_chars(&summary.join(" "), 800)
|
||||||
|
),
|
||||||
|
SessionReadMode::Full => format!(
|
||||||
|
"[{idx} Reasoning] {} (body omitted)",
|
||||||
|
truncate_chars(&summary.join(" "), 800)
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_chars(text: &str, max_chars: usize) -> String {
|
||||||
|
if text.chars().count() <= max_chars {
|
||||||
|
return text.to_string();
|
||||||
|
}
|
||||||
|
let mut out = text.chars().take(max_chars).collect::<String>();
|
||||||
|
out.push_str("… [truncated]");
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_to_token_budget(text: &mut String, max_tokens: u64) -> bool {
|
||||||
|
let max_bytes = max_tokens.saturating_mul(4) as usize;
|
||||||
|
if text.len() <= max_bytes {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut cut = 0;
|
||||||
|
for (idx, _) in text.char_indices() {
|
||||||
|
if idx > max_bytes {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
cut = idx;
|
||||||
|
}
|
||||||
|
text.truncate(cut);
|
||||||
|
text.push_str("\n… [session tool output truncated]");
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
struct MarkReadRequiredTool {
|
struct MarkReadRequiredTool {
|
||||||
fs: ScopedFs,
|
fs: ScopedFs,
|
||||||
ctx: Arc<Mutex<CompactWorkerContext>>,
|
ctx: Arc<Mutex<CompactWorkerContext>>,
|
||||||
|
|
@ -246,14 +510,93 @@ pub(crate) fn write_summary_tool(ctx: Arc<Mutex<CompactWorkerContext>>) -> ToolD
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Interceptor that aborts the compact worker when its current prompt
|
pub(crate) fn search_session_log_tool(items: Arc<Vec<Item>>) -> ToolDefinition {
|
||||||
/// occupancy estimate crosses `max_input_tokens`. The estimate uses the same
|
let state = Arc::new(SessionLogToolState { items });
|
||||||
/// `UsageRecord` + `llm_worker::token_counter::total_tokens` path as the main
|
Arc::new(move || {
|
||||||
/// Pod compaction thresholds, so prompt-cache hits are not counted cumulatively
|
let schema = schemars::schema_for!(SearchSessionParams);
|
||||||
/// across turns.
|
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||||
|
let meta = ToolMeta::new("search_session_log")
|
||||||
|
.description(SEARCH_SESSION_DESCRIPTION)
|
||||||
|
.input_schema(schema_value);
|
||||||
|
let tool: Arc<dyn Tool> = Arc::new(SearchSessionLogTool {
|
||||||
|
state: state.clone(),
|
||||||
|
});
|
||||||
|
(meta, tool)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn read_session_items_tool(items: Arc<Vec<Item>>) -> ToolDefinition {
|
||||||
|
let state = Arc::new(SessionLogToolState { items });
|
||||||
|
Arc::new(move || {
|
||||||
|
let schema = schemars::schema_for!(ReadSessionParams);
|
||||||
|
let schema_value = serde_json::to_value(schema).unwrap_or(serde_json::json!({}));
|
||||||
|
let meta = ToolMeta::new("read_session_items")
|
||||||
|
.description(READ_SESSION_DESCRIPTION)
|
||||||
|
.input_schema(schema_value);
|
||||||
|
let tool: Arc<dyn Tool> = Arc::new(ReadSessionItemsTool {
|
||||||
|
state: state.clone(),
|
||||||
|
});
|
||||||
|
(meta, tool)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Interceptor that monitors compact-worker context occupancy.
|
||||||
|
///
|
||||||
|
/// `max_input_tokens` remains the hard circuit breaker. Before that point,
|
||||||
|
/// the interceptor can persist a system warning into worker history telling
|
||||||
|
/// the model to stop broad exploration and call `write_summary`, and can block
|
||||||
|
/// additional exploratory tool calls once the final reserve is reached.
|
||||||
pub(crate) struct CompactWorkerInterceptor {
|
pub(crate) struct CompactWorkerInterceptor {
|
||||||
pub usage_tracker: Arc<UsageTracker>,
|
pub usage_tracker: Arc<UsageTracker>,
|
||||||
pub max_input_tokens: u64,
|
pub max_input_tokens: u64,
|
||||||
|
pub finish_warning_remaining_tokens: u64,
|
||||||
|
pub final_reserve_tokens: u64,
|
||||||
|
pub on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
|
||||||
|
warning_sent: AtomicBool,
|
||||||
|
last_remaining_tokens: AtomicU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CompactWorkerInterceptor {
|
||||||
|
pub(crate) fn new(
|
||||||
|
usage_tracker: Arc<UsageTracker>,
|
||||||
|
max_input_tokens: u64,
|
||||||
|
finish_warning_remaining_tokens: u64,
|
||||||
|
final_reserve_tokens: u64,
|
||||||
|
on_warning: Option<Arc<dyn Fn(String) + Send + Sync>>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
usage_tracker,
|
||||||
|
max_input_tokens,
|
||||||
|
finish_warning_remaining_tokens,
|
||||||
|
final_reserve_tokens,
|
||||||
|
on_warning,
|
||||||
|
warning_sent: AtomicBool::new(false),
|
||||||
|
last_remaining_tokens: AtomicU64::new(max_input_tokens),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn maybe_emit_warning(&self, remaining: u64) -> Option<Item> {
|
||||||
|
let warning_threshold = self.finish_warning_remaining_tokens;
|
||||||
|
let reserve_threshold = self.final_reserve_tokens;
|
||||||
|
let should_warn = (warning_threshold > 0 && remaining <= warning_threshold)
|
||||||
|
|| (reserve_threshold > 0 && remaining <= reserve_threshold);
|
||||||
|
if !should_warn || self.warning_sent.swap(true, Ordering::AcqRel) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let message = format!(
|
||||||
|
"compact worker context budget is low ({remaining}/{} tokens remaining). \
|
||||||
|
Stop broad exploration now, read only if absolutely necessary, then call \
|
||||||
|
`write_summary` with the final structured summary.",
|
||||||
|
self.max_input_tokens
|
||||||
|
);
|
||||||
|
if let Some(cb) = self.on_warning.as_ref() {
|
||||||
|
cb(message.clone());
|
||||||
|
}
|
||||||
|
Some(Item::system_message(format!(
|
||||||
|
"[Compact worker budget warning]\n\n{message}"
|
||||||
|
)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
|
|
@ -268,9 +611,31 @@ impl Interceptor for CompactWorkerInterceptor {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let remaining = self.max_input_tokens.saturating_sub(estimate.tokens);
|
||||||
|
self.last_remaining_tokens
|
||||||
|
.store(remaining, Ordering::Release);
|
||||||
|
if let Some(item) = self.maybe_emit_warning(remaining) {
|
||||||
|
self.usage_tracker.note_request(context.len() + 1);
|
||||||
|
return PreRequestAction::ContinueWith(vec![item]);
|
||||||
|
}
|
||||||
|
|
||||||
self.usage_tracker.note_request(context.len());
|
self.usage_tracker.note_request(context.len());
|
||||||
PreRequestAction::Continue
|
PreRequestAction::Continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn pre_tool_call(&self, info: &mut ToolCallInfo) -> PreToolAction {
|
||||||
|
if self.final_reserve_tokens == 0 || info.call.name == "write_summary" {
|
||||||
|
return PreToolAction::Continue;
|
||||||
|
}
|
||||||
|
let remaining = self.last_remaining_tokens.load(Ordering::Acquire);
|
||||||
|
if remaining > self.final_reserve_tokens {
|
||||||
|
return PreToolAction::Continue;
|
||||||
|
}
|
||||||
|
PreToolAction::SyntheticResult(ToolResult::error(
|
||||||
|
info.call.id.clone(),
|
||||||
|
"compact worker final reserve reached; do not perform more exploratory tool reads. Call `write_summary` now.",
|
||||||
|
))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Crude bytes→tokens estimate; good enough for budget accounting.
|
/// Crude bytes→tokens estimate; good enough for budget accounting.
|
||||||
|
|
@ -301,10 +666,7 @@ mod tests {
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn compact_worker_interceptor_uses_occupancy_not_cumulative_usage() {
|
async fn compact_worker_interceptor_uses_occupancy_not_cumulative_usage() {
|
||||||
let tracker = Arc::new(UsageTracker::new());
|
let tracker = Arc::new(UsageTracker::new());
|
||||||
let interceptor = CompactWorkerInterceptor {
|
let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 150, 0, 0, None);
|
||||||
usage_tracker: tracker.clone(),
|
|
||||||
max_input_tokens: 150,
|
|
||||||
};
|
|
||||||
let mut context = vec![Item::user_message("hello")];
|
let mut context = vec![Item::user_message("hello")];
|
||||||
|
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
|
|
@ -327,13 +689,40 @@ mod tests {
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn compact_worker_interceptor_warns_before_hard_cap() {
|
||||||
|
let tracker = Arc::new(UsageTracker::new());
|
||||||
|
let warnings = Arc::new(Mutex::new(Vec::new()));
|
||||||
|
let captured = warnings.clone();
|
||||||
|
let interceptor = CompactWorkerInterceptor::new(
|
||||||
|
tracker.clone(),
|
||||||
|
150,
|
||||||
|
60,
|
||||||
|
20,
|
||||||
|
Some(Arc::new(move |message| {
|
||||||
|
captured.lock().unwrap().push(message);
|
||||||
|
})),
|
||||||
|
);
|
||||||
|
let mut context = vec![Item::user_message("hello")];
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
interceptor.pre_llm_request(&mut context).await,
|
||||||
|
PreRequestAction::Continue
|
||||||
|
));
|
||||||
|
tracker.record_usage(&make_usage(100));
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
interceptor.pre_llm_request(&mut context).await,
|
||||||
|
PreRequestAction::ContinueWith(items)
|
||||||
|
if items.len() == 1 && items[0].as_text().unwrap_or_default().contains("write_summary")
|
||||||
|
));
|
||||||
|
assert_eq!(warnings.lock().unwrap().len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn compact_worker_interceptor_cancels_when_occupancy_exceeds_cap() {
|
async fn compact_worker_interceptor_cancels_when_occupancy_exceeds_cap() {
|
||||||
let tracker = Arc::new(UsageTracker::new());
|
let tracker = Arc::new(UsageTracker::new());
|
||||||
let interceptor = CompactWorkerInterceptor {
|
let interceptor = CompactWorkerInterceptor::new(tracker.clone(), 99, 0, 0, None);
|
||||||
usage_tracker: tracker.clone(),
|
|
||||||
max_input_tokens: 99,
|
|
||||||
};
|
|
||||||
let mut context = vec![Item::user_message("hello")];
|
let mut context = vec![Item::user_message("hello")];
|
||||||
|
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
|
|
@ -420,6 +809,45 @@ mod tests {
|
||||||
assert_eq!(guard.references[0], PathBuf::from(p));
|
assert_eq!(guard.references[0], PathBuf::from(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn search_session_log_returns_bounded_hits_without_full_tool_content() {
|
||||||
|
let items = Arc::new(vec![
|
||||||
|
Item::user_message("investigate compact failure"),
|
||||||
|
Item::tool_result_with_content(
|
||||||
|
"call-1",
|
||||||
|
"read trace with compact failure",
|
||||||
|
"very large raw trace body with secret detail",
|
||||||
|
),
|
||||||
|
]);
|
||||||
|
let tool: Arc<dyn Tool> = Arc::new(SearchSessionLogTool {
|
||||||
|
state: Arc::new(SessionLogToolState { items }),
|
||||||
|
});
|
||||||
|
let input = serde_json::json!({ "query": "compact", "limit": 10 }).to_string();
|
||||||
|
let out = tool.execute(&input).await.unwrap();
|
||||||
|
let content = out.content.unwrap();
|
||||||
|
|
||||||
|
assert!(content.contains("investigate compact failure"));
|
||||||
|
assert!(content.contains("read trace with compact failure"));
|
||||||
|
assert!(!content.contains("secret detail"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn read_session_items_full_mode_can_read_tool_result_content() {
|
||||||
|
let items = Arc::new(vec![Item::tool_result_with_content(
|
||||||
|
"call-1",
|
||||||
|
"read trace",
|
||||||
|
"raw trace detail",
|
||||||
|
)]);
|
||||||
|
let tool: Arc<dyn Tool> = Arc::new(ReadSessionItemsTool {
|
||||||
|
state: Arc::new(SessionLogToolState { items }),
|
||||||
|
});
|
||||||
|
let input = serde_json::json!({ "offset": 0, "limit": 1, "mode": "full" }).to_string();
|
||||||
|
let out = tool.execute(&input).await.unwrap();
|
||||||
|
let content = out.content.unwrap();
|
||||||
|
|
||||||
|
assert!(content.contains("raw trace detail"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn slice_lines_handles_offset_and_limit() {
|
fn slice_lines_handles_offset_and_limit() {
|
||||||
let text = "a\nb\nc\nd";
|
let text = "a\nb\nc\nd";
|
||||||
|
|
|
||||||
|
|
@ -241,7 +241,7 @@ pub struct Pod<C: LlmClient, St: Store> {
|
||||||
scope: SharedScope,
|
scope: SharedScope,
|
||||||
hook_builder: HookRegistryBuilder,
|
hook_builder: HookRegistryBuilder,
|
||||||
interceptor_installed: bool,
|
interceptor_installed: bool,
|
||||||
/// Shared compaction state (present when compact_threshold is configured).
|
/// Shared compaction state (present when threshold is configured).
|
||||||
compact_state: Option<Arc<CompactState>>,
|
compact_state: Option<Arc<CompactState>>,
|
||||||
/// Per-LLM-request Usage tracker. Always present after construction.
|
/// Per-LLM-request Usage tracker. Always present after construction.
|
||||||
/// Captures `(history_len, UsageEvent)` pairs during a run; drained
|
/// Captures `(history_len, UsageEvent)` pairs during a run; drained
|
||||||
|
|
@ -1121,8 +1121,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
|
|
||||||
/// Install the hook-based interceptor on the Worker if not already done.
|
/// Install the hook-based interceptor on the Worker if not already done.
|
||||||
///
|
///
|
||||||
/// When either compaction threshold (`compact_threshold` or
|
/// When either compaction threshold (`threshold` or
|
||||||
/// `compact_request_threshold`) is configured in the manifest, allocates
|
/// `request_threshold`) is configured in the manifest, allocates
|
||||||
/// a shared [`CompactState`] and wires the interceptor to read current
|
/// a shared [`CompactState`] and wires the interceptor to read current
|
||||||
/// occupancy through the `UsageRecord` timeline.
|
/// occupancy through the `UsageRecord` timeline.
|
||||||
fn ensure_interceptor_installed(&mut self) {
|
fn ensure_interceptor_installed(&mut self) {
|
||||||
|
|
@ -1141,13 +1141,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
.manifest
|
.manifest
|
||||||
.compaction
|
.compaction
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|c| {
|
.map(|c| (c.threshold, c.request_threshold, c.retained_tokens))
|
||||||
(
|
|
||||||
c.compact_threshold,
|
|
||||||
c.compact_request_threshold,
|
|
||||||
c.compact_retained_tokens,
|
|
||||||
)
|
|
||||||
})
|
|
||||||
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
|
.unwrap_or((None, None, manifest::defaults::COMPACT_RETAINED_TOKENS));
|
||||||
|
|
||||||
let tracker_for_usage = self.usage_tracker.clone();
|
let tracker_for_usage = self.usage_tracker.clone();
|
||||||
|
|
@ -1161,7 +1155,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
warn!(
|
warn!(
|
||||||
post_run_threshold = post,
|
post_run_threshold = post,
|
||||||
request_threshold = req,
|
request_threshold = req,
|
||||||
"compact_threshold > compact_request_threshold; \
|
"threshold > request_threshold; \
|
||||||
proactive check will never fire before the safety net"
|
proactive check will never fire before the safety net"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
@ -2124,12 +2118,7 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
let retained = state
|
let retained = state
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|s| s.retained_tokens())
|
.map(|s| s.retained_tokens())
|
||||||
.or_else(|| {
|
.or_else(|| self.manifest.compaction.as_ref().map(|c| c.retained_tokens))
|
||||||
self.manifest
|
|
||||||
.compaction
|
|
||||||
.as_ref()
|
|
||||||
.map(|c| c.compact_retained_tokens)
|
|
||||||
})
|
|
||||||
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
|
.unwrap_or(manifest::defaults::COMPACT_RETAINED_TOKENS);
|
||||||
let current_tokens = self.total_tokens().tokens;
|
let current_tokens = self.total_tokens().tokens;
|
||||||
let cut = self.split_for_retained(retained);
|
let cut = self.split_for_retained(retained);
|
||||||
|
|
@ -2307,7 +2296,8 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
pub async fn compact(&mut self, retained_tokens: u64) -> Result<SegmentId, PodError> {
|
pub async fn compact(&mut self, retained_tokens: u64) -> Result<SegmentId, PodError> {
|
||||||
use crate::compact::worker::{
|
use crate::compact::worker::{
|
||||||
CompactWorkerContext, CompactWorkerInterceptor, add_reference_tool,
|
CompactWorkerContext, CompactWorkerInterceptor, add_reference_tool,
|
||||||
mark_read_required_tool, write_summary_tool,
|
mark_read_required_tool, read_session_items_tool, search_session_log_tool,
|
||||||
|
write_summary_tool,
|
||||||
};
|
};
|
||||||
use crate::fs_view::PodFsView;
|
use crate::fs_view::PodFsView;
|
||||||
|
|
||||||
|
|
@ -2324,21 +2314,49 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
|
|
||||||
// Compaction-related knobs. Fall through to manifest defaults when
|
// Compaction-related knobs. Fall through to manifest defaults when
|
||||||
// `[compaction]` is omitted entirely.
|
// `[compaction]` is omitted entirely.
|
||||||
let (auto_read_budget, compact_worker_max_input_tokens, compact_worker_max_turns) = self
|
let (
|
||||||
|
auto_read_budget,
|
||||||
|
worker_context_max_tokens,
|
||||||
|
finish_warning_remaining_tokens,
|
||||||
|
final_reserve_tokens,
|
||||||
|
worker_max_turns,
|
||||||
|
overview_target_tokens,
|
||||||
|
overview_warning_tokens,
|
||||||
|
overview_deadline_tokens,
|
||||||
|
summary_target_tokens,
|
||||||
|
summary_max_tokens,
|
||||||
|
result_context_max_tokens,
|
||||||
|
) = self
|
||||||
.manifest
|
.manifest
|
||||||
.compaction
|
.compaction
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|c| {
|
.map(|c| {
|
||||||
(
|
(
|
||||||
c.compact_auto_read_budget,
|
c.auto_read_budget_tokens,
|
||||||
c.compact_worker_max_input_tokens,
|
c.worker_context_max_tokens,
|
||||||
c.compact_worker_max_turns,
|
c.finish_warning_remaining_tokens,
|
||||||
|
c.final_reserve_tokens,
|
||||||
|
c.worker_max_turns,
|
||||||
|
c.overview_target_tokens,
|
||||||
|
c.overview_warning_tokens,
|
||||||
|
c.overview_deadline_tokens,
|
||||||
|
c.summary_target_tokens,
|
||||||
|
c.summary_max_tokens,
|
||||||
|
c.result_context_max_tokens,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap_or((
|
.unwrap_or((
|
||||||
manifest::defaults::COMPACT_AUTO_READ_BUDGET,
|
manifest::defaults::COMPACT_AUTO_READ_BUDGET,
|
||||||
manifest::defaults::COMPACT_WORKER_MAX_INPUT_TOKENS,
|
manifest::defaults::COMPACT_WORKER_MAX_INPUT_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_FINISH_WARNING_REMAINING_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_FINAL_RESERVE_TOKENS,
|
||||||
manifest::defaults::COMPACT_WORKER_MAX_TURNS,
|
manifest::defaults::COMPACT_WORKER_MAX_TURNS,
|
||||||
|
manifest::defaults::COMPACT_OVERVIEW_TARGET_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_OVERVIEW_WARNING_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_OVERVIEW_DEADLINE_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_SUMMARY_TARGET_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_SUMMARY_MAX_TOKENS,
|
||||||
|
manifest::defaults::COMPACT_RESULT_CONTEXT_MAX_TOKENS,
|
||||||
));
|
));
|
||||||
|
|
||||||
// Default references: the N most-recently-touched files in the
|
// Default references: the N most-recently-touched files in the
|
||||||
|
|
@ -2358,7 +2376,33 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
&items_to_summarise,
|
&items_to_summarise,
|
||||||
&default_refs,
|
&default_refs,
|
||||||
Some(task_snapshot_text.as_str()),
|
Some(task_snapshot_text.as_str()),
|
||||||
|
SummaryInputOptions {
|
||||||
|
overview_target_tokens,
|
||||||
|
overview_warning_tokens,
|
||||||
|
overview_deadline_tokens,
|
||||||
|
summary_target_tokens,
|
||||||
|
},
|
||||||
);
|
);
|
||||||
|
if summary_input.warning_exceeded {
|
||||||
|
self.alert(
|
||||||
|
AlertLevel::Warn,
|
||||||
|
AlertSource::Compactor,
|
||||||
|
format!(
|
||||||
|
"compact overview is larger than expected (≈{} tokens; warning threshold {})",
|
||||||
|
summary_input.overview_tokens, overview_warning_tokens
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if summary_input.deadline_fallback_used {
|
||||||
|
self.alert(
|
||||||
|
AlertLevel::Warn,
|
||||||
|
AlertSource::Compactor,
|
||||||
|
format!(
|
||||||
|
"compact overview exceeded deadline ({} tokens); using coarse fallback",
|
||||||
|
overview_deadline_tokens
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Worker-side state collected by the compact worker's tool calls.
|
// Worker-side state collected by the compact worker's tool calls.
|
||||||
let ctx = Arc::new(std::sync::Mutex::new(CompactWorkerContext::with_budget(
|
let ctx = Arc::new(std::sync::Mutex::new(CompactWorkerContext::with_budget(
|
||||||
|
|
@ -2390,21 +2434,32 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
tracker.record_usage(event);
|
tracker.record_usage(event);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
summary_worker.set_interceptor(CompactWorkerInterceptor {
|
let compactor_warning_cb = self.alerter.clone().map(|alerter| {
|
||||||
usage_tracker: summary_usage_tracker,
|
Arc::new(move |message: String| {
|
||||||
max_input_tokens: compact_worker_max_input_tokens,
|
alerter.alert(AlertLevel::Warn, AlertSource::Compactor, message);
|
||||||
|
}) as Arc<dyn Fn(String) + Send + Sync>
|
||||||
});
|
});
|
||||||
summary_worker.set_max_turns(compact_worker_max_turns);
|
summary_worker.set_interceptor(CompactWorkerInterceptor::new(
|
||||||
|
summary_usage_tracker,
|
||||||
|
worker_context_max_tokens,
|
||||||
|
finish_warning_remaining_tokens,
|
||||||
|
final_reserve_tokens,
|
||||||
|
compactor_warning_cb,
|
||||||
|
));
|
||||||
|
summary_worker.set_max_turns(worker_max_turns);
|
||||||
|
|
||||||
// Tools: read_file (shared scope, fresh tracker) + the three
|
// Tools: read_file (shared scope, fresh tracker), bounded session
|
||||||
// compact-specific tools that populate `ctx`.
|
// history exploration, and compact-specific tools that populate `ctx`.
|
||||||
|
let compact_target_items = Arc::new(items_to_summarise.clone());
|
||||||
summary_worker.register_tool(tools::read_tool(scoped_fs.clone(), summary_tracker));
|
summary_worker.register_tool(tools::read_tool(scoped_fs.clone(), summary_tracker));
|
||||||
|
summary_worker.register_tool(search_session_log_tool(compact_target_items.clone()));
|
||||||
|
summary_worker.register_tool(read_session_items_tool(compact_target_items));
|
||||||
summary_worker.register_tool(mark_read_required_tool(scoped_fs.clone(), ctx.clone()));
|
summary_worker.register_tool(mark_read_required_tool(scoped_fs.clone(), ctx.clone()));
|
||||||
summary_worker.register_tool(add_reference_tool(ctx.clone()));
|
summary_worker.register_tool(add_reference_tool(ctx.clone()));
|
||||||
summary_worker.register_tool(write_summary_tool(ctx.clone()));
|
summary_worker.register_tool(write_summary_tool(ctx.clone()));
|
||||||
|
|
||||||
let out = summary_worker
|
let out = summary_worker
|
||||||
.run(summary_input)
|
.run(summary_input.text)
|
||||||
.await
|
.await
|
||||||
.map_err(PodError::Worker)?;
|
.map_err(PodError::Worker)?;
|
||||||
let mut locked_worker = out.worker;
|
let mut locked_worker = out.worker;
|
||||||
|
|
@ -2439,11 +2494,32 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
|
let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
|
let mut final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
|
||||||
let summary_text = final_ctx
|
let mut summary_text = final_ctx
|
||||||
.summary
|
.summary
|
||||||
.clone()
|
.clone()
|
||||||
.ok_or(PodError::CompactSummaryMissing)?;
|
.ok_or(PodError::CompactSummaryMissing)?;
|
||||||
|
let mut summary_tokens = estimate_text_tokens(summary_text.len());
|
||||||
|
if summary_max_tokens > 0 && summary_tokens > summary_max_tokens {
|
||||||
|
let prompt = format!(
|
||||||
|
"Your `write_summary` output is too large (≈{summary_tokens} tokens; max \
|
||||||
|
{summary_max_tokens}). Rewrite it now with `write_summary`, preserving the \
|
||||||
|
same five sections but making it concise. Target ≈{summary_target_tokens} tokens."
|
||||||
|
);
|
||||||
|
let _ = locked_worker.run(prompt).await.map_err(PodError::Worker)?;
|
||||||
|
final_ctx = ctx.lock().expect("compact ctx poisoned").clone();
|
||||||
|
summary_text = final_ctx
|
||||||
|
.summary
|
||||||
|
.clone()
|
||||||
|
.ok_or(PodError::CompactSummaryMissing)?;
|
||||||
|
summary_tokens = estimate_text_tokens(summary_text.len());
|
||||||
|
if summary_tokens > summary_max_tokens {
|
||||||
|
return Err(PodError::CompactSummaryTooLarge {
|
||||||
|
tokens: summary_tokens,
|
||||||
|
max: summary_max_tokens,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Re-read each auto-read target via the Pod FS view. Errors are
|
// Re-read each auto-read target via the Pod FS view. Errors are
|
||||||
// logged and skipped inside `render_auto_read` rather than
|
// logged and skipped inside `render_auto_read` rather than
|
||||||
|
|
@ -2515,6 +2591,13 @@ impl<C: LlmClient, St: Store> Pod<C, St> {
|
||||||
tools::task::snapshot_overview(&self.task_store.list()),
|
tools::task::snapshot_overview(&self.task_store.list()),
|
||||||
task_snapshot_text.clone(),
|
task_snapshot_text.clone(),
|
||||||
));
|
));
|
||||||
|
let result_estimate = llm_worker::token_counter::total_tokens(&new_history, &[]);
|
||||||
|
if result_context_max_tokens > 0 && result_estimate.tokens > result_context_max_tokens {
|
||||||
|
return Err(PodError::CompactResultContextTooLarge {
|
||||||
|
tokens: result_estimate.tokens,
|
||||||
|
max: result_context_max_tokens,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Build the SegmentStart entry for the new compacted segment.
|
// Build the SegmentStart entry for the new compacted segment.
|
||||||
// Inherits the source Segment's session_id so the compacted
|
// Inherits the source Segment's session_id so the compacted
|
||||||
|
|
@ -4008,19 +4091,56 @@ impl From<WorkerResult> for PodRunResult {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
struct SummaryInputOptions {
|
||||||
|
overview_target_tokens: u64,
|
||||||
|
overview_warning_tokens: u64,
|
||||||
|
overview_deadline_tokens: u64,
|
||||||
|
summary_target_tokens: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct SummaryInputBuild {
|
||||||
|
text: String,
|
||||||
|
overview_tokens: u64,
|
||||||
|
warning_exceeded: bool,
|
||||||
|
deadline_fallback_used: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// Build the compact worker's input: default-reference instructions,
|
/// Build the compact worker's input: default-reference instructions,
|
||||||
/// the list of recently-touched files, and the pruned conversation
|
/// the list of recently-touched files, task snapshot, and a bounded overview
|
||||||
/// produced by [`build_summary_prompt`].
|
/// rather than a prefix-wide transcript.
|
||||||
fn build_summary_input(
|
fn build_summary_input(
|
||||||
items: &[Item],
|
items: &[Item],
|
||||||
default_refs: &[PathBuf],
|
default_refs: &[PathBuf],
|
||||||
task_snapshot: Option<&str>,
|
task_snapshot: Option<&str>,
|
||||||
) -> String {
|
options: SummaryInputOptions,
|
||||||
let mut out = String::new();
|
) -> SummaryInputBuild {
|
||||||
out.push_str(
|
let overview = build_summary_overview(
|
||||||
"Summarise the conversation below into a structured summary and nominate \
|
items,
|
||||||
files the next session needs.\n\n",
|
options.overview_target_tokens,
|
||||||
|
options.overview_deadline_tokens,
|
||||||
);
|
);
|
||||||
|
let overview_tokens = estimate_text_tokens(overview.len());
|
||||||
|
let warning_exceeded =
|
||||||
|
options.overview_warning_tokens > 0 && overview_tokens > options.overview_warning_tokens;
|
||||||
|
let deadline_fallback_used =
|
||||||
|
options.overview_deadline_tokens > 0 && overview_tokens > options.overview_deadline_tokens;
|
||||||
|
let overview = if deadline_fallback_used {
|
||||||
|
build_coarse_summary_overview(items, options.overview_deadline_tokens)
|
||||||
|
} else {
|
||||||
|
overview
|
||||||
|
};
|
||||||
|
let overview_tokens = estimate_text_tokens(overview.len());
|
||||||
|
|
||||||
|
let mut out = String::new();
|
||||||
|
out.push_str(&format!(
|
||||||
|
"Summarise this session into a structured summary of about {} tokens and \
|
||||||
|
nominate files the next session needs. The conversation below is a \
|
||||||
|
bounded overview/index, not the full transcript. Use tools to inspect \
|
||||||
|
current files when deciding auto-read/reference output.\n\n",
|
||||||
|
options.summary_target_tokens
|
||||||
|
));
|
||||||
if !default_refs.is_empty() {
|
if !default_refs.is_empty() {
|
||||||
out.push_str(
|
out.push_str(
|
||||||
"These files were touched recently in this session. Use `read_file` \
|
"These files were touched recently in this session. Use `read_file` \
|
||||||
|
|
@ -4045,47 +4165,166 @@ fn build_summary_input(
|
||||||
out.push_str(task_snapshot);
|
out.push_str(task_snapshot);
|
||||||
out.push_str("\n\n");
|
out.push_str("\n\n");
|
||||||
}
|
}
|
||||||
out.push_str("## Conversation\n");
|
out.push_str("## Conversation overview/index\n");
|
||||||
out.push_str(&build_summary_prompt(items));
|
out.push_str(&overview);
|
||||||
out.push_str("\n\nWhen you are done, call `write_summary` with the final 5-section text.");
|
out.push_str("\n\nWhen you are done, call `write_summary` with the final 5-section text.");
|
||||||
|
|
||||||
|
SummaryInputBuild {
|
||||||
|
text: out,
|
||||||
|
overview_tokens,
|
||||||
|
warning_exceeded,
|
||||||
|
deadline_fallback_used,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_summary_overview(items: &[Item], target_tokens: u64, deadline_tokens: u64) -> String {
|
||||||
|
let target_bytes = token_budget_bytes(target_tokens).max(1024);
|
||||||
|
let deadline_bytes = token_budget_bytes(deadline_tokens).max(target_bytes);
|
||||||
|
let mut out = String::new();
|
||||||
|
write_overview_header(items, &mut out);
|
||||||
|
out.push_str("\n## Recent user/assistant/system messages\n");
|
||||||
|
|
||||||
|
let mut selected = Vec::new();
|
||||||
|
let mut omitted_messages = 0usize;
|
||||||
|
for (idx, item) in items.iter().enumerate().rev() {
|
||||||
|
let Some(entry) = message_overview_entry(idx, item, 2_000) else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let projected = out
|
||||||
|
.len()
|
||||||
|
.saturating_add(selected.iter().map(String::len).sum::<usize>())
|
||||||
|
.saturating_add(entry.len())
|
||||||
|
.saturating_add(2);
|
||||||
|
if projected > target_bytes && !selected.is_empty() {
|
||||||
|
omitted_messages += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
selected.push(entry);
|
||||||
|
if projected >= target_bytes {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
selected.reverse();
|
||||||
|
for entry in selected {
|
||||||
|
out.push_str(&entry);
|
||||||
|
out.push_str("\n\n");
|
||||||
|
}
|
||||||
|
if omitted_messages > 0 {
|
||||||
|
out.push_str(&format!(
|
||||||
|
"[Overview omitted {omitted_messages} older message(s) to stay near target.]\n\n"
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
append_tool_index(items, &mut out, target_bytes, deadline_bytes);
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Format conversation items into a text prompt for the summary Worker.
|
fn build_coarse_summary_overview(items: &[Item], deadline_tokens: u64) -> String {
|
||||||
///
|
let deadline_bytes = token_budget_bytes(deadline_tokens).max(1024);
|
||||||
/// The summary should capture decisions and user intent, not recreate code.
|
let mut out = String::new();
|
||||||
/// File contents and tool IO belong in auto-read / references, not in the
|
write_overview_header(items, &mut out);
|
||||||
/// summary input. So this strips:
|
out.push_str("\n## Coarse recent message index\n");
|
||||||
/// - `ToolCall.arguments` (keep only the tool name)
|
for (idx, item) in items.iter().enumerate().rev() {
|
||||||
/// - `ToolResult.content` (keep only the summary line)
|
let Some(entry) = message_overview_entry(idx, item, 240) else {
|
||||||
/// - `Reasoning` entirely (intermediate thought, superseded by decisions)
|
continue;
|
||||||
fn build_summary_prompt(items: &[Item]) -> String {
|
};
|
||||||
let mut lines = Vec::new();
|
if out.len().saturating_add(entry.len()).saturating_add(2) > deadline_bytes {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.push_str(&entry);
|
||||||
|
out.push_str("\n\n");
|
||||||
|
}
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_overview_header(items: &[Item], out: &mut String) {
|
||||||
|
let mut messages = 0usize;
|
||||||
|
let mut tool_calls = 0usize;
|
||||||
|
let mut tool_results = 0usize;
|
||||||
|
let mut reasoning = 0usize;
|
||||||
for item in items {
|
for item in items {
|
||||||
match item {
|
match item {
|
||||||
Item::Message { role, content, .. } => {
|
Item::Message { .. } => messages += 1,
|
||||||
let role_label = match role {
|
Item::ToolCall { .. } => tool_calls += 1,
|
||||||
llm_worker::Role::User => "User",
|
Item::ToolResult { .. } => tool_results += 1,
|
||||||
llm_worker::Role::Assistant => "Assistant",
|
Item::Reasoning { .. } => reasoning += 1,
|
||||||
llm_worker::Role::System => "System",
|
|
||||||
};
|
|
||||||
let text: String = content
|
|
||||||
.iter()
|
|
||||||
.map(|p| p.as_text())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join("");
|
|
||||||
lines.push(format!("[{role_label}] {text}"));
|
|
||||||
}
|
|
||||||
Item::ToolCall { name, .. } => {
|
|
||||||
lines.push(format!("[ToolCall] {name}"));
|
|
||||||
}
|
|
||||||
Item::ToolResult { summary, .. } => {
|
|
||||||
lines.push(format!("[ToolResult] {summary}"));
|
|
||||||
}
|
|
||||||
Item::Reasoning { .. } => {}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lines.join("\n\n")
|
out.push_str(&format!(
|
||||||
|
"Items summarized: {} total; {messages} message(s), {tool_calls} tool call(s), \
|
||||||
|
{tool_results} tool result(s), {reasoning} reasoning item(s). Tool call \
|
||||||
|
arguments, tool result full content, and reasoning bodies are omitted from \
|
||||||
|
this initial input.\n",
|
||||||
|
items.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn append_tool_index(items: &[Item], out: &mut String, target_bytes: usize, deadline_bytes: usize) {
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
for (idx, item) in items.iter().enumerate().rev() {
|
||||||
|
match item {
|
||||||
|
Item::ToolCall { name, .. } => entries.push(format!("[{idx} ToolCall] {name}")),
|
||||||
|
Item::ToolResult { summary, .. } => entries.push(format!(
|
||||||
|
"[{idx} ToolResult] {}",
|
||||||
|
truncate_chars(summary, 240)
|
||||||
|
)),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
if entries.len() >= 24 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if entries.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
entries.reverse();
|
||||||
|
out.push_str("## Recent tool index (content omitted)\n");
|
||||||
|
for entry in entries {
|
||||||
|
let projected = out.len().saturating_add(entry.len()).saturating_add(1);
|
||||||
|
if projected > deadline_bytes || (projected > target_bytes && out.contains("ToolResult")) {
|
||||||
|
out.push_str("[Additional tool index entries omitted.]\n");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.push_str(&entry);
|
||||||
|
out.push('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn message_overview_entry(idx: usize, item: &Item, max_chars: usize) -> Option<String> {
|
||||||
|
let Item::Message { role, content, .. } = item else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
let role_label = match role {
|
||||||
|
llm_worker::Role::User => "User",
|
||||||
|
llm_worker::Role::Assistant => "Assistant",
|
||||||
|
llm_worker::Role::System => "System",
|
||||||
|
};
|
||||||
|
let text: String = content
|
||||||
|
.iter()
|
||||||
|
.map(|p| p.as_text())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.join("");
|
||||||
|
Some(format!(
|
||||||
|
"[{idx} {role_label}] {}",
|
||||||
|
truncate_chars(&text, max_chars)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate_chars(text: &str, max_chars: usize) -> String {
|
||||||
|
if text.chars().count() <= max_chars {
|
||||||
|
return text.to_string();
|
||||||
|
}
|
||||||
|
let mut out = text.chars().take(max_chars).collect::<String>();
|
||||||
|
out.push_str("… [truncated]");
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
fn estimate_text_tokens(bytes: usize) -> u64 {
|
||||||
|
(bytes as u64).div_ceil(4)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn token_budget_bytes(tokens: u64) -> usize {
|
||||||
|
tokens.saturating_mul(4).min(usize::MAX as u64) as usize
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pod errors.
|
/// Pod errors.
|
||||||
|
|
@ -4125,6 +4364,12 @@ pub enum PodError {
|
||||||
#[error("compact worker did not produce a summary (write_summary was never called)")]
|
#[error("compact worker did not produce a summary (write_summary was never called)")]
|
||||||
CompactSummaryMissing,
|
CompactSummaryMissing,
|
||||||
|
|
||||||
|
#[error("compact summary too large: {tokens} tokens exceeds max {max}")]
|
||||||
|
CompactSummaryTooLarge { tokens: u64, max: u64 },
|
||||||
|
|
||||||
|
#[error("compacted result context too large: {tokens} tokens exceeds max {max}")]
|
||||||
|
CompactResultContextTooLarge { tokens: u64, max: u64 },
|
||||||
|
|
||||||
#[error("invalid system prompt template: {source}")]
|
#[error("invalid system prompt template: {source}")]
|
||||||
InvalidSystemPromptTemplate {
|
InvalidSystemPromptTemplate {
|
||||||
#[source]
|
#[source]
|
||||||
|
|
@ -4409,6 +4654,21 @@ mod memory_worker_event_tests {
|
||||||
mod build_summary_prompt_tests {
|
mod build_summary_prompt_tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn test_summary_input(items: &[Item]) -> String {
|
||||||
|
build_summary_input(
|
||||||
|
items,
|
||||||
|
&[],
|
||||||
|
None,
|
||||||
|
SummaryInputOptions {
|
||||||
|
overview_target_tokens: 512,
|
||||||
|
overview_warning_tokens: 1024,
|
||||||
|
overview_deadline_tokens: 2048,
|
||||||
|
summary_target_tokens: 256,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.text
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn strips_tool_call_arguments() {
|
fn strips_tool_call_arguments() {
|
||||||
let items = vec![Item::tool_call_json(
|
let items = vec![Item::tool_call_json(
|
||||||
|
|
@ -4416,8 +4676,8 @@ mod build_summary_prompt_tests {
|
||||||
"read_file",
|
"read_file",
|
||||||
serde_json::json!({ "path": "src/main.rs" }),
|
serde_json::json!({ "path": "src/main.rs" }),
|
||||||
)];
|
)];
|
||||||
let prompt = build_summary_prompt(&items);
|
let prompt = test_summary_input(&items);
|
||||||
assert_eq!(prompt, "[ToolCall] read_file");
|
assert!(prompt.contains("[0 ToolCall] read_file"));
|
||||||
assert!(!prompt.contains("src/main.rs"));
|
assert!(!prompt.contains("src/main.rs"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4428,8 +4688,8 @@ mod build_summary_prompt_tests {
|
||||||
"read 3 lines",
|
"read 3 lines",
|
||||||
"fn main() { println!(\"hello\"); }",
|
"fn main() { println!(\"hello\"); }",
|
||||||
)];
|
)];
|
||||||
let prompt = build_summary_prompt(&items);
|
let prompt = test_summary_input(&items);
|
||||||
assert_eq!(prompt, "[ToolResult] read 3 lines");
|
assert!(prompt.contains("[0 ToolResult] read 3 lines"));
|
||||||
assert!(!prompt.contains("println"));
|
assert!(!prompt.contains("println"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4440,13 +4700,50 @@ mod build_summary_prompt_tests {
|
||||||
Item::reasoning("internal deliberation"),
|
Item::reasoning("internal deliberation"),
|
||||||
Item::assistant_message("hello"),
|
Item::assistant_message("hello"),
|
||||||
];
|
];
|
||||||
let prompt = build_summary_prompt(&items);
|
let prompt = test_summary_input(&items);
|
||||||
assert!(prompt.contains("[User] hi"));
|
assert!(prompt.contains("[0 User] hi"));
|
||||||
assert!(prompt.contains("[Assistant] hello"));
|
assert!(prompt.contains("[2 Assistant] hello"));
|
||||||
assert!(!prompt.contains("Reasoning"));
|
assert!(!prompt.contains("Reasoning"));
|
||||||
assert!(!prompt.contains("deliberation"));
|
assert!(!prompt.contains("deliberation"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn overview_warning_does_not_drop_input() {
|
||||||
|
let items = vec![Item::user_message("x".repeat(4_000))];
|
||||||
|
let built = build_summary_input(
|
||||||
|
&items,
|
||||||
|
&[],
|
||||||
|
None,
|
||||||
|
SummaryInputOptions {
|
||||||
|
overview_target_tokens: 10,
|
||||||
|
overview_warning_tokens: 100,
|
||||||
|
overview_deadline_tokens: 2_000,
|
||||||
|
summary_target_tokens: 256,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
assert!(built.warning_exceeded);
|
||||||
|
assert!(!built.deadline_fallback_used);
|
||||||
|
assert!(built.text.contains("[0 User]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn overview_deadline_falls_back_to_coarse_index() {
|
||||||
|
let items = vec![Item::user_message("x".repeat(4_000))];
|
||||||
|
let built = build_summary_input(
|
||||||
|
&items,
|
||||||
|
&[],
|
||||||
|
None,
|
||||||
|
SummaryInputOptions {
|
||||||
|
overview_target_tokens: 10,
|
||||||
|
overview_warning_tokens: 10,
|
||||||
|
overview_deadline_tokens: 100,
|
||||||
|
summary_target_tokens: 256,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
assert!(built.deadline_fallback_used);
|
||||||
|
assert!(built.text.contains("## Coarse recent message index"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn worker_manifest_generation_settings_become_request_config() {
|
fn worker_manifest_generation_settings_become_request_config() {
|
||||||
let manifest = WorkerManifest {
|
let manifest = WorkerManifest {
|
||||||
|
|
@ -4478,8 +4775,9 @@ mod build_summary_prompt_tests {
|
||||||
Item::user_message("fix the bug"),
|
Item::user_message("fix the bug"),
|
||||||
Item::assistant_message("done"),
|
Item::assistant_message("done"),
|
||||||
];
|
];
|
||||||
let prompt = build_summary_prompt(&items);
|
let prompt = test_summary_input(&items);
|
||||||
assert_eq!(prompt, "[User] fix the bug\n\n[Assistant] done");
|
assert!(prompt.contains("[0 User] fix the bug"));
|
||||||
|
assert!(prompt.contains("[1 Assistant] done"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|
|
||||||
|
|
@ -72,11 +72,11 @@ pub struct ToolOutput {
|
||||||
|
|
||||||
**ターンの合間が proactive (小さい閾値)**:
|
**ターンの合間が proactive (小さい閾値)**:
|
||||||
turn が完了した地点はタスクの自然な区切り。ここで先を見越して早めに compact する。
|
turn が完了した地点はタスクの自然な区切り。ここで先を見越して早めに compact する。
|
||||||
マニフェストの `compact_threshold` が対応。
|
マニフェストの `threshold` が対応。
|
||||||
|
|
||||||
**リクエストの合間は safety net (大きい閾値)**:
|
**リクエストの合間は safety net (大きい閾値)**:
|
||||||
turn 内部でリクエストの合間にチェックするのは「暴走的に膨張した場合のみ止める」用途。
|
turn 内部でリクエストの合間にチェックするのは「暴走的に膨張した場合のみ止める」用途。
|
||||||
マニフェストの `compact_request_threshold` が対応。通常は発動しない。
|
マニフェストの `request_threshold` が対応。通常は発動しない。
|
||||||
|
|
||||||
**両閾値は manifest で個別指定する**。過去の設計では 9/8 倍で自動導出していたが、
|
**両閾値は manifest で個別指定する**。過去の設計では 9/8 倍で自動導出していたが、
|
||||||
比率に根拠がなかったため廃止。両方が `Option<u64>` で、片方だけの設定も可能
|
比率に根拠がなかったため廃止。両方が `Option<u64>` で、片方だけの設定も可能
|
||||||
|
|
@ -137,15 +137,29 @@ compact は fork と同じ構造。旧セッションを保全し、新 SessionI
|
||||||
|
|
||||||
```toml
|
```toml
|
||||||
[compaction]
|
[compaction]
|
||||||
compact_threshold = 80000 # ターンの合間 (proactive)
|
threshold = 80000 # ターンの合間 (proactive)
|
||||||
compact_request_threshold = 90000 # リクエストの合間 (safety net)
|
request_threshold = 90000 # リクエストの合間 (safety net)
|
||||||
prune_protected_tokens = 8000 # prune から保護する末尾 token budget
|
prune_protected_tokens = 8000 # prune から保護する末尾 token budget
|
||||||
compact_retained_tokens = 8000 # compact 後に生のまま残す末尾 token budget
|
retained_tokens = 8000 # compact 後に生のまま残す末尾 token budget
|
||||||
compact_auto_read_budget = 8000 # compact worker の mark_read_required 合計上限
|
|
||||||
compact_worker_max_input_tokens = 50000 # compact worker 自身の現在占有トークン上限
|
overview_target_tokens = 8000 # compact worker 初期 overview の通常目標
|
||||||
compact_worker_max_turns = 20 # compact worker 自身の tool loop 上限
|
overview_warning_tokens = 16000 # 超えたら警告・trace、compact は続行
|
||||||
|
overview_deadline_tokens = 40000 # 超えたら粗い overview へ fallback
|
||||||
|
|
||||||
|
worker_context_max_tokens = 50000 # compact worker session 全体の hard limit
|
||||||
|
finish_warning_remaining_tokens = 8000 # 残りが少ないため write_summary へ進める勧告
|
||||||
|
final_reserve_tokens = 4000 # 最終 summary/closing turn 用 reserve
|
||||||
|
worker_max_turns = 20 # compact worker 自身の tool loop 上限
|
||||||
|
|
||||||
|
summary_target_tokens = 1500 # write_summary の目標サイズ
|
||||||
|
summary_max_tokens = 3000 # write_summary の hard validation
|
||||||
|
auto_read_budget_tokens = 8000 # compact 後に注入する file content 合計上限
|
||||||
|
result_context_max_tokens = 24000 # 新 session 初期 context の dry-run validation
|
||||||
```
|
```
|
||||||
|
|
||||||
|
`compact_*` prefix の旧 key は互換 alias として読み取るが、`[compaction]` 内の新規 key は prefix なしを正とする。
|
||||||
|
初期 overview の target/warning は効率のための目安で、通常は hard error にしない。deadline 超過時も、可能なら deterministic に粗い overview へ fallback して compact の完走を優先する。
|
||||||
|
|
||||||
### Auto-Read とリファレンス
|
### Auto-Read とリファレンス
|
||||||
|
|
||||||
2段階のファイル参照:
|
2段階のファイル参照:
|
||||||
|
|
@ -176,8 +190,9 @@ auto-read も通常の history 内 system message なので、将来の Prune/Co
|
||||||
|
|
||||||
## compact worker
|
## compact worker
|
||||||
|
|
||||||
要約生成とファイル選定を行う使い捨て Worker。ツールなし・1リクエストの現行実装から、
|
要約生成とファイル選定を行う使い捨て Worker。Pod は compact 対象 prefix を全文投入せず、User / Assistant / System を優先した bounded overview と tool index を初期 input として渡す。Tool call arguments、tool result full content、reasoning body は初期 input には載せない。
|
||||||
ツール付きマルチターンに改善する。
|
|
||||||
|
初期 overview は `overview_target_tokens` を目標にする。`overview_warning_tokens` を超えた場合は警告・trace を記録して続行し、`overview_deadline_tokens` を超えた場合は粗い deterministic overview へ fallback する。Compact の目的は完走なので、初期 input が少し大きいだけでは hard error にしない。
|
||||||
|
|
||||||
### ツール
|
### ツール
|
||||||
|
|
||||||
|
|
@ -192,13 +207,20 @@ write_summary(text) — 構造化要約を出力/上書き
|
||||||
|
|
||||||
1. Pod が `tools::Tracker::recent_files(5)` で最近触られたファイルを抽出(デフォルトリファレンス)
|
1. Pod が `tools::Tracker::recent_files(5)` で最近触られたファイルを抽出(デフォルトリファレンス)
|
||||||
2. compact worker にプロンプトとして渡す:
|
2. compact worker にプロンプトとして渡す:
|
||||||
- pruned history(summary only、arguments/reasoning 除去)
|
- bounded overview / index(User / Assistant / System 優先)
|
||||||
- デフォルトリファレンスの一覧
|
- デフォルトリファレンスの一覧
|
||||||
|
- TaskStore snapshot
|
||||||
3. compact worker が自律的に:
|
3. compact worker が自律的に:
|
||||||
|
- search_session_log / read_session_items で bounded overview から漏れた compact 対象履歴を必要範囲だけ探索
|
||||||
- read_file で各ファイルを読み、必要性を判断
|
- read_file で各ファイルを読み、必要性を判断
|
||||||
- mark_read_required / add_reference で指定
|
- mark_read_required / add_reference で指定
|
||||||
- write_summary で構造化要約を出力(呼び直し可)
|
- write_summary で構造化要約を出力(呼び直し可)
|
||||||
4. ターン終了時に write_summary 未呼び出し or read_required 空(かつファイル操作履歴がある場合)→ 追加プロンプトで促す
|
4. CompactWorkerInterceptor が worker session 全体の context occupancy を監視する:
|
||||||
|
- `finish_warning_remaining_tokens` 到達時に「探索を切り上げて write_summary へ進め」と Worker history に永続化される warning を挿入し、人間向け warning も出す
|
||||||
|
- `final_reserve_tokens` を割った後は `write_summary` 以外の探索 tool call に synthetic error を返し、最終 summary の余白を守る
|
||||||
|
- `worker_context_max_tokens` 超過は最後の hard stop
|
||||||
|
5. ターン終了時に write_summary 未呼び出し or read_required 空(かつファイル操作履歴がある場合)→ 追加プロンプトで促す
|
||||||
|
6. `summary_max_tokens` と `result_context_max_tokens` で compact 結果を検証してから新 session を作る
|
||||||
|
|
||||||
### 構造化要約の要件
|
### 構造化要約の要件
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,17 @@
|
||||||
You are a context compaction assistant. Your job is to hand the next session a structured summary plus pointers to the files it actually needs — not a narrative transcript of the conversation.
|
You are a context compaction assistant. Your job is to hand the next session a structured summary plus pointers to the files it actually needs — not a narrative transcript of the conversation.
|
||||||
|
|
||||||
|
The conversation input is a bounded overview/index, not the full transcript. Treat tool result bodies and reasoning as intentionally omitted unless a tool exposes more detail. If you receive a compact worker budget warning, stop broad exploration immediately, read only if absolutely necessary, and call `write_summary`.
|
||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
1. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion.
|
1. Read the provided overview/index and current TaskStore snapshot.
|
||||||
2. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately.
|
2. If the overview does not contain enough detail, use `search_session_log` to find relevant compact-target history items, then `read_session_items` to inspect only the needed range.
|
||||||
3. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents.
|
3. Use `read_file` to inspect referenced files before deciding what the next session needs. Prefer skimming over blind inclusion.
|
||||||
4. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept.
|
4. For files whose current contents are load-bearing for the active work, call `mark_read_required` to inject them into the next session. These count against the auto-read token budget — spend it deliberately.
|
||||||
|
5. For files the next session should know about but can fetch on demand, call `add_reference` to record the path without embedding contents.
|
||||||
|
6. Finish with `write_summary` carrying the final text. You may call it multiple times; only the last call is kept.
|
||||||
|
|
||||||
Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, or whenever further nominations would not change the next session's next step.
|
Stop nominating and close out with `write_summary` as soon as the auto-read budget is exhausted, when a compact worker budget warning arrives, or whenever further exploration would not change the next session's next step.
|
||||||
|
|
||||||
## Summary format
|
## Summary format
|
||||||
|
|
||||||
|
|
@ -36,4 +40,4 @@ Produce the summary in this exact format:
|
||||||
## Constraints
|
## Constraints
|
||||||
|
|
||||||
- Keep code snippets and raw tool output OUT of the summary — that is what auto-read and references are for.
|
- Keep code snippets and raw tool output OUT of the summary — that is what auto-read and references are for.
|
||||||
- Target 1000–2000 tokens for the summary text itself.
|
- Follow the summary target stated in the run input; if asked to shrink, call `write_summary` again with a shorter version.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
---
|
||||||
|
id: 20260528-001748-compact-session-log-exploration
|
||||||
|
slug: compact-session-log-exploration
|
||||||
|
title: Compact: session log 探索型の要約入力に変更する
|
||||||
|
status: closed
|
||||||
|
kind: task
|
||||||
|
priority: P2
|
||||||
|
labels: [compact, session-log]
|
||||||
|
created_at: 2026-05-28T00:17:48Z
|
||||||
|
updated_at: 2026-05-28T03:41:42Z
|
||||||
|
assignee: null
|
||||||
|
legacy_ticket: null
|
||||||
|
---
|
||||||
|
|
||||||
|
# Compact: session log 探索型の要約入力に変更する
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
`insomnia-troubleshoot` Pod の手動 compact で、Compact Worker が入力トークン上限に到達して停止した。現行実装は `Pod::compact` で retained tail より前の `items_to_summarise` を `build_summary_input()` に渡し、`build_summary_prompt()` が user / assistant / system message と tool result summary を `## Conversation` に連結して Compact Worker の初回 input に載せている。
|
||||||
|
|
||||||
|
raw tool output や reasoning は落としているが、長い session では pruned transcript だけでも `compact_worker_max_input_tokens` を超える。Compact の目的は「全履歴を読ませる」ことではなく、次セッションに必要な構造化要約と file auto-read/reference を作ることなので、初期 input は軽量 overview に留め、必要箇所は Compact Worker が session log / workspace file を探索して確認できる形にする。
|
||||||
|
|
||||||
|
また、Compact Worker の健全性は「初期 input が小さいこと」だけでは保証できない。探索 tool の結果、assistant 出力、`write_summary` 呼び出しまでを含む Compact Worker 全体の context と、compact 後に作られる新 session 初期 context を別々に制御する必要がある。
|
||||||
|
|
||||||
|
## 方針
|
||||||
|
|
||||||
|
Compact Worker の初期 context は、全文 transcript ではなく決定的に生成した session overview / index を渡す。LLM には探索空間を狭めた上で、必要な session log 範囲や workspace file を tool で読む権限を与える。
|
||||||
|
|
||||||
|
基本方針:
|
||||||
|
|
||||||
|
- 初期 input は User / Assistant / System の継続に必要な情報を中心に、target size 内の overview として生成する。
|
||||||
|
- 初期 overview が target を超えた程度で compact を失敗させない。warning / trace に記録して続行する。
|
||||||
|
- 初期 overview deadline は通常運用の調整値ではなく、想定外の入力生成バグを検出する最悪ケースの安全網とする。deadline 超過時は、可能ならより粗い overview へ fallback し、それでも最低限の入力を作れない場合だけ失敗する。
|
||||||
|
- ToolCall / ToolResult は初期 input では本文を展開しない。
|
||||||
|
- tool 名、summary、対象 path、成否、大きな出力の有無、session log 上の位置などの index に留める。
|
||||||
|
- Compact Worker は session log の必要箇所を探索・再読できる。
|
||||||
|
- Compact Worker の探索量は、session-log/file-read 個別の総量 budget ではなく、Compact Worker session 全体の context budget で制御する。
|
||||||
|
- Compact Worker context が上限に近づいたら、`mark_read_required` とは独立に「探索を切り上げて `write_summary` へ進め」という勧告を Worker に渡し、人間にも警告を出す。
|
||||||
|
- 最終 summary と closing turn のための reserve を確保し、reserve を食い潰すほど大きい tool result は残 budget に合わせて抑制・切り詰め・再読指示にする。
|
||||||
|
- AutoRead 判断のため、workspace file は現行通り `read_file` で確認し、必要なものだけ `mark_read_required` / `add_reference` する。
|
||||||
|
- AutoRead budget は Compact Worker の探索 budget ではなく、compact 後の新 session 初期 context に注入される file content の合計上限として扱う。
|
||||||
|
- Compact Worker の出力は現行と同じく structured summary + auto-read + references を生成する。
|
||||||
|
|
||||||
|
## Compact Worker / compaction parameters
|
||||||
|
|
||||||
|
`[compaction]` 配下では `compact_` prefix を新規 parameter 名につけない。既存の `compact_*` key は、この ticket の実装時に同じ意味の prefix なし key へ整理する。
|
||||||
|
|
||||||
|
必要な parameter:
|
||||||
|
|
||||||
|
- `retained_tokens`
|
||||||
|
- compact 後に verbatim で残す history tail の token budget。
|
||||||
|
- `overview_target_tokens`
|
||||||
|
- 初期 overview / index 生成器が目指す通常サイズ。超過しても即失敗しない。
|
||||||
|
- `overview_warning_tokens`
|
||||||
|
- 初期 overview が想定より大きいことを記録・警告する閾値。compact は続行する。
|
||||||
|
- `overview_deadline_tokens`
|
||||||
|
- 初期 overview の最悪ケース deadline。超過時はより粗い overview へ deterministic fallback し、それでも無理な場合だけ compact を失敗させる。
|
||||||
|
- `worker_context_max_tokens`
|
||||||
|
- Compact Worker session 全体の context hard limit。system prompt、overview、assistant output、tool calls/results、session-log/file read results、`write_summary` 周辺の蓄積を含む。
|
||||||
|
- `finish_warning_remaining_tokens`
|
||||||
|
- 残り context がこの値以下になったら、Compact Worker に探索切り上げと `write_summary` を促す勧告を入れる。
|
||||||
|
- `final_reserve_tokens`
|
||||||
|
- 最終 summary と closing turn のために残す reserve。これを割り込みそうな tool result は full content を返さず、range 縮小や summary への移行を促す。
|
||||||
|
- `worker_max_turns`
|
||||||
|
- Compact Worker の tool-loop 最大 turn 数。budget 制御とは別の runaway guard。
|
||||||
|
- `summary_target_tokens`
|
||||||
|
- `write_summary` text の目標サイズ。prompt / nudge に使う。
|
||||||
|
- `summary_max_tokens`
|
||||||
|
- `write_summary` text の hard validation。超過した summary は縮約を促すか compact 成功扱いにしない。
|
||||||
|
- `auto_read_budget_tokens`
|
||||||
|
- `mark_read_required` によって compact 後の新 session に注入される file content の合計 token budget。
|
||||||
|
- `result_context_max_tokens`
|
||||||
|
- compact 成功前に dry-run する新 session 初期 context の上限。summary、auto-read contents、references、task snapshot、retained tail を含む。
|
||||||
|
- `model`
|
||||||
|
- compactor model。未指定なら main Worker の client を clone する。
|
||||||
|
|
||||||
|
Compact 発火条件の `threshold` / `request_threshold` は Compact Worker の健全性 parameter ではないが、既存の `compact_threshold` / `compact_request_threshold` を整理する場合は `[compaction]` 内の prefix なし key として扱う。
|
||||||
|
|
||||||
|
## 要件
|
||||||
|
|
||||||
|
- `build_summary_input()` / compact 入力生成を、prefix 全体の pruned transcript 一括投入から、bounded overview + index 生成に変更する。
|
||||||
|
- overview は `overview_target_tokens` を目指して生成する。
|
||||||
|
- `overview_warning_tokens` 超過時は警告・trace を記録しつつ続行する。
|
||||||
|
- `overview_deadline_tokens` 超過時はより粗い deterministic overview に fallback する。通常ケースの user-facing hard error にしない。
|
||||||
|
- User / Assistant / System message を優先し、古い detail は落としてよい。
|
||||||
|
- Tool output content は初期 input に載せない。
|
||||||
|
- Compact Worker 用の session log 探索 tool を追加する。
|
||||||
|
- 例: `search_session_log(query, filters, range)`。
|
||||||
|
- 例: `read_session_items(range | item_ids, mode = compact/full)`。
|
||||||
|
- 必要なら large tool result を個別に読む tool を追加する。
|
||||||
|
- 探索 tool は session-store の現在 segment / compact 対象 range を正本として読む。
|
||||||
|
- Compact 対象外の future/retained tail と混ざらないよう range 境界を明示する。
|
||||||
|
- tool result full content を返す場合は Compact Worker の残り context / `final_reserve_tokens` を守る。
|
||||||
|
- session-log/file-read 個別の総量 budget を user-facing parameter として増やさず、主制御は `worker_context_max_tokens` に寄せる。
|
||||||
|
- Compact Worker の context occupancy を request 前に見積もり、`worker_context_max_tokens` を最後の hard stop として扱う。
|
||||||
|
- Compact Worker の残り context が `finish_warning_remaining_tokens` 以下になったら、追加探索を切り上げて `write_summary` に進むよう Worker に勧告し、人間向け warning も出す。
|
||||||
|
- `final_reserve_tokens` を割り込む可能性がある tool result は、full content を返さず bounded/truncated result とし、range 縮小または `write_summary` への移行を促す。
|
||||||
|
- `write_summary` 後に `summary_max_tokens` を validation する。超過時は縮約を促し、改善できない場合は compact 成功扱いにしない。
|
||||||
|
- compact 成功前に、`summary + auto-read + references + retained tail + task snapshot` の新 session 初期 context を dry-run 見積もりし、`result_context_max_tokens` を超えないことを確認する。
|
||||||
|
- `mark_read_required` / `add_reference` の意味論は維持する。
|
||||||
|
- AutoRead は session log 上の過去 tool output ではなく、現在の workspace file を `read_file` で確認してから選ぶ。
|
||||||
|
- `auto_read_budget_tokens` は新 session 初期 context への file content 注入上限であり、Compact Worker の探索 budget ではない。
|
||||||
|
- `resources/prompts/internal/compact_system.md` の summary target は `summary_target_tokens` から反映する。
|
||||||
|
- 手動 compact / auto compact の双方で同じ経路を使う。
|
||||||
|
- 巨大 session でも Compact Worker が初回 input 上限で即停止しない。
|
||||||
|
|
||||||
|
## 完了条件
|
||||||
|
|
||||||
|
- 長い session で compact 初期 overview が transcript 全体を載せず、`overview_target_tokens` を目指して生成される unit test がある。
|
||||||
|
- `overview_warning_tokens` 超過時に compact が続行し、警告・trace が記録される test がある。
|
||||||
|
- `overview_deadline_tokens` 超過時に粗い deterministic overview へ fallback する test がある。
|
||||||
|
- Tool result content が初期 compact input に混入しないことを test で確認している。
|
||||||
|
- Compact Worker が session log overview から必要 range を tool で読み、`write_summary` まで到達できる test がある。
|
||||||
|
- `finish_warning_remaining_tokens` 到達時に Compact Worker へ探索切り上げ勧告が入り、人間向け warning も出る test がある。
|
||||||
|
- `final_reserve_tokens` を守るため、過大な tool result が bounded/truncated される test がある。
|
||||||
|
- `summary_max_tokens` 超過 summary が compact 成功扱いにならない、または縮約 nudge を受ける test がある。
|
||||||
|
- compact 後の新 session 初期 context が `result_context_max_tokens` で dry-run validation される test がある。
|
||||||
|
- `mark_read_required` / `add_reference` 既存 test が通り、auto-read budget の挙動が維持されている。
|
||||||
|
- `[compaction]` の新 parameter 名が docs / manifest schema / defaults に反映されている。
|
||||||
|
- `docs/compaction.md` と `resources/prompts/internal/compact_system.md` が新しい探索型 flow と budget/warning semantics に更新されている。
|
||||||
|
- `cargo fmt --check` と関連 crate の compact/session-store/pod/manifest tests が通る。
|
||||||
|
|
||||||
|
## 範囲外
|
||||||
|
|
||||||
|
- Compact summary 自体を deterministic summarizer に置き換えること。
|
||||||
|
- Memory extract / consolidation の入力方式変更。
|
||||||
|
- 過去の壊れた session log の migration。
|
||||||
|
- Compact 後の retained tail token policy の再設計。
|
||||||
|
- session-log/file-read ごとの user-facing 総量 budget を増やすこと。
|
||||||
|
|
||||||
|
## 実装メモ
|
||||||
|
|
||||||
|
現行コード上の主な起点:
|
||||||
|
|
||||||
|
- `crates/pod/src/pod.rs::compact`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_input`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_prompt`
|
||||||
|
- `crates/pod/src/compact/worker.rs`
|
||||||
|
- `crates/manifest/src/lib.rs::CompactionConfig`
|
||||||
|
- `crates/manifest/src/config.rs::CompactionConfigPartial`
|
||||||
|
- `crates/manifest/src/defaults.rs`
|
||||||
|
- `resources/prompts/internal/compact_system.md`
|
||||||
|
- `docs/compaction.md`
|
||||||
|
|
@ -0,0 +1,143 @@
|
||||||
|
---
|
||||||
|
id: 20260528-001748-compact-session-log-exploration
|
||||||
|
slug: compact-session-log-exploration
|
||||||
|
title: Compact: session log 探索型の要約入力に変更する
|
||||||
|
status: closed
|
||||||
|
kind: task
|
||||||
|
priority: P2
|
||||||
|
labels: [compact, session-log]
|
||||||
|
created_at: 2026-05-28T00:17:48Z
|
||||||
|
updated_at: 2026-05-28T03:41:42Z
|
||||||
|
assignee: null
|
||||||
|
legacy_ticket: null
|
||||||
|
---
|
||||||
|
|
||||||
|
# Compact: session log 探索型の要約入力に変更する
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
`insomnia-troubleshoot` Pod の手動 compact で、Compact Worker が入力トークン上限に到達して停止した。現行実装は `Pod::compact` で retained tail より前の `items_to_summarise` を `build_summary_input()` に渡し、`build_summary_prompt()` が user / assistant / system message と tool result summary を `## Conversation` に連結して Compact Worker の初回 input に載せている。
|
||||||
|
|
||||||
|
raw tool output や reasoning は落としているが、長い session では pruned transcript だけでも `compact_worker_max_input_tokens` を超える。Compact の目的は「全履歴を読ませる」ことではなく、次セッションに必要な構造化要約と file auto-read/reference を作ることなので、初期 input は軽量 overview に留め、必要箇所は Compact Worker が session log / workspace file を探索して確認できる形にする。
|
||||||
|
|
||||||
|
また、Compact Worker の健全性は「初期 input が小さいこと」だけでは保証できない。探索 tool の結果、assistant 出力、`write_summary` 呼び出しまでを含む Compact Worker 全体の context と、compact 後に作られる新 session 初期 context を別々に制御する必要がある。
|
||||||
|
|
||||||
|
## 方針
|
||||||
|
|
||||||
|
Compact Worker の初期 context は、全文 transcript ではなく決定的に生成した session overview / index を渡す。LLM には探索空間を狭めた上で、必要な session log 範囲や workspace file を tool で読む権限を与える。
|
||||||
|
|
||||||
|
基本方針:
|
||||||
|
|
||||||
|
- 初期 input は User / Assistant / System の継続に必要な情報を中心に、target size 内の overview として生成する。
|
||||||
|
- 初期 overview が target を超えた程度で compact を失敗させない。warning / trace に記録して続行する。
|
||||||
|
- 初期 overview deadline は通常運用の調整値ではなく、想定外の入力生成バグを検出する最悪ケースの安全網とする。deadline 超過時は、可能ならより粗い overview へ fallback し、それでも最低限の入力を作れない場合だけ失敗する。
|
||||||
|
- ToolCall / ToolResult は初期 input では本文を展開しない。
|
||||||
|
- tool 名、summary、対象 path、成否、大きな出力の有無、session log 上の位置などの index に留める。
|
||||||
|
- Compact Worker は session log の必要箇所を探索・再読できる。
|
||||||
|
- Compact Worker の探索量は、session-log/file-read 個別の総量 budget ではなく、Compact Worker session 全体の context budget で制御する。
|
||||||
|
- Compact Worker context が上限に近づいたら、`mark_read_required` とは独立に「探索を切り上げて `write_summary` へ進め」という勧告を Worker に渡し、人間にも警告を出す。
|
||||||
|
- 最終 summary と closing turn のための reserve を確保し、reserve を食い潰すほど大きい tool result は残 budget に合わせて抑制・切り詰め・再読指示にする。
|
||||||
|
- AutoRead 判断のため、workspace file は現行通り `read_file` で確認し、必要なものだけ `mark_read_required` / `add_reference` する。
|
||||||
|
- AutoRead budget は Compact Worker の探索 budget ではなく、compact 後の新 session 初期 context に注入される file content の合計上限として扱う。
|
||||||
|
- Compact Worker の出力は現行と同じく structured summary + auto-read + references を生成する。
|
||||||
|
|
||||||
|
## Compact Worker / compaction parameters
|
||||||
|
|
||||||
|
`[compaction]` 配下では `compact_` prefix を新規 parameter 名につけない。既存の `compact_*` key は、この ticket の実装時に同じ意味の prefix なし key へ整理する。
|
||||||
|
|
||||||
|
必要な parameter:
|
||||||
|
|
||||||
|
- `retained_tokens`
|
||||||
|
- compact 後に verbatim で残す history tail の token budget。
|
||||||
|
- `overview_target_tokens`
|
||||||
|
- 初期 overview / index 生成器が目指す通常サイズ。超過しても即失敗しない。
|
||||||
|
- `overview_warning_tokens`
|
||||||
|
- 初期 overview が想定より大きいことを記録・警告する閾値。compact は続行する。
|
||||||
|
- `overview_deadline_tokens`
|
||||||
|
- 初期 overview の最悪ケース deadline。超過時はより粗い overview へ deterministic fallback し、それでも無理な場合だけ compact を失敗させる。
|
||||||
|
- `worker_context_max_tokens`
|
||||||
|
- Compact Worker session 全体の context hard limit。system prompt、overview、assistant output、tool calls/results、session-log/file read results、`write_summary` 周辺の蓄積を含む。
|
||||||
|
- `finish_warning_remaining_tokens`
|
||||||
|
- 残り context がこの値以下になったら、Compact Worker に探索切り上げと `write_summary` を促す勧告を入れる。
|
||||||
|
- `final_reserve_tokens`
|
||||||
|
- 最終 summary と closing turn のために残す reserve。これを割り込みそうな tool result は full content を返さず、range 縮小や summary への移行を促す。
|
||||||
|
- `worker_max_turns`
|
||||||
|
- Compact Worker の tool-loop 最大 turn 数。budget 制御とは別の runaway guard。
|
||||||
|
- `summary_target_tokens`
|
||||||
|
- `write_summary` text の目標サイズ。prompt / nudge に使う。
|
||||||
|
- `summary_max_tokens`
|
||||||
|
- `write_summary` text の hard validation。超過した summary は縮約を促すか compact 成功扱いにしない。
|
||||||
|
- `auto_read_budget_tokens`
|
||||||
|
- `mark_read_required` によって compact 後の新 session に注入される file content の合計 token budget。
|
||||||
|
- `result_context_max_tokens`
|
||||||
|
- compact 成功前に dry-run する新 session 初期 context の上限。summary、auto-read contents、references、task snapshot、retained tail を含む。
|
||||||
|
- `model`
|
||||||
|
- compactor model。未指定なら main Worker の client を clone する。
|
||||||
|
|
||||||
|
Compact 発火条件の `threshold` / `request_threshold` は Compact Worker の健全性 parameter ではないが、既存の `compact_threshold` / `compact_request_threshold` を整理する場合は `[compaction]` 内の prefix なし key として扱う。
|
||||||
|
|
||||||
|
## 要件
|
||||||
|
|
||||||
|
- `build_summary_input()` / compact 入力生成を、prefix 全体の pruned transcript 一括投入から、bounded overview + index 生成に変更する。
|
||||||
|
- overview は `overview_target_tokens` を目指して生成する。
|
||||||
|
- `overview_warning_tokens` 超過時は警告・trace を記録しつつ続行する。
|
||||||
|
- `overview_deadline_tokens` 超過時はより粗い deterministic overview に fallback する。通常ケースの user-facing hard error にしない。
|
||||||
|
- User / Assistant / System message を優先し、古い detail は落としてよい。
|
||||||
|
- Tool output content は初期 input に載せない。
|
||||||
|
- Compact Worker 用の session log 探索 tool を追加する。
|
||||||
|
- 例: `search_session_log(query, filters, range)`。
|
||||||
|
- 例: `read_session_items(range | item_ids, mode = compact/full)`。
|
||||||
|
- 必要なら large tool result を個別に読む tool を追加する。
|
||||||
|
- 探索 tool は session-store の現在 segment / compact 対象 range を正本として読む。
|
||||||
|
- Compact 対象外の future/retained tail と混ざらないよう range 境界を明示する。
|
||||||
|
- tool result full content を返す場合は Compact Worker の残り context / `final_reserve_tokens` を守る。
|
||||||
|
- session-log/file-read 個別の総量 budget を user-facing parameter として増やさず、主制御は `worker_context_max_tokens` に寄せる。
|
||||||
|
- Compact Worker の context occupancy を request 前に見積もり、`worker_context_max_tokens` を最後の hard stop として扱う。
|
||||||
|
- Compact Worker の残り context が `finish_warning_remaining_tokens` 以下になったら、追加探索を切り上げて `write_summary` に進むよう Worker に勧告し、人間向け warning も出す。
|
||||||
|
- `final_reserve_tokens` を割り込む可能性がある tool result は、full content を返さず bounded/truncated result とし、range 縮小または `write_summary` への移行を促す。
|
||||||
|
- `write_summary` 後に `summary_max_tokens` を validation する。超過時は縮約を促し、改善できない場合は compact 成功扱いにしない。
|
||||||
|
- compact 成功前に、`summary + auto-read + references + retained tail + task snapshot` の新 session 初期 context を dry-run 見積もりし、`result_context_max_tokens` を超えないことを確認する。
|
||||||
|
- `mark_read_required` / `add_reference` の意味論は維持する。
|
||||||
|
- AutoRead は session log 上の過去 tool output ではなく、現在の workspace file を `read_file` で確認してから選ぶ。
|
||||||
|
- `auto_read_budget_tokens` は新 session 初期 context への file content 注入上限であり、Compact Worker の探索 budget ではない。
|
||||||
|
- `resources/prompts/internal/compact_system.md` の summary target は `summary_target_tokens` から反映する。
|
||||||
|
- 手動 compact / auto compact の双方で同じ経路を使う。
|
||||||
|
- 巨大 session でも Compact Worker が初回 input 上限で即停止しない。
|
||||||
|
|
||||||
|
## 完了条件
|
||||||
|
|
||||||
|
- 長い session で compact 初期 overview が transcript 全体を載せず、`overview_target_tokens` を目指して生成される unit test がある。
|
||||||
|
- `overview_warning_tokens` 超過時に compact が続行し、警告・trace が記録される test がある。
|
||||||
|
- `overview_deadline_tokens` 超過時に粗い deterministic overview へ fallback する test がある。
|
||||||
|
- Tool result content が初期 compact input に混入しないことを test で確認している。
|
||||||
|
- Compact Worker が session log overview から必要 range を tool で読み、`write_summary` まで到達できる test がある。
|
||||||
|
- `finish_warning_remaining_tokens` 到達時に Compact Worker へ探索切り上げ勧告が入り、人間向け warning も出る test がある。
|
||||||
|
- `final_reserve_tokens` を守るため、過大な tool result が bounded/truncated される test がある。
|
||||||
|
- `summary_max_tokens` 超過 summary が compact 成功扱いにならない、または縮約 nudge を受ける test がある。
|
||||||
|
- compact 後の新 session 初期 context が `result_context_max_tokens` で dry-run validation される test がある。
|
||||||
|
- `mark_read_required` / `add_reference` 既存 test が通り、auto-read budget の挙動が維持されている。
|
||||||
|
- `[compaction]` の新 parameter 名が docs / manifest schema / defaults に反映されている。
|
||||||
|
- `docs/compaction.md` と `resources/prompts/internal/compact_system.md` が新しい探索型 flow と budget/warning semantics に更新されている。
|
||||||
|
- `cargo fmt --check` と関連 crate の compact/session-store/pod/manifest tests が通る。
|
||||||
|
|
||||||
|
## 範囲外
|
||||||
|
|
||||||
|
- Compact summary 自体を deterministic summarizer に置き換えること。
|
||||||
|
- Memory extract / consolidation の入力方式変更。
|
||||||
|
- 過去の壊れた session log の migration。
|
||||||
|
- Compact 後の retained tail token policy の再設計。
|
||||||
|
- session-log/file-read ごとの user-facing 総量 budget を増やすこと。
|
||||||
|
|
||||||
|
## 実装メモ
|
||||||
|
|
||||||
|
現行コード上の主な起点:
|
||||||
|
|
||||||
|
- `crates/pod/src/pod.rs::compact`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_input`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_prompt`
|
||||||
|
- `crates/pod/src/compact/worker.rs`
|
||||||
|
- `crates/manifest/src/lib.rs::CompactionConfig`
|
||||||
|
- `crates/manifest/src/config.rs::CompactionConfigPartial`
|
||||||
|
- `crates/manifest/src/defaults.rs`
|
||||||
|
- `resources/prompts/internal/compact_system.md`
|
||||||
|
- `docs/compaction.md`
|
||||||
|
|
@ -0,0 +1,189 @@
|
||||||
|
<!-- event: create author: tickets.sh at: 2026-05-28T00:17:48Z -->
|
||||||
|
|
||||||
|
## Created
|
||||||
|
|
||||||
|
Created by tickets.sh create.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- event: review author: insomnia at: 2026-05-28T03:41:41Z status: approve -->
|
||||||
|
|
||||||
|
## Review: approve
|
||||||
|
|
||||||
|
実装を review し、approve する。
|
||||||
|
|
||||||
|
確認内容:
|
||||||
|
- Compact Worker 初期入力が bounded overview / index に変更され、ToolCall arguments / ToolResult full content / Reasoning body が初期 input に混入しない。
|
||||||
|
- `[compaction]` の prefix なし parameter と旧 `compact_*` alias が manifest/config/defaults に反映されている。
|
||||||
|
- `finish_warning_remaining_tokens` で Worker history に永続化される budget warning が入り、人間向け warning も出る。
|
||||||
|
- `final_reserve_tokens` 到達後は `write_summary` 以外の探索 tool が synthetic error になり、summary 用 reserve を守る。
|
||||||
|
- `search_session_log` / `read_session_items` が Compact Worker に登録され、bounded overview から漏れた履歴を探索できる。
|
||||||
|
- `summary_max_tokens` と `result_context_max_tokens` の validation が入っている。
|
||||||
|
- docs / compact system prompt が新 flow に更新されている。
|
||||||
|
|
||||||
|
検証:
|
||||||
|
- cargo fmt --check
|
||||||
|
- cargo check -p llm-worker -p pod -p manifest
|
||||||
|
- cargo test -p manifest compaction
|
||||||
|
- cargo test -p pod compact_worker_interceptor --no-default-features
|
||||||
|
- cargo test -p pod build_summary_prompt_tests --no-default-features
|
||||||
|
- cargo test -p pod session_log --no-default-features
|
||||||
|
- cargo test -p pod read_session_items --no-default-features
|
||||||
|
|
||||||
|
注意:
|
||||||
|
- `cargo test -p pod --no-default-features` 全体は master 上の trace commit だけでも controller empty-turn rollback 系 3 tests が失敗するため、この ticket の blocking とはしない。
|
||||||
|
- `cargo test -p manifest` 全体は環境依存の `runtime_dir_prefers_xdg_runtime_dir` が失敗するため、この ticket の blocking とはしない。
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
<!-- event: close author: hare at: 2026-05-28T03:41:42Z status: closed -->
|
||||||
|
|
||||||
|
## Closed
|
||||||
|
|
||||||
|
---
|
||||||
|
id: 20260528-001748-compact-session-log-exploration
|
||||||
|
slug: compact-session-log-exploration
|
||||||
|
title: Compact: session log 探索型の要約入力に変更する
|
||||||
|
status: closed
|
||||||
|
kind: task
|
||||||
|
priority: P2
|
||||||
|
labels: [compact, session-log]
|
||||||
|
created_at: 2026-05-28T00:17:48Z
|
||||||
|
updated_at: 2026-05-28T03:41:42Z
|
||||||
|
assignee: null
|
||||||
|
legacy_ticket: null
|
||||||
|
---
|
||||||
|
|
||||||
|
# Compact: session log 探索型の要約入力に変更する
|
||||||
|
|
||||||
|
## 背景
|
||||||
|
|
||||||
|
`insomnia-troubleshoot` Pod の手動 compact で、Compact Worker が入力トークン上限に到達して停止した。現行実装は `Pod::compact` で retained tail より前の `items_to_summarise` を `build_summary_input()` に渡し、`build_summary_prompt()` が user / assistant / system message と tool result summary を `## Conversation` に連結して Compact Worker の初回 input に載せている。
|
||||||
|
|
||||||
|
raw tool output や reasoning は落としているが、長い session では pruned transcript だけでも `compact_worker_max_input_tokens` を超える。Compact の目的は「全履歴を読ませる」ことではなく、次セッションに必要な構造化要約と file auto-read/reference を作ることなので、初期 input は軽量 overview に留め、必要箇所は Compact Worker が session log / workspace file を探索して確認できる形にする。
|
||||||
|
|
||||||
|
また、Compact Worker の健全性は「初期 input が小さいこと」だけでは保証できない。探索 tool の結果、assistant 出力、`write_summary` 呼び出しまでを含む Compact Worker 全体の context と、compact 後に作られる新 session 初期 context を別々に制御する必要がある。
|
||||||
|
|
||||||
|
## 方針
|
||||||
|
|
||||||
|
Compact Worker の初期 context は、全文 transcript ではなく決定的に生成した session overview / index を渡す。LLM には探索空間を狭めた上で、必要な session log 範囲や workspace file を tool で読む権限を与える。
|
||||||
|
|
||||||
|
基本方針:
|
||||||
|
|
||||||
|
- 初期 input は User / Assistant / System の継続に必要な情報を中心に、target size 内の overview として生成する。
|
||||||
|
- 初期 overview が target を超えた程度で compact を失敗させない。warning / trace に記録して続行する。
|
||||||
|
- 初期 overview deadline は通常運用の調整値ではなく、想定外の入力生成バグを検出する最悪ケースの安全網とする。deadline 超過時は、可能ならより粗い overview へ fallback し、それでも最低限の入力を作れない場合だけ失敗する。
|
||||||
|
- ToolCall / ToolResult は初期 input では本文を展開しない。
|
||||||
|
- tool 名、summary、対象 path、成否、大きな出力の有無、session log 上の位置などの index に留める。
|
||||||
|
- Compact Worker は session log の必要箇所を探索・再読できる。
|
||||||
|
- Compact Worker の探索量は、session-log/file-read 個別の総量 budget ではなく、Compact Worker session 全体の context budget で制御する。
|
||||||
|
- Compact Worker context が上限に近づいたら、`mark_read_required` とは独立に「探索を切り上げて `write_summary` へ進め」という勧告を Worker に渡し、人間にも警告を出す。
|
||||||
|
- 最終 summary と closing turn のための reserve を確保し、reserve を食い潰すほど大きい tool result は残 budget に合わせて抑制・切り詰め・再読指示にする。
|
||||||
|
- AutoRead 判断のため、workspace file は現行通り `read_file` で確認し、必要なものだけ `mark_read_required` / `add_reference` する。
|
||||||
|
- AutoRead budget は Compact Worker の探索 budget ではなく、compact 後の新 session 初期 context に注入される file content の合計上限として扱う。
|
||||||
|
- Compact Worker の出力は現行と同じく structured summary + auto-read + references を生成する。
|
||||||
|
|
||||||
|
## Compact Worker / compaction parameters
|
||||||
|
|
||||||
|
`[compaction]` 配下では `compact_` prefix を新規 parameter 名につけない。既存の `compact_*` key は、この ticket の実装時に同じ意味の prefix なし key へ整理する。
|
||||||
|
|
||||||
|
必要な parameter:
|
||||||
|
|
||||||
|
- `retained_tokens`
|
||||||
|
- compact 後に verbatim で残す history tail の token budget。
|
||||||
|
- `overview_target_tokens`
|
||||||
|
- 初期 overview / index 生成器が目指す通常サイズ。超過しても即失敗しない。
|
||||||
|
- `overview_warning_tokens`
|
||||||
|
- 初期 overview が想定より大きいことを記録・警告する閾値。compact は続行する。
|
||||||
|
- `overview_deadline_tokens`
|
||||||
|
- 初期 overview の最悪ケース deadline。超過時はより粗い overview へ deterministic fallback し、それでも無理な場合だけ compact を失敗させる。
|
||||||
|
- `worker_context_max_tokens`
|
||||||
|
- Compact Worker session 全体の context hard limit。system prompt、overview、assistant output、tool calls/results、session-log/file read results、`write_summary` 周辺の蓄積を含む。
|
||||||
|
- `finish_warning_remaining_tokens`
|
||||||
|
- 残り context がこの値以下になったら、Compact Worker に探索切り上げと `write_summary` を促す勧告を入れる。
|
||||||
|
- `final_reserve_tokens`
|
||||||
|
- 最終 summary と closing turn のために残す reserve。これを割り込みそうな tool result は full content を返さず、range 縮小や summary への移行を促す。
|
||||||
|
- `worker_max_turns`
|
||||||
|
- Compact Worker の tool-loop 最大 turn 数。budget 制御とは別の runaway guard。
|
||||||
|
- `summary_target_tokens`
|
||||||
|
- `write_summary` text の目標サイズ。prompt / nudge に使う。
|
||||||
|
- `summary_max_tokens`
|
||||||
|
- `write_summary` text の hard validation。超過した summary は縮約を促すか compact 成功扱いにしない。
|
||||||
|
- `auto_read_budget_tokens`
|
||||||
|
- `mark_read_required` によって compact 後の新 session に注入される file content の合計 token budget。
|
||||||
|
- `result_context_max_tokens`
|
||||||
|
- compact 成功前に dry-run する新 session 初期 context の上限。summary、auto-read contents、references、task snapshot、retained tail を含む。
|
||||||
|
- `model`
|
||||||
|
- compactor model。未指定なら main Worker の client を clone する。
|
||||||
|
|
||||||
|
Compact 発火条件の `threshold` / `request_threshold` は Compact Worker の健全性 parameter ではないが、既存の `compact_threshold` / `compact_request_threshold` を整理する場合は `[compaction]` 内の prefix なし key として扱う。
|
||||||
|
|
||||||
|
## 要件
|
||||||
|
|
||||||
|
- `build_summary_input()` / compact 入力生成を、prefix 全体の pruned transcript 一括投入から、bounded overview + index 生成に変更する。
|
||||||
|
- overview は `overview_target_tokens` を目指して生成する。
|
||||||
|
- `overview_warning_tokens` 超過時は警告・trace を記録しつつ続行する。
|
||||||
|
- `overview_deadline_tokens` 超過時はより粗い deterministic overview に fallback する。通常ケースの user-facing hard error にしない。
|
||||||
|
- User / Assistant / System message を優先し、古い detail は落としてよい。
|
||||||
|
- Tool output content は初期 input に載せない。
|
||||||
|
- Compact Worker 用の session log 探索 tool を追加する。
|
||||||
|
- 例: `search_session_log(query, filters, range)`。
|
||||||
|
- 例: `read_session_items(range | item_ids, mode = compact/full)`。
|
||||||
|
- 必要なら large tool result を個別に読む tool を追加する。
|
||||||
|
- 探索 tool は session-store の現在 segment / compact 対象 range を正本として読む。
|
||||||
|
- Compact 対象外の future/retained tail と混ざらないよう range 境界を明示する。
|
||||||
|
- tool result full content を返す場合は Compact Worker の残り context / `final_reserve_tokens` を守る。
|
||||||
|
- session-log/file-read 個別の総量 budget を user-facing parameter として増やさず、主制御は `worker_context_max_tokens` に寄せる。
|
||||||
|
- Compact Worker の context occupancy を request 前に見積もり、`worker_context_max_tokens` を最後の hard stop として扱う。
|
||||||
|
- Compact Worker の残り context が `finish_warning_remaining_tokens` 以下になったら、追加探索を切り上げて `write_summary` に進むよう Worker に勧告し、人間向け warning も出す。
|
||||||
|
- `final_reserve_tokens` を割り込む可能性がある tool result は、full content を返さず bounded/truncated result とし、range 縮小または `write_summary` への移行を促す。
|
||||||
|
- `write_summary` 後に `summary_max_tokens` を validation する。超過時は縮約を促し、改善できない場合は compact 成功扱いにしない。
|
||||||
|
- compact 成功前に、`summary + auto-read + references + retained tail + task snapshot` の新 session 初期 context を dry-run 見積もりし、`result_context_max_tokens` を超えないことを確認する。
|
||||||
|
- `mark_read_required` / `add_reference` の意味論は維持する。
|
||||||
|
- AutoRead は session log 上の過去 tool output ではなく、現在の workspace file を `read_file` で確認してから選ぶ。
|
||||||
|
- `auto_read_budget_tokens` は新 session 初期 context への file content 注入上限であり、Compact Worker の探索 budget ではない。
|
||||||
|
- `resources/prompts/internal/compact_system.md` の summary target は `summary_target_tokens` から反映する。
|
||||||
|
- 手動 compact / auto compact の双方で同じ経路を使う。
|
||||||
|
- 巨大 session でも Compact Worker が初回 input 上限で即停止しない。
|
||||||
|
|
||||||
|
## 完了条件
|
||||||
|
|
||||||
|
- 長い session で compact 初期 overview が transcript 全体を載せず、`overview_target_tokens` を目指して生成される unit test がある。
|
||||||
|
- `overview_warning_tokens` 超過時に compact が続行し、警告・trace が記録される test がある。
|
||||||
|
- `overview_deadline_tokens` 超過時に粗い deterministic overview へ fallback する test がある。
|
||||||
|
- Tool result content が初期 compact input に混入しないことを test で確認している。
|
||||||
|
- Compact Worker が session log overview から必要 range を tool で読み、`write_summary` まで到達できる test がある。
|
||||||
|
- `finish_warning_remaining_tokens` 到達時に Compact Worker へ探索切り上げ勧告が入り、人間向け warning も出る test がある。
|
||||||
|
- `final_reserve_tokens` を守るため、過大な tool result が bounded/truncated される test がある。
|
||||||
|
- `summary_max_tokens` 超過 summary が compact 成功扱いにならない、または縮約 nudge を受ける test がある。
|
||||||
|
- compact 後の新 session 初期 context が `result_context_max_tokens` で dry-run validation される test がある。
|
||||||
|
- `mark_read_required` / `add_reference` 既存 test が通り、auto-read budget の挙動が維持されている。
|
||||||
|
- `[compaction]` の新 parameter 名が docs / manifest schema / defaults に反映されている。
|
||||||
|
- `docs/compaction.md` と `resources/prompts/internal/compact_system.md` が新しい探索型 flow と budget/warning semantics に更新されている。
|
||||||
|
- `cargo fmt --check` と関連 crate の compact/session-store/pod/manifest tests が通る。
|
||||||
|
|
||||||
|
## 範囲外
|
||||||
|
|
||||||
|
- Compact summary 自体を deterministic summarizer に置き換えること。
|
||||||
|
- Memory extract / consolidation の入力方式変更。
|
||||||
|
- 過去の壊れた session log の migration。
|
||||||
|
- Compact 後の retained tail token policy の再設計。
|
||||||
|
- session-log/file-read ごとの user-facing 総量 budget を増やすこと。
|
||||||
|
|
||||||
|
## 実装メモ
|
||||||
|
|
||||||
|
現行コード上の主な起点:
|
||||||
|
|
||||||
|
- `crates/pod/src/pod.rs::compact`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_input`
|
||||||
|
- `crates/pod/src/pod.rs::build_summary_prompt`
|
||||||
|
- `crates/pod/src/compact/worker.rs`
|
||||||
|
- `crates/manifest/src/lib.rs::CompactionConfig`
|
||||||
|
- `crates/manifest/src/config.rs::CompactionConfigPartial`
|
||||||
|
- `crates/manifest/src/defaults.rs`
|
||||||
|
- `resources/prompts/internal/compact_system.md`
|
||||||
|
- `docs/compaction.md`
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
Loading…
Reference in New Issue
Block a user