From 82dcc57475633cf931fa21c75f7f75c1b505b6d6 Mon Sep 17 00:00:00 2001 From: Hare Date: Fri, 29 May 2026 18:21:17 +0900 Subject: [PATCH] fix: bound web search network reads --- crates/manifest/src/config.rs | 19 ++++++++++++ crates/manifest/src/lib.rs | 10 +++++-- crates/tools/src/web.rs | 56 +++++++++++++++++++++++++++++++++-- docs/pod-factory.md | 4 ++- 4 files changed, 83 insertions(+), 6 deletions(-) diff --git a/crates/manifest/src/config.rs b/crates/manifest/src/config.rs index c73ade40..cae3adfa 100644 --- a/crates/manifest/src/config.rs +++ b/crates/manifest/src/config.rs @@ -332,6 +332,7 @@ impl crate::WebSearchConfig { enabled: upper.enabled.or(self.enabled), provider: upper.provider.or(self.provider), api_key_env: upper.api_key_env.or(self.api_key_env), + timeout_secs: upper.timeout_secs.or(self.timeout_secs), base_url: upper.base_url.or(self.base_url), country: upper.country.or(self.country), search_lang: upper.search_lang.or(self.search_lang), @@ -1085,6 +1086,14 @@ mod tests { prune_protected_tokens: Some(5_000), ..Default::default() }), + web: Some(WebConfig { + search: Some(crate::WebSearchConfig { + api_key_env: Some("LOWER_BRAVE_KEY".into()), + timeout_secs: Some(12), + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }; let upper = PodManifestConfig { @@ -1092,6 +1101,13 @@ mod tests { threshold: Some(80_000), ..Default::default() }), + web: Some(WebConfig { + search: Some(crate::WebSearchConfig { + timeout_secs: Some(3), + ..Default::default() + }), + ..Default::default() + }), ..Default::default() }; let merged = lower.merge(upper); @@ -1099,6 +1115,9 @@ mod tests { assert_eq!(c.threshold, Some(80_000)); // field from lower retained when upper has None assert_eq!(c.prune_protected_tokens, Some(5_000)); + let search = merged.web.unwrap().search.unwrap(); + assert_eq!(search.timeout_secs, Some(3)); + assert_eq!(search.api_key_env.as_deref(), Some("LOWER_BRAVE_KEY")); } #[test] diff --git a/crates/manifest/src/lib.rs b/crates/manifest/src/lib.rs index b7a06d99..90620739 100644 --- a/crates/manifest/src/lib.rs +++ b/crates/manifest/src/lib.rs @@ -120,6 +120,10 @@ pub struct WebSearchConfig { /// not belong in manifest files. #[serde(default)] pub api_key_env: Option, + /// Request timeout in seconds. Tool implementation applies a safe default + /// when this is omitted. + #[serde(default)] + pub timeout_secs: Option, /// Optional provider endpoint override for tests/proxies. Defaults to the /// Brave web search endpoint for the Brave provider. #[serde(default)] @@ -640,13 +644,15 @@ permission = "write" #[test] fn parse_web_config() { let toml = format!( - "{}\n[web]\nenabled = true\n\n[web.search]\nprovider = \"brave\"\napi_key_env = \"BRAVE_SEARCH_API_KEY\"\n\n[web.fetch]\ntimeout_secs = 7\nredirect_limit = 3\nmax_response_bytes = 12345\nmax_output_bytes = 2048\n", + "{}\n[web]\nenabled = true\n\n[web.search]\nprovider = \"brave\"\napi_key_env = \"BRAVE_SEARCH_API_KEY\"\ntimeout_secs = 12\n\n[web.fetch]\ntimeout_secs = 7\nredirect_limit = 3\nmax_response_bytes = 12345\nmax_output_bytes = 2048\n", MINIMAL_REQUIRED ); let manifest = PodManifest::from_toml(&toml).unwrap(); let web = manifest.web.unwrap(); assert_eq!(web.enabled, Some(true)); - assert_eq!(web.search.unwrap().provider, Some(WebSearchProvider::Brave)); + let search = web.search.unwrap(); + assert_eq!(search.provider, Some(WebSearchProvider::Brave)); + assert_eq!(search.timeout_secs, Some(12)); let fetch = web.fetch.unwrap(); assert_eq!(fetch.timeout_secs, Some(7)); assert_eq!(fetch.redirect_limit, Some(3)); diff --git a/crates/tools/src/web.rs b/crates/tools/src/web.rs index a9dd2df9..a2b7b2e1 100644 --- a/crates/tools/src/web.rs +++ b/crates/tools/src/web.rs @@ -16,6 +16,8 @@ const BRAVE_SEARCH_ENDPOINT: &str = "https://api.search.brave.com/res/v1/web/sea const BRAVE_QUERY_MAX_CHARS: usize = 400; const BRAVE_QUERY_MAX_WORDS: usize = 50; const WEB_SEARCH_DEFAULT_LIMIT: usize = 10; +const WEB_SEARCH_DEFAULT_TIMEOUT_SECS: u64 = 15; +const WEB_SEARCH_MAX_RESPONSE_BYTES: usize = 1024 * 1024; const WEB_FETCH_DEFAULT_TIMEOUT_SECS: u64 = 20; const WEB_FETCH_DEFAULT_REDIRECT_LIMIT: usize = 5; const WEB_FETCH_DEFAULT_MAX_RESPONSE_BYTES: usize = 2 * 1024 * 1024; @@ -240,17 +242,27 @@ async fn brave_search( } } + let timeout = Duration::from_secs( + cfg.timeout_secs + .unwrap_or(WEB_SEARCH_DEFAULT_TIMEOUT_SECS) + .max(1), + ); let response = client .get(url) + .timeout(timeout) .header("Accept", "application/json") .header("X-Subscription-Token", api_key) .send() .await .map_err(|err| ToolError::ExecutionFailed(format!("Brave Search request failed: {err}")))?; let status = response.status(); - let body = response.bytes().await.map_err(|err| { - ToolError::ExecutionFailed(format!("Brave Search response read failed: {err}")) - })?; + reject_oversized_content_length(response.headers(), WEB_SEARCH_MAX_RESPONSE_BYTES)?; + let (body, truncated) = read_limited(response, WEB_SEARCH_MAX_RESPONSE_BYTES).await?; + if truncated { + return Err(ToolError::ExecutionFailed(format!( + "Brave Search response exceeded max_response_bytes {WEB_SEARCH_MAX_RESPONSE_BYTES}" + ))); + } if !status.is_success() { return Err(ToolError::ExecutionFailed(format!( "Brave Search returned HTTP {status}: {}", @@ -281,6 +293,8 @@ async fn brave_search( "query_max_words": BRAVE_QUERY_MAX_WORDS, "limit": limit, "offset": offset, + "timeout_secs": timeout.as_secs(), + "max_response_bytes": WEB_SEARCH_MAX_RESPONSE_BYTES, }, "query": query, "results": results, @@ -1003,6 +1017,7 @@ mod tests { enabled: Some(true), provider: Some(WebSearchProvider::Brave), api_key_env: Some(env_name.clone()), + timeout_secs: Some(2), base_url: Some(format!("http://{addr}/search")), ..Default::default() }), @@ -1026,7 +1041,42 @@ mod tests { .contains("x-subscription-token: test-key\r\n") ); assert_eq!(value["provider"]["name"], "brave"); + assert_eq!(value["provider"]["timeout_secs"], 2); assert_eq!(value["results"][0]["title"], "Example"); assert_eq!(value["results"][0]["extra_snippets"][0], "Extra"); } + + #[tokio::test] + async fn rejects_oversized_brave_response() { + let response = format!( + "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{{}}", + WEB_SEARCH_MAX_RESPONSE_BYTES + 1 + ); + let response: &'static str = Box::leak(response.into_boxed_str()); + let addr = serve_once(response).await; + let env_name = format!("INSOMNIA_TEST_BRAVE_OVERSIZED_KEY_{}", std::process::id()); + unsafe { std::env::set_var(&env_name, "test-key") }; + let tools = WebTools::new(Some(WebConfig { + enabled: Some(true), + allow_private_addresses: Some(true), + search: Some(WebSearchConfig { + enabled: Some(true), + provider: Some(WebSearchProvider::Brave), + api_key_env: Some(env_name.clone()), + base_url: Some(format!("http://{addr}/search")), + ..Default::default() + }), + fetch: None, + })); + let err = tools + .run_search(WebSearchInput { + query: "insomnia".into(), + limit: Some(1), + offset: Some(0), + }) + .await + .unwrap_err(); + unsafe { std::env::remove_var(&env_name) }; + assert!(err.to_string().contains("Content-Length")); + } } diff --git a/docs/pod-factory.md b/docs/pod-factory.md index 01f2f5bc..7fda3282 100644 --- a/docs/pod-factory.md +++ b/docs/pod-factory.md @@ -184,6 +184,7 @@ enabled = true [web.search] provider = "brave" api_key_env = "BRAVE_SEARCH_API_KEY" +timeout_secs = 15 [web.fetch] timeout_secs = 20 @@ -244,6 +245,7 @@ enabled = true [web.search] provider = "brave" api_key_env = "BRAVE_SEARCH_API_KEY" # API key は env 参照に置き、manifest に raw secret を書かない +timeout_secs = 15 [web.fetch] timeout_secs = 20 @@ -252,7 +254,7 @@ max_response_bytes = 2097152 max_output_bytes = 65536 ``` -`WebSearch` の最初の provider は Brave Search API(`https://api.search.brave.com/res/v1/web/search`)で、入力は `query` と任意の `limit` / `offset`。Brave の制約に合わせて `query` は 400 文字 / 50 words まで、`limit` は 1-20、`offset` は 0-9 に制限される。 +`WebSearch` の最初の provider は Brave Search API(`https://api.search.brave.com/res/v1/web/search`)で、入力は `query` と任意の `limit` / `offset`。Brave の制約に合わせて `query` は 400 文字 / 50 words まで、`limit` は 1-20、`offset` は 0-9 に制限される。`timeout_secs` を省略した場合は安全な既定値が使われ、provider response は固定上限内で読み込まれる。 `WebFetch` は http/https URL のみを fetch し、timeout・redirect・response/output byte limit を適用する。localhost / private / link-local などの host/IP は fetch 前と各 redirect で拒否される。テストや明示的に信頼した環境では `[web] allow_private_addresses = true` または `[web.fetch] allow_private_addresses = true` を指定できる。