yoi/crates/memory/src/linter/warnings.rs
2026-04-27 13:59:04 +09:00

109 lines
3.8 KiB
Rust

//! Soft warnings: low-importance large records, sources accumulation.
//!
//! Similar-slug warnings need the existing record set and are
//! integrated into the main linter pass when implemented; this file
//! covers per-write checks that only need the proposed content.
use crate::error::LintWarning;
use crate::linter::LintReport;
use crate::linter::existing::ExistingRecords;
use crate::slug::Slug;
use crate::workspace::{ClassifiedPath, RecordKind};
const LARGE_BODY_THRESHOLD: usize = 1500;
const SOURCES_OVERFLOW_THRESHOLD: usize = 10;
const SIMILAR_SLUG_DISTANCE: usize = 2;
/// Cluster size (including the new slug) at which the similar-slug
/// warning fires. 3 follows `docs/plan/memory.md` §Linter (`類似 slug
/// 乱立`) — two existing close neighbours plus the new write.
const SIMILAR_SLUG_CLUSTER_MIN: usize = 3;
/// For kinds that don't carry a `sources` array (Summary), emit only
/// the body-size warning.
pub fn check_warnings_kindless(_cp: &ClassifiedPath, body: &str, _report: &mut LintReport) {
let _ = body;
// Summary intentionally has no warning band — the per-record
// size:importance heuristic doesn't apply to a single rolling file.
}
/// For kinds with `sources` (Decisions / Requests / Knowledge), consult
/// both the body length and the sources count.
pub fn check_warnings_with_sources(body: &str, source_count: usize, report: &mut LintReport) {
let chars = body.chars().count();
if source_count <= 1 && chars >= LARGE_BODY_THRESHOLD {
report.push_warning(LintWarning::LowImportanceLargeRecord { chars });
}
if source_count > SOURCES_OVERFLOW_THRESHOLD {
report.push_warning(LintWarning::SourcesOverflow {
count: source_count,
});
}
}
/// Emit a `SimilarSlugs` warning when the proposed slug joins a cluster
/// of `SIMILAR_SLUG_CLUSTER_MIN` or more slugs in the same kind that
/// are pairwise within `SIMILAR_SLUG_DISTANCE` Levenshtein steps of the
/// new one. The reported list includes the new slug, sorted to keep
/// the warning text deterministic.
pub fn check_similar_slugs(
new_slug: &Slug,
kind: RecordKind,
existing: &ExistingRecords,
report: &mut LintReport,
) {
let mut neighbours: Vec<String> = existing
.slugs(kind)
.into_iter()
.filter(|s| *s != new_slug)
.filter(|s| levenshtein(new_slug.as_str(), s.as_str()) <= SIMILAR_SLUG_DISTANCE)
.map(|s| s.to_string())
.collect();
if neighbours.len() + 1 < SIMILAR_SLUG_CLUSTER_MIN {
return;
}
neighbours.push(new_slug.to_string());
neighbours.sort();
report.push_warning(LintWarning::SimilarSlugs(neighbours));
}
/// Iterative two-row Levenshtein distance over chars.
fn levenshtein(a: &str, b: &str) -> usize {
let a: Vec<char> = a.chars().collect();
let b: Vec<char> = b.chars().collect();
if a.is_empty() {
return b.len();
}
if b.is_empty() {
return a.len();
}
let mut prev: Vec<usize> = (0..=b.len()).collect();
let mut curr: Vec<usize> = vec![0; b.len() + 1];
for (i, ca) in a.iter().enumerate() {
curr[0] = i + 1;
for (j, cb) in b.iter().enumerate() {
let cost = if ca == cb { 0 } else { 1 };
curr[j + 1] = (curr[j] + 1)
.min(prev[j + 1] + 1)
.min(prev[j] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b.len()]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn levenshtein_basics() {
assert_eq!(levenshtein("", ""), 0);
assert_eq!(levenshtein("a", ""), 1);
assert_eq!(levenshtein("", "ab"), 2);
assert_eq!(levenshtein("kitten", "sitting"), 3);
assert_eq!(levenshtein("foo", "foo"), 0);
assert_eq!(levenshtein("abc", "abd"), 1);
assert_eq!(levenshtein("abcd", "acbd"), 2);
}
}