From 99dbb1c6c09d185bcd7e99ad74755f0df45064af Mon Sep 17 00:00:00 2001 From: Hare Date: Wed, 29 Apr 2026 21:23:29 +0900 Subject: [PATCH] =?UTF-8?q?tui=E3=81=AE=E5=8D=98=E8=AA=9E=E5=A2=83?= =?UTF-8?q?=E7=95=8C=E3=82=AB=E3=83=BC=E3=82=BD=E3=83=AB=E7=A7=BB=E5=8B=95?= =?UTF-8?q?=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/tui/src/app.rs | 6 + crates/tui/src/input.rs | 251 ++++++++++++++++++++++++++++++++++++++++ crates/tui/src/main.rs | 8 ++ 3 files changed, 265 insertions(+) diff --git a/crates/tui/src/app.rs b/crates/tui/src/app.rs index d376dd54..53abe3ac 100644 --- a/crates/tui/src/app.rs +++ b/crates/tui/src/app.rs @@ -404,6 +404,12 @@ impl App { pub fn move_cursor_right(&mut self) { self.input.move_right(); } + pub fn move_cursor_word_left(&mut self) { + self.input.move_word_left(); + } + pub fn move_cursor_word_right(&mut self) { + self.input.move_word_right(); + } pub fn move_cursor_home(&mut self) { self.input.move_home(); } diff --git a/crates/tui/src/input.rs b/crates/tui/src/input.rs index 86cfa413..d71622a2 100644 --- a/crates/tui/src/input.rs +++ b/crates/tui/src/input.rs @@ -38,6 +38,48 @@ pub enum Atom { Paste(PasteRef), } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AtomClass { + Word(WordKind), + Sep, + Paste, +} + +/// Sub-classification of word atoms. A run of equal `WordKind` is one word; +/// a kind switch is a word boundary. Lets `Ctrl+Left/Right` step over +/// runs of hiragana/katakana/han/ASCII independently when they sit adjacent. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum WordKind { + Ascii, + Hiragana, + Katakana, + Han, + Other, +} + +fn atom_class(atom: &Atom) -> AtomClass { + match atom { + Atom::Paste(_) => AtomClass::Paste, + Atom::Char(c) => char_class(*c), + } +} + +fn char_class(c: char) -> AtomClass { + if c.is_ascii_alphanumeric() || c == '_' { + return AtomClass::Word(WordKind::Ascii); + } + let cp = c as u32; + match cp { + 0x3040..=0x309F => AtomClass::Word(WordKind::Hiragana), + 0x30A0..=0x30FF | 0x31F0..=0x31FF => AtomClass::Word(WordKind::Katakana), + 0x3400..=0x4DBF | 0x4E00..=0x9FFF | 0xF900..=0xFAFF | 0x20000..=0x2FFFF => { + AtomClass::Word(WordKind::Han) + } + _ if c.is_alphanumeric() => AtomClass::Word(WordKind::Other), + _ => AtomClass::Sep, + } +} + pub struct InputBuffer { atoms: Vec, /// Insertion point in `0..=atoms.len()`. @@ -114,6 +156,38 @@ impl InputBuffer { self.cursor = (self.cursor + 1).min(self.atoms.len()); } + /// Move backward by one word. Skips a run of separators, then a run of + /// atoms sharing the same [`AtomClass`] — so `Word(Hiragana)` next to + /// `Word(Han)` are separate blocks, and a `Paste` atom is its own block. + pub fn move_word_left(&mut self) { + while self.cursor > 0 && atom_class(&self.atoms[self.cursor - 1]) == AtomClass::Sep { + self.cursor -= 1; + } + if self.cursor == 0 { + return; + } + let kind = atom_class(&self.atoms[self.cursor - 1]); + while self.cursor > 0 && atom_class(&self.atoms[self.cursor - 1]) == kind { + self.cursor -= 1; + } + } + + /// Move forward by one word. Mirror of [`move_word_left`]. + pub fn move_word_right(&mut self) { + while self.cursor < self.atoms.len() + && atom_class(&self.atoms[self.cursor]) == AtomClass::Sep + { + self.cursor += 1; + } + if self.cursor == self.atoms.len() { + return; + } + let kind = atom_class(&self.atoms[self.cursor]); + while self.cursor < self.atoms.len() && atom_class(&self.atoms[self.cursor]) == kind { + self.cursor += 1; + } + } + pub fn move_home(&mut self) { while self.cursor > 0 { if matches!(self.atoms[self.cursor - 1], Atom::Char('\n')) { @@ -489,3 +563,180 @@ mod submit_segments_tests { assert!(matches!(segs[0], Segment::Paste { .. })); } } + +#[cfg(test)] +mod word_motion_tests { + use super::*; + + fn buf_from(text: &str) -> InputBuffer { + let mut buf = InputBuffer::new(); + for c in text.chars() { + buf.insert_char(c); + } + buf + } + + fn cursor(buf: &InputBuffer) -> usize { + buf.cursor + } + + #[test] + fn empty_buffer_is_noop() { + let mut buf = InputBuffer::new(); + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); + buf.move_word_right(); + assert_eq!(cursor(&buf), 0); + } + + #[test] + fn forward_from_start_lands_after_first_word() { + let mut buf = buf_from("foo bar baz"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 3); // after "foo" + buf.move_word_right(); + assert_eq!(cursor(&buf), 7); // after "foo bar" + buf.move_word_right(); + assert_eq!(cursor(&buf), 11); // after "foo bar baz" + buf.move_word_right(); + assert_eq!(cursor(&buf), 11); // end stays put + } + + #[test] + fn backward_from_end_lands_at_last_word_start() { + let mut buf = buf_from("foo bar baz"); + buf.move_word_left(); + assert_eq!(cursor(&buf), 8); // start of "baz" + buf.move_word_left(); + assert_eq!(cursor(&buf), 4); // start of "bar" + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); // start of "foo" + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); + } + + #[test] + fn skips_runs_of_separators() { + let mut buf = buf_from("a , b"); + buf.cursor = 1; // just after "a" + buf.move_word_right(); + assert_eq!(cursor(&buf), 7); // after "b" + buf.move_word_left(); + assert_eq!(cursor(&buf), 6); // start of "b" + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); // start of "a" + } + + #[test] + fn newline_is_a_separator() { + let mut buf = buf_from("foo\nbar"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 3); + buf.move_word_right(); + assert_eq!(cursor(&buf), 7); + buf.move_word_left(); + assert_eq!(cursor(&buf), 4); + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); + } + + #[test] + fn paste_counts_as_one_word() { + let mut buf = InputBuffer::new(); + for c in "foo ".chars() { + buf.insert_char(c); + } + buf.insert_paste("anything".into()); + for c in " bar".chars() { + buf.insert_char(c); + } + // atoms: f o o ' ' [P] ' ' b a r → 9 atoms, paste at index 4 + let end = 9; + buf.cursor = end; + buf.move_word_left(); + assert_eq!(cursor(&buf), 6); // start of "bar" + buf.move_word_left(); + assert_eq!(cursor(&buf), 4); // before paste + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); // start of "foo" + + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 3); // after "foo" + buf.move_word_right(); + assert_eq!(cursor(&buf), 5); // after paste + buf.move_word_right(); + assert_eq!(cursor(&buf), 9); // after "bar" + } + + #[test] + fn underscore_is_a_word_char() { + let mut buf = buf_from("foo_bar baz"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 7); // "foo_bar" is one word + } + + #[test] + fn hiragana_run_is_one_word() { + // "こんにちは" — 5 hiragana atoms, no separators. + let mut buf = buf_from("こんにちは"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 5); + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); + } + + #[test] + fn script_switch_is_a_word_boundary() { + // 漢字 | ひらがな | ASCII + let mut buf = buf_from("日本語のtest"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 3); // after "日本語" + buf.move_word_right(); + assert_eq!(cursor(&buf), 4); // after "の" + buf.move_word_right(); + assert_eq!(cursor(&buf), 8); // after "test" + + buf.move_word_left(); + assert_eq!(cursor(&buf), 4); // start of "test" + buf.move_word_left(); + assert_eq!(cursor(&buf), 3); // start of "の" + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); // start of "日本語" + } + + #[test] + fn katakana_separates_from_ascii() { + let mut buf = buf_from("カタカナsecret"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 4); // after "カタカナ" + buf.move_word_right(); + assert_eq!(cursor(&buf), 10); // after "secret" + buf.move_word_left(); + assert_eq!(cursor(&buf), 4); + buf.move_word_left(); + assert_eq!(cursor(&buf), 0); + } + + #[test] + fn japanese_punctuation_is_a_separator() { + // 「、」 (U+3001) and 「。」 (U+3002) are not word chars. + let mut buf = buf_from("読んだ、走った。"); + buf.cursor = 0; + buf.move_word_right(); + assert_eq!(cursor(&buf), 1); // after "読" (han run of 1) + buf.move_word_right(); + assert_eq!(cursor(&buf), 3); // after "んだ" (hiragana run) + // "、" is sep — skipped, then han "走" + buf.move_word_right(); + assert_eq!(cursor(&buf), 5); // after "走" + buf.move_word_right(); + assert_eq!(cursor(&buf), 7); // after "った" + } +} diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 49134766..5a01572a 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -415,10 +415,18 @@ fn handle_key(app: &mut App, key: KeyEvent) -> Option { app.delete_char_after(); None } + KeyCode::Left if ctrl => { + app.move_cursor_word_left(); + None + } KeyCode::Left => { app.move_cursor_left(); None } + KeyCode::Right if ctrl => { + app.move_cursor_word_right(); + None + } KeyCode::Right => { app.move_cursor_right(); None