255 lines
8.3 KiB
Rust
255 lines
8.3 KiB
Rust
pub struct Tokenizer {
|
|
state: State,
|
|
pos: usize,
|
|
reconsume: bool,
|
|
latest: Option<Token>,
|
|
input: Vec<char>,
|
|
buffer: String,
|
|
}
|
|
|
|
impl Tokenizer {
|
|
pub fn new(input: String) -> Self {
|
|
Self {
|
|
state: State::Data,
|
|
pos: 0,
|
|
reconsume: false,
|
|
latest: None,
|
|
input: input.chars().collect(),
|
|
buffer: String::new(),
|
|
}
|
|
}
|
|
|
|
fn is_eof(&self) -> bool {
|
|
self.pos >= self.input.len()
|
|
}
|
|
|
|
fn consume_input(&mut self) -> char {
|
|
let c = self.input[self.pos];
|
|
self.pos += 1;
|
|
c
|
|
}
|
|
}
|
|
|
|
impl Iterator for Tokenizer {
|
|
type Item = Token;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
if self.is_eof() {
|
|
return None;
|
|
}
|
|
loop {
|
|
if self.reconsume {
|
|
self.pos -= 1;
|
|
self.reconsume = false;
|
|
}
|
|
let c = self.consume_input();
|
|
match self.state {
|
|
State::Data => match c {
|
|
'(' => {
|
|
self.state = State::NodeOpen;
|
|
}
|
|
' ' | '\n' | '\t' => {}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => {
|
|
return Some(Token::Character(c));
|
|
}
|
|
},
|
|
State::NodeOpen => match c {
|
|
' ' | '\n' => {}
|
|
x if x.is_ascii_alphanumeric() => {
|
|
self.state = State::Define;
|
|
self.reconsume = true;
|
|
continue;
|
|
}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => panic!("Unexpected character: {}", c),
|
|
},
|
|
State::Define => match c {
|
|
x if x.is_ascii_alphanumeric() => {
|
|
self.buffer.push(c);
|
|
}
|
|
' ' | '\n' => {
|
|
self.state = State::AfterDefine;
|
|
self.latest = Some(Token::Define {
|
|
name: self.buffer.clone(),
|
|
behavior: Vec::new(),
|
|
});
|
|
self.buffer.clear();
|
|
}
|
|
')' => {
|
|
self.state = State::Data;
|
|
}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => panic!("Unexpected character: {}", c),
|
|
},
|
|
State::AfterDefine => match c {
|
|
' ' | '\n' => {}
|
|
'[' => {
|
|
self.state = State::Behavior;
|
|
}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => {
|
|
self.state = State::Data;
|
|
self.reconsume = true;
|
|
return self.latest.take();
|
|
}
|
|
},
|
|
State::Behavior => match c {
|
|
x if x.is_ascii_alphanumeric() => {
|
|
self.reconsume = true;
|
|
if let Some(t) = self.latest.as_mut() {
|
|
match t {
|
|
Token::Define {
|
|
name: _,
|
|
ref mut behavior,
|
|
} => {
|
|
behavior.push(BehaviorItem::new());
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
self.state = State::BehaviorKey;
|
|
}
|
|
' ' | '\n' => {}
|
|
']' => {
|
|
self.state = State::Data;
|
|
return self.latest.take();
|
|
}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => panic!("Unexpected character: {}", c),
|
|
},
|
|
State::BehaviorKey => match c {
|
|
x if x.is_ascii_alphanumeric() => {
|
|
self.buffer.push(c);
|
|
}
|
|
':' => {
|
|
if let Some(t) = self.latest.as_mut() {
|
|
match t {
|
|
Token::Define {
|
|
name: _,
|
|
ref mut behavior,
|
|
} => {
|
|
behavior.last_mut().unwrap().set_prefix(self.buffer.clone());
|
|
self.buffer.clear();
|
|
self.state = State::BehaviorKey;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
'=' => {
|
|
if let Some(t) = self.latest.as_mut() {
|
|
match t {
|
|
Token::Define {
|
|
name: _,
|
|
ref mut behavior,
|
|
} => {
|
|
behavior.last_mut().unwrap().set_key(self.buffer.clone());
|
|
self.buffer.clear();
|
|
self.state = State::BehaviorValue;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => panic!("Unexpected character: {}", c),
|
|
},
|
|
State::BehaviorValue => match c {
|
|
x if x.is_ascii_alphanumeric() => {
|
|
self.buffer.push(c);
|
|
}
|
|
']' => {
|
|
if let Some(t) = self.latest.as_mut() {
|
|
match t {
|
|
Token::Define {
|
|
name: _,
|
|
ref mut behavior,
|
|
} => {
|
|
behavior.last_mut().unwrap().set_value(self.buffer.clone());
|
|
self.buffer.clear();
|
|
self.state = State::Behavior;
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
return self.latest.take();
|
|
}
|
|
' ' | '\n' => {}
|
|
_ if self.is_eof() => {
|
|
return Some(Token::EOF);
|
|
}
|
|
_ => panic!("Unexpected character: {}", c),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy)]
|
|
enum State {
|
|
Data,
|
|
NodeOpen,
|
|
Define,
|
|
AfterDefine,
|
|
Behavior,
|
|
BehaviorKey,
|
|
BehaviorValue,
|
|
}
|
|
|
|
use crate::om::behavior::BehaviorItem;
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub enum Token {
|
|
Define {
|
|
name: String,
|
|
behavior: Vec<BehaviorItem>,
|
|
},
|
|
Character(char),
|
|
EOF,
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_empty_input() {
|
|
let input = "".to_string();
|
|
let mut tokenizer = Tokenizer::new(input);
|
|
assert_eq!(None, tokenizer.next());
|
|
}
|
|
|
|
#[test]
|
|
fn test_tokenizer() {
|
|
let input = "(foo [prefix:key=value])".to_string();
|
|
let mut tokenizer = Tokenizer::new(input);
|
|
let expected = [
|
|
Token::Define {
|
|
name: "foo".to_string(),
|
|
behavior: vec![BehaviorItem {
|
|
prefix: Some("prefix".to_string()),
|
|
key: "key".to_string(),
|
|
value: "value".to_string(),
|
|
}],
|
|
},
|
|
Token::EOF,
|
|
];
|
|
for e in expected {
|
|
assert_eq!(Some(e), tokenizer.next());
|
|
}
|
|
}
|
|
}
|