pub struct Tokenizer { state: State, pos: usize, reconsume: bool, latest: Option, input: Vec, buffer: String, } impl Tokenizer { pub fn new(input: String) -> Self { Self { state: State::Data, pos: 0, reconsume: false, latest: None, input: input.chars().collect(), buffer: String::new(), } } fn is_eof(&self) -> bool { self.pos >= self.input.len() } fn consume_input(&mut self) -> char { let c = self.input[self.pos]; self.pos += 1; c } } impl Iterator for Tokenizer { type Item = Token; fn next(&mut self) -> Option { if self.is_eof() { return None; } loop { if self.reconsume { self.pos -= 1; self.reconsume = false; } let c = self.consume_input(); match self.state { State::Data => match c { '(' => { self.state = State::NodeOpen; } ' ' | '\n' | '\t' => {} _ if self.is_eof() => { return Some(Token::EOF); } _ => { return Some(Token::Character(c)); } }, State::NodeOpen => match c { ' ' | '\n' => {} x if x.is_ascii_alphanumeric() => { self.state = State::Define; self.reconsume = true; continue; } _ if self.is_eof() => { return Some(Token::EOF); } _ => panic!("Unexpected character: {}", c), }, State::Define => match c { x if x.is_ascii_alphanumeric() => { self.buffer.push(c); } ' ' | '\n' => { self.state = State::AfterDefine; self.latest = Some(Token::Define { name: self.buffer.clone(), behavior: Vec::new(), }); self.buffer.clear(); } ')' => { self.state = State::Data; } _ if self.is_eof() => { return Some(Token::EOF); } _ => panic!("Unexpected character: {}", c), }, State::AfterDefine => match c { ' ' | '\n' => {} '[' => { self.state = State::Behavior; } _ if self.is_eof() => { return Some(Token::EOF); } _ => { self.state = State::Data; self.reconsume = true; return self.latest.take(); } }, State::Behavior => match c { x if x.is_ascii_alphanumeric() => { self.reconsume = true; if let Some(t) = self.latest.as_mut() { match t { Token::Define { name: _, ref mut behavior, } => { behavior.push(BehaviorItem::new()); } _ => {} } } self.state = State::BehaviorKey; } ' ' | '\n' => {} ']' => { self.state = State::Data; return self.latest.take(); } _ if self.is_eof() => { return Some(Token::EOF); } _ => panic!("Unexpected character: {}", c), }, State::BehaviorKey => match c { x if x.is_ascii_alphanumeric() => { self.buffer.push(c); } ':' => { if let Some(t) = self.latest.as_mut() { match t { Token::Define { name: _, ref mut behavior, } => { behavior.last_mut().unwrap().set_prefix(self.buffer.clone()); self.buffer.clear(); self.state = State::BehaviorKey; } _ => {} } } } '=' => { if let Some(t) = self.latest.as_mut() { match t { Token::Define { name: _, ref mut behavior, } => { behavior.last_mut().unwrap().set_key(self.buffer.clone()); self.buffer.clear(); self.state = State::BehaviorValue; } _ => {} } } } _ if self.is_eof() => { return Some(Token::EOF); } _ => panic!("Unexpected character: {}", c), }, State::BehaviorValue => match c { x if x.is_ascii_alphanumeric() => { self.buffer.push(c); } ']' => { if let Some(t) = self.latest.as_mut() { match t { Token::Define { name: _, ref mut behavior, } => { behavior.last_mut().unwrap().set_value(self.buffer.clone()); self.buffer.clear(); self.state = State::Behavior; } _ => {} } } return self.latest.take(); } ' ' | '\n' => {} _ if self.is_eof() => { return Some(Token::EOF); } _ => panic!("Unexpected character: {}", c), }, } } } } #[derive(Clone, Copy)] enum State { Data, NodeOpen, Define, AfterDefine, Behavior, BehaviorKey, BehaviorValue, } use crate::om::behavior::BehaviorItem; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Token { Define { name: String, behavior: Vec, }, Character(char), EOF, } #[cfg(test)] mod tests { use super::*; #[test] fn test_empty_input() { let input = "".to_string(); let mut tokenizer = Tokenizer::new(input); assert_eq!(None, tokenizer.next()); } #[test] fn test_tokenizer() { let input = "(foo [prefix:key=value])".to_string(); let mut tokenizer = Tokenizer::new(input); let expected = [ Token::Define { name: "foo".to_string(), behavior: vec![BehaviorItem { prefix: Some("prefix".to_string()), key: "key".to_string(), value: "value".to_string(), }], }, Token::EOF, ]; for e in expected { assert_eq!(Some(e), tokenizer.next()); } } }