commit b108b3ee55cdfc30081ce9a6dce165c98a835821 Author: Hare Date: Mon Dec 9 00:21:13 2024 +0900 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1749076 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "Lang" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..656d44a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "Lang" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..c36a6b4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,12 @@ +use std::io::{self, Read}; + +pub mod om; +pub mod renderer; + +fn main() { + let mut input = String::new(); + io::stdin().read_to_string(&mut input).expect("Failed to read from stdin"); + om::tokenizer::Tokenizer::new(input).for_each(|token| { + println!("{:?}", token); + }); +} diff --git a/src/om/behavior.rs b/src/om/behavior.rs new file mode 100644 index 0000000..906c00b --- /dev/null +++ b/src/om/behavior.rs @@ -0,0 +1,36 @@ + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BehaviorItem { + pub prefix: Option, + pub key: String, + pub value: String, +} + +impl BehaviorItem { + pub fn new() -> Self { + Self { + prefix: Option::None, + key: String::new(), + value: String::new(), + } + } + + pub fn set_prefix(&mut self, prefix: String) { + self.prefix = Some(prefix); + } + + pub fn set_value(&mut self, value: String) { + self.value = value; + } + + pub fn set_key(&mut self, key: String) { + self.key = key; + } + + pub fn name(&self) -> String { + match &self.prefix { + Some(prefix) => format!("{}:{}", prefix, self.key), + None => self.key.clone(), + } + } +} \ No newline at end of file diff --git a/src/om/mod.rs b/src/om/mod.rs new file mode 100644 index 0000000..287b249 --- /dev/null +++ b/src/om/mod.rs @@ -0,0 +1,3 @@ +pub mod tokenizer; +pub mod behavior; + diff --git a/src/om/tokenizer.rs b/src/om/tokenizer.rs new file mode 100644 index 0000000..49c8a5a --- /dev/null +++ b/src/om/tokenizer.rs @@ -0,0 +1,254 @@ +pub struct Tokenizer { + state: State, + pos: usize, + reconsume: bool, + latest: Option, + input: Vec, + buffer: String, +} + +impl Tokenizer { + pub fn new(input: String) -> Self { + Self { + state: State::Data, + pos: 0, + reconsume: false, + latest: None, + input: input.chars().collect(), + buffer: String::new(), + } + } + + fn is_eof(&self) -> bool { + self.pos >= self.input.len() + } + + fn consume_input(&mut self) -> char { + let c = self.input[self.pos]; + self.pos += 1; + c + } +} + +impl Iterator for Tokenizer { + type Item = Token; + + fn next(&mut self) -> Option { + if self.is_eof() { + return None; + } + loop { + if self.reconsume { + self.pos -= 1; + self.reconsume = false; + } + let c = self.consume_input(); + match self.state { + State::Data => match c { + '(' => { + self.state = State::NodeOpen; + } + ' ' | '\n' | '\t' => {} + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => { + return Some(Token::Character(c)); + } + }, + State::NodeOpen => match c { + ' ' | '\n' => {} + x if x.is_ascii_alphanumeric() => { + self.state = State::Define; + self.reconsume = true; + continue; + } + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => panic!("Unexpected character: {}", c), + }, + State::Define => match c { + x if x.is_ascii_alphanumeric() => { + self.buffer.push(c); + } + ' ' | '\n' => { + self.state = State::AfterDefine; + self.latest = Some(Token::Define { + name: self.buffer.clone(), + behavior: Vec::new(), + }); + self.buffer.clear(); + } + ')' => { + self.state = State::Data; + } + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => panic!("Unexpected character: {}", c), + }, + State::AfterDefine => match c { + ' ' | '\n' => {} + '[' => { + self.state = State::Behavior; + } + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => { + self.state = State::Data; + self.reconsume = true; + return self.latest.take(); + } + }, + State::Behavior => match c { + x if x.is_ascii_alphanumeric() => { + self.reconsume = true; + if let Some(t) = self.latest.as_mut() { + match t { + Token::Define { + name: _, + ref mut behavior, + } => { + behavior.push(BehaviorItem::new()); + } + _ => {} + } + } + self.state = State::BehaviorKey; + } + ' ' | '\n' => {} + ']' => { + self.state = State::Data; + return self.latest.take(); + } + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => panic!("Unexpected character: {}", c), + }, + State::BehaviorKey => match c { + x if x.is_ascii_alphanumeric() => { + self.buffer.push(c); + } + ':' => { + if let Some(t) = self.latest.as_mut() { + match t { + Token::Define { + name: _, + ref mut behavior, + } => { + behavior.last_mut().unwrap().set_prefix(self.buffer.clone()); + self.buffer.clear(); + self.state = State::BehaviorKey; + } + _ => {} + } + } + } + '=' => { + if let Some(t) = self.latest.as_mut() { + match t { + Token::Define { + name: _, + ref mut behavior, + } => { + behavior.last_mut().unwrap().set_key(self.buffer.clone()); + self.buffer.clear(); + self.state = State::BehaviorValue; + } + _ => {} + } + } + } + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => panic!("Unexpected character: {}", c), + }, + State::BehaviorValue => match c { + x if x.is_ascii_alphanumeric() => { + self.buffer.push(c); + } + ']' => { + if let Some(t) = self.latest.as_mut() { + match t { + Token::Define { + name: _, + ref mut behavior, + } => { + behavior.last_mut().unwrap().set_value(self.buffer.clone()); + self.buffer.clear(); + self.state = State::Behavior; + } + _ => {} + } + } + return self.latest.take(); + } + ' ' | '\n' => {} + _ if self.is_eof() => { + return Some(Token::EOF); + } + _ => panic!("Unexpected character: {}", c), + }, + } + } + } +} + +#[derive(Clone, Copy)] +enum State { + Data, + NodeOpen, + Define, + AfterDefine, + Behavior, + BehaviorKey, + BehaviorValue, +} + +use crate::om::behavior::BehaviorItem; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Token { + Define { + name: String, + behavior: Vec, + }, + Character(char), + EOF, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_input() { + let input = "".to_string(); + let mut tokenizer = Tokenizer::new(input); + assert_eq!(None, tokenizer.next()); + } + + #[test] + fn test_tokenizer() { + let input = "(foo [prefix:key=value])".to_string(); + let mut tokenizer = Tokenizer::new(input); + let expected = [ + Token::Define { + name: "foo".to_string(), + behavior: vec![BehaviorItem { + prefix: Some("prefix".to_string()), + key: "key".to_string(), + value: "value".to_string(), + }], + }, + Token::EOF, + ]; + for e in expected { + assert_eq!(Some(e), tokenizer.next()); + } + } +} diff --git a/src/renderer/mod.rs b/src/renderer/mod.rs new file mode 100644 index 0000000..e69de29