This commit is contained in:
Keisuke Hirata 2024-12-09 00:21:13 +09:00
commit b108b3ee55
8 changed files with 319 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

7
Cargo.lock generated Normal file
View File

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "Lang"
version = "0.1.0"

6
Cargo.toml Normal file
View File

@ -0,0 +1,6 @@
[package]
name = "Lang"
version = "0.1.0"
edition = "2021"
[dependencies]

12
src/main.rs Normal file
View File

@ -0,0 +1,12 @@
use std::io::{self, Read};
pub mod om;
pub mod renderer;
fn main() {
let mut input = String::new();
io::stdin().read_to_string(&mut input).expect("Failed to read from stdin");
om::tokenizer::Tokenizer::new(input).for_each(|token| {
println!("{:?}", token);
});
}

36
src/om/behavior.rs Normal file
View File

@ -0,0 +1,36 @@
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BehaviorItem {
pub prefix: Option<String>,
pub key: String,
pub value: String,
}
impl BehaviorItem {
pub fn new() -> Self {
Self {
prefix: Option::None,
key: String::new(),
value: String::new(),
}
}
pub fn set_prefix(&mut self, prefix: String) {
self.prefix = Some(prefix);
}
pub fn set_value(&mut self, value: String) {
self.value = value;
}
pub fn set_key(&mut self, key: String) {
self.key = key;
}
pub fn name(&self) -> String {
match &self.prefix {
Some(prefix) => format!("{}:{}", prefix, self.key),
None => self.key.clone(),
}
}
}

3
src/om/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod tokenizer;
pub mod behavior;

254
src/om/tokenizer.rs Normal file
View File

@ -0,0 +1,254 @@
pub struct Tokenizer {
state: State,
pos: usize,
reconsume: bool,
latest: Option<Token>,
input: Vec<char>,
buffer: String,
}
impl Tokenizer {
pub fn new(input: String) -> Self {
Self {
state: State::Data,
pos: 0,
reconsume: false,
latest: None,
input: input.chars().collect(),
buffer: String::new(),
}
}
fn is_eof(&self) -> bool {
self.pos >= self.input.len()
}
fn consume_input(&mut self) -> char {
let c = self.input[self.pos];
self.pos += 1;
c
}
}
impl Iterator for Tokenizer {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
if self.is_eof() {
return None;
}
loop {
if self.reconsume {
self.pos -= 1;
self.reconsume = false;
}
let c = self.consume_input();
match self.state {
State::Data => match c {
'(' => {
self.state = State::NodeOpen;
}
' ' | '\n' | '\t' => {}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => {
return Some(Token::Character(c));
}
},
State::NodeOpen => match c {
' ' | '\n' => {}
x if x.is_ascii_alphanumeric() => {
self.state = State::Define;
self.reconsume = true;
continue;
}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => panic!("Unexpected character: {}", c),
},
State::Define => match c {
x if x.is_ascii_alphanumeric() => {
self.buffer.push(c);
}
' ' | '\n' => {
self.state = State::AfterDefine;
self.latest = Some(Token::Define {
name: self.buffer.clone(),
behavior: Vec::new(),
});
self.buffer.clear();
}
')' => {
self.state = State::Data;
}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => panic!("Unexpected character: {}", c),
},
State::AfterDefine => match c {
' ' | '\n' => {}
'[' => {
self.state = State::Behavior;
}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => {
self.state = State::Data;
self.reconsume = true;
return self.latest.take();
}
},
State::Behavior => match c {
x if x.is_ascii_alphanumeric() => {
self.reconsume = true;
if let Some(t) = self.latest.as_mut() {
match t {
Token::Define {
name: _,
ref mut behavior,
} => {
behavior.push(BehaviorItem::new());
}
_ => {}
}
}
self.state = State::BehaviorKey;
}
' ' | '\n' => {}
']' => {
self.state = State::Data;
return self.latest.take();
}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => panic!("Unexpected character: {}", c),
},
State::BehaviorKey => match c {
x if x.is_ascii_alphanumeric() => {
self.buffer.push(c);
}
':' => {
if let Some(t) = self.latest.as_mut() {
match t {
Token::Define {
name: _,
ref mut behavior,
} => {
behavior.last_mut().unwrap().set_prefix(self.buffer.clone());
self.buffer.clear();
self.state = State::BehaviorKey;
}
_ => {}
}
}
}
'=' => {
if let Some(t) = self.latest.as_mut() {
match t {
Token::Define {
name: _,
ref mut behavior,
} => {
behavior.last_mut().unwrap().set_key(self.buffer.clone());
self.buffer.clear();
self.state = State::BehaviorValue;
}
_ => {}
}
}
}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => panic!("Unexpected character: {}", c),
},
State::BehaviorValue => match c {
x if x.is_ascii_alphanumeric() => {
self.buffer.push(c);
}
']' => {
if let Some(t) = self.latest.as_mut() {
match t {
Token::Define {
name: _,
ref mut behavior,
} => {
behavior.last_mut().unwrap().set_value(self.buffer.clone());
self.buffer.clear();
self.state = State::Behavior;
}
_ => {}
}
}
return self.latest.take();
}
' ' | '\n' => {}
_ if self.is_eof() => {
return Some(Token::EOF);
}
_ => panic!("Unexpected character: {}", c),
},
}
}
}
}
#[derive(Clone, Copy)]
enum State {
Data,
NodeOpen,
Define,
AfterDefine,
Behavior,
BehaviorKey,
BehaviorValue,
}
use crate::om::behavior::BehaviorItem;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Token {
Define {
name: String,
behavior: Vec<BehaviorItem>,
},
Character(char),
EOF,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
let input = "".to_string();
let mut tokenizer = Tokenizer::new(input);
assert_eq!(None, tokenizer.next());
}
#[test]
fn test_tokenizer() {
let input = "(foo [prefix:key=value])".to_string();
let mut tokenizer = Tokenizer::new(input);
let expected = [
Token::Define {
name: "foo".to_string(),
behavior: vec![BehaviorItem {
prefix: Some("prefix".to_string()),
key: "key".to_string(),
value: "value".to_string(),
}],
},
Token::EOF,
];
for e in expected {
assert_eq!(Some(e), tokenizer.next());
}
}
}

0
src/renderer/mod.rs Normal file
View File