From ddcee75c8dc5f050446b3ff3083d7d2fa49a5082 Mon Sep 17 00:00:00 2001 From: Hare Date: Tue, 16 Jun 2026 02:02:17 +0900 Subject: [PATCH] Implement core lexer and parser --- crates/decodal-core/src/ast.rs | 133 +++++++ crates/decodal-core/src/diagnostic.rs | 41 +++ crates/decodal-core/src/lexer.rs | 342 +++++++++++++++++ crates/decodal-core/src/lib.rs | 12 + crates/decodal-core/src/parser.rs | 510 ++++++++++++++++++++++++++ crates/decodal-core/src/span.rs | 25 ++ doc/manual/souce/language/syntax.md | 21 +- 7 files changed, 1083 insertions(+), 1 deletion(-) create mode 100644 crates/decodal-core/src/ast.rs create mode 100644 crates/decodal-core/src/diagnostic.rs create mode 100644 crates/decodal-core/src/lexer.rs create mode 100644 crates/decodal-core/src/parser.rs create mode 100644 crates/decodal-core/src/span.rs diff --git a/crates/decodal-core/src/ast.rs b/crates/decodal-core/src/ast.rs new file mode 100644 index 0000000..cadbf5a --- /dev/null +++ b/crates/decodal-core/src/ast.rs @@ -0,0 +1,133 @@ +use alloc::{string::String, vec::Vec}; + +use crate::span::Span; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct ExprId(pub u32); + +#[derive(Debug, Default, Clone)] +pub struct Ast { + exprs: Vec, +} + +impl Ast { + pub fn new() -> Self { + Self { exprs: Vec::new() } + } + + pub fn push(&mut self, expr: Expr, span: Span) -> ExprId { + let id = ExprId(self.exprs.len() as u32); + self.exprs.push(SpannedExpr { expr, span }); + id + } + + pub fn get(&self, id: ExprId) -> &SpannedExpr { + &self.exprs[id.0 as usize] + } + + pub fn span(&self, id: ExprId) -> Span { + self.get(id).span + } + + pub fn len(&self) -> usize { + self.exprs.len() + } + + pub fn is_empty(&self) -> bool { + self.exprs.is_empty() + } +} + +#[derive(Debug, Clone)] +pub struct SpannedExpr { + pub expr: Expr, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + Literal(Literal), + Ident(String), + Object(Vec), + Array(Vec), + Let { + bindings: Vec, + body: ExprId, + }, + Import(String), + Path { + base: ExprId, + field: String, + }, + Call { + callee: ExprId, + args: Vec, + }, + Function { + params: Vec, + body: ExprId, + }, + Match { + scrutinee: ExprId, + arms: Vec, + }, + Binary { + op: BinaryOp, + lhs: ExprId, + rhs: ExprId, + }, + Default { + base: ExprId, + fallback: ExprId, + }, + CompareConstraint { + op: CompareOp, + value: ExprId, + }, + RegexConstraint(String), + Wildcard, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Field { + pub path: Vec, + pub value: ExprId, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Param { + pub name: String, + pub constraint: Option, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct MatchArm { + pub pattern: ExprId, + pub body: ExprId, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + String(String), + Int(i64), + Float(f64), + Bool(bool), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BinaryOp { + And, + Patch, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CompareOp { + Gt, + Gte, + Lt, + Lte, + Eq, +} diff --git a/crates/decodal-core/src/diagnostic.rs b/crates/decodal-core/src/diagnostic.rs new file mode 100644 index 0000000..033bb81 --- /dev/null +++ b/crates/decodal-core/src/diagnostic.rs @@ -0,0 +1,41 @@ +use alloc::string::String; + +use crate::span::Span; + +pub type Result = core::result::Result; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Diagnostic { + pub kind: DiagnosticKind, + pub span: Span, + pub message: String, +} + +impl Diagnostic { + pub fn new(kind: DiagnosticKind, span: Span, message: impl Into) -> Self { + Self { + kind, + span, + message: message.into(), + } + } + + pub fn syntax(span: Span, message: impl Into) -> Self { + Self::new(DiagnosticKind::Syntax, span, message) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum DiagnosticKind { + Syntax, + UnresolvedIdentifier, + TypeMismatch, + ConstraintViolation, + Conflict, + DefaultConflict, + Cycle, + Import, + MatchFailure, + Materialize, + UnsupportedFeature, +} diff --git a/crates/decodal-core/src/lexer.rs b/crates/decodal-core/src/lexer.rs new file mode 100644 index 0000000..a898042 --- /dev/null +++ b/crates/decodal-core/src/lexer.rs @@ -0,0 +1,342 @@ +use alloc::{string::String, vec::Vec}; + +use crate::{Diagnostic, Span, diagnostic::Result}; + +#[derive(Debug, Clone, PartialEq)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TokenKind { + Ident(String), + Int(i64), + Float(f64), + String(String), + Regex(String), + True, + False, + Let, + In, + Match, + Import, + Default, + Underscore, + LBrace, + RBrace, + LBracket, + RBracket, + LParen, + RParen, + Semicolon, + Comma, + Dot, + Colon, + Equal, + Arrow, + Amp, + SlashSlash, + Gt, + Gte, + Lt, + Lte, + Eof, +} + +pub struct Lexer<'a> { + source: &'a str, + bytes: &'a [u8], + pos: usize, +} + +impl<'a> Lexer<'a> { + pub fn new(source: &'a str) -> Self { + Self { + source, + bytes: source.as_bytes(), + pos: 0, + } + } + + pub fn tokenize(mut self) -> Result> { + let mut tokens = Vec::new(); + loop { + let token = self.next_token()?; + let is_eof = token.kind == TokenKind::Eof; + tokens.push(token); + if is_eof { + return Ok(tokens); + } + } + } + + fn next_token(&mut self) -> Result { + self.skip_ws_and_comments(); + let start = self.pos; + let Some(ch) = self.peek() else { + return Ok(Token { + kind: TokenKind::Eof, + span: Span::empty(self.pos), + }); + }; + + let kind = match ch { + b'{' => { + self.pos += 1; + TokenKind::LBrace + } + b'}' => { + self.pos += 1; + TokenKind::RBrace + } + b'[' => { + self.pos += 1; + TokenKind::LBracket + } + b']' => { + self.pos += 1; + TokenKind::RBracket + } + b'(' => { + self.pos += 1; + TokenKind::LParen + } + b')' => { + self.pos += 1; + TokenKind::RParen + } + b';' => { + self.pos += 1; + TokenKind::Semicolon + } + b',' => { + self.pos += 1; + TokenKind::Comma + } + b'.' => { + self.pos += 1; + TokenKind::Dot + } + b':' => { + self.pos += 1; + TokenKind::Colon + } + b'_' => { + self.pos += 1; + TokenKind::Underscore + } + b'&' => { + self.pos += 1; + TokenKind::Amp + } + b'=' => { + self.pos += 1; + if self.consume(b'>') { + TokenKind::Arrow + } else { + TokenKind::Equal + } + } + b'>' => { + self.pos += 1; + if self.consume(b'=') { + TokenKind::Gte + } else { + TokenKind::Gt + } + } + b'<' => { + self.pos += 1; + if self.consume(b'=') { + TokenKind::Lte + } else { + TokenKind::Lt + } + } + b'/' => { + self.pos += 1; + if self.consume(b'/') { + TokenKind::SlashSlash + } else { + self.lex_regex(start)? + } + } + b'"' => self.lex_string()?, + b'0'..=b'9' => self.lex_number()?, + c if is_ident_start(c) => self.lex_ident_or_keyword(), + _ => { + return Err(Diagnostic::syntax( + Span::new(start, start + 1), + "unexpected character", + )); + } + }; + + Ok(Token { + kind, + span: Span::new(start, self.pos), + }) + } + + fn skip_ws_and_comments(&mut self) { + loop { + while matches!(self.peek(), Some(b' ' | b'\t' | b'\r' | b'\n')) { + self.pos += 1; + } + if self.peek() == Some(b'#') { + while let Some(c) = self.peek() { + self.pos += 1; + if c == b'\n' { + break; + } + } + continue; + } + break; + } + } + + fn lex_string(&mut self) -> Result { + let start = self.pos; + self.pos += 1; + let mut value = String::new(); + while let Some(c) = self.peek() { + self.pos += 1; + match c { + b'"' => return Ok(TokenKind::String(value)), + b'\\' => { + let Some(escaped) = self.peek() else { + return Err(Diagnostic::syntax( + Span::new(start, self.pos), + "unterminated escape", + )); + }; + self.pos += 1; + let ch = match escaped { + b'"' => '"', + b'\\' => '\\', + b'n' => '\n', + b'r' => '\r', + b't' => '\t', + other => other as char, + }; + value.push(ch); + } + other => value.push(other as char), + } + } + Err(Diagnostic::syntax( + Span::new(start, self.pos), + "unterminated string", + )) + } + + fn lex_regex(&mut self, start: usize) -> Result { + let mut pattern = String::new(); + let mut escaped = false; + while let Some(c) = self.peek() { + self.pos += 1; + if escaped { + pattern.push(c as char); + escaped = false; + continue; + } + match c { + b'\\' => { + pattern.push('\\'); + escaped = true; + } + b'/' => return Ok(TokenKind::Regex(pattern)), + other => pattern.push(other as char), + } + } + Err(Diagnostic::syntax( + Span::new(start, self.pos), + "unterminated regex", + )) + } + + fn lex_number(&mut self) -> Result { + let start = self.pos; + while matches!(self.peek(), Some(b'0'..=b'9')) { + self.pos += 1; + } + let mut is_float = false; + if self.peek() == Some(b'.') && matches!(self.peek_n(1), Some(b'0'..=b'9')) { + is_float = true; + self.pos += 1; + while matches!(self.peek(), Some(b'0'..=b'9')) { + self.pos += 1; + } + } + let text = &self.source[start..self.pos]; + if is_float { + text.parse::().map(TokenKind::Float).map_err(|_| { + Diagnostic::syntax(Span::new(start, self.pos), "invalid float literal") + }) + } else { + text.parse::() + .map(TokenKind::Int) + .map_err(|_| Diagnostic::syntax(Span::new(start, self.pos), "invalid int literal")) + } + } + + fn lex_ident_or_keyword(&mut self) -> TokenKind { + let start = self.pos; + self.pos += 1; + while matches!(self.peek(), Some(c) if is_ident_continue(c)) { + self.pos += 1; + } + let text = &self.source[start..self.pos]; + match text { + "true" => TokenKind::True, + "false" => TokenKind::False, + "let" => TokenKind::Let, + "in" => TokenKind::In, + "match" => TokenKind::Match, + "import" => TokenKind::Import, + "default" => TokenKind::Default, + _ => TokenKind::Ident(String::from(text)), + } + } + + fn peek(&self) -> Option { + self.bytes.get(self.pos).copied() + } + + fn peek_n(&self, n: usize) -> Option { + self.bytes.get(self.pos + n).copied() + } + + fn consume(&mut self, expected: u8) -> bool { + if self.peek() == Some(expected) { + self.pos += 1; + true + } else { + false + } + } +} + +fn is_ident_start(c: u8) -> bool { + c.is_ascii_alphabetic() +} + +fn is_ident_continue(c: u8) -> bool { + c.is_ascii_alphanumeric() || c == b'_' +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn tokenizes_basic_source() { + let tokens = Lexer::new("port = Int & >= 1;").tokenize().unwrap(); + assert!(matches!(tokens[0].kind, TokenKind::Ident(_))); + assert_eq!(tokens[1].kind, TokenKind::Equal); + assert_eq!(tokens[3].kind, TokenKind::Amp); + assert_eq!(tokens[4].kind, TokenKind::Gte); + } +} diff --git a/crates/decodal-core/src/lib.rs b/crates/decodal-core/src/lib.rs index bf479d9..ef37d57 100644 --- a/crates/decodal-core/src/lib.rs +++ b/crates/decodal-core/src/lib.rs @@ -2,6 +2,18 @@ extern crate alloc; +pub mod ast; +pub mod diagnostic; +pub mod lexer; +pub mod parser; +pub mod span; + +pub use ast::{Ast, BinaryOp, CompareOp, Expr, ExprId, Field, Literal, Param}; +pub use diagnostic::{Diagnostic, DiagnosticKind, Result}; +pub use lexer::{Lexer, Token, TokenKind}; +pub use parser::{ParseOutput, Parser, parse_source}; +pub use span::Span; + pub fn version() -> &'static str { env!("CARGO_PKG_VERSION") } diff --git a/crates/decodal-core/src/parser.rs b/crates/decodal-core/src/parser.rs new file mode 100644 index 0000000..b2cbf0d --- /dev/null +++ b/crates/decodal-core/src/parser.rs @@ -0,0 +1,510 @@ +use alloc::{string::String, vec::Vec}; + +use crate::{ + Span, + ast::{Ast, BinaryOp, CompareOp, Expr, ExprId, Field, Literal, MatchArm, Param}, + diagnostic::{Diagnostic, Result}, + lexer::{Lexer, Token, TokenKind}, +}; + +#[derive(Debug, Clone)] +pub struct ParseOutput { + pub ast: Ast, + pub root: ExprId, +} + +pub fn parse_source(source: &str) -> Result { + let tokens = Lexer::new(source).tokenize()?; + Parser::new(tokens).parse() +} + +pub struct Parser { + tokens: Vec, + pos: usize, + ast: Ast, +} + +impl Parser { + pub fn new(tokens: Vec) -> Self { + Self { + tokens, + pos: 0, + ast: Ast::new(), + } + } + + pub fn parse(mut self) -> Result { + let root = if self.starts_field() { + let fields = self.parse_fields_until_eof()?; + let span = fields + .first() + .map(|f| { + fields + .iter() + .fold(f.span, |acc, field| acc.join(field.span)) + }) + .unwrap_or_else(|| Span::empty(0)); + self.ast.push(Expr::Object(fields), span) + } else { + let expr = self.parse_expr(0)?; + self.expect_eof()?; + expr + }; + Ok(ParseOutput { + ast: self.ast, + root, + }) + } + + fn parse_expr(&mut self, min_bp: u8) -> Result { + let mut lhs = self.parse_prefix()?; + + loop { + if self.at_eof() || self.is_expr_stop() { + break; + } + + // postfix: path reference + if self.consume_kind(&TokenKind::Dot).is_some() { + let (field, field_span) = self.expect_ident()?; + let span = self.ast.span(lhs).join(field_span); + lhs = self.ast.push(Expr::Path { base: lhs, field }, span); + continue; + } + + // postfix: call + if self.consume_kind(&TokenKind::LParen).is_some() { + let mut args = Vec::new(); + if self.consume_kind(&TokenKind::RParen).is_none() { + loop { + args.push(self.parse_expr(0)?); + if self.consume_kind(&TokenKind::Comma).is_some() { + continue; + } + self.expect_kind(&TokenKind::RParen, "expected ')' after call arguments")?; + break; + } + } + let span = self.ast.span(lhs).join(self.previous_span()); + lhs = self.ast.push(Expr::Call { callee: lhs, args }, span); + continue; + } + + let Some((kind, l_bp, r_bp)) = self.peek_infix() else { + break; + }; + if l_bp < min_bp { + break; + } + let op_span = self.advance().span; + let rhs = self.parse_expr(r_bp)?; + let span = self.ast.span(lhs).join(self.ast.span(rhs)).join(op_span); + lhs = match kind { + InfixKind::And => self.ast.push( + Expr::Binary { + op: BinaryOp::And, + lhs, + rhs, + }, + span, + ), + InfixKind::Patch => self.ast.push( + Expr::Binary { + op: BinaryOp::Patch, + lhs, + rhs, + }, + span, + ), + InfixKind::Default => self.ast.push( + Expr::Default { + base: lhs, + fallback: rhs, + }, + span, + ), + }; + } + + Ok(lhs) + } + + fn parse_prefix(&mut self) -> Result { + let token = self.advance().clone(); + match token.kind { + TokenKind::String(value) => Ok(self + .ast + .push(Expr::Literal(Literal::String(value)), token.span)), + TokenKind::Int(value) => Ok(self + .ast + .push(Expr::Literal(Literal::Int(value)), token.span)), + TokenKind::Float(value) => Ok(self + .ast + .push(Expr::Literal(Literal::Float(value)), token.span)), + TokenKind::True => Ok(self + .ast + .push(Expr::Literal(Literal::Bool(true)), token.span)), + TokenKind::False => Ok(self + .ast + .push(Expr::Literal(Literal::Bool(false)), token.span)), + TokenKind::Ident(name) => Ok(self.ast.push(Expr::Ident(name), token.span)), + TokenKind::Regex(pattern) => { + Ok(self.ast.push(Expr::RegexConstraint(pattern), token.span)) + } + TokenKind::Underscore => Ok(self.ast.push(Expr::Wildcard, token.span)), + TokenKind::LBrace => self.parse_object_after_lbrace(token.span), + TokenKind::LBracket => self.parse_array_after_lbracket(token.span), + TokenKind::LParen => self.parse_group_or_function(token.span), + TokenKind::Let => self.parse_let(token.span), + TokenKind::Match => self.parse_match(token.span), + TokenKind::Import => self.parse_import(token.span), + TokenKind::Gt | TokenKind::Gte | TokenKind::Lt | TokenKind::Lte => { + let op = match token.kind { + TokenKind::Gt => CompareOp::Gt, + TokenKind::Gte => CompareOp::Gte, + TokenKind::Lt => CompareOp::Lt, + TokenKind::Lte => CompareOp::Lte, + _ => unreachable!(), + }; + let value = self.parse_expr(8)?; + let span = token.span.join(self.ast.span(value)); + Ok(self.ast.push(Expr::CompareConstraint { op, value }, span)) + } + _ => Err(Diagnostic::syntax(token.span, "expected expression")), + } + } + + fn parse_object_after_lbrace(&mut self, start_span: Span) -> Result { + let mut fields = Vec::new(); + if self.consume_kind(&TokenKind::RBrace).is_some() { + return Ok(self + .ast + .push(Expr::Object(fields), start_span.join(self.previous_span()))); + } + loop { + fields.push(self.parse_field()?); + if self.consume_kind(&TokenKind::Semicolon).is_some() { + if self.consume_kind(&TokenKind::RBrace).is_some() { + break; + } + continue; + } + self.expect_kind(&TokenKind::RBrace, "expected ';' or '}' after object field")?; + break; + } + let span = start_span.join(self.previous_span()); + Ok(self.ast.push(Expr::Object(fields), span)) + } + + fn parse_array_after_lbracket(&mut self, start_span: Span) -> Result { + let mut items = Vec::new(); + if self.consume_kind(&TokenKind::RBracket).is_some() { + return Ok(self + .ast + .push(Expr::Array(items), start_span.join(self.previous_span()))); + } + loop { + items.push(self.parse_expr(0)?); + if self.consume_kind(&TokenKind::Comma).is_some() { + if self.consume_kind(&TokenKind::RBracket).is_some() { + break; + } + continue; + } + self.expect_kind(&TokenKind::RBracket, "expected ',' or ']' after array item")?; + break; + } + let span = start_span.join(self.previous_span()); + Ok(self.ast.push(Expr::Array(items), span)) + } + + fn parse_group_or_function(&mut self, start_span: Span) -> Result { + if self.looks_like_params() { + let params = self.parse_params_after_lparen()?; + self.expect_kind(&TokenKind::Arrow, "expected '=>' after function parameters")?; + let body = self.parse_expr(0)?; + let span = start_span.join(self.ast.span(body)); + return Ok(self.ast.push(Expr::Function { params, body }, span)); + } + + let expr = self.parse_expr(0)?; + self.expect_kind(&TokenKind::RParen, "expected ')' after expression")?; + Ok(expr) + } + + fn parse_params_after_lparen(&mut self) -> Result> { + let mut params = Vec::new(); + if self.consume_kind(&TokenKind::RParen).is_some() { + return Ok(params); + } + loop { + let (name, name_span) = self.expect_ident()?; + let constraint = if self.consume_kind(&TokenKind::Colon).is_some() { + Some(self.parse_expr(0)?) + } else { + None + }; + let span = constraint + .map(|id| name_span.join(self.ast.span(id))) + .unwrap_or(name_span); + params.push(Param { + name, + constraint, + span, + }); + if self.consume_kind(&TokenKind::Comma).is_some() { + continue; + } + self.expect_kind(&TokenKind::RParen, "expected ',' or ')' after parameter")?; + break; + } + Ok(params) + } + + fn parse_let(&mut self, start_span: Span) -> Result { + let mut bindings = Vec::new(); + while !self.check_kind(&TokenKind::In) && !self.at_eof() { + bindings.push(self.parse_field()?); + self.expect_kind(&TokenKind::Semicolon, "expected ';' after let binding")?; + } + self.expect_kind(&TokenKind::In, "expected 'in' after let bindings")?; + let body = self.parse_expr(0)?; + let span = start_span.join(self.ast.span(body)); + Ok(self.ast.push(Expr::Let { bindings, body }, span)) + } + + fn parse_match(&mut self, start_span: Span) -> Result { + let scrutinee = self.parse_expr(0)?; + self.expect_kind(&TokenKind::LBrace, "expected '{' after match scrutinee")?; + let mut arms = Vec::new(); + if self.consume_kind(&TokenKind::RBrace).is_none() { + loop { + let pattern = self.parse_expr(0)?; + self.expect_kind(&TokenKind::Colon, "expected ':' after match pattern")?; + let body = self.parse_expr(0)?; + let span = self.ast.span(pattern).join(self.ast.span(body)); + arms.push(MatchArm { + pattern, + body, + span, + }); + if self.consume_kind(&TokenKind::Semicolon).is_some() { + if self.consume_kind(&TokenKind::RBrace).is_some() { + break; + } + continue; + } + self.expect_kind(&TokenKind::RBrace, "expected ';' or '}' after match arm")?; + break; + } + } + let span = start_span.join(self.previous_span()); + Ok(self.ast.push(Expr::Match { scrutinee, arms }, span)) + } + + fn parse_import(&mut self, start_span: Span) -> Result { + let token = self.advance().clone(); + let path = match token.kind { + TokenKind::String(path) | TokenKind::Ident(path) => path, + _ => return Err(Diagnostic::syntax(token.span, "expected import path")), + }; + Ok(self + .ast + .push(Expr::Import(path), start_span.join(token.span))) + } + + fn parse_fields_until_eof(&mut self) -> Result> { + let mut fields = Vec::new(); + while !self.at_eof() { + fields.push(self.parse_field()?); + self.consume_kind(&TokenKind::Semicolon); + } + Ok(fields) + } + + fn parse_field(&mut self) -> Result { + let (first, first_span) = self.expect_ident()?; + let mut path = Vec::new(); + path.push(first); + let mut span = first_span; + while self.consume_kind(&TokenKind::Dot).is_some() { + let (name, name_span) = self.expect_ident()?; + span = span.join(name_span); + path.push(name); + } + self.expect_kind(&TokenKind::Equal, "expected '=' after field name")?; + let value = self.parse_expr(0)?; + span = span.join(self.ast.span(value)); + Ok(Field { path, value, span }) + } + + fn starts_field(&self) -> bool { + matches!(self.peek_kind(), TokenKind::Ident(_)) + && matches!(self.peek_kind_n(1), TokenKind::Equal | TokenKind::Dot) + } + + fn looks_like_params(&self) -> bool { + match self.peek_kind() { + TokenKind::RParen => matches!(self.peek_kind_n(1), TokenKind::Arrow), + TokenKind::Ident(_) => { + let mut i = self.pos; + loop { + if !matches!(self.kind_at(i), TokenKind::Ident(_)) { + return false; + } + i += 1; + if matches!(self.kind_at(i), TokenKind::Colon) { + // Skip a simple constraint expression approximately until comma/rparen. + i += 1; + let mut depth = 0usize; + while !matches!(self.kind_at(i), TokenKind::Eof) { + match self.kind_at(i) { + TokenKind::LParen | TokenKind::LBrace | TokenKind::LBracket => { + depth += 1 + } + TokenKind::RParen if depth == 0 => break, + TokenKind::RParen | TokenKind::RBrace | TokenKind::RBracket => { + depth = depth.saturating_sub(1) + } + TokenKind::Comma if depth == 0 => break, + _ => {} + } + i += 1; + } + } + if matches!(self.kind_at(i), TokenKind::Comma) { + i += 1; + continue; + } + if matches!(self.kind_at(i), TokenKind::RParen) { + return matches!(self.kind_at(i + 1), TokenKind::Arrow); + } + return false; + } + } + _ => false, + } + } + + fn peek_infix(&self) -> Option<(InfixKind, u8, u8)> { + match self.peek_kind() { + TokenKind::Default => Some((InfixKind::Default, 1, 2)), + TokenKind::SlashSlash => Some((InfixKind::Patch, 3, 4)), + TokenKind::Amp => Some((InfixKind::And, 5, 6)), + _ => None, + } + } + + fn is_expr_stop(&self) -> bool { + matches!( + self.peek_kind(), + TokenKind::Semicolon + | TokenKind::Comma + | TokenKind::RParen + | TokenKind::RBracket + | TokenKind::RBrace + | TokenKind::Colon + | TokenKind::In + | TokenKind::Arrow + ) + } + + fn expect_ident(&mut self) -> Result<(String, Span)> { + let token = self.advance().clone(); + match token.kind { + TokenKind::Ident(name) => Ok((name, token.span)), + _ => Err(Diagnostic::syntax(token.span, "expected identifier")), + } + } + + fn expect_kind(&mut self, expected: &TokenKind, message: &'static str) -> Result { + if let Some(span) = self.consume_kind(expected) { + Ok(span) + } else { + Err(Diagnostic::syntax(self.peek().span, message)) + } + } + + fn expect_eof(&mut self) -> Result<()> { + if self.at_eof() { + Ok(()) + } else { + Err(Diagnostic::syntax(self.peek().span, "expected end of file")) + } + } + + fn consume_kind(&mut self, expected: &TokenKind) -> Option { + if core::mem::discriminant(self.peek_kind()) == core::mem::discriminant(expected) { + Some(self.advance().span) + } else { + None + } + } + + fn check_kind(&self, expected: &TokenKind) -> bool { + core::mem::discriminant(self.peek_kind()) == core::mem::discriminant(expected) + } + + fn at_eof(&self) -> bool { + matches!(self.peek_kind(), TokenKind::Eof) + } + + fn advance(&mut self) -> &Token { + let index = self.pos; + if !self.at_eof() { + self.pos += 1; + } + &self.tokens[index] + } + + fn previous_span(&self) -> Span { + self.tokens[self.pos.saturating_sub(1)].span + } + + fn peek(&self) -> &Token { + &self.tokens[self.pos] + } + + fn peek_kind(&self) -> &TokenKind { + &self.peek().kind + } + + fn peek_kind_n(&self, n: usize) -> &TokenKind { + self.kind_at(self.pos + n) + } + + fn kind_at(&self, index: usize) -> &TokenKind { + self.tokens + .get(index) + .map(|token| &token.kind) + .unwrap_or(&TokenKind::Eof) + } +} + +#[derive(Debug, Clone, Copy)] +enum InfixKind { + And, + Patch, + Default, +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ast::Expr; + + #[test] + fn parses_top_level_fields_as_object() { + let parsed = parse_source("port = Int & >= 1 default 8080;").unwrap(); + assert!(matches!(parsed.ast.get(parsed.root).expr, Expr::Object(_))); + } + + #[test] + fn parses_object_dot_field() { + let parsed = parse_source("{ feature.enable = false; }").unwrap(); + let Expr::Object(fields) = &parsed.ast.get(parsed.root).expr else { + panic!() + }; + assert_eq!(fields[0].path, ["feature", "enable"]); + } +} diff --git a/crates/decodal-core/src/span.rs b/crates/decodal-core/src/span.rs new file mode 100644 index 0000000..b3567ec --- /dev/null +++ b/crates/decodal-core/src/span.rs @@ -0,0 +1,25 @@ +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub struct Span { + pub start: u32, + pub end: u32, +} + +impl Span { + pub const fn new(start: usize, end: usize) -> Self { + Self { + start: start as u32, + end: end as u32, + } + } + + pub const fn empty(offset: usize) -> Self { + Self::new(offset, offset) + } + + pub fn join(self, other: Self) -> Self { + Self { + start: self.start.min(other.start), + end: self.end.max(other.end), + } + } +} diff --git a/doc/manual/souce/language/syntax.md b/doc/manual/souce/language/syntax.md index 198a25a..f9176fc 100644 --- a/doc/manual/souce/language/syntax.md +++ b/doc/manual/souce/language/syntax.md @@ -15,6 +15,25 @@ schema.dcdl service.dcdl ``` +## Module source + +ファイル全体は単一の式として書ける。 +また、top-level に field 定義列を書いた場合は、暗黙の object として扱う。 + +```dcdl +host = String; +port = Int default 8080; +``` + +上の source は以下と同じ意味である。 + +```dcdl +{ + host = String; + port = Int default 8080; +} +``` + ## コメント コメントは `#` から行末までとする。 @@ -38,7 +57,7 @@ host = "127.0.0.1"; # trailing comment ## 識別子 -識別子の厳密な字句規則は未確定である。 +識別子は ASCII 英字で始まり、ASCII 英数字または `_` を続けられる。 慣習としては `lower_snake`、`lowerCamel`、`UpperCamel` を使える想定とする。 ```dcdl