Implement core lexer and parser

This commit is contained in:
Keisuke Hirata 2026-06-16 02:02:17 +09:00
parent 12a6e9a84c
commit ddcee75c8d
No known key found for this signature in database
7 changed files with 1083 additions and 1 deletions

View File

@ -0,0 +1,133 @@
use alloc::{string::String, vec::Vec};
use crate::span::Span;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct ExprId(pub u32);
#[derive(Debug, Default, Clone)]
pub struct Ast {
exprs: Vec<SpannedExpr>,
}
impl Ast {
pub fn new() -> Self {
Self { exprs: Vec::new() }
}
pub fn push(&mut self, expr: Expr, span: Span) -> ExprId {
let id = ExprId(self.exprs.len() as u32);
self.exprs.push(SpannedExpr { expr, span });
id
}
pub fn get(&self, id: ExprId) -> &SpannedExpr {
&self.exprs[id.0 as usize]
}
pub fn span(&self, id: ExprId) -> Span {
self.get(id).span
}
pub fn len(&self) -> usize {
self.exprs.len()
}
pub fn is_empty(&self) -> bool {
self.exprs.is_empty()
}
}
#[derive(Debug, Clone)]
pub struct SpannedExpr {
pub expr: Expr,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Expr {
Literal(Literal),
Ident(String),
Object(Vec<Field>),
Array(Vec<ExprId>),
Let {
bindings: Vec<Field>,
body: ExprId,
},
Import(String),
Path {
base: ExprId,
field: String,
},
Call {
callee: ExprId,
args: Vec<ExprId>,
},
Function {
params: Vec<Param>,
body: ExprId,
},
Match {
scrutinee: ExprId,
arms: Vec<MatchArm>,
},
Binary {
op: BinaryOp,
lhs: ExprId,
rhs: ExprId,
},
Default {
base: ExprId,
fallback: ExprId,
},
CompareConstraint {
op: CompareOp,
value: ExprId,
},
RegexConstraint(String),
Wildcard,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Field {
pub path: Vec<String>,
pub value: ExprId,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct Param {
pub name: String,
pub constraint: Option<ExprId>,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub struct MatchArm {
pub pattern: ExprId,
pub body: ExprId,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Literal {
String(String),
Int(i64),
Float(f64),
Bool(bool),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp {
And,
Patch,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompareOp {
Gt,
Gte,
Lt,
Lte,
Eq,
}

View File

@ -0,0 +1,41 @@
use alloc::string::String;
use crate::span::Span;
pub type Result<T> = core::result::Result<T, Diagnostic>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Diagnostic {
pub kind: DiagnosticKind,
pub span: Span,
pub message: String,
}
impl Diagnostic {
pub fn new(kind: DiagnosticKind, span: Span, message: impl Into<String>) -> Self {
Self {
kind,
span,
message: message.into(),
}
}
pub fn syntax(span: Span, message: impl Into<String>) -> Self {
Self::new(DiagnosticKind::Syntax, span, message)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DiagnosticKind {
Syntax,
UnresolvedIdentifier,
TypeMismatch,
ConstraintViolation,
Conflict,
DefaultConflict,
Cycle,
Import,
MatchFailure,
Materialize,
UnsupportedFeature,
}

View File

@ -0,0 +1,342 @@
use alloc::{string::String, vec::Vec};
use crate::{Diagnostic, Span, diagnostic::Result};
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
Ident(String),
Int(i64),
Float(f64),
String(String),
Regex(String),
True,
False,
Let,
In,
Match,
Import,
Default,
Underscore,
LBrace,
RBrace,
LBracket,
RBracket,
LParen,
RParen,
Semicolon,
Comma,
Dot,
Colon,
Equal,
Arrow,
Amp,
SlashSlash,
Gt,
Gte,
Lt,
Lte,
Eof,
}
pub struct Lexer<'a> {
source: &'a str,
bytes: &'a [u8],
pos: usize,
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self {
source,
bytes: source.as_bytes(),
pos: 0,
}
}
pub fn tokenize(mut self) -> Result<Vec<Token>> {
let mut tokens = Vec::new();
loop {
let token = self.next_token()?;
let is_eof = token.kind == TokenKind::Eof;
tokens.push(token);
if is_eof {
return Ok(tokens);
}
}
}
fn next_token(&mut self) -> Result<Token> {
self.skip_ws_and_comments();
let start = self.pos;
let Some(ch) = self.peek() else {
return Ok(Token {
kind: TokenKind::Eof,
span: Span::empty(self.pos),
});
};
let kind = match ch {
b'{' => {
self.pos += 1;
TokenKind::LBrace
}
b'}' => {
self.pos += 1;
TokenKind::RBrace
}
b'[' => {
self.pos += 1;
TokenKind::LBracket
}
b']' => {
self.pos += 1;
TokenKind::RBracket
}
b'(' => {
self.pos += 1;
TokenKind::LParen
}
b')' => {
self.pos += 1;
TokenKind::RParen
}
b';' => {
self.pos += 1;
TokenKind::Semicolon
}
b',' => {
self.pos += 1;
TokenKind::Comma
}
b'.' => {
self.pos += 1;
TokenKind::Dot
}
b':' => {
self.pos += 1;
TokenKind::Colon
}
b'_' => {
self.pos += 1;
TokenKind::Underscore
}
b'&' => {
self.pos += 1;
TokenKind::Amp
}
b'=' => {
self.pos += 1;
if self.consume(b'>') {
TokenKind::Arrow
} else {
TokenKind::Equal
}
}
b'>' => {
self.pos += 1;
if self.consume(b'=') {
TokenKind::Gte
} else {
TokenKind::Gt
}
}
b'<' => {
self.pos += 1;
if self.consume(b'=') {
TokenKind::Lte
} else {
TokenKind::Lt
}
}
b'/' => {
self.pos += 1;
if self.consume(b'/') {
TokenKind::SlashSlash
} else {
self.lex_regex(start)?
}
}
b'"' => self.lex_string()?,
b'0'..=b'9' => self.lex_number()?,
c if is_ident_start(c) => self.lex_ident_or_keyword(),
_ => {
return Err(Diagnostic::syntax(
Span::new(start, start + 1),
"unexpected character",
));
}
};
Ok(Token {
kind,
span: Span::new(start, self.pos),
})
}
fn skip_ws_and_comments(&mut self) {
loop {
while matches!(self.peek(), Some(b' ' | b'\t' | b'\r' | b'\n')) {
self.pos += 1;
}
if self.peek() == Some(b'#') {
while let Some(c) = self.peek() {
self.pos += 1;
if c == b'\n' {
break;
}
}
continue;
}
break;
}
}
fn lex_string(&mut self) -> Result<TokenKind> {
let start = self.pos;
self.pos += 1;
let mut value = String::new();
while let Some(c) = self.peek() {
self.pos += 1;
match c {
b'"' => return Ok(TokenKind::String(value)),
b'\\' => {
let Some(escaped) = self.peek() else {
return Err(Diagnostic::syntax(
Span::new(start, self.pos),
"unterminated escape",
));
};
self.pos += 1;
let ch = match escaped {
b'"' => '"',
b'\\' => '\\',
b'n' => '\n',
b'r' => '\r',
b't' => '\t',
other => other as char,
};
value.push(ch);
}
other => value.push(other as char),
}
}
Err(Diagnostic::syntax(
Span::new(start, self.pos),
"unterminated string",
))
}
fn lex_regex(&mut self, start: usize) -> Result<TokenKind> {
let mut pattern = String::new();
let mut escaped = false;
while let Some(c) = self.peek() {
self.pos += 1;
if escaped {
pattern.push(c as char);
escaped = false;
continue;
}
match c {
b'\\' => {
pattern.push('\\');
escaped = true;
}
b'/' => return Ok(TokenKind::Regex(pattern)),
other => pattern.push(other as char),
}
}
Err(Diagnostic::syntax(
Span::new(start, self.pos),
"unterminated regex",
))
}
fn lex_number(&mut self) -> Result<TokenKind> {
let start = self.pos;
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.pos += 1;
}
let mut is_float = false;
if self.peek() == Some(b'.') && matches!(self.peek_n(1), Some(b'0'..=b'9')) {
is_float = true;
self.pos += 1;
while matches!(self.peek(), Some(b'0'..=b'9')) {
self.pos += 1;
}
}
let text = &self.source[start..self.pos];
if is_float {
text.parse::<f64>().map(TokenKind::Float).map_err(|_| {
Diagnostic::syntax(Span::new(start, self.pos), "invalid float literal")
})
} else {
text.parse::<i64>()
.map(TokenKind::Int)
.map_err(|_| Diagnostic::syntax(Span::new(start, self.pos), "invalid int literal"))
}
}
fn lex_ident_or_keyword(&mut self) -> TokenKind {
let start = self.pos;
self.pos += 1;
while matches!(self.peek(), Some(c) if is_ident_continue(c)) {
self.pos += 1;
}
let text = &self.source[start..self.pos];
match text {
"true" => TokenKind::True,
"false" => TokenKind::False,
"let" => TokenKind::Let,
"in" => TokenKind::In,
"match" => TokenKind::Match,
"import" => TokenKind::Import,
"default" => TokenKind::Default,
_ => TokenKind::Ident(String::from(text)),
}
}
fn peek(&self) -> Option<u8> {
self.bytes.get(self.pos).copied()
}
fn peek_n(&self, n: usize) -> Option<u8> {
self.bytes.get(self.pos + n).copied()
}
fn consume(&mut self, expected: u8) -> bool {
if self.peek() == Some(expected) {
self.pos += 1;
true
} else {
false
}
}
}
fn is_ident_start(c: u8) -> bool {
c.is_ascii_alphabetic()
}
fn is_ident_continue(c: u8) -> bool {
c.is_ascii_alphanumeric() || c == b'_'
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokenizes_basic_source() {
let tokens = Lexer::new("port = Int & >= 1;").tokenize().unwrap();
assert!(matches!(tokens[0].kind, TokenKind::Ident(_)));
assert_eq!(tokens[1].kind, TokenKind::Equal);
assert_eq!(tokens[3].kind, TokenKind::Amp);
assert_eq!(tokens[4].kind, TokenKind::Gte);
}
}

View File

@ -2,6 +2,18 @@
extern crate alloc;
pub mod ast;
pub mod diagnostic;
pub mod lexer;
pub mod parser;
pub mod span;
pub use ast::{Ast, BinaryOp, CompareOp, Expr, ExprId, Field, Literal, Param};
pub use diagnostic::{Diagnostic, DiagnosticKind, Result};
pub use lexer::{Lexer, Token, TokenKind};
pub use parser::{ParseOutput, Parser, parse_source};
pub use span::Span;
pub fn version() -> &'static str {
env!("CARGO_PKG_VERSION")
}

View File

@ -0,0 +1,510 @@
use alloc::{string::String, vec::Vec};
use crate::{
Span,
ast::{Ast, BinaryOp, CompareOp, Expr, ExprId, Field, Literal, MatchArm, Param},
diagnostic::{Diagnostic, Result},
lexer::{Lexer, Token, TokenKind},
};
#[derive(Debug, Clone)]
pub struct ParseOutput {
pub ast: Ast,
pub root: ExprId,
}
pub fn parse_source(source: &str) -> Result<ParseOutput> {
let tokens = Lexer::new(source).tokenize()?;
Parser::new(tokens).parse()
}
pub struct Parser {
tokens: Vec<Token>,
pos: usize,
ast: Ast,
}
impl Parser {
pub fn new(tokens: Vec<Token>) -> Self {
Self {
tokens,
pos: 0,
ast: Ast::new(),
}
}
pub fn parse(mut self) -> Result<ParseOutput> {
let root = if self.starts_field() {
let fields = self.parse_fields_until_eof()?;
let span = fields
.first()
.map(|f| {
fields
.iter()
.fold(f.span, |acc, field| acc.join(field.span))
})
.unwrap_or_else(|| Span::empty(0));
self.ast.push(Expr::Object(fields), span)
} else {
let expr = self.parse_expr(0)?;
self.expect_eof()?;
expr
};
Ok(ParseOutput {
ast: self.ast,
root,
})
}
fn parse_expr(&mut self, min_bp: u8) -> Result<ExprId> {
let mut lhs = self.parse_prefix()?;
loop {
if self.at_eof() || self.is_expr_stop() {
break;
}
// postfix: path reference
if self.consume_kind(&TokenKind::Dot).is_some() {
let (field, field_span) = self.expect_ident()?;
let span = self.ast.span(lhs).join(field_span);
lhs = self.ast.push(Expr::Path { base: lhs, field }, span);
continue;
}
// postfix: call
if self.consume_kind(&TokenKind::LParen).is_some() {
let mut args = Vec::new();
if self.consume_kind(&TokenKind::RParen).is_none() {
loop {
args.push(self.parse_expr(0)?);
if self.consume_kind(&TokenKind::Comma).is_some() {
continue;
}
self.expect_kind(&TokenKind::RParen, "expected ')' after call arguments")?;
break;
}
}
let span = self.ast.span(lhs).join(self.previous_span());
lhs = self.ast.push(Expr::Call { callee: lhs, args }, span);
continue;
}
let Some((kind, l_bp, r_bp)) = self.peek_infix() else {
break;
};
if l_bp < min_bp {
break;
}
let op_span = self.advance().span;
let rhs = self.parse_expr(r_bp)?;
let span = self.ast.span(lhs).join(self.ast.span(rhs)).join(op_span);
lhs = match kind {
InfixKind::And => self.ast.push(
Expr::Binary {
op: BinaryOp::And,
lhs,
rhs,
},
span,
),
InfixKind::Patch => self.ast.push(
Expr::Binary {
op: BinaryOp::Patch,
lhs,
rhs,
},
span,
),
InfixKind::Default => self.ast.push(
Expr::Default {
base: lhs,
fallback: rhs,
},
span,
),
};
}
Ok(lhs)
}
fn parse_prefix(&mut self) -> Result<ExprId> {
let token = self.advance().clone();
match token.kind {
TokenKind::String(value) => Ok(self
.ast
.push(Expr::Literal(Literal::String(value)), token.span)),
TokenKind::Int(value) => Ok(self
.ast
.push(Expr::Literal(Literal::Int(value)), token.span)),
TokenKind::Float(value) => Ok(self
.ast
.push(Expr::Literal(Literal::Float(value)), token.span)),
TokenKind::True => Ok(self
.ast
.push(Expr::Literal(Literal::Bool(true)), token.span)),
TokenKind::False => Ok(self
.ast
.push(Expr::Literal(Literal::Bool(false)), token.span)),
TokenKind::Ident(name) => Ok(self.ast.push(Expr::Ident(name), token.span)),
TokenKind::Regex(pattern) => {
Ok(self.ast.push(Expr::RegexConstraint(pattern), token.span))
}
TokenKind::Underscore => Ok(self.ast.push(Expr::Wildcard, token.span)),
TokenKind::LBrace => self.parse_object_after_lbrace(token.span),
TokenKind::LBracket => self.parse_array_after_lbracket(token.span),
TokenKind::LParen => self.parse_group_or_function(token.span),
TokenKind::Let => self.parse_let(token.span),
TokenKind::Match => self.parse_match(token.span),
TokenKind::Import => self.parse_import(token.span),
TokenKind::Gt | TokenKind::Gte | TokenKind::Lt | TokenKind::Lte => {
let op = match token.kind {
TokenKind::Gt => CompareOp::Gt,
TokenKind::Gte => CompareOp::Gte,
TokenKind::Lt => CompareOp::Lt,
TokenKind::Lte => CompareOp::Lte,
_ => unreachable!(),
};
let value = self.parse_expr(8)?;
let span = token.span.join(self.ast.span(value));
Ok(self.ast.push(Expr::CompareConstraint { op, value }, span))
}
_ => Err(Diagnostic::syntax(token.span, "expected expression")),
}
}
fn parse_object_after_lbrace(&mut self, start_span: Span) -> Result<ExprId> {
let mut fields = Vec::new();
if self.consume_kind(&TokenKind::RBrace).is_some() {
return Ok(self
.ast
.push(Expr::Object(fields), start_span.join(self.previous_span())));
}
loop {
fields.push(self.parse_field()?);
if self.consume_kind(&TokenKind::Semicolon).is_some() {
if self.consume_kind(&TokenKind::RBrace).is_some() {
break;
}
continue;
}
self.expect_kind(&TokenKind::RBrace, "expected ';' or '}' after object field")?;
break;
}
let span = start_span.join(self.previous_span());
Ok(self.ast.push(Expr::Object(fields), span))
}
fn parse_array_after_lbracket(&mut self, start_span: Span) -> Result<ExprId> {
let mut items = Vec::new();
if self.consume_kind(&TokenKind::RBracket).is_some() {
return Ok(self
.ast
.push(Expr::Array(items), start_span.join(self.previous_span())));
}
loop {
items.push(self.parse_expr(0)?);
if self.consume_kind(&TokenKind::Comma).is_some() {
if self.consume_kind(&TokenKind::RBracket).is_some() {
break;
}
continue;
}
self.expect_kind(&TokenKind::RBracket, "expected ',' or ']' after array item")?;
break;
}
let span = start_span.join(self.previous_span());
Ok(self.ast.push(Expr::Array(items), span))
}
fn parse_group_or_function(&mut self, start_span: Span) -> Result<ExprId> {
if self.looks_like_params() {
let params = self.parse_params_after_lparen()?;
self.expect_kind(&TokenKind::Arrow, "expected '=>' after function parameters")?;
let body = self.parse_expr(0)?;
let span = start_span.join(self.ast.span(body));
return Ok(self.ast.push(Expr::Function { params, body }, span));
}
let expr = self.parse_expr(0)?;
self.expect_kind(&TokenKind::RParen, "expected ')' after expression")?;
Ok(expr)
}
fn parse_params_after_lparen(&mut self) -> Result<Vec<Param>> {
let mut params = Vec::new();
if self.consume_kind(&TokenKind::RParen).is_some() {
return Ok(params);
}
loop {
let (name, name_span) = self.expect_ident()?;
let constraint = if self.consume_kind(&TokenKind::Colon).is_some() {
Some(self.parse_expr(0)?)
} else {
None
};
let span = constraint
.map(|id| name_span.join(self.ast.span(id)))
.unwrap_or(name_span);
params.push(Param {
name,
constraint,
span,
});
if self.consume_kind(&TokenKind::Comma).is_some() {
continue;
}
self.expect_kind(&TokenKind::RParen, "expected ',' or ')' after parameter")?;
break;
}
Ok(params)
}
fn parse_let(&mut self, start_span: Span) -> Result<ExprId> {
let mut bindings = Vec::new();
while !self.check_kind(&TokenKind::In) && !self.at_eof() {
bindings.push(self.parse_field()?);
self.expect_kind(&TokenKind::Semicolon, "expected ';' after let binding")?;
}
self.expect_kind(&TokenKind::In, "expected 'in' after let bindings")?;
let body = self.parse_expr(0)?;
let span = start_span.join(self.ast.span(body));
Ok(self.ast.push(Expr::Let { bindings, body }, span))
}
fn parse_match(&mut self, start_span: Span) -> Result<ExprId> {
let scrutinee = self.parse_expr(0)?;
self.expect_kind(&TokenKind::LBrace, "expected '{' after match scrutinee")?;
let mut arms = Vec::new();
if self.consume_kind(&TokenKind::RBrace).is_none() {
loop {
let pattern = self.parse_expr(0)?;
self.expect_kind(&TokenKind::Colon, "expected ':' after match pattern")?;
let body = self.parse_expr(0)?;
let span = self.ast.span(pattern).join(self.ast.span(body));
arms.push(MatchArm {
pattern,
body,
span,
});
if self.consume_kind(&TokenKind::Semicolon).is_some() {
if self.consume_kind(&TokenKind::RBrace).is_some() {
break;
}
continue;
}
self.expect_kind(&TokenKind::RBrace, "expected ';' or '}' after match arm")?;
break;
}
}
let span = start_span.join(self.previous_span());
Ok(self.ast.push(Expr::Match { scrutinee, arms }, span))
}
fn parse_import(&mut self, start_span: Span) -> Result<ExprId> {
let token = self.advance().clone();
let path = match token.kind {
TokenKind::String(path) | TokenKind::Ident(path) => path,
_ => return Err(Diagnostic::syntax(token.span, "expected import path")),
};
Ok(self
.ast
.push(Expr::Import(path), start_span.join(token.span)))
}
fn parse_fields_until_eof(&mut self) -> Result<Vec<Field>> {
let mut fields = Vec::new();
while !self.at_eof() {
fields.push(self.parse_field()?);
self.consume_kind(&TokenKind::Semicolon);
}
Ok(fields)
}
fn parse_field(&mut self) -> Result<Field> {
let (first, first_span) = self.expect_ident()?;
let mut path = Vec::new();
path.push(first);
let mut span = first_span;
while self.consume_kind(&TokenKind::Dot).is_some() {
let (name, name_span) = self.expect_ident()?;
span = span.join(name_span);
path.push(name);
}
self.expect_kind(&TokenKind::Equal, "expected '=' after field name")?;
let value = self.parse_expr(0)?;
span = span.join(self.ast.span(value));
Ok(Field { path, value, span })
}
fn starts_field(&self) -> bool {
matches!(self.peek_kind(), TokenKind::Ident(_))
&& matches!(self.peek_kind_n(1), TokenKind::Equal | TokenKind::Dot)
}
fn looks_like_params(&self) -> bool {
match self.peek_kind() {
TokenKind::RParen => matches!(self.peek_kind_n(1), TokenKind::Arrow),
TokenKind::Ident(_) => {
let mut i = self.pos;
loop {
if !matches!(self.kind_at(i), TokenKind::Ident(_)) {
return false;
}
i += 1;
if matches!(self.kind_at(i), TokenKind::Colon) {
// Skip a simple constraint expression approximately until comma/rparen.
i += 1;
let mut depth = 0usize;
while !matches!(self.kind_at(i), TokenKind::Eof) {
match self.kind_at(i) {
TokenKind::LParen | TokenKind::LBrace | TokenKind::LBracket => {
depth += 1
}
TokenKind::RParen if depth == 0 => break,
TokenKind::RParen | TokenKind::RBrace | TokenKind::RBracket => {
depth = depth.saturating_sub(1)
}
TokenKind::Comma if depth == 0 => break,
_ => {}
}
i += 1;
}
}
if matches!(self.kind_at(i), TokenKind::Comma) {
i += 1;
continue;
}
if matches!(self.kind_at(i), TokenKind::RParen) {
return matches!(self.kind_at(i + 1), TokenKind::Arrow);
}
return false;
}
}
_ => false,
}
}
fn peek_infix(&self) -> Option<(InfixKind, u8, u8)> {
match self.peek_kind() {
TokenKind::Default => Some((InfixKind::Default, 1, 2)),
TokenKind::SlashSlash => Some((InfixKind::Patch, 3, 4)),
TokenKind::Amp => Some((InfixKind::And, 5, 6)),
_ => None,
}
}
fn is_expr_stop(&self) -> bool {
matches!(
self.peek_kind(),
TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::RParen
| TokenKind::RBracket
| TokenKind::RBrace
| TokenKind::Colon
| TokenKind::In
| TokenKind::Arrow
)
}
fn expect_ident(&mut self) -> Result<(String, Span)> {
let token = self.advance().clone();
match token.kind {
TokenKind::Ident(name) => Ok((name, token.span)),
_ => Err(Diagnostic::syntax(token.span, "expected identifier")),
}
}
fn expect_kind(&mut self, expected: &TokenKind, message: &'static str) -> Result<Span> {
if let Some(span) = self.consume_kind(expected) {
Ok(span)
} else {
Err(Diagnostic::syntax(self.peek().span, message))
}
}
fn expect_eof(&mut self) -> Result<()> {
if self.at_eof() {
Ok(())
} else {
Err(Diagnostic::syntax(self.peek().span, "expected end of file"))
}
}
fn consume_kind(&mut self, expected: &TokenKind) -> Option<Span> {
if core::mem::discriminant(self.peek_kind()) == core::mem::discriminant(expected) {
Some(self.advance().span)
} else {
None
}
}
fn check_kind(&self, expected: &TokenKind) -> bool {
core::mem::discriminant(self.peek_kind()) == core::mem::discriminant(expected)
}
fn at_eof(&self) -> bool {
matches!(self.peek_kind(), TokenKind::Eof)
}
fn advance(&mut self) -> &Token {
let index = self.pos;
if !self.at_eof() {
self.pos += 1;
}
&self.tokens[index]
}
fn previous_span(&self) -> Span {
self.tokens[self.pos.saturating_sub(1)].span
}
fn peek(&self) -> &Token {
&self.tokens[self.pos]
}
fn peek_kind(&self) -> &TokenKind {
&self.peek().kind
}
fn peek_kind_n(&self, n: usize) -> &TokenKind {
self.kind_at(self.pos + n)
}
fn kind_at(&self, index: usize) -> &TokenKind {
self.tokens
.get(index)
.map(|token| &token.kind)
.unwrap_or(&TokenKind::Eof)
}
}
#[derive(Debug, Clone, Copy)]
enum InfixKind {
And,
Patch,
Default,
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ast::Expr;
#[test]
fn parses_top_level_fields_as_object() {
let parsed = parse_source("port = Int & >= 1 default 8080;").unwrap();
assert!(matches!(parsed.ast.get(parsed.root).expr, Expr::Object(_)));
}
#[test]
fn parses_object_dot_field() {
let parsed = parse_source("{ feature.enable = false; }").unwrap();
let Expr::Object(fields) = &parsed.ast.get(parsed.root).expr else {
panic!()
};
assert_eq!(fields[0].path, ["feature", "enable"]);
}
}

View File

@ -0,0 +1,25 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Span {
pub start: u32,
pub end: u32,
}
impl Span {
pub const fn new(start: usize, end: usize) -> Self {
Self {
start: start as u32,
end: end as u32,
}
}
pub const fn empty(offset: usize) -> Self {
Self::new(offset, offset)
}
pub fn join(self, other: Self) -> Self {
Self {
start: self.start.min(other.start),
end: self.end.max(other.end),
}
}
}

View File

@ -15,6 +15,25 @@ schema.dcdl
service.dcdl
```
## Module source
ファイル全体は単一の式として書ける。
また、top-level に field 定義列を書いた場合は、暗黙の object として扱う。
```dcdl
host = String;
port = Int default 8080;
```
上の source は以下と同じ意味である。
```dcdl
{
host = String;
port = Int default 8080;
}
```
## コメント
コメントは `#` から行末までとする。
@ -38,7 +57,7 @@ host = "127.0.0.1"; # trailing comment
## 識別子
識別子の厳密な字句規則は未確定である。
識別子は ASCII 英字で始まり、ASCII 英数字または `_` を続けられる。
慣習としては `lower_snake`、`lowerCamel`、`UpperCamel` を使える想定とする。
```dcdl