slovo/compiler/src/sexpr.rs
2026-05-22 08:38:43 +02:00

226 lines
5.9 KiB
Rust

use crate::{
diag::Diagnostic,
token::{Span, Token, TokenKind},
};
#[derive(Debug, Clone)]
pub enum Atom {
Ident(String),
Int(i64),
I64(i64),
U32(u32),
U64(u64),
Float(f64),
String(String),
Arrow,
}
#[derive(Debug, Clone)]
pub enum SExprKind {
Atom(Atom),
List(Vec<SExpr>),
}
#[derive(Debug, Clone)]
pub struct SExpr {
pub kind: SExprKind,
pub span: Span,
}
pub fn parse(file: &str, tokens: &[Token]) -> Result<Vec<SExpr>, Vec<Diagnostic>> {
let mut parser = Parser {
file,
tokens,
pos: 0,
errors: Vec::new(),
};
let mut forms = Vec::new();
while parser.pos < tokens.len() {
match parser.parse_expr() {
Some(expr) => forms.push(expr),
None => break,
}
}
if parser.errors.is_empty() {
Ok(forms)
} else {
Err(parser.errors)
}
}
pub fn print_tree(forms: &[SExpr]) -> String {
let mut output = String::new();
for form in forms {
write_tree_expr(form, 0, &mut output);
}
output
}
fn write_tree_expr(expr: &SExpr, indent: usize, output: &mut String) {
output.push_str(&" ".repeat(indent));
match &expr.kind {
SExprKind::Atom(atom) => write_tree_atom(atom, output),
SExprKind::List(items) => {
output.push_str("list\n");
for item in items {
write_tree_expr(item, indent + 1, output);
}
}
}
}
fn write_tree_atom(atom: &Atom, output: &mut String) {
match atom {
Atom::Ident(value) => {
output.push_str("ident ");
output.push_str(value);
}
Atom::Int(value) => {
output.push_str("int ");
output.push_str(&value.to_string());
}
Atom::I64(value) => {
output.push_str("i64 ");
output.push_str(&value.to_string());
}
Atom::U32(value) => {
output.push_str("u32 ");
output.push_str(&value.to_string());
}
Atom::U64(value) => {
output.push_str("u64 ");
output.push_str(&value.to_string());
}
Atom::Float(value) => {
output.push_str("float ");
output.push_str(&value.to_string());
}
Atom::String(value) => {
output.push_str("string \"");
for ch in value.chars() {
output.extend(ch.escape_default());
}
output.push('"');
}
Atom::Arrow => output.push_str("arrow ->"),
}
output.push('\n');
}
struct Parser<'a> {
file: &'a str,
tokens: &'a [Token],
pos: usize,
errors: Vec<Diagnostic>,
}
impl<'a> Parser<'a> {
fn parse_expr(&mut self) -> Option<SExpr> {
let token = self.tokens.get(self.pos)?.clone();
match token.kind {
TokenKind::LParen => self.parse_list(),
TokenKind::RParen => {
self.errors.push(
Diagnostic::new(self.file, "UnexpectedRParen", "unexpected `)`")
.with_span(token.span),
);
self.pos += 1;
None
}
TokenKind::Ident(name) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::Ident(name)),
span: token.span,
})
}
TokenKind::Int(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::Int(value)),
span: token.span,
})
}
TokenKind::I64(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::I64(value)),
span: token.span,
})
}
TokenKind::U32(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::U32(value)),
span: token.span,
})
}
TokenKind::U64(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::U64(value)),
span: token.span,
})
}
TokenKind::Float(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::Float(value)),
span: token.span,
})
}
TokenKind::String(value) => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::String(value)),
span: token.span,
})
}
TokenKind::Arrow => {
self.pos += 1;
Some(SExpr {
kind: SExprKind::Atom(Atom::Arrow),
span: token.span,
})
}
}
}
fn parse_list(&mut self) -> Option<SExpr> {
let start = self.tokens[self.pos].span.start;
self.pos += 1;
let mut items = Vec::new();
while self.pos < self.tokens.len() {
if matches!(self.tokens[self.pos].kind, TokenKind::RParen) {
let end = self.tokens[self.pos].span.end;
self.pos += 1;
return Some(SExpr {
kind: SExprKind::List(items),
span: Span::new(start, end),
});
}
if let Some(expr) = self.parse_expr() {
items.push(expr);
} else {
break;
}
}
self.errors.push(
Diagnostic::new(self.file, "UnclosedList", "unclosed list")
.with_span(Span::new(start, start + 1))
.hint("add a closing `)`"),
);
None
}
}