From 47fd9a36a41df45da25df61c1e07511875a4dc42 Mon Sep 17 00:00:00 2001 From: Toni Date: Thu, 29 May 2025 20:32:54 +0200 Subject: [PATCH] parse blocks --- src/codegen.rs | 15 ++++++ src/parser.rs | 120 +++++++++++++++++++++++++++++++++++++++++------ src/tokenizer.rs | 76 ++++++++++++++++++++++++++++-- test.mot | 14 +++--- 4 files changed, 199 insertions(+), 26 deletions(-) diff --git a/src/codegen.rs b/src/codegen.rs index 46c3d4c..f6984ef 100644 --- a/src/codegen.rs +++ b/src/codegen.rs @@ -83,6 +83,21 @@ section .note.GNU-stack env.locals.insert(name.lexeme, offset); writeln!(&mut self.output, " mov QWORD [rbp-{}], rax", offset)?; } + Stmt::Block(statements) => { + let mut env = Env::new(); + for stmt in statements { + self.compile_stmt(&mut env, stmt)?; + } + } + Stmt::If { + condition: _, + then_branch: _, + else_branch: _, + } => todo!(), + Stmt::While { + condition: _, + body: _, + } => todo!(), } Ok(()) } diff --git a/src/parser.rs b/src/parser.rs index 869436a..42e96d2 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,25 @@ +use std::error::Error; + use crate::tokenizer::{MotError, Token, TokenType, error}; #[derive(Debug, Clone)] pub enum Stmt { Expression(Expr), Print(Expr), - Var { name: Token, initializer: Expr }, + Var { + name: Token, + initializer: Expr, + }, + Block(Vec), + If { + condition: Expr, + then_branch: Box, + else_branch: Option>, + }, + While { + condition: Expr, + body: Box, + }, } #[derive(Debug, Clone)] @@ -37,7 +52,7 @@ impl Parser { Parser { tokens, current: 0 } } - pub fn parse(mut self) -> Result, MotError> { + pub fn parse(mut self) -> Result, Box> { let mut statements = vec![]; while !self.eof() { statements.push(self.declaration()?); @@ -45,7 +60,7 @@ impl Parser { Ok(statements) } - fn declaration(&mut self) -> Result { + fn declaration(&mut self) -> Result> { // TODO: synchronization after parse error if self.match_token(&[TokenType::KeywordLet]) { self.let_declaration() @@ -54,27 +69,70 @@ impl Parser { } } - fn let_declaration(&mut self) -> Result { + fn let_declaration(&mut self) -> Result> { let name = self.consume(TokenType::Identifier, "expected variable name")?; self.consume(TokenType::Equal, "expected '=' after variable name")?; let initializer = self.expression()?; Ok(Stmt::Var { name, initializer }) } - fn statement(&mut self) -> Result { + fn block(&mut self) -> Result> { + self.consume(TokenType::Indent, "expected an indent")?; + + let mut statements = vec![]; + while !self.eof() && !self.match_token(&[TokenType::Dedent]) { + statements.push(self.declaration()?); + } + + Ok(Stmt::Block(statements)) + } + + fn statement(&mut self) -> Result> { if self.match_token(&[TokenType::KeywordPrint]) { Ok(Stmt::Print(self.expression()?)) + } else if self.match_token(&[TokenType::KeywordIf]) { + self.if_statement() + } else if self.match_token(&[TokenType::KeywordWhile]) { + self.while_statement() } else { Ok(Stmt::Expression(self.expression()?)) } } - fn expression(&mut self) -> Result { + fn if_statement(&mut self) -> Result> { + let condition = self.expression()?; + let then_branch = self.block()?; + let else_branch = if self.match_token(&[TokenType::KeywordElse]) { + if self.match_token(&[TokenType::KeywordIf]) { + Some(Box::new(self.if_statement()?)) + } else { + Some(Box::new(self.block()?)) + } + } else { + None + }; + Ok(Stmt::If { + condition, + then_branch: Box::new(then_branch), + else_branch, + }) + } + + fn while_statement(&mut self) -> Result> { + let condition = self.expression()?; + let body = self.block()?; + Ok(Stmt::While { + condition, + body: Box::new(body), + }) + } + + fn expression(&mut self) -> Result> { self.assignment() } - fn assignment(&mut self) -> Result { - let expr = self.equality()?; + fn assignment(&mut self) -> Result> { + let expr = self.logical_or()?; if self.match_token(&[TokenType::Equal]) { let equals = self.previous().clone(); @@ -92,7 +150,39 @@ impl Parser { Ok(expr) } - fn equality(&mut self) -> Result { + fn logical_or(&mut self) -> Result> { + let mut expr = self.logical_and()?; + + while self.match_token(&[TokenType::Or]) { + let op = self.previous().clone(); + let right = self.logical_and()?; + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + } + } + + Ok(expr) + } + + fn logical_and(&mut self) -> Result> { + let mut expr = self.equality()?; + + while self.match_token(&[TokenType::And]) { + let op = self.previous().clone(); + let right = self.equality()?; + expr = Expr::Binary { + left: Box::new(expr), + op, + right: Box::new(right), + } + } + + Ok(expr) + } + + fn equality(&mut self) -> Result> { let mut expr = self.comparison()?; while self.match_token(&[TokenType::DoubleEqual, TokenType::NotEqual]) { @@ -108,7 +198,7 @@ impl Parser { Ok(expr) } - fn comparison(&mut self) -> Result { + fn comparison(&mut self) -> Result> { let mut expr = self.term()?; while self.match_token(&[ @@ -129,7 +219,7 @@ impl Parser { Ok(expr) } - fn term(&mut self) -> Result { + fn term(&mut self) -> Result> { let mut expr = self.factor()?; while self.match_token(&[TokenType::Plus, TokenType::Minus, TokenType::Xor]) { @@ -145,7 +235,7 @@ impl Parser { Ok(expr) } - fn factor(&mut self) -> Result { + fn factor(&mut self) -> Result> { let mut expr = self.unary()?; while self.match_token(&[TokenType::Star, TokenType::Slash, TokenType::Mod]) { @@ -161,7 +251,7 @@ impl Parser { Ok(expr) } - fn unary(&mut self) -> Result { + fn unary(&mut self) -> Result> { if self.match_token(&[TokenType::Bang, TokenType::Minus]) { let op = self.previous().clone(); let right = self.unary()?; @@ -174,7 +264,7 @@ impl Parser { self.primary() } - fn primary(&mut self) -> Result { + fn primary(&mut self) -> Result> { if self.match_token(&[TokenType::Number, TokenType::String]) { Ok(Expr::Literal(self.previous().clone())) } else if self.match_token(&[TokenType::LeftParen]) { @@ -188,7 +278,7 @@ impl Parser { } } - fn consume(&mut self, token_type: TokenType, message: &str) -> Result { + fn consume(&mut self, token_type: TokenType, message: &str) -> Result> { if self.check(&token_type) { Ok(self.advance().clone()) } else { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b0e64b5..b2d51a4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::{cmp::Ordering, error::Error, fmt}; #[derive(Debug, Clone, PartialEq)] pub enum TokenType { @@ -32,7 +32,12 @@ pub enum TokenType { KeywordPrint, KeywordLet, + KeywordIf, + KeywordElse, + KeywordWhile, + Indent, + Dedent, Eof, } @@ -52,10 +57,10 @@ impl std::error::Error for MotError {} macro_rules! error { ($loc:expr, $msg:expr) => { - Err(MotError { + Err(Box::new(MotError { loc: $loc.clone(), message: $msg.into(), - }) + })) }; } @@ -84,6 +89,8 @@ pub struct Token { pub struct Tokenizer { source: Vec, tokens: Vec, + indent_stack: Vec, + current_indent: usize, start: usize, current: usize, loc: Loc, @@ -94,6 +101,8 @@ impl Tokenizer { Tokenizer { source: source.chars().collect(), tokens: vec![], + indent_stack: vec![0], + current_indent: 0, start: 0, current: 0, loc: Loc { @@ -104,7 +113,7 @@ impl Tokenizer { } } - pub fn tokenize(mut self) -> Result, MotError> { + pub fn tokenize(mut self) -> Result, Box> { while !self.eof() { self.start = self.current; self.scan_token()?; @@ -118,7 +127,7 @@ impl Tokenizer { Ok(self.tokens) } - fn scan_token(&mut self) -> Result<(), MotError> { + fn scan_token(&mut self) -> Result<(), Box> { match self.advance() { '(' => self.add_token(TokenType::LeftParen), ')' => self.add_token(TokenType::RightParen), @@ -202,6 +211,7 @@ impl Tokenizer { '\n' => { self.loc.line += 1; self.loc.column = 1; + self.handle_indentation()?; } '0'..='9' => self.scan_number(), 'A'..='Z' | 'a'..='z' | '_' => self.scan_identifier(), @@ -210,6 +220,59 @@ impl Tokenizer { Ok(()) } + fn handle_indentation(&mut self) -> Result<(), Box> { + if self.peek() == '\n' { + return Ok(()); + } + let new_indent = self.count_indentation(); + + match new_indent.cmp(&self.current_indent) { + Ordering::Greater => { + self.indent_stack.push(new_indent); + self.tokens.push(Token { + token_type: TokenType::Indent, + lexeme: String::new(), + loc: self.loc.clone(), + }); + } + Ordering::Less => { + while !self.indent_stack.is_empty() + && *self.indent_stack.last().unwrap() > new_indent + { + self.indent_stack.pop(); + self.tokens.push(Token { + token_type: TokenType::Dedent, + lexeme: String::new(), + loc: self.loc.clone(), + }); + } + if self.indent_stack.is_empty() || *self.indent_stack.last().unwrap() != new_indent + { + return error!(self.loc, "invalid indentation"); + } + } + Ordering::Equal => {} + } + + self.current_indent = new_indent; + Ok(()) + } + + fn count_indentation(&mut self) -> usize { + let mut count = 0; + + while self.peek() == ' ' || self.peek() == '\t' { + if self.peek() == ' ' { + count += 1; + } + if self.peek() == '\t' { + count += 4; + } + self.advance(); + } + count + } + fn scan_number(&mut self) { while self.peek().is_ascii_digit() { self.advance(); @@ -234,6 +297,9 @@ impl Tokenizer { self.add_token(match lexeme.as_str() { "print" => TokenType::KeywordPrint, "let" => TokenType::KeywordLet, + "if" => TokenType::KeywordIf, + "else" => TokenType::KeywordElse, + "while" => TokenType::KeywordWhile, _ => TokenType::Identifier, }) } diff --git a/test.mot b/test.mot index 4013d61..f2f9708 100644 --- a/test.mot +++ b/test.mot @@ -1,6 +1,8 @@ -let a = 5 -let b = 7 -print(b - a) -a = 4 -print(b - a) -print(b) \ No newline at end of file +let a = 0 +let b = 1 +let temp = 0 + +while a < 10000 + print a + temp = a + b = temp + b \ No newline at end of file