parse blocks

This commit is contained in:
2025-05-29 20:32:54 +02:00
parent 01cc38f31d
commit 47fd9a36a4
4 changed files with 199 additions and 26 deletions

View File

@@ -1,4 +1,4 @@
use std::fmt;
use std::{cmp::Ordering, error::Error, fmt};
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
@@ -32,7 +32,12 @@ pub enum TokenType {
KeywordPrint,
KeywordLet,
KeywordIf,
KeywordElse,
KeywordWhile,
Indent,
Dedent,
Eof,
}
@@ -52,10 +57,10 @@ impl std::error::Error for MotError {}
macro_rules! error {
($loc:expr, $msg:expr) => {
Err(MotError {
Err(Box::new(MotError {
loc: $loc.clone(),
message: $msg.into(),
})
}))
};
}
@@ -84,6 +89,8 @@ pub struct Token {
pub struct Tokenizer {
source: Vec<char>,
tokens: Vec<Token>,
indent_stack: Vec<usize>,
current_indent: usize,
start: usize,
current: usize,
loc: Loc,
@@ -94,6 +101,8 @@ impl Tokenizer {
Tokenizer {
source: source.chars().collect(),
tokens: vec![],
indent_stack: vec![0],
current_indent: 0,
start: 0,
current: 0,
loc: Loc {
@@ -104,7 +113,7 @@ impl Tokenizer {
}
}
pub fn tokenize(mut self) -> Result<Vec<Token>, MotError> {
pub fn tokenize(mut self) -> Result<Vec<Token>, Box<dyn Error>> {
while !self.eof() {
self.start = self.current;
self.scan_token()?;
@@ -118,7 +127,7 @@ impl Tokenizer {
Ok(self.tokens)
}
fn scan_token(&mut self) -> Result<(), MotError> {
fn scan_token(&mut self) -> Result<(), Box<dyn Error>> {
match self.advance() {
'(' => self.add_token(TokenType::LeftParen),
')' => self.add_token(TokenType::RightParen),
@@ -202,6 +211,7 @@ impl Tokenizer {
'\n' => {
self.loc.line += 1;
self.loc.column = 1;
self.handle_indentation()?;
}
'0'..='9' => self.scan_number(),
'A'..='Z' | 'a'..='z' | '_' => self.scan_identifier(),
@@ -210,6 +220,59 @@ impl Tokenizer {
Ok(())
}
fn handle_indentation(&mut self) -> Result<(), Box<dyn Error>> {
if self.peek() == '\n' {
return Ok(());
}
let new_indent = self.count_indentation();
match new_indent.cmp(&self.current_indent) {
Ordering::Greater => {
self.indent_stack.push(new_indent);
self.tokens.push(Token {
token_type: TokenType::Indent,
lexeme: String::new(),
loc: self.loc.clone(),
});
}
Ordering::Less => {
while !self.indent_stack.is_empty()
&& *self.indent_stack.last().unwrap() > new_indent
{
self.indent_stack.pop();
self.tokens.push(Token {
token_type: TokenType::Dedent,
lexeme: String::new(),
loc: self.loc.clone(),
});
}
if self.indent_stack.is_empty() || *self.indent_stack.last().unwrap() != new_indent
{
return error!(self.loc, "invalid indentation");
}
}
Ordering::Equal => {}
}
self.current_indent = new_indent;
Ok(())
}
fn count_indentation(&mut self) -> usize {
let mut count = 0;
while self.peek() == ' ' || self.peek() == '\t' {
if self.peek() == ' ' {
count += 1;
}
if self.peek() == '\t' {
count += 4;
}
self.advance();
}
count
}
fn scan_number(&mut self) {
while self.peek().is_ascii_digit() {
self.advance();
@@ -234,6 +297,9 @@ impl Tokenizer {
self.add_token(match lexeme.as_str() {
"print" => TokenType::KeywordPrint,
"let" => TokenType::KeywordLet,
"if" => TokenType::KeywordIf,
"else" => TokenType::KeywordElse,
"while" => TokenType::KeywordWhile,
_ => TokenType::Identifier,
})
}