use std::{collections::HashMap, fmt::Write}; use crate::{ parser::{Expr, Stmt}, tokenizer::{TokenType, ZernError, error}, }; pub struct Var { pub var_type: String, pub stack_offset: usize, } pub struct Env { scopes: Vec>, next_offset: usize, loop_begin_label: String, loop_end_label: String, } impl Env { pub fn new() -> Env { Env { scopes: vec![HashMap::new()], next_offset: 8, loop_begin_label: String::new(), loop_end_label: String::new(), } } pub fn push_scope(&mut self) { self.scopes.push(HashMap::new()); } pub fn pop_scope(&mut self) { self.scopes.pop(); } pub fn define_var(&mut self, name: String, var_type: String) -> usize { let offset = self.next_offset; self.next_offset += 8; self.scopes.last_mut().unwrap().insert(name, Var { var_type, stack_offset: offset, }); offset } pub fn get_var(&self, name: &str) -> Option<&Var> { for scope in self.scopes.iter().rev() { if let Some(var) = scope.get(name) { return Some(var); } } None } } macro_rules! emit { ($($arg:tt)*) => { let _ = writeln!($($arg)*); }; } static REGISTERS: [&str; 6] = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"]; pub struct CodegenX86_64 { output: String, data_section: String, label_counter: usize, data_counter: usize, } impl CodegenX86_64 { pub fn new() -> CodegenX86_64 { CodegenX86_64 { output: String::new(), data_section: String::new(), label_counter: 0, data_counter: 1, } } fn label(&mut self) -> String { self.label_counter += 1; format!(".L{}", self.label_counter) } pub fn get_output(&self) -> String { format!( "section .data {}{}", self.data_section, self.output ) } pub fn emit_prologue(&mut self) -> Result<(), ZernError> { emit!( &mut self.output, "section .note.GNU-stack db 0 section .text extern malloc c.malloc equ malloc extern calloc c.calloc equ calloc extern realloc c.realloc equ realloc extern free c.free equ free extern puts c.puts equ puts extern printf c.printf equ printf extern sprintf c.sprintf equ sprintf extern strtol c.strtol equ strtol extern strlen c.strlen equ strlen extern strcmp c.strcmp equ strcmp extern strcat c.strcat equ strcat extern strcpy c.strcpy equ strcpy extern strdup c.strdup equ strdup extern strncpy c.strncpy equ strncpy extern syscall c.syscall equ syscall extern fopen c.fopen equ fopen extern fseek c.fseek equ fseek extern ftell c.ftell equ ftell extern fread c.fread equ fread extern fwrite c.fwrite equ fwrite extern fclose c.fclose equ fclose extern rewind c.rewind equ rewind extern system c.system equ system extern opendir c.opendir equ opendir extern readdir c.readdir equ readdir extern closedir c.closedir equ closedir extern exit c.exit equ exit extern gettimeofday c.gettimeofday equ gettimeofday extern connect c.connect equ connect extern socket c.socket equ socket extern send c.send equ send extern read c.read equ read extern close c.close equ close extern bind c.bind equ bind extern listen c.listen equ listen extern accept c.accept equ accept extern getchar c.getchar equ getchar section .text._builtin_deref8 _builtin_deref8: xor rax, rax mov al, byte [rdi] ret section .text._builtin_deref64 _builtin_deref64: mov rax, qword [rdi] ret section .text._builtin_set8 _builtin_set8: mov [rdi], sil ret section .text._builtin_set64 _builtin_set64: mov [rdi], rsi ret section .text._builtin_lshift _builtin_lshift: mov rcx, rsi mov rax, rdi shl rax, cl ret section .text._builtin_rshift _builtin_rshift: mov rcx, rsi mov rax, rdi sar rax, cl ret " ); Ok(()) } pub fn compile_stmt(&mut self, env: &mut Env, stmt: Stmt) -> Result<(), ZernError> { match stmt { Stmt::Expression(expr) => self.compile_expr(env, expr)?, Stmt::Let { name, var_type, initializer, } => { if env.get_var(&name.lexeme).is_some() { return error!( name.loc, format!("variable already defined: {}", &name.lexeme) ); } self.compile_expr(env, initializer)?; let offset = env.define_var(name.lexeme.clone(), var_type.lexeme); emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset); } Stmt::Block(statements) => { env.push_scope(); for stmt in statements { self.compile_stmt(env, stmt)?; } env.pop_scope(); } Stmt::If { condition, then_branch, else_branch, } => { let else_label = self.label(); let end_label = self.label(); self.compile_expr(env, condition)?; emit!(&mut self.output, " test rax, rax"); emit!(&mut self.output, " je {}", else_label); self.compile_stmt(env, *then_branch.clone())?; emit!(&mut self.output, " jmp {}", end_label); emit!(&mut self.output, "{}:", else_label); self.compile_stmt(env, *else_branch.clone())?; emit!(&mut self.output, "{}:", end_label); } Stmt::While { condition, body } => { let old_loop_begin_label = env.loop_begin_label.clone(); let old_loop_end_label = env.loop_end_label.clone(); env.loop_begin_label = self.label(); env.loop_end_label = self.label(); emit!(&mut self.output, "{}:", env.loop_begin_label); self.compile_expr(env, condition)?; emit!(&mut self.output, " test rax, rax"); emit!(&mut self.output, " je {}", env.loop_end_label); self.compile_stmt(env, *body.clone())?; emit!(&mut self.output, " jmp {}", env.loop_begin_label); emit!(&mut self.output, "{}:", env.loop_end_label); env.loop_begin_label = old_loop_begin_label; env.loop_end_label = old_loop_end_label; } Stmt::Function { name, params, return_type, body, } => { if name.lexeme == "main" { emit!(&mut self.output, "global {}", name.lexeme); if return_type.lexeme != "I64" { return error!(&name.loc, "main must return I64"); } } emit!(&mut self.output, "section .text.{}", name.lexeme); emit!(&mut self.output, "{}:", name.lexeme); emit!(&mut self.output, " push rbp"); emit!(&mut self.output, " mov rbp, rsp"); emit!(&mut self.output, " sub rsp, 256"); // TODO for (i, param) in params.iter().enumerate() { let offset = env .define_var(param.var_name.lexeme.clone(), param.var_type.lexeme.clone()); let reg = match REGISTERS.get(i) { Some(x) => x, None => return error!(&name.loc, "only up to 6 params allowed"), }; emit!(&mut self.output, " mov QWORD [rbp-{}], {}", offset, reg); } self.compile_stmt(env, *body)?; if name.lexeme == "main" { emit!(&mut self.output, " mov rax, 0"); } emit!(&mut self.output, " mov rsp, rbp"); emit!(&mut self.output, " pop rbp"); emit!(&mut self.output, " ret"); } Stmt::Return(expr) => { self.compile_expr(env, expr)?; emit!(&mut self.output, " mov rsp, rbp"); emit!(&mut self.output, " pop rbp"); emit!(&mut self.output, " ret"); } Stmt::For { var, start, end, body, } => { let old_loop_begin_label = env.loop_begin_label.clone(); let old_loop_end_label = env.loop_end_label.clone(); env.loop_begin_label = self.label(); env.loop_end_label = self.label(); env.push_scope(); let offset = env.define_var(var.lexeme, "I64".into()); self.compile_expr(env, start)?; emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset); emit!(&mut self.output, "{}:", env.loop_begin_label); emit!(&mut self.output, " mov rax, QWORD [rbp-{}]", offset); emit!(&mut self.output, " push rax"); self.compile_expr(env, end)?; emit!(&mut self.output, " pop rcx"); emit!(&mut self.output, " cmp rcx, rax"); emit!(&mut self.output, " jge {}", env.loop_end_label); self.compile_stmt(env, *body)?; emit!(&mut self.output, " mov rax, QWORD [rbp-{}]", offset); emit!(&mut self.output, " add rax, 1"); emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset); emit!(&mut self.output, " jmp {}", env.loop_begin_label); emit!(&mut self.output, "{}:", env.loop_end_label); env.pop_scope(); env.loop_begin_label = old_loop_begin_label; env.loop_end_label = old_loop_end_label; } Stmt::Break => { emit!(&mut self.output, " jmp {}", env.loop_end_label); } Stmt::Continue => { // TODO: skips incrementing when used in a for loop emit!(&mut self.output, " jmp {}", env.loop_begin_label); } } Ok(()) } pub fn compile_expr(&mut self, env: &mut Env, expr: Expr) -> Result<(), ZernError> { match expr { Expr::Binary { left, op, right } => { self.compile_expr(env, *left)?; emit!(&mut self.output, " push rax"); self.compile_expr(env, *right)?; emit!(&mut self.output, " mov rbx, rax"); emit!(&mut self.output, " pop rax"); match op.token_type { TokenType::Plus => { emit!(&mut self.output, " add rax, rbx"); } TokenType::Minus => { emit!(&mut self.output, " sub rax, rbx"); } TokenType::Star => { emit!(&mut self.output, " imul rax, rbx"); } TokenType::Slash => { emit!(&mut self.output, " cqo"); emit!(&mut self.output, " idiv rbx"); } TokenType::Mod => { emit!(&mut self.output, " cqo"); emit!(&mut self.output, " idiv rbx"); emit!(&mut self.output, " mov rax, rdx"); } TokenType::Xor => { emit!(&mut self.output, " xor rax, rbx"); } TokenType::And => { emit!(&mut self.output, " and rax, rbx"); } TokenType::Or => { emit!(&mut self.output, " or rax, rbx"); } TokenType::DoubleEqual => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " sete al"); emit!(&mut self.output, " movzx rax, al"); } TokenType::NotEqual => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " setne al"); emit!(&mut self.output, " movzx rax, al"); } TokenType::Greater => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " setg al"); emit!(&mut self.output, " movzx rax, al"); } TokenType::GreaterEqual => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " setge al"); emit!(&mut self.output, " movzx rax, al"); } TokenType::Less => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " setl al"); emit!(&mut self.output, " movzx rax, al"); } TokenType::LessEqual => { emit!(&mut self.output, " cmp rax, rbx"); emit!(&mut self.output, " setle al"); emit!(&mut self.output, " movzx rax, al"); } _ => unreachable!(), } } Expr::Grouping(expr) => self.compile_expr(env, *expr)?, Expr::Literal(token) => match token.token_type { TokenType::Number => { emit!(&mut self.output, " mov rax, {}", token.lexeme); } TokenType::Char => { emit!( &mut self.output, " mov rax, {}", token.lexeme.chars().nth(1).unwrap() as u8 ); } TokenType::String => { // TODO: actual string parsing in the tokenizer let value = &token.lexeme[1..token.lexeme.len() - 1] .replace("\\n", "\n") .replace("\\r", "\r") .replace("\\t", "\t") .replace("\\033", "\x1b") .replace("\\0", "\0"); let label = format!("str_{:03}", self.data_counter); if value.is_empty() { emit!(&mut self.data_section, " {} db 0", label); } else { let charcodes = value .chars() .map(|x| (x as u8).to_string()) .collect::>() .join(","); emit!(&mut self.data_section, " {} db {},0", label, charcodes,); } emit!(&mut self.output, " mov rax, {}", label); self.data_counter += 1; } TokenType::True => { emit!(&mut self.output, " mov rax, 1"); } TokenType::False => { emit!(&mut self.output, " mov rax, 0"); } _ => unreachable!(), }, Expr::Unary { op, right } => { self.compile_expr(env, *right)?; match op.token_type { TokenType::Minus => { emit!(&mut self.output, " neg rax"); } TokenType::Bang => { emit!(&mut self.output, " test rax, rax"); emit!(&mut self.output, " sete al"); emit!(&mut self.output, " movzx rax, al"); } _ => unreachable!(), } } Expr::Variable(name) => { let var = match env.get_var(&name.lexeme) { Some(x) => x, None => { return error!(name.loc, format!("undefined variable: {}", &name.lexeme)); } }; emit!( &mut self.output, " mov rax, QWORD [rbp-{}]", var.stack_offset, ); } Expr::Assign { name, value } => { self.compile_expr(env, *value)?; let var = match env.get_var(&name.lexeme) { Some(x) => x, None => { return error!(name.loc, format!("undefined variable: {}", &name.lexeme)); } }; emit!( &mut self.output, " mov QWORD [rbp-{}], rax", var.stack_offset, ); } Expr::Call { callee, paren, args, } => { let callee = match *callee { Expr::Variable(name) => name.lexeme, _ => return error!(&paren.loc, "tried to call a non-constant expression"), }; for arg in &args { self.compile_expr(env, arg.clone())?; emit!(&mut self.output, " push rax"); } for i in (0..args.len()).rev() { let reg = match REGISTERS.get(i) { Some(x) => x, None => return error!(&paren.loc, "only up to 6 args allowed"), }; emit!(&mut self.output, " pop {}", reg); } emit!(&mut self.output, " call {}", callee); } Expr::ArrayLiteral(exprs) => { emit!(&mut self.output, " call array.new"); emit!(&mut self.output, " push rax"); for expr in exprs { self.compile_expr(env, expr)?; emit!(&mut self.output, " mov rsi, rax"); emit!(&mut self.output, " pop rdi"); emit!(&mut self.output, " push rdi"); emit!(&mut self.output, " call array.push"); } emit!(&mut self.output, " pop rax"); } Expr::Index { expr, index } => { self.compile_expr(env, *expr)?; emit!(&mut self.output, " push rax"); self.compile_expr(env, *index)?; emit!(&mut self.output, " pop rbx"); emit!(&mut self.output, " mov rbx, [rbx]"); emit!(&mut self.output, " mov rax, [rbx + rax*8]"); } } Ok(()) } }