Files
zern/src/codegen_x86_64.rs
2025-07-02 13:56:30 +02:00

631 lines
20 KiB
Rust

use std::{collections::HashMap, fmt::Write};
use crate::{
parser::{Expr, Stmt},
tokenizer::{TokenType, ZernError, error},
};
pub struct Var {
pub var_type: String,
pub stack_offset: usize,
}
pub struct Env {
scopes: Vec<HashMap<String, Var>>,
next_offset: usize,
loop_begin_label: String,
loop_end_label: String,
}
impl Env {
pub fn new() -> Env {
Env {
scopes: vec![HashMap::new()],
next_offset: 8,
loop_begin_label: String::new(),
loop_end_label: String::new(),
}
}
pub fn push_scope(&mut self) {
self.scopes.push(HashMap::new());
}
pub fn pop_scope(&mut self) {
self.scopes.pop();
}
pub fn define_var(&mut self, name: String, var_type: String) -> usize {
let offset = self.next_offset;
self.next_offset += 8;
self.scopes.last_mut().unwrap().insert(name, Var {
var_type,
stack_offset: offset,
});
offset
}
pub fn get_var(&self, name: &str) -> Option<&Var> {
for scope in self.scopes.iter().rev() {
if let Some(var) = scope.get(name) {
return Some(var);
}
}
None
}
}
macro_rules! emit {
($($arg:tt)*) => {
let _ = writeln!($($arg)*);
};
}
static REGISTERS: [&str; 6] = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"];
pub struct CodegenX86_64 {
output: String,
data_section: String,
label_counter: usize,
data_counter: usize,
}
impl CodegenX86_64 {
pub fn new() -> CodegenX86_64 {
CodegenX86_64 {
output: String::new(),
data_section: String::new(),
label_counter: 0,
data_counter: 1,
}
}
fn label(&mut self) -> String {
self.label_counter += 1;
format!(".L{}", self.label_counter)
}
pub fn get_output(&self) -> String {
format!(
"section .data
{}{}",
self.data_section, self.output
)
}
pub fn emit_prologue(&mut self) -> Result<(), ZernError> {
emit!(
&mut self.output,
"section .note.GNU-stack
db 0
section .text
extern stdin
extern malloc
extern calloc
extern realloc
extern free
extern printf
extern sprintf
extern strtol
extern strlen
extern strcmp
extern strcat
extern strcpy
extern strdup
extern strncpy
extern fgets
extern fopen
extern fseek
extern ftell
extern fread
extern fwrite
extern fclose
extern rewind
extern system
extern opendir
extern readdir
extern closedir
extern exit
extern gettimeofday
extern connect
extern inet_addr
extern socket
extern send
extern read
extern close
extern bind
extern listen
extern accept
extern getchar
section .text._builtin_deref8
_builtin_deref8:
xor rax, rax
mov al, byte [rdi]
ret
section .text._builtin_deref64
_builtin_deref64:
mov rax, qword [rdi]
ret
section .text._builtin_stdin
_builtin_stdin:
mov rax, [rel stdin]
ret
section .text._builtin_lshift
_builtin_lshift:
mov rcx, rsi
mov rax, rdi
shl rax, cl
ret
section .text._builtin_rshift
_builtin_rshift:
mov rcx, rsi
mov rax, rdi
sar rax, cl
ret
section .text._builtin_string_set
_builtin_string_set:
mov [rdi + rsi], dl
ret
section .text._builtin_listdir
_builtin_listdir:
push r14
push rbx
push rax
mov r14, rdi
call Array.new
mov rbx, rax
mov rdi, r14
call opendir
mov r14, rax
._builtin_listdir.1:
mov rdi, r14
call readdir
test rax, rax
je ._builtin_listdir.3
cmp byte [rax+19], 46
jne ._builtin_listdir.2
movzx ecx, byte [rax+20]
test ecx, ecx
je ._builtin_listdir.1
cmp ecx, 46
jne ._builtin_listdir.2
cmp byte [rax+21], 0
je ._builtin_listdir.1
._builtin_listdir.2:
add rax, 19
mov rdi, rax
call strdup
mov rsi, rax
mov rdi, rbx
call Array.push
jmp ._builtin_listdir.1
._builtin_listdir.3:
mov rdi, r14
call closedir
mov rax, rbx
add rsp, 8
pop rbx
pop r14
ret
section .text._builtin_array_set
_builtin_array_set:
mov rax, [rdi]
mov [rax + rsi*8], rdx
ret
section .text._builtin_array_push
_builtin_array_push:
push r14
push rbx
push rax
mov r14, rsi
mov rbx, rdi
mov rax, [rdi]
mov rcx, [rdi + 16]
cmp rcx, [rdi + 8]
jne ._builtin_array_push.1
lea rdx, [rcx + rcx]
mov rsi, 4
test rcx, rcx
cmovnz rsi, rdx
mov [rbx + 8], rsi
shl rsi, 3
mov rdi, rax
call realloc
mov [rbx], rax
mov rcx, [rbx + 16]
._builtin_array_push.1:
mov [rax + rcx*8], r14
inc qword [rbx + 16]
add rsp, 8
pop rbx
pop r14
ret
section .text._builtin_array_size
_builtin_array_size:
mov rax, [rdi + 16]
ret
section .text._builtin_array_free
_builtin_array_free:
push rbx
mov rbx, rdi
mov rdi, [rdi]
call free
mov rdi, rbx
pop rbx
jmp free
"
);
Ok(())
}
pub fn compile_stmt(&mut self, env: &mut Env, stmt: Stmt) -> Result<(), ZernError> {
match stmt {
Stmt::Expression(expr) => self.compile_expr(env, expr)?,
Stmt::Let {
name,
var_type,
initializer,
} => {
if env.get_var(&name.lexeme).is_some() {
return error!(
name.loc,
format!("variable already defined: {}", &name.lexeme)
);
}
self.compile_expr(env, initializer)?;
let offset = env.define_var(name.lexeme.clone(), var_type.lexeme);
emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset);
}
Stmt::Block(statements) => {
env.push_scope();
for stmt in statements {
self.compile_stmt(env, stmt)?;
}
env.pop_scope();
}
Stmt::If {
condition,
then_branch,
else_branch,
} => {
let else_label = self.label();
let end_label = self.label();
self.compile_expr(env, condition)?;
emit!(&mut self.output, " test rax, rax");
emit!(&mut self.output, " je {}", else_label);
self.compile_stmt(env, *then_branch.clone())?;
emit!(&mut self.output, " jmp {}", end_label);
emit!(&mut self.output, "{}:", else_label);
self.compile_stmt(env, *else_branch.clone())?;
emit!(&mut self.output, "{}:", end_label);
}
Stmt::While { condition, body } => {
let old_loop_begin_label = env.loop_begin_label.clone();
let old_loop_end_label = env.loop_end_label.clone();
env.loop_begin_label = self.label();
env.loop_end_label = self.label();
emit!(&mut self.output, "{}:", env.loop_begin_label);
self.compile_expr(env, condition)?;
emit!(&mut self.output, " test rax, rax");
emit!(&mut self.output, " je {}", env.loop_end_label);
self.compile_stmt(env, *body.clone())?;
emit!(&mut self.output, " jmp {}", env.loop_begin_label);
emit!(&mut self.output, "{}:", env.loop_end_label);
env.loop_begin_label = old_loop_begin_label;
env.loop_end_label = old_loop_end_label;
}
Stmt::Function {
name,
params,
return_type,
body,
} => {
if name.lexeme == "main" {
emit!(&mut self.output, "global {}", name.lexeme);
if return_type.lexeme != "I64" {
return error!(&name.loc, "main must return I64");
}
}
emit!(&mut self.output, "section .text.{}", name.lexeme);
emit!(&mut self.output, "{}:", name.lexeme);
emit!(&mut self.output, " push rbp");
emit!(&mut self.output, " mov rbp, rsp");
emit!(&mut self.output, " sub rsp, 256"); // TODO
for (i, param) in params.iter().enumerate() {
let offset = env
.define_var(param.var_name.lexeme.clone(), param.var_type.lexeme.clone());
let reg = match REGISTERS.get(i) {
Some(x) => x,
None => return error!(&name.loc, "only up to 6 params allowed"),
};
emit!(&mut self.output, " mov QWORD [rbp-{}], {}", offset, reg);
}
self.compile_stmt(env, *body)?;
if name.lexeme == "main" {
emit!(&mut self.output, " mov rax, 0");
}
emit!(&mut self.output, " mov rsp, rbp");
emit!(&mut self.output, " pop rbp");
emit!(&mut self.output, " ret");
}
Stmt::Return(expr) => {
self.compile_expr(env, expr)?;
emit!(&mut self.output, " mov rsp, rbp");
emit!(&mut self.output, " pop rbp");
emit!(&mut self.output, " ret");
}
Stmt::For {
var,
start,
end,
body,
} => {
let old_loop_begin_label = env.loop_begin_label.clone();
let old_loop_end_label = env.loop_end_label.clone();
env.loop_begin_label = self.label();
env.loop_end_label = self.label();
env.push_scope();
let offset = env.define_var(var.lexeme, "I64".into());
self.compile_expr(env, start)?;
emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset);
emit!(&mut self.output, "{}:", env.loop_begin_label);
emit!(&mut self.output, " mov rax, QWORD [rbp-{}]", offset);
emit!(&mut self.output, " push rax");
self.compile_expr(env, end)?;
emit!(&mut self.output, " pop rcx");
emit!(&mut self.output, " cmp rcx, rax");
emit!(&mut self.output, " jge {}", env.loop_end_label);
self.compile_stmt(env, *body)?;
emit!(&mut self.output, " mov rax, QWORD [rbp-{}]", offset);
emit!(&mut self.output, " add rax, 1");
emit!(&mut self.output, " mov QWORD [rbp-{}], rax", offset);
emit!(&mut self.output, " jmp {}", env.loop_begin_label);
emit!(&mut self.output, "{}:", env.loop_end_label);
env.pop_scope();
env.loop_begin_label = old_loop_begin_label;
env.loop_end_label = old_loop_end_label;
}
Stmt::Break => {
emit!(&mut self.output, " jmp {}", env.loop_end_label);
}
Stmt::Continue => {
// TODO: skips incrementing when used in a for loop
emit!(&mut self.output, " jmp {}", env.loop_begin_label);
}
}
Ok(())
}
pub fn compile_expr(&mut self, env: &mut Env, expr: Expr) -> Result<(), ZernError> {
match expr {
Expr::Binary { left, op, right } => {
self.compile_expr(env, *left)?;
emit!(&mut self.output, " push rax");
self.compile_expr(env, *right)?;
emit!(&mut self.output, " mov rbx, rax");
emit!(&mut self.output, " pop rax");
match op.token_type {
TokenType::Plus => {
emit!(&mut self.output, " add rax, rbx");
}
TokenType::Minus => {
emit!(&mut self.output, " sub rax, rbx");
}
TokenType::Star => {
emit!(&mut self.output, " imul rax, rbx");
}
TokenType::Slash => {
emit!(&mut self.output, " cqo");
emit!(&mut self.output, " idiv rbx");
}
TokenType::Mod => {
emit!(&mut self.output, " cqo");
emit!(&mut self.output, " idiv rbx");
emit!(&mut self.output, " mov rax, rdx");
}
TokenType::Xor => {
emit!(&mut self.output, " xor rax, rbx");
}
TokenType::And => {
emit!(&mut self.output, " and rax, rbx");
}
TokenType::Or => {
emit!(&mut self.output, " or rax, rbx");
}
TokenType::DoubleEqual => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " sete al");
emit!(&mut self.output, " movzx rax, al");
}
TokenType::NotEqual => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " setne al");
emit!(&mut self.output, " movzx rax, al");
}
TokenType::Greater => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " setg al");
emit!(&mut self.output, " movzx rax, al");
}
TokenType::GreaterEqual => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " setge al");
emit!(&mut self.output, " movzx rax, al");
}
TokenType::Less => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " setl al");
emit!(&mut self.output, " movzx rax, al");
}
TokenType::LessEqual => {
emit!(&mut self.output, " cmp rax, rbx");
emit!(&mut self.output, " setle al");
emit!(&mut self.output, " movzx rax, al");
}
_ => unreachable!(),
}
}
Expr::Grouping(expr) => self.compile_expr(env, *expr)?,
Expr::Literal(token) => match token.token_type {
TokenType::Number => {
emit!(&mut self.output, " mov rax, {}", token.lexeme);
}
TokenType::Char => {
emit!(
&mut self.output,
" mov rax, {}",
token.lexeme.chars().nth(1).unwrap() as u8
);
}
TokenType::String => {
// TODO: actual string parsing in the tokenizer
let value = &token.lexeme[1..token.lexeme.len() - 1]
.replace("\\n", "\n")
.replace("\\r", "\r")
.replace("\\t", "\t")
.replace("\\033", "\x1b")
.replace("\\0", "\0");
if value.is_empty() {
emit!(&mut self.data_section, " S{} db 0", self.data_counter);
} else {
let charcodes = value
.chars()
.map(|x| (x as u8).to_string())
.collect::<Vec<String>>()
.join(",");
emit!(
&mut self.data_section,
" S{} db {},0",
self.data_counter,
charcodes,
);
}
emit!(&mut self.output, " mov rax, S{}", self.data_counter);
self.data_counter += 1;
}
TokenType::True => {
emit!(&mut self.output, " mov rax, 1");
}
TokenType::False => {
emit!(&mut self.output, " mov rax, 0");
}
_ => unreachable!(),
},
Expr::Unary { op, right } => {
self.compile_expr(env, *right)?;
match op.token_type {
TokenType::Minus => {
emit!(&mut self.output, " neg rax");
}
TokenType::Bang => {
emit!(&mut self.output, " test rax, rax");
emit!(&mut self.output, " sete al");
emit!(&mut self.output, " movzx rax, al");
}
_ => unreachable!(),
}
}
Expr::Variable(name) => {
let var = match env.get_var(&name.lexeme) {
Some(x) => x,
None => {
return error!(name.loc, format!("undefined variable: {}", &name.lexeme));
}
};
emit!(
&mut self.output,
" mov rax, QWORD [rbp-{}]",
var.stack_offset,
);
}
Expr::Assign { name, value } => {
self.compile_expr(env, *value)?;
let var = match env.get_var(&name.lexeme) {
Some(x) => x,
None => {
return error!(name.loc, format!("undefined variable: {}", &name.lexeme));
}
};
emit!(
&mut self.output,
" mov QWORD [rbp-{}], rax",
var.stack_offset,
);
}
Expr::Call {
callee,
paren,
args,
} => {
let callee = match *callee {
Expr::Variable(name) => name.lexeme,
_ => return error!(&paren.loc, "tried to call a non-constant expression"),
};
for arg in &args {
self.compile_expr(env, arg.clone())?;
emit!(&mut self.output, " push rax");
}
for i in (0..args.len()).rev() {
let reg = match REGISTERS.get(i) {
Some(x) => x,
None => return error!(&paren.loc, "only up to 6 args allowed"),
};
emit!(&mut self.output, " pop {}", reg);
}
emit!(&mut self.output, " call {}", callee);
}
Expr::ArrayLiteral(exprs) => {
emit!(&mut self.output, " call Array.new");
emit!(&mut self.output, " push rax");
for expr in exprs {
self.compile_expr(env, expr)?;
emit!(&mut self.output, " mov rsi, rax");
emit!(&mut self.output, " pop rdi");
emit!(&mut self.output, " push rdi");
emit!(&mut self.output, " call Array.push");
}
emit!(&mut self.output, " pop rax");
}
Expr::Index { expr, index } => {
self.compile_expr(env, *expr)?;
emit!(&mut self.output, " push rax");
self.compile_expr(env, *index)?;
emit!(&mut self.output, " pop rbx");
emit!(&mut self.output, " mov rbx, [rbx]");
emit!(&mut self.output, " mov rax, [rbx + rax*8]");
}
}
Ok(())
}
}