Files
interpreter-cpp/src/parser/parser.cpp
2025-07-15 01:43:39 +02:00

402 lines
11 KiB
C++

#include "parser.hpp"
#include "ast/errors/error.hpp"
#include "ast/expressions/boolean.hpp"
#include "ast/expressions/function.hpp"
#include "ast/expressions/identifier.hpp"
#include "ast/expressions/if_then_else.hpp"
#include "ast/expressions/infix.hpp"
#include "ast/expressions/integer.hpp"
#include "ast/expressions/prefix.hpp"
#include "ast/statements/block.hpp"
#include "token/token.hpp"
#include "token/type.hpp"
#include "utils/tracer.hpp"
#include <sstream>
#define LOG_CUR_NEXT \
std::cout << "current: " << current << std::endl; \
std::cout << "next: " << next << std::endl;
namespace parser {
parser::parser(lexer::lexer& lexer)
: lexer(lexer),
current(token::type::ILLEGAL, ""),
next(token::type::ILLEGAL, "") {
next_token();
next_token();
register_prefix(
token::type::IDENTIFIER,
std::bind(&parser::parse_identifier, this)
);
register_prefix(
token::type::INT,
std::bind(&parser::parse_integer, this)
);
register_prefix(
{
token::type::BANG,
token::type::MINUS,
},
std::bind(&parser::parse_prefix_expr, this)
);
register_prefix(
{
token::type::TRUE,
token::type::FALSE,
},
std::bind(&parser::parse_boolean, this)
);
register_prefix(
token::type::LPAREN,
std::bind(&parser::parse_grouped_expr, this)
);
register_prefix(
token::type::IF,
std::bind(&parser::parse_if_then_else, this)
);
register_prefix(
token::type::FUNCTION,
std::bind(&parser::parse_function, this)
);
using namespace std::placeholders;
register_infix(
{token::type::PLUS,
token::type::MINUS,
token::type::ASTERISK,
token::type::SLASH,
token::type::EQ,
token::type::NEQ,
token::type::GT,
token::type::LT},
std::bind(&parser::parse_infix_expr, this, _1)
);
}
void parser::next_token() {
current = next;
next = lexer.next_token();
}
void parser::skip_until_semicolon() {
for (; current.type != token::type::SEMICOLON
&& current.type != token::type::END_OF_FILE;
next_token()) {};
}
std::unique_ptr<ast::program> parser::parse_program() {
std::unique_ptr<ast::program> p = std::make_unique<ast::program>();
for (; current.type != token::type::END_OF_FILE; next_token()) {
ast::statement* stmt = parse_statement();
if (stmt != nullptr)
p->statements.push_back(stmt);
}
return p;
}
ast::statement* parser::parse_statement() {
switch (current.type) {
case token::type::LET:
return parse_let();
case token::type::RETURN:
return parse_return();
default:
return parse_expression_stmt();
}
}
ast::expression* parser::parse_expression(precedence prec) {
TRACE_FUNCTION;
auto prefix_it = prefix_parse_fns.find(current.type);
if (prefix_it == prefix_parse_fns.end()) {
unkown_prefix_error(current);
return nullptr;
}
prefix_parse_fn prefix = prefix_it->second;
ast::expression* left = prefix();
while (next.type != token::type::SEMICOLON
&& prec < precedence_for(next.type)) {
auto infix_it = infix_parse_fns.find(next.type);
if (infix_it == infix_parse_fns.end())
return left;
next_token();
infix_parse_fn infix = infix_it->second;
left = infix(left);
}
return left;
};
ast::return_stmt* parser::parse_return() {
ast::return_stmt* stmt = new ast::return_stmt(current);
next_token();
stmt->value = parse_expression();
if (next.type == token::type::SEMICOLON)
next_token();
return stmt;
}
ast::let_stmt* parser::parse_let() {
ast::let_stmt* stmt = new ast::let_stmt(current);
if (!expect_next(token::type::IDENTIFIER)) {
delete stmt;
skip_until_semicolon();
return nullptr;
}
stmt->name = new ast::identifier{current, current.literal};
if (!expect_next(token::type::ASSIGN)) {
delete stmt;
skip_until_semicolon();
return nullptr;
}
next_token();
stmt->value = parse_expression();
if (next.type == token::type::SEMICOLON)
next_token();
return stmt;
}
ast::expression_stmt* parser::parse_expression_stmt() {
TRACE_FUNCTION;
ast::expression_stmt* stmt = new ast::expression_stmt(current);
stmt->expression = parse_expression();
if (next.type == token::type::SEMICOLON)
next_token();
return stmt;
};
bool parser::expect_next(token::type t) {
if (next.type == t) {
next_token();
return true;
}
next_error(t);
return false;
}
void parser::next_error(token::type t) {
std::stringstream ss;
ss << "Expected next token to be " << t << " but instead got "
<< next.type;
errors.push_back(new ast::error::expected_next(t, ss.str()));
}
void parser::unkown_prefix_error(token::token tok) {
std::stringstream ss;
ss << "No prefix parse function for token " << tok;
errors.push_back(new ast::error::unkown_prefix(tok, ss.str()));
}
parser::~parser() {
for (const auto& e : errors)
delete e;
}
void parser::register_prefix(token::type type, prefix_parse_fn fn) {
prefix_parse_fns[type] = fn;
};
void parser::register_prefix(
std::vector<token::type> types, prefix_parse_fn fn
) {
for (auto& type : types)
register_prefix(type, fn);
};
void parser::register_infix(token::type type, infix_parse_fn fn) {
infix_parse_fns[type] = fn;
};
void
parser::register_infix(std::vector<token::type> types, infix_parse_fn fn) {
for (auto& type : types)
register_infix(type, fn);
};
ast::identifier* parser::parse_identifier() {
return new ast::identifier(current, current.literal);
};
ast::integer_literal* parser::parse_integer() {
TRACE_FUNCTION;
return new ast::integer_literal(current, std::stoi(current.literal));
};
ast::boolean_literal* parser::parse_boolean() {
TRACE_FUNCTION;
return new ast::boolean_literal(
current,
current.type == token::type::TRUE
);
};
static void free_vec(std::vector<ast::identifier*> v) {
for (auto& e : v)
delete e;
}
std::vector<ast::identifier*> parser::parse_function_parameters() {
if (next.type == token::type::RPAREN) {
next_token();
return {}; // no params
}
std::vector<ast::identifier*> ret;
if (!expect_next(token::type::IDENTIFIER))
return {};
ret.push_back(parse_identifier());
while (next.type == token::type::COMMA) {
next_token();
if (!expect_next(token::type::IDENTIFIER)) {
free_vec(ret);
return {};
}
ret.push_back(parse_identifier());
}
if (current.type == token::type::COMMA
&& next.type == token::type::RPAREN) {
next_error(token::type::IDENTIFIER);
free_vec(ret);
return {};
}
if (!expect_next(token::type::RPAREN)) {
free_vec(ret);
return {};
}
return ret;
}
ast::function_literal* parser::parse_function() {
TRACE_FUNCTION;
ast::function_literal* ret = new ast::function_literal(current);
if (!expect_next(token::type::LPAREN)) {
delete ret;
return nullptr;
}
ret->parameters = parse_function_parameters();
if (!expect_next(token::type::LBRACE)) {
delete ret;
return nullptr;
}
ret->body = parse_block();
return ret;
};
ast::prefix_expr* parser::parse_prefix_expr() {
TRACE_FUNCTION;
ast::prefix_expr* ret = new ast::prefix_expr(current, current.literal);
next_token();
ret->right = parse_expression(precedence::PREFIX);
return ret;
};
ast::expression* parser::parse_grouped_expr() {
TRACE_FUNCTION;
next_token();
ast::expression* ret = parse_expression(precedence::LOWEST);
if (!expect_next(token::type::RPAREN)) {
delete ret;
return nullptr;
}
return ret;
};
ast::if_then_else* parser::parse_if_then_else() {
TRACE_FUNCTION;
ast::if_then_else* ret = new ast::if_then_else(current);
if (!expect_next(token::type::LPAREN)) {
delete ret;
return nullptr;
}
next_token();
ret->condition = parse_expression();
if (!expect_next(token::type::RPAREN)) {
delete ret;
return nullptr;
}
if (!expect_next(token::type::LBRACE)) {
delete ret;
return nullptr;
}
ret->consequence = parse_block();
if (next.type != token::type::ELSE)
return ret;
next_token();
if (!expect_next(token::type::LBRACE)) {
delete ret;
return nullptr;
}
ret->alternative = parse_block();
return ret;
};
ast::block_stmt* parser::parse_block() {
TRACE_FUNCTION;
ast::block_stmt* ret = new ast::block_stmt(current);
for (next_token(); current.type != token::type::RBRACE
&& current.type != token::type::END_OF_FILE;
next_token()) {
ast::statement* stmt = parse_statement();
if (stmt != nullptr)
ret->statements.push_back(stmt);
}
if (current.type != token::type::RBRACE) {
next_error(token::type::RBRACE);
delete ret;
return nullptr;
}
return ret;
}
ast::infix_expr* parser::parse_infix_expr(ast::expression* left) {
TRACE_FUNCTION;
ast::infix_expr* ret =
new ast::infix_expr(current, current.literal, left);
precedence prec = precedence_for(current.type);
next_token();
ret->right = parse_expression(prec);
return ret;
};
} // namespace parser