very basic parser of let statements

This commit is contained in:
Karma Riuk
2025-07-03 13:30:56 +02:00
parent c091f7f021
commit de465b6122
9 changed files with 311 additions and 0 deletions

9
src/ast/ast.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include "ast.hpp"
namespace ast {
std::string program ::token_literal() const {
if (statements.size() > 0)
return statements[0]->token_literal();
return "";
}
} // namespace ast

29
src/ast/ast.hpp Normal file
View File

@@ -0,0 +1,29 @@
#pragma once
#include <string>
#include <vector>
namespace ast {
struct node {
virtual std::string token_literal() const = 0;
virtual ~node() = default;
};
struct statement : node {
virtual std::string token_literal() const override = 0;
};
struct expression : node {
virtual std::string token_literal() const override = 0;
};
struct program : public node {
std::vector<statement*> statements;
std::string token_literal() const override;
~program() {
for (const auto& ref : statements)
delete ref;
};
};
} // namespace ast

View File

@@ -0,0 +1,11 @@
#include "identifier.hpp"
namespace ast {
identifier::identifier(token::token token, std::string value)
: token(std::move(token)),
value(std::move(value)) {}
std::string identifier::token_literal() const {
return token.literal;
}
} // namespace ast

View File

@@ -0,0 +1,16 @@
#pragma once
#include "ast/ast.hpp"
#include "token/token.hpp"
#include <string>
namespace ast {
struct identifier : expression {
identifier(token::token token, std::string value);
token::token token;
std::string value;
std::string token_literal() const override;
};
} // namespace ast

View File

@@ -0,0 +1,17 @@
#include "let.hpp"
namespace ast {
let::let(token::token token)
: token(std::move(token)),
name(nullptr),
value(nullptr) {}
std::string let::token_literal() const {
return token.literal;
}
let::~let() {
delete name;
delete value;
};
} // namespace ast

View File

@@ -0,0 +1,19 @@
#pragma once
#include "ast/ast.hpp"
#include "ast/expressions/identifier.hpp"
#include "token/token.hpp"
namespace ast {
struct let : statement {
let(token::token token);
token::token token;
identifier* name;
expression* value;
std::string token_literal() const override;
~let();
};
} // namespace ast

78
src/parser/parser.cpp Normal file
View File

@@ -0,0 +1,78 @@
#include "parser.hpp"
#include "token/type.hpp"
#include <sstream>
namespace parser {
parser::parser(lexer::lexer& lexer)
: lexer(lexer),
current(token::type::ILLEGAL, ""),
next(token::type::ILLEGAL, "") {
next_token();
next_token();
}
void parser::next_token() {
current = next;
next = lexer.next_token();
}
ast::program* parser::parse_program() {
ast::program* p = new ast::program();
for (; current.type != token::type::END_OF_FILE; next_token()) {
ast::statement* stmt = parse_statement();
if (stmt != nullptr)
p->statements.push_back(stmt);
}
return p;
}
ast::statement* parser::parse_statement() {
switch (current.type) {
case token::type::LET:
return parse_let();
default:
return nullptr;
}
}
bool parser::expect_next(token::type t) {
if (next.type == t) {
next_token();
return true;
}
return false;
}
ast::let* parser::parse_let() {
ast::let* stmt = new ast::let(current);
if (!expect_next(token::type::IDENTIFIER)) {
delete stmt;
return nullptr;
}
stmt->name = new ast::identifier{current, current.literal};
if (!expect_next(token::type::ASSIGN)) {
delete stmt;
return nullptr;
}
// TODO: we are currently skipping expressions until we encounter a
// semicolon
for (; current.type != token::type::SEMICOLON; next_token()) {}
return stmt;
}
void parser::next_error(token::type t) {
std::stringstream ss;
ss << "Expected next token to be " << t << " but instead got "
<< next.type;
errors.push_back(ss.str());
}
} // namespace parser

25
src/parser/parser.hpp Normal file
View File

@@ -0,0 +1,25 @@
#pragma once
#include "ast/ast.hpp"
#include "ast/statements/let.hpp"
#include "lexer/lexer.hpp"
#include "token/token.hpp"
namespace parser {
struct parser {
parser(lexer::lexer& lexer);
std::vector<std::string> errors;
ast::program* parse_program();
private:
lexer::lexer& lexer;
token::token current, next;
void next_token();
ast::statement* parse_statement();
ast::let* parse_let();
bool expect_next(token::type);
void next_error(token::type);
};
} // namespace parser

107
test/parser.cpp Normal file
View File

@@ -0,0 +1,107 @@
#include "parser/parser.hpp"
#include "ast/ast.hpp"
#include "ast/statements/let.hpp"
#include "lexer/lexer.hpp"
#include <doctest.h>
#include <iostream>
#include <sstream>
void test_let_statement(ast::statement* stmt, const std::string name) {
REQUIRE(stmt->token_literal() == "let");
ast::let* let_stmt = dynamic_cast<ast::let*>(stmt);
REQUIRE_MESSAGE(
let_stmt != nullptr,
"Couldn't cast statement to a let statement"
);
REQUIRE(let_stmt->name->value == name);
REQUIRE(let_stmt->name->token_literal() == name);
}
void checkParserErrors(const std::vector<std::string>& errors) {
if (errors.empty())
return;
std::cerr << "parser has " << errors.size() << " errors:\n";
for (const auto& msg : errors)
std::cerr << "parser error: \"" << msg << "\"\n";
// Use doctest's FAIL macro to immediately stop
FAIL_CHECK("Parser had errors. See stderr for details.");
}
TEST_CASE("Malformed let statement (checking for memory leaks)") {
SUBCASE("Second token not identifier") {
std::stringstream input("\
let 5 = 5;\
");
lexer::lexer l{input};
parser::parser p{l};
ast::program* program = p.parse_program();
REQUIRE_MESSAGE(
program != nullptr,
"parse_program() returned a null pointer"
);
REQUIRE(program->statements.size() == 0);
delete program;
}
SUBCASE("Third token not '='") {
std::stringstream input("\
let five ! 5;\
");
lexer::lexer l{input};
parser::parser p{l};
ast::program* program = p.parse_program();
REQUIRE_MESSAGE(
program != nullptr,
"parse_program() returned a null pointer"
);
REQUIRE(program->statements.size() == 0);
delete program;
}
}
TEST_CASE("Parse let statement") {
std::stringstream input("\
let x = 5;\
let y = 10;\
let foobar = 103213;\
");
lexer::lexer l{input};
parser::parser p{l};
ast::program* program = p.parse_program();
REQUIRE_MESSAGE(
program != nullptr,
"parse_program() returned a null pointer"
);
REQUIRE(program->statements.size() == 3);
struct test {
std::string expected_identifier;
};
test tests[]{
"x",
"y",
"foobar",
};
int i = 0;
for (const auto& t : tests) {
ast::statement* stmt = program->statements[i++];
test_let_statement(stmt, t.expected_identifier);
}
delete program;
}