From de465b6122a26fc3ef65dde96be89f26971411c2 Mon Sep 17 00:00:00 2001 From: Karma Riuk Date: Thu, 3 Jul 2025 13:30:56 +0200 Subject: [PATCH] very basic parser of let statements --- src/ast/ast.cpp | 9 +++ src/ast/ast.hpp | 29 ++++++++ src/ast/expressions/identifier.cpp | 11 +++ src/ast/expressions/identifier.hpp | 16 +++++ src/ast/statements/let.cpp | 17 +++++ src/ast/statements/let.hpp | 19 +++++ src/parser/parser.cpp | 78 +++++++++++++++++++++ src/parser/parser.hpp | 25 +++++++ test/parser.cpp | 107 +++++++++++++++++++++++++++++ 9 files changed, 311 insertions(+) create mode 100644 src/ast/ast.cpp create mode 100644 src/ast/ast.hpp create mode 100644 src/ast/expressions/identifier.cpp create mode 100644 src/ast/expressions/identifier.hpp create mode 100644 src/ast/statements/let.cpp create mode 100644 src/ast/statements/let.hpp create mode 100644 src/parser/parser.cpp create mode 100644 src/parser/parser.hpp create mode 100644 test/parser.cpp diff --git a/src/ast/ast.cpp b/src/ast/ast.cpp new file mode 100644 index 0000000..6af4da3 --- /dev/null +++ b/src/ast/ast.cpp @@ -0,0 +1,9 @@ +#include "ast.hpp" + +namespace ast { + std::string program ::token_literal() const { + if (statements.size() > 0) + return statements[0]->token_literal(); + return ""; + } +} // namespace ast diff --git a/src/ast/ast.hpp b/src/ast/ast.hpp new file mode 100644 index 0000000..172d01e --- /dev/null +++ b/src/ast/ast.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +namespace ast { + struct node { + virtual std::string token_literal() const = 0; + virtual ~node() = default; + }; + + struct statement : node { + virtual std::string token_literal() const override = 0; + }; + + struct expression : node { + virtual std::string token_literal() const override = 0; + }; + + struct program : public node { + std::vector statements; + std::string token_literal() const override; + + ~program() { + for (const auto& ref : statements) + delete ref; + }; + }; +} // namespace ast diff --git a/src/ast/expressions/identifier.cpp b/src/ast/expressions/identifier.cpp new file mode 100644 index 0000000..b54d573 --- /dev/null +++ b/src/ast/expressions/identifier.cpp @@ -0,0 +1,11 @@ +#include "identifier.hpp" + +namespace ast { + identifier::identifier(token::token token, std::string value) + : token(std::move(token)), + value(std::move(value)) {} + + std::string identifier::token_literal() const { + return token.literal; + } +} // namespace ast diff --git a/src/ast/expressions/identifier.hpp b/src/ast/expressions/identifier.hpp new file mode 100644 index 0000000..22409cd --- /dev/null +++ b/src/ast/expressions/identifier.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "ast/ast.hpp" +#include "token/token.hpp" + +#include + +namespace ast { + struct identifier : expression { + identifier(token::token token, std::string value); + token::token token; + std::string value; + + std::string token_literal() const override; + }; +} // namespace ast diff --git a/src/ast/statements/let.cpp b/src/ast/statements/let.cpp new file mode 100644 index 0000000..29ee789 --- /dev/null +++ b/src/ast/statements/let.cpp @@ -0,0 +1,17 @@ +#include "let.hpp" + +namespace ast { + let::let(token::token token) + : token(std::move(token)), + name(nullptr), + value(nullptr) {} + + std::string let::token_literal() const { + return token.literal; + } + + let::~let() { + delete name; + delete value; + }; +} // namespace ast diff --git a/src/ast/statements/let.hpp b/src/ast/statements/let.hpp new file mode 100644 index 0000000..3dc7d3b --- /dev/null +++ b/src/ast/statements/let.hpp @@ -0,0 +1,19 @@ +#pragma once + +#include "ast/ast.hpp" +#include "ast/expressions/identifier.hpp" +#include "token/token.hpp" + +namespace ast { + struct let : statement { + let(token::token token); + + token::token token; + identifier* name; + expression* value; + + std::string token_literal() const override; + + ~let(); + }; +} // namespace ast diff --git a/src/parser/parser.cpp b/src/parser/parser.cpp new file mode 100644 index 0000000..220644e --- /dev/null +++ b/src/parser/parser.cpp @@ -0,0 +1,78 @@ +#include "parser.hpp" + +#include "token/type.hpp" + +#include + +namespace parser { + parser::parser(lexer::lexer& lexer) + : lexer(lexer), + current(token::type::ILLEGAL, ""), + next(token::type::ILLEGAL, "") { + next_token(); + next_token(); + } + + void parser::next_token() { + current = next; + next = lexer.next_token(); + } + + ast::program* parser::parse_program() { + ast::program* p = new ast::program(); + + for (; current.type != token::type::END_OF_FILE; next_token()) { + ast::statement* stmt = parse_statement(); + if (stmt != nullptr) + p->statements.push_back(stmt); + } + + + return p; + } + + ast::statement* parser::parse_statement() { + switch (current.type) { + case token::type::LET: + return parse_let(); + default: + return nullptr; + } + } + + bool parser::expect_next(token::type t) { + if (next.type == t) { + next_token(); + return true; + } + return false; + } + + ast::let* parser::parse_let() { + ast::let* stmt = new ast::let(current); + + if (!expect_next(token::type::IDENTIFIER)) { + delete stmt; + return nullptr; + } + + stmt->name = new ast::identifier{current, current.literal}; + + if (!expect_next(token::type::ASSIGN)) { + delete stmt; + return nullptr; + } + + // TODO: we are currently skipping expressions until we encounter a + // semicolon + for (; current.type != token::type::SEMICOLON; next_token()) {} + return stmt; + } + + void parser::next_error(token::type t) { + std::stringstream ss; + ss << "Expected next token to be " << t << " but instead got " + << next.type; + errors.push_back(ss.str()); + } +} // namespace parser diff --git a/src/parser/parser.hpp b/src/parser/parser.hpp new file mode 100644 index 0000000..6069463 --- /dev/null +++ b/src/parser/parser.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "ast/ast.hpp" +#include "ast/statements/let.hpp" +#include "lexer/lexer.hpp" +#include "token/token.hpp" + +namespace parser { + struct parser { + parser(lexer::lexer& lexer); + std::vector errors; + + ast::program* parse_program(); + + private: + lexer::lexer& lexer; + token::token current, next; + + void next_token(); + ast::statement* parse_statement(); + ast::let* parse_let(); + bool expect_next(token::type); + void next_error(token::type); + }; +} // namespace parser diff --git a/test/parser.cpp b/test/parser.cpp new file mode 100644 index 0000000..08789b4 --- /dev/null +++ b/test/parser.cpp @@ -0,0 +1,107 @@ +#include "parser/parser.hpp" + +#include "ast/ast.hpp" +#include "ast/statements/let.hpp" +#include "lexer/lexer.hpp" + +#include +#include +#include + +void test_let_statement(ast::statement* stmt, const std::string name) { + REQUIRE(stmt->token_literal() == "let"); + ast::let* let_stmt = dynamic_cast(stmt); + REQUIRE_MESSAGE( + let_stmt != nullptr, + "Couldn't cast statement to a let statement" + ); + + REQUIRE(let_stmt->name->value == name); + REQUIRE(let_stmt->name->token_literal() == name); +} + +void checkParserErrors(const std::vector& errors) { + if (errors.empty()) + return; + + std::cerr << "parser has " << errors.size() << " errors:\n"; + for (const auto& msg : errors) + std::cerr << "parser error: \"" << msg << "\"\n"; + + // Use doctest's FAIL macro to immediately stop + FAIL_CHECK("Parser had errors. See stderr for details."); +} + +TEST_CASE("Malformed let statement (checking for memory leaks)") { + SUBCASE("Second token not identifier") { + std::stringstream input("\ +let 5 = 5;\ +"); + + lexer::lexer l{input}; + parser::parser p{l}; + + ast::program* program = p.parse_program(); + REQUIRE_MESSAGE( + program != nullptr, + "parse_program() returned a null pointer" + ); + REQUIRE(program->statements.size() == 0); + delete program; + } + + SUBCASE("Third token not '='") { + std::stringstream input("\ +let five ! 5;\ +"); + + lexer::lexer l{input}; + parser::parser p{l}; + + ast::program* program = p.parse_program(); + REQUIRE_MESSAGE( + program != nullptr, + "parse_program() returned a null pointer" + ); + REQUIRE(program->statements.size() == 0); + delete program; + } +} + +TEST_CASE("Parse let statement") { + std::stringstream input("\ +let x = 5;\ +let y = 10;\ +let foobar = 103213;\ +"); + + lexer::lexer l{input}; + parser::parser p{l}; + + ast::program* program = p.parse_program(); + + REQUIRE_MESSAGE( + program != nullptr, + "parse_program() returned a null pointer" + ); + REQUIRE(program->statements.size() == 3); + + struct test { + std::string expected_identifier; + }; + + test tests[]{ + "x", + "y", + "foobar", + }; + + int i = 0; + for (const auto& t : tests) { + ast::statement* stmt = program->statements[i++]; + + test_let_statement(stmt, t.expected_identifier); + } + + delete program; +}