added some sort of error generation when parsing

errors occur
checkign that the casting of the let statement
2025-07-07 15:02:06 +02:00 · 2025-07-07 15:01:46 +02:00 · 2025-07-03 13:30:56 +02:00 · 2025-07-03 11:59:26 +02:00 · 2025-07-03 10:10:22 +02:00 · 2025-07-02 23:12:28 +02:00
21 changed files with 535 additions and 23 deletions
--- a/.clangd
+++ b/.clangd
@@ -0,0 +1,2 @@
 SemanticTokens:
  DisabledKinds: [Operator]
--- a/8
+++ b/8
@@ -31,9 +31,13 @@ TEST_TARGET   := $(BIN_DIR)/monkey_tests
 # -------------------------------------------------------------------
 # Top‐level rules
 # -------------------------------------------------------------------
-.PHONY: all clean run tests
+.PHONY: all clean run tests valgrind
 all: $(TARGET) $(TEST_TARGET)
 valgrind: CXXFLAGS += -O0 -g
 valgrind: $(TEST_TARGET)
 	valgrind -s --leak-check=full --show-leak-kinds=all --track-origins=yes $(TEST_TARGET)
 clean:
 	@rm -rf $(BUILD_DIR)
@@ -43,7 +47,7 @@ clean:
 run: $(TARGET)
 	@$(TARGET)
-tests: $(TEST_TARGET)
+test: $(TEST_TARGET)
 	@$(TEST_TARGET) $(if $(TEST),--test-case=$(TEST))
 # -------------------------------------------------------------------
--- a/src/ast/ast.cpp
+++ b/src/ast/ast.cpp
@@ -0,0 +1,9 @@
 #include "ast.hpp"
 namespace ast {
    std::string program ::token_literal() const {
        if (statements.size() > 0)
            return statements[0]->token_literal();
        return "";
    }
 } // namespace ast
--- a/src/ast/ast.hpp
+++ b/src/ast/ast.hpp
@@ -0,0 +1,29 @@
 #pragma once
 #include <string>
 #include <vector>
 namespace ast {
    struct node {
        virtual std::string token_literal() const = 0;
        virtual ~node() = default;
    };
    struct statement : node {
        virtual std::string token_literal() const override = 0;
    };
    struct expression : node {
        virtual std::string token_literal() const override = 0;
    };
    struct program : public node {
        std::vector<statement*> statements;
        std::string token_literal() const override;
        ~program() {
            for (const auto& ref : statements)
                delete ref;
        };
    };
 } // namespace ast
--- a/src/ast/errors/error.hpp
+++ b/src/ast/errors/error.hpp
@@ -0,0 +1,25 @@
 #pragma once
 #include "token/type.hpp"
 namespace ast::error {
    struct error : public std::runtime_error {
        explicit error(const std::string& message)
            : std::runtime_error(message) {}
    };
    struct parser_error : error {
        explicit parser_error(const std::string& message): error(message) {}
    };
    struct expected_next : parser_error {
        token::type expected_type;
        explicit expected_next(
            token::type expected_type, const std::string& message
        )
            : parser_error(message),
              expected_type(expected_type) {}
    };
 } // namespace ast::error
--- a/src/ast/expressions/identifier.cpp
+++ b/src/ast/expressions/identifier.cpp
@@ -0,0 +1,11 @@
 #include "identifier.hpp"
 namespace ast {
    identifier::identifier(token::token token, std::string value)
        : token(std::move(token)),
          value(std::move(value)) {}
    std::string identifier::token_literal() const {
        return token.literal;
    }
 } // namespace ast
--- a/src/ast/expressions/identifier.hpp
+++ b/src/ast/expressions/identifier.hpp
@@ -0,0 +1,16 @@
 #pragma once
 #include "ast/ast.hpp"
 #include "token/token.hpp"
 #include <string>
 namespace ast {
    struct identifier : expression {
        identifier(token::token token, std::string value);
        token::token token;
        std::string value;
        std::string token_literal() const override;
    };
 } // namespace ast
--- a/src/ast/statements/let.cpp
+++ b/src/ast/statements/let.cpp
@@ -0,0 +1,17 @@
 #include "let.hpp"
 namespace ast {
    let::let(token::token token)
        : token(std::move(token)),
          name(nullptr),
          value(nullptr) {}
    std::string let::token_literal() const {
        return token.literal;
    }
    let::~let() {
        delete name;
        delete value;
    };
 } // namespace ast
--- a/src/ast/statements/let.hpp
+++ b/src/ast/statements/let.hpp
@@ -0,0 +1,19 @@
 #pragma once
 #include "ast/ast.hpp"
 #include "ast/expressions/identifier.hpp"
 #include "token/token.hpp"
 namespace ast {
    struct let : statement {
        let(token::token token);
        token::token token;
        identifier* name;
        expression* value;
        std::string token_literal() const override;
        ~let();
    };
 } // namespace ast
--- a/src/lexer/lexer.cpp
+++ b/src/lexer/lexer.cpp
@@ -8,15 +8,30 @@
 namespace lexer {
    token::token lexer::next_token() {
        char c;
        if (!(input >> c))
            return {token::type::END_OF_FILE, ""};
        switch (c) {
        case '=':
            if (input.peek() == '=')
                return {token::type::EQ, std::string{c, (char) input.get()}};
            return {token::type::ASSIGN, c};
        case '+':
            return {token::type::PLUS, c};
        case '-':
            return {token::type::MINUS, c};
        case '!':
            if (input.peek() == '=')
                return {token::type::NEQ, std::string{c, (char) input.get()}};
            return {token::type::BANG, c};
        case '*':
            return {token::type::ASTERISK, c};
        case '/':
            return {token::type::SLASH, c};
        case '<':
            return {token::type::LT, c};
        case '>':
            return {token::type::GT, c};
        case ',':
            return {token::type::COMMA, c};
        case ';':
@@ -31,14 +46,14 @@ namespace lexer {
            return {token::type::RBRACE, c};
        default:
            if (is_letter(c)) {
-                std::string identifier_or_keyword = read_string(c);
+                std::string identifier_or_keyword = read_string();
                return {
                    token::lookup_identifier(identifier_or_keyword),
                    identifier_or_keyword
                };
            }
            if (std::isdigit(c))
-                return {token::type::INT, read_int(c)};
+                return {token::type::INT, read_int()};
            return {token::type::ILLEGAL, c};
        }
@@ -48,17 +63,17 @@ namespace lexer {
        return c == '_' || std::isalpha(static_cast<unsigned char>(c));
    }
-    std::string lexer::read_string(char first_char) {
+    std::string lexer::read_string() {
        std::string result;
-        result.push_back(first_char);
+        result.push_back(c);
        for (char c = input.peek(); is_letter(c); c = input.peek())
            result.push_back(input.get());
        return result;
    }
-    std::string lexer::read_int(char first_digit) {
+    std::string lexer::read_int() {
        std::string result;
-        result.push_back(first_digit);
+        result.push_back(c);
        for (char c = input.peek(); std::isdigit(c); c = input.peek())
            result.push_back(input.get());
        return result;
--- a/src/lexer/lexer.hpp
+++ b/src/lexer/lexer.hpp
@@ -1,3 +1,4 @@
 #pragma once
 #include "token/token.hpp"
 #include <istream>
@@ -5,12 +6,13 @@
 namespace lexer {
    struct lexer {
        std::istream& input;
        char c = 0;
        token::token next_token();
      private:
        bool is_letter(char);
-        std::string read_string(char);
+        std::string read_string();
-        std::string read_int(char);
+        std::string read_int();
    };
 } // namespace lexer
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -1,9 +1,8 @@
-#include "token/type.hpp"
+#include "repl/repl.hpp"
 #include <iostream>
 int main() {
-    token::type eof = token::type::ILLEGAL;
+    repl::start(std::cin, std::cout);
    std::cout << eof << std::endl;
    return 0;
 }
--- a/src/parser/parser.cpp
+++ b/src/parser/parser.cpp
@@ -0,0 +1,85 @@
 #include "parser.hpp"
 #include "ast/errors/error.hpp"
 #include "token/type.hpp"
 #include <sstream>
 namespace parser {
    parser::parser(lexer::lexer& lexer)
        : lexer(lexer),
          current(token::type::ILLEGAL, ""),
          next(token::type::ILLEGAL, "") {
        next_token();
        next_token();
    }
    void parser::next_token() {
        current = next;
        next = lexer.next_token();
    }
    ast::program* parser::parse_program() {
        ast::program* p = new ast::program();
        for (; current.type != token::type::END_OF_FILE; next_token()) {
            ast::statement* stmt = parse_statement();
            if (stmt != nullptr)
                p->statements.push_back(stmt);
        }
        return p;
    }
    ast::statement* parser::parse_statement() {
        switch (current.type) {
        case token::type::LET:
            return parse_let();
        default:
            return nullptr;
        }
    }
    bool parser::expect_next(token::type t) {
        if (next.type == t) {
            next_token();
            return true;
        }
        next_error(t);
        return false;
    }
    ast::let* parser::parse_let() {
        ast::let* stmt = new ast::let(current);
        if (!expect_next(token::type::IDENTIFIER)) {
            delete stmt;
            return nullptr;
        }
        stmt->name = new ast::identifier{current, current.literal};
        if (!expect_next(token::type::ASSIGN)) {
            delete stmt;
            return nullptr;
        }
        // TODO: we are currently skipping expressions until we encounter a
        // semicolon
        for (; current.type != token::type::SEMICOLON; next_token()) {}
        return stmt;
    }
    void parser::next_error(token::type t) {
        std::stringstream ss;
        ss << "Expected next token to be " << t << " but instead got "
           << next.type;
        errors.push_back(new ast::error::expected_next(t, ss.str()));
    }
    parser::~parser() {
        for (const auto& e : errors)
            delete e;
    }
 } // namespace parser
--- a/src/parser/parser.hpp
+++ b/src/parser/parser.hpp
@@ -0,0 +1,27 @@
 #pragma once
 #include "ast/ast.hpp"
 #include "ast/errors/error.hpp"
 #include "ast/statements/let.hpp"
 #include "lexer/lexer.hpp"
 #include "token/token.hpp"
 namespace parser {
    struct parser {
        parser(lexer::lexer& lexer);
        ~parser();
        std::vector<ast::error::error*> errors;
        ast::program* parse_program();
      private:
        lexer::lexer& lexer;
        token::token current, next;
        void next_token();
        ast::statement* parse_statement();
        ast::let* parse_let();
        bool expect_next(token::type);
        void next_error(token::type);
    };
 } // namespace parser
--- a/src/repl/repl.cpp
+++ b/src/repl/repl.cpp
@@ -0,0 +1,30 @@
 #include "repl.hpp"
 #include "lexer/lexer.hpp"
 #include <sstream>
 #include <string>
 static const std::string PROMPT = ">> ";
 namespace repl {
    void start(std::istream& in, std::ostream& out) {
        while (true) {
            out << PROMPT;
            std::string line;
            if (!std::getline(in, line))
                return;
            std::istringstream ss(line);
            lexer::lexer l{ss};
            for (token::token tok = l.next_token();
                 tok.type != token::type::END_OF_FILE;
                 tok = l.next_token())
                out << tok << " ";
            out << std::endl;
        }
    }
 } // namespace repl
--- a/src/repl/repl.hpp
+++ b/src/repl/repl.hpp
@@ -0,0 +1,8 @@
 #pragma once
 #include <istream>
 #include <ostream>
 namespace repl {
    void start(std::istream&, std::ostream&);
 }
--- a/src/token/token.hpp
+++ b/src/token/token.hpp
@@ -13,4 +13,8 @@ namespace token {
        token(::token::type t, char c): type(t), literal(1, c) {}
    };
    inline std::ostream& operator<<(std::ostream& os, token tok) {
        return os << tok.type << '(' << tok.literal << ')';
    }
 } // namespace token
--- a/src/token/type.cpp
+++ b/src/token/type.cpp
@@ -6,10 +6,11 @@
 namespace token {
    // Array mapping enum values to their string representations
-    constexpr std::array<std::string_view, static_cast<size_t>(type::LET) + 1>
+    constexpr std::
-        tokenTypeStrings = {
+        array<std::string_view, static_cast<size_t>(type::RETURN) + 1>
            tokenTypeStrings = {
 #define X(name, str) str,
-            TOKEN_LIST
+                TOKEN_LIST
 #undef X
    };
@@ -24,6 +25,11 @@ namespace token {
    static std::unordered_map<std::string, type> keywords{
        {"fn", type::FUNCTION},
        {"let", type::LET},
        {"if", type::IF},
        {"else", type::ELSE},
        {"true", type::TRUE},
        {"false", type::FALSE},
        {"return", type::RETURN},
    };
    type lookup_identifier(std::string ident) {
--- a/src/token/type.hpp
+++ b/src/token/type.hpp
@@ -12,14 +12,27 @@ namespace token {
    X(INT, "INT")                                                              \
    X(ASSIGN, "=")                                                             \
    X(PLUS, "+")                                                               \
    X(MINUS, "-")                                                              \
    X(BANG, "!")                                                               \
    X(ASTERISK, "*")                                                           \
    X(SLASH, "/")                                                              \
    X(LT, "<")                                                                 \
    X(GT, ">")                                                                 \
    X(COMMA, ",")                                                              \
    X(SEMICOLON, ";")                                                          \
    X(LPAREN, "(")                                                             \
    X(RPAREN, ")")                                                             \
    X(LBRACE, "{")                                                             \
    X(RBRACE, "}")                                                             \
    X(LET, "LET")                                                              \
    X(FUNCTION, "FUNCTION")                                                    \
-    X(LET, "LET")
+    X(IF, "IF")                                                                \
    X(ELSE, "ELSE")                                                            \
    X(TRUE, "TRUE")                                                            \
    X(FALSE, "FALSE")                                                          \
    X(EQ, "==")                                                                \
    X(NEQ, "!=")                                                               \
    X(RETURN, "RETURN")
    // Define the TokenType enum using the X-macro
    enum class type {
--- a/test/lexer.cpp
+++ b/test/lexer.cpp
@@ -31,12 +31,12 @@ TEST_CASE("Single character token") {
    for (const auto& t : tests) {
        token::token tok = l.next_token();
-        CHECK(tok.type == t.expectedType);
+        REQUIRE(tok.type == t.expectedType);
-        CHECK(tok.literal == t.expectedLiteral);
+        REQUIRE(tok.literal == t.expectedLiteral);
    }
 };
-TEST_CASE("Full tokens") {
+TEST_CASE("More tokens") {
    struct test {
        token::type expectedType;
        std::string expectedLiteral;
@@ -48,6 +48,17 @@ let add = fn(x, y) {\
    x + y;\
 };\
 let result = add(five, ten);\
 !-/*5;\
 5 < 10 > 5;\
 \
 if (5 < 10) {\
    return true;\
 } else {\
    return false;\
 }\
 \
 10 == 10;\
 10 != 9;\
 ");
    lexer::lexer l{ss};
@@ -93,12 +104,58 @@ let result = add(five, ten);\
        {token::type::IDENTIFIER, "ten"},
        {token::type::RPAREN, ")"},
        {token::type::SEMICOLON, ";"},
        {token::type::BANG, "!"},
        {token::type::MINUS, "-"},
        {token::type::SLASH, "/"},
        {token::type::ASTERISK, "*"},
        {token::type::INT, "5"},
        {token::type::SEMICOLON, ";"},
        {token::type::INT, "5"},
        {token::type::LT, "<"},
        {token::type::INT, "10"},
        {token::type::GT, ">"},
        {token::type::INT, "5"},
        {token::type::SEMICOLON, ";"},
        {token::type::IF, "if"},
        {token::type::LPAREN, "("},
        {token::type::INT, "5"},
        {token::type::LT, "<"},
        {token::type::INT, "10"},
        {token::type::RPAREN, ")"},
        {token::type::LBRACE, "{"},
        {token::type::RETURN, "return"},
        {token::type::TRUE, "true"},
        {token::type::SEMICOLON, ";"},
        {token::type::RBRACE, "}"},
        {token::type::ELSE, "else"},
        {token::type::LBRACE, "{"},
        {token::type::RETURN, "return"},
        {token::type::FALSE, "false"},
        {token::type::SEMICOLON, ";"},
        {token::type::RBRACE, "}"},
        {token::type::INT, "10"},
        {token::type::EQ, "=="},
        {token::type::INT, "10"},
        {token::type::SEMICOLON, ";"},
        {token::type::INT, "10"},
        {token::type::NEQ, "!="},
        {token::type::INT, "9"},
        {token::type::SEMICOLON, ";"},
        // clang-format on
    };
    for (const auto& t : tests) {
        token::token tok = l.next_token();
-        CHECK(tok.type == t.expectedType);
+        REQUIRE(tok.type == t.expectedType);
-        CHECK(tok.literal == t.expectedLiteral);
+        REQUIRE(tok.literal == t.expectedLiteral);
    }
 };
--- a/test/parser.cpp
+++ b/test/parser.cpp
@@ -0,0 +1,134 @@
 #include "parser/parser.hpp"
 #include "ast/ast.hpp"
 #include "ast/statements/let.hpp"
 #include "lexer/lexer.hpp"
 #include <doctest.h>
 #include <iostream>
 #include <sstream>
 void test_let_statement(ast::statement* stmt, const std::string name) {
    REQUIRE(stmt->token_literal() == "let");
    ast::let* let_stmt;
    REQUIRE_NOTHROW(let_stmt = dynamic_cast<ast::let*>(stmt));
    REQUIRE_MESSAGE(
        let_stmt != nullptr,
        "Couldn't cast statement to a let statement"
    );
    REQUIRE(let_stmt->name->value == name);
    REQUIRE(let_stmt->name->token_literal() == name);
 }
 void test_failing_let_parsing(
    std::string input_s,
    std::vector<token::type> expected_types,
    int n_good_statements = 0
 ) {
    std::stringstream input(input_s);
    lexer::lexer l{input};
    parser::parser p{l};
    ast::program* program = p.parse_program();
    // Check for errors
    REQUIRE(p.errors.size() == expected_types.size());
    int i = 0;
    for (auto& e : p.errors) {
        ast::error::expected_next* en;
        REQUIRE_NOTHROW(en = dynamic_cast<ast::error::expected_next*>(e));
        REQUIRE_MESSAGE(
            en != nullptr,
            "Couldn't cast the error to an 'expected_next'"
        );
        REQUIRE(en->expected_type == expected_types[i++]);
    }
    // normal program check
    REQUIRE_MESSAGE(
        program != nullptr,
        "parse_program() returned a null pointer"
    );
    REQUIRE(program->statements.size() == n_good_statements);
    delete program;
 }
 void check_parser_errors(const std::vector<ast::error::error*>& errors) {
    if (errors.empty())
        return;
    std::cerr << "parser has " << errors.size() << " errors:\n";
    for (const auto& error : errors)
        std::cerr << '\t' << error->what() << "\n";
    // Use doctest's FAIL macro to immediately stop
    FAIL_CHECK("Parser had errors. See stderr for details.");
 }
 TEST_CASE("Malformed let statement (checking for memory leaks)") {
    SUBCASE("Second token not identifier") {
        test_failing_let_parsing("let 5 = 5;", {token::type::IDENTIFIER});
    }
    SUBCASE("Third token not '='") {
        test_failing_let_parsing("let five ! 5;", {token::type::ASSIGN});
    }
    SUBCASE("Missing both identifier and '='") {
        test_failing_let_parsing("let 5;", {token::type::IDENTIFIER});
    }
    SUBCASE("Multiple parsing errors") {
        test_failing_let_parsing(
            "let 5; let ! = 5; let five = 5; let five 5; let;",
            {token::type::IDENTIFIER,
             token::type::IDENTIFIER,
             token::type::ASSIGN,
             token::type::IDENTIFIER},
            1
        );
    }
 }
 TEST_CASE("Parse let statement") {
    std::stringstream input("\
 let x = 5;\
 let y = 10;\
 let foobar = 103213;\
 ");
    lexer::lexer l{input};
    parser::parser p{l};
    ast::program* program = p.parse_program();
    check_parser_errors(p.errors);
    REQUIRE_MESSAGE(
        program != nullptr,
        "parse_program() returned a null pointer"
    );
    REQUIRE(program->statements.size() == 3);
    struct test {
        std::string expected_identifier;
    };
    test tests[]{
        "x",
        "y",
        "foobar",
    };
    int i = 0;
    for (const auto& t : tests) {
        ast::statement* stmt = program->statements[i++];
        test_let_statement(stmt, t.expected_identifier);
    }
    delete program;
 }
Author	SHA1	Message	Date
Karma Riuk	bed99e0d63	added some sort of error generation when parsing errors occur	2025-07-07 15:02:06 +02:00
Karma Riuk	bbac513aa9	checkign that the casting of the let statement doesn't throw	2025-07-07 15:01:46 +02:00
Karma Riuk	de465b6122	very basic parser of let statements	2025-07-03 13:30:56 +02:00
Karma Riuk	c091f7f021	added execution of valgrind on valgrind target	2025-07-03 11:59:26 +02:00
Karma Riuk	ca74b67bb0	renamed tests to test	2025-07-03 10:10:22 +02:00
Karma Riuk	4da5313db5	using require instead of check to fail fast	2025-07-02 23:12:28 +02:00
Karma Riuk	896b9001c7	added default value to lexer field to avoid compiler complaints	2025-07-02 23:00:09 +02:00
Karma Riuk	6181fc8d9f	added valgrind to targets to check for memory leaks	2025-07-02 22:59:49 +02:00
Karma Riuk	d328ae60df	added clangd config to disable semantic tokens on operators because seeing new and delete treated as operators was hurting my eyes	2025-07-02 22:42:21 +02:00
Karma Riuk	3547822d3e	forgot the pragma once for the hpps	2025-07-02 11:17:34 +02:00
Karma Riuk	e773cb649f	added the current character to the lexer struct for cleaner structure	2025-07-01 18:59:43 +02:00
Karma Riuk	69bee723a2	implemented very simple repl	2025-07-01 18:43:25 +02:00
Karma Riuk	aee7a741b1	added EQ and NEQ	2025-07-01 18:01:45 +02:00
Karma Riuk	7973f7522c	extended lexer to new keywords	2025-06-30 00:36:31 +02:00
Karma Riuk	5cc7147909	extended single char tokens	2025-06-30 00:27:30 +02:00
		`@@ -0,0 +1,2 @@`
							`SemanticTokens:`
							`DisabledKinds: [Operator]`