Compare commits

...

15 Commits

Author SHA1 Message Date
Karma Riuk
132dc65240 added some sort of error generation when parsing
errors occur
2025-07-07 15:04:23 +02:00
Karma Riuk
bbac513aa9 checkign that the casting of the let statement
doesn't throw
2025-07-07 15:01:46 +02:00
Karma Riuk
de465b6122 very basic parser of let statements 2025-07-03 13:30:56 +02:00
Karma Riuk
c091f7f021 added execution of valgrind on valgrind target 2025-07-03 11:59:26 +02:00
Karma Riuk
ca74b67bb0 renamed tests to test 2025-07-03 10:10:22 +02:00
Karma Riuk
4da5313db5 using require instead of check to fail fast 2025-07-02 23:12:28 +02:00
Karma Riuk
896b9001c7 added default value to lexer field to avoid
compiler complaints
2025-07-02 23:00:09 +02:00
Karma Riuk
6181fc8d9f added valgrind to targets to check for memory
leaks
2025-07-02 22:59:49 +02:00
Karma Riuk
d328ae60df added clangd config to disable semantic tokens on
operators because seeing new and delete treated as
operators was hurting my eyes
2025-07-02 22:42:21 +02:00
Karma Riuk
3547822d3e forgot the pragma once for the hpps 2025-07-02 11:17:34 +02:00
Karma Riuk
e773cb649f added the current character to the lexer struct
for cleaner structure
2025-07-01 18:59:43 +02:00
Karma Riuk
69bee723a2 implemented very simple repl 2025-07-01 18:43:25 +02:00
Karma Riuk
aee7a741b1 added EQ and NEQ 2025-07-01 18:01:45 +02:00
Karma Riuk
7973f7522c extended lexer to new keywords 2025-06-30 00:36:31 +02:00
Karma Riuk
5cc7147909 extended single char tokens 2025-06-30 00:27:30 +02:00
21 changed files with 534 additions and 23 deletions

2
.clangd Normal file
View File

@@ -0,0 +1,2 @@
SemanticTokens:
DisabledKinds: [Operator]

View File

@@ -31,9 +31,13 @@ TEST_TARGET := $(BIN_DIR)/monkey_tests
# ------------------------------------------------------------------- # -------------------------------------------------------------------
# Toplevel rules # Toplevel rules
# ------------------------------------------------------------------- # -------------------------------------------------------------------
.PHONY: all clean run tests .PHONY: all clean run tests valgrind
all: $(TARGET) $(TEST_TARGET) all: $(TARGET) $(TEST_TARGET)
valgrind: CXXFLAGS += -O0 -g
valgrind: $(TEST_TARGET)
valgrind -s --leak-check=full --show-leak-kinds=all --track-origins=yes $(TEST_TARGET)
clean: clean:
@rm -rf $(BUILD_DIR) @rm -rf $(BUILD_DIR)
@@ -43,7 +47,7 @@ clean:
run: $(TARGET) run: $(TARGET)
@$(TARGET) @$(TARGET)
tests: $(TEST_TARGET) test: $(TEST_TARGET)
@$(TEST_TARGET) $(if $(TEST),--test-case=$(TEST)) @$(TEST_TARGET) $(if $(TEST),--test-case=$(TEST))
# ------------------------------------------------------------------- # -------------------------------------------------------------------

9
src/ast/ast.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include "ast.hpp"
namespace ast {
std::string program ::token_literal() const {
if (statements.size() > 0)
return statements[0]->token_literal();
return "";
}
} // namespace ast

29
src/ast/ast.hpp Normal file
View File

@@ -0,0 +1,29 @@
#pragma once
#include <string>
#include <vector>
namespace ast {
struct node {
virtual std::string token_literal() const = 0;
virtual ~node() = default;
};
struct statement : node {
virtual std::string token_literal() const override = 0;
};
struct expression : node {
virtual std::string token_literal() const override = 0;
};
struct program : public node {
std::vector<statement*> statements;
std::string token_literal() const override;
~program() {
for (const auto& ref : statements)
delete ref;
};
};
} // namespace ast

25
src/ast/errors/error.hpp Normal file
View File

@@ -0,0 +1,25 @@
#pragma once
#include "token/type.hpp"
namespace ast::error {
struct error : public std::runtime_error {
explicit error(const std::string& message)
: std::runtime_error(message) {}
};
struct parser_error : error {
explicit parser_error(const std::string& message): error(message) {}
};
struct expected_next : parser_error {
token::type expected_type;
explicit expected_next(
token::type expected_type, const std::string& message
)
: parser_error(message),
expected_type(expected_type) {}
};
} // namespace ast::error

View File

@@ -0,0 +1,11 @@
#include "identifier.hpp"
namespace ast {
identifier::identifier(token::token token, std::string value)
: token(std::move(token)),
value(std::move(value)) {}
std::string identifier::token_literal() const {
return token.literal;
}
} // namespace ast

View File

@@ -0,0 +1,16 @@
#pragma once
#include "ast/ast.hpp"
#include "token/token.hpp"
#include <string>
namespace ast {
struct identifier : expression {
identifier(token::token token, std::string value);
token::token token;
std::string value;
std::string token_literal() const override;
};
} // namespace ast

View File

@@ -0,0 +1,17 @@
#include "let.hpp"
namespace ast {
let::let(token::token token)
: token(std::move(token)),
name(nullptr),
value(nullptr) {}
std::string let::token_literal() const {
return token.literal;
}
let::~let() {
delete name;
delete value;
};
} // namespace ast

View File

@@ -0,0 +1,19 @@
#pragma once
#include "ast/ast.hpp"
#include "ast/expressions/identifier.hpp"
#include "token/token.hpp"
namespace ast {
struct let : statement {
let(token::token token);
token::token token;
identifier* name;
expression* value;
std::string token_literal() const override;
~let();
};
} // namespace ast

View File

@@ -8,15 +8,30 @@
namespace lexer { namespace lexer {
token::token lexer::next_token() { token::token lexer::next_token() {
char c;
if (!(input >> c)) if (!(input >> c))
return {token::type::END_OF_FILE, ""}; return {token::type::END_OF_FILE, ""};
switch (c) { switch (c) {
case '=': case '=':
if (input.peek() == '=')
return {token::type::EQ, std::string{c, (char) input.get()}};
return {token::type::ASSIGN, c}; return {token::type::ASSIGN, c};
case '+': case '+':
return {token::type::PLUS, c}; return {token::type::PLUS, c};
case '-':
return {token::type::MINUS, c};
case '!':
if (input.peek() == '=')
return {token::type::NEQ, std::string{c, (char) input.get()}};
return {token::type::BANG, c};
case '*':
return {token::type::ASTERISK, c};
case '/':
return {token::type::SLASH, c};
case '<':
return {token::type::LT, c};
case '>':
return {token::type::GT, c};
case ',': case ',':
return {token::type::COMMA, c}; return {token::type::COMMA, c};
case ';': case ';':
@@ -31,14 +46,14 @@ namespace lexer {
return {token::type::RBRACE, c}; return {token::type::RBRACE, c};
default: default:
if (is_letter(c)) { if (is_letter(c)) {
std::string identifier_or_keyword = read_string(c); std::string identifier_or_keyword = read_string();
return { return {
token::lookup_identifier(identifier_or_keyword), token::lookup_identifier(identifier_or_keyword),
identifier_or_keyword identifier_or_keyword
}; };
} }
if (std::isdigit(c)) if (std::isdigit(c))
return {token::type::INT, read_int(c)}; return {token::type::INT, read_int()};
return {token::type::ILLEGAL, c}; return {token::type::ILLEGAL, c};
} }
@@ -48,17 +63,17 @@ namespace lexer {
return c == '_' || std::isalpha(static_cast<unsigned char>(c)); return c == '_' || std::isalpha(static_cast<unsigned char>(c));
} }
std::string lexer::read_string(char first_char) { std::string lexer::read_string() {
std::string result; std::string result;
result.push_back(first_char); result.push_back(c);
for (char c = input.peek(); is_letter(c); c = input.peek()) for (char c = input.peek(); is_letter(c); c = input.peek())
result.push_back(input.get()); result.push_back(input.get());
return result; return result;
} }
std::string lexer::read_int(char first_digit) { std::string lexer::read_int() {
std::string result; std::string result;
result.push_back(first_digit); result.push_back(c);
for (char c = input.peek(); std::isdigit(c); c = input.peek()) for (char c = input.peek(); std::isdigit(c); c = input.peek())
result.push_back(input.get()); result.push_back(input.get());
return result; return result;

View File

@@ -1,3 +1,4 @@
#pragma once
#include "token/token.hpp" #include "token/token.hpp"
#include <istream> #include <istream>
@@ -5,12 +6,13 @@
namespace lexer { namespace lexer {
struct lexer { struct lexer {
std::istream& input; std::istream& input;
char c = 0;
token::token next_token(); token::token next_token();
private: private:
bool is_letter(char); bool is_letter(char);
std::string read_string(char); std::string read_string();
std::string read_int(char); std::string read_int();
}; };
} // namespace lexer } // namespace lexer

View File

@@ -1,9 +1,8 @@
#include "token/type.hpp" #include "repl/repl.hpp"
#include <iostream> #include <iostream>
int main() { int main() {
token::type eof = token::type::ILLEGAL; repl::start(std::cin, std::cout);
std::cout << eof << std::endl;
return 0; return 0;
} }

84
src/parser/parser.cpp Normal file
View File

@@ -0,0 +1,84 @@
#include "parser.hpp"
#include "ast/errors/error.hpp"
#include "token/type.hpp"
#include <sstream>
namespace parser {
parser::parser(lexer::lexer& lexer)
: lexer(lexer),
current(token::type::ILLEGAL, ""),
next(token::type::ILLEGAL, "") {
next_token();
next_token();
}
void parser::next_token() {
current = next;
next = lexer.next_token();
}
ast::program* parser::parse_program() {
ast::program* p = new ast::program();
for (; current.type != token::type::END_OF_FILE; next_token()) {
ast::statement* stmt = parse_statement();
if (stmt != nullptr)
p->statements.push_back(stmt);
}
return p;
}
ast::statement* parser::parse_statement() {
switch (current.type) {
case token::type::LET:
return parse_let();
default:
return nullptr;
}
}
bool parser::expect_next(token::type t) {
if (next.type == t) {
next_token();
return true;
}
next_error(t);
return false;
}
ast::let* parser::parse_let() {
ast::let* stmt = new ast::let(current);
if (!expect_next(token::type::IDENTIFIER)) {
delete stmt;
return nullptr;
}
stmt->name = new ast::identifier{current, current.literal};
if (!expect_next(token::type::ASSIGN)) {
delete stmt;
return nullptr;
}
// TODO: we are currently skipping expressions until we encounter a
// semicolon
for (; current.type != token::type::SEMICOLON; next_token()) {}
return stmt;
}
void parser::next_error(token::type t) {
std::stringstream ss;
ss << "Expected next token to be " << t << " but instead got "
<< next.type;
errors.push_back(new ast::error::expected_next(t, ss.str()));
}
parser::~parser() {
for (const auto& e : errors)
delete e;
}
} // namespace parser

27
src/parser/parser.hpp Normal file
View File

@@ -0,0 +1,27 @@
#pragma once
#include "ast/ast.hpp"
#include "ast/errors/error.hpp"
#include "ast/statements/let.hpp"
#include "lexer/lexer.hpp"
#include "token/token.hpp"
namespace parser {
struct parser {
parser(lexer::lexer& lexer);
~parser();
std::vector<ast::error::error*> errors;
ast::program* parse_program();
private:
lexer::lexer& lexer;
token::token current, next;
void next_token();
ast::statement* parse_statement();
ast::let* parse_let();
bool expect_next(token::type);
void next_error(token::type);
};
} // namespace parser

30
src/repl/repl.cpp Normal file
View File

@@ -0,0 +1,30 @@
#include "repl.hpp"
#include "lexer/lexer.hpp"
#include <sstream>
#include <string>
static const std::string PROMPT = ">> ";
namespace repl {
void start(std::istream& in, std::ostream& out) {
while (true) {
out << PROMPT;
std::string line;
if (!std::getline(in, line))
return;
std::istringstream ss(line);
lexer::lexer l{ss};
for (token::token tok = l.next_token();
tok.type != token::type::END_OF_FILE;
tok = l.next_token())
out << tok << " ";
out << std::endl;
}
}
} // namespace repl

8
src/repl/repl.hpp Normal file
View File

@@ -0,0 +1,8 @@
#pragma once
#include <istream>
#include <ostream>
namespace repl {
void start(std::istream&, std::ostream&);
}

View File

@@ -13,4 +13,8 @@ namespace token {
token(::token::type t, char c): type(t), literal(1, c) {} token(::token::type t, char c): type(t), literal(1, c) {}
}; };
inline std::ostream& operator<<(std::ostream& os, token tok) {
return os << tok.type << '(' << tok.literal << ')';
}
} // namespace token } // namespace token

View File

@@ -6,10 +6,11 @@
namespace token { namespace token {
// Array mapping enum values to their string representations // Array mapping enum values to their string representations
constexpr std::array<std::string_view, static_cast<size_t>(type::LET) + 1> constexpr std::
tokenTypeStrings = { array<std::string_view, static_cast<size_t>(type::RETURN) + 1>
tokenTypeStrings = {
#define X(name, str) str, #define X(name, str) str,
TOKEN_LIST TOKEN_LIST
#undef X #undef X
}; };
@@ -24,6 +25,11 @@ namespace token {
static std::unordered_map<std::string, type> keywords{ static std::unordered_map<std::string, type> keywords{
{"fn", type::FUNCTION}, {"fn", type::FUNCTION},
{"let", type::LET}, {"let", type::LET},
{"if", type::IF},
{"else", type::ELSE},
{"true", type::TRUE},
{"false", type::FALSE},
{"return", type::RETURN},
}; };
type lookup_identifier(std::string ident) { type lookup_identifier(std::string ident) {

View File

@@ -12,14 +12,27 @@ namespace token {
X(INT, "INT") \ X(INT, "INT") \
X(ASSIGN, "=") \ X(ASSIGN, "=") \
X(PLUS, "+") \ X(PLUS, "+") \
X(MINUS, "-") \
X(BANG, "!") \
X(ASTERISK, "*") \
X(SLASH, "/") \
X(LT, "<") \
X(GT, ">") \
X(COMMA, ",") \ X(COMMA, ",") \
X(SEMICOLON, ";") \ X(SEMICOLON, ";") \
X(LPAREN, "(") \ X(LPAREN, "(") \
X(RPAREN, ")") \ X(RPAREN, ")") \
X(LBRACE, "{") \ X(LBRACE, "{") \
X(RBRACE, "}") \ X(RBRACE, "}") \
X(LET, "LET") \
X(FUNCTION, "FUNCTION") \ X(FUNCTION, "FUNCTION") \
X(LET, "LET") X(IF, "IF") \
X(ELSE, "ELSE") \
X(TRUE, "TRUE") \
X(FALSE, "FALSE") \
X(EQ, "==") \
X(NEQ, "!=") \
X(RETURN, "RETURN")
// Define the TokenType enum using the X-macro // Define the TokenType enum using the X-macro
enum class type { enum class type {

View File

@@ -31,12 +31,12 @@ TEST_CASE("Single character token") {
for (const auto& t : tests) { for (const auto& t : tests) {
token::token tok = l.next_token(); token::token tok = l.next_token();
CHECK(tok.type == t.expectedType); REQUIRE(tok.type == t.expectedType);
CHECK(tok.literal == t.expectedLiteral); REQUIRE(tok.literal == t.expectedLiteral);
} }
}; };
TEST_CASE("Full tokens") { TEST_CASE("More tokens") {
struct test { struct test {
token::type expectedType; token::type expectedType;
std::string expectedLiteral; std::string expectedLiteral;
@@ -48,6 +48,17 @@ let add = fn(x, y) {\
x + y;\ x + y;\
};\ };\
let result = add(five, ten);\ let result = add(five, ten);\
!-/*5;\
5 < 10 > 5;\
\
if (5 < 10) {\
return true;\
} else {\
return false;\
}\
\
10 == 10;\
10 != 9;\
"); ");
lexer::lexer l{ss}; lexer::lexer l{ss};
@@ -93,12 +104,58 @@ let result = add(five, ten);\
{token::type::IDENTIFIER, "ten"}, {token::type::IDENTIFIER, "ten"},
{token::type::RPAREN, ")"}, {token::type::RPAREN, ")"},
{token::type::SEMICOLON, ";"}, {token::type::SEMICOLON, ";"},
{token::type::BANG, "!"},
{token::type::MINUS, "-"},
{token::type::SLASH, "/"},
{token::type::ASTERISK, "*"},
{token::type::INT, "5"},
{token::type::SEMICOLON, ";"},
{token::type::INT, "5"},
{token::type::LT, "<"},
{token::type::INT, "10"},
{token::type::GT, ">"},
{token::type::INT, "5"},
{token::type::SEMICOLON, ";"},
{token::type::IF, "if"},
{token::type::LPAREN, "("},
{token::type::INT, "5"},
{token::type::LT, "<"},
{token::type::INT, "10"},
{token::type::RPAREN, ")"},
{token::type::LBRACE, "{"},
{token::type::RETURN, "return"},
{token::type::TRUE, "true"},
{token::type::SEMICOLON, ";"},
{token::type::RBRACE, "}"},
{token::type::ELSE, "else"},
{token::type::LBRACE, "{"},
{token::type::RETURN, "return"},
{token::type::FALSE, "false"},
{token::type::SEMICOLON, ";"},
{token::type::RBRACE, "}"},
{token::type::INT, "10"},
{token::type::EQ, "=="},
{token::type::INT, "10"},
{token::type::SEMICOLON, ";"},
{token::type::INT, "10"},
{token::type::NEQ, "!="},
{token::type::INT, "9"},
{token::type::SEMICOLON, ";"},
// clang-format on // clang-format on
}; };
for (const auto& t : tests) { for (const auto& t : tests) {
token::token tok = l.next_token(); token::token tok = l.next_token();
CHECK(tok.type == t.expectedType); REQUIRE(tok.type == t.expectedType);
CHECK(tok.literal == t.expectedLiteral); REQUIRE(tok.literal == t.expectedLiteral);
} }
}; };

134
test/parser.cpp Normal file
View File

@@ -0,0 +1,134 @@
#include "parser/parser.hpp"
#include "ast/ast.hpp"
#include "ast/statements/let.hpp"
#include "lexer/lexer.hpp"
#include <doctest.h>
#include <iostream>
#include <sstream>
void test_let_statement(ast::statement* stmt, const std::string name) {
REQUIRE(stmt->token_literal() == "let");
ast::let* let_stmt;
REQUIRE_NOTHROW(let_stmt = dynamic_cast<ast::let*>(stmt));
REQUIRE_MESSAGE(
let_stmt != nullptr,
"Couldn't cast statement to a let statement"
);
REQUIRE(let_stmt->name->value == name);
REQUIRE(let_stmt->name->token_literal() == name);
}
void test_failing_let_parsing(
std::string input_s,
std::vector<token::type> expected_types,
int n_good_statements = 0
) {
std::stringstream input(input_s);
lexer::lexer l{input};
parser::parser p{l};
ast::program* program = p.parse_program();
// Check for errors
REQUIRE(p.errors.size() == expected_types.size());
int i = 0;
for (auto& e : p.errors) {
ast::error::expected_next* en;
REQUIRE_NOTHROW(en = dynamic_cast<ast::error::expected_next*>(e));
REQUIRE_MESSAGE(
en != nullptr,
"Couldn't cast the error to an 'expected_next'"
);
REQUIRE(en->expected_type == expected_types[i++]);
}
// normal program check
REQUIRE_MESSAGE(
program != nullptr,
"parse_program() returned a null pointer"
);
REQUIRE(program->statements.size() == n_good_statements);
delete program;
}
void check_parser_errors(const std::vector<ast::error::error*>& errors) {
if (errors.empty())
return;
std::cerr << "parser has " << errors.size() << " errors:\n";
for (const auto& error : errors)
std::cerr << '\t' << error->what() << "\n";
// Use doctest's FAIL macro to immediately stop
FAIL_CHECK("Parser had errors. See stderr for details.");
}
TEST_CASE("Malformed let statement (checking for memory leaks)") {
SUBCASE("Second token not identifier") {
test_failing_let_parsing("let 5 = 5;", {token::type::IDENTIFIER});
}
SUBCASE("Third token not '='") {
test_failing_let_parsing("let five ! 5;", {token::type::ASSIGN});
}
SUBCASE("Missing both identifier and '='") {
test_failing_let_parsing("let 5;", {token::type::IDENTIFIER});
}
SUBCASE("Multiple parsing errors") {
test_failing_let_parsing(
"let 5; let ! = 5; let five = 5; let five 5; let;",
{token::type::IDENTIFIER,
token::type::IDENTIFIER,
token::type::ASSIGN,
token::type::IDENTIFIER},
1
);
}
}
TEST_CASE("Parse let statement") {
std::stringstream input("\
let x = 5;\
let y = 10;\
let foobar = 103213;\
");
lexer::lexer l{input};
parser::parser p{l};
ast::program* program = p.parse_program();
check_parser_errors(p.errors);
REQUIRE_MESSAGE(
program != nullptr,
"parse_program() returned a null pointer"
);
REQUIRE(program->statements.size() == 3);
struct test {
std::string expected_identifier;
};
test tests[]{
"x",
"y",
"foobar",
};
int i = 0;
for (const auto& t : tests) {
ast::statement* stmt = program->statements[i++];
test_let_statement(stmt, t.expected_identifier);
}
delete program;
}