Compare commits

...

17 Commits

Author SHA1 Message Date
Karma Riuk
dec93f8272 implemented lexer for a more complex subset of the
monkey language
2025-06-30 00:12:28 +02:00
Karma Riuk
69217fdf90 added test for full lexer (missing impl) 2025-06-29 20:28:53 +02:00
Karma Riuk
c322b69590 renamed IDENT to IDENTIFIER because i kept reading
indent
2025-06-29 20:04:20 +02:00
Karma Riuk
ffff13b2e0 lexer can now read single character tokens 2025-06-29 12:33:37 +02:00
Karma Riuk
ca05c3577a renamed EOF_ to END_OF_FILE 2025-06-29 12:33:09 +02:00
Karma Riuk
1c928616a4 written structure and tests for lexer, missing
implementation
2025-06-29 10:56:51 +02:00
Karma Riuk
ccfc3ed0f7 fixed bug 2025-06-29 10:56:32 +02:00
Karma Riuk
2aff81ba4c fixed token header and made the tokenTypeStrings
not seeable from outside modules
2025-06-29 10:43:12 +02:00
Karma Riuk
9ad9a0b85b added src to inclusion for lsp 2025-06-29 10:14:29 +02:00
Karma Riuk
09a0dc7b6d brought back namespaces because i think i get it
now
2025-06-29 10:14:04 +02:00
Karma Riuk
65792464bb changed make rule name 2025-06-29 10:13:27 +02:00
Karma Riuk
4771aa4f10 removed namespace perche mi rompeva le palle 2025-06-29 10:07:12 +02:00
Karma Riuk
81cdd0690d made the token type less repetitive 2025-06-28 18:05:01 +02:00
Karma Riuk
4364afa111 added compile_flags.txt for lsp 2025-06-28 17:59:14 +02:00
Karma Riuk
9a13de97e1 initial code 2025-06-28 17:59:08 +02:00
Karma Riuk
8acce0f6a6 made makefile better 2025-06-28 17:57:44 +02:00
Karma Riuk
b966b6dfab put back the tabs because makefiles are bitchy 2025-06-28 17:27:56 +02:00
10 changed files with 370 additions and 61 deletions

118
Makefile
View File

@@ -1,70 +1,74 @@
# ────────────────────────────────────
# Compiler and flags
# -------------------------------------------------------------------
# Projectwide settings
# -------------------------------------------------------------------
CXX := g++
CXXFLAGS := -Wall -WError -I./include
# ────────────────────────────────────
# Paths
CXXFLAGS := -std=c++17 -Wall -Wextra -Iinclude -Isrc -MMD -MP
LDFLAGS :=
SRC_DIR := src
TEST_SRC := test/test.cpp
REPL_SRC := $(SRC_DIR)/main.cpp
TEST_DIR := test
BUILD_DIR := build
OBJ_DIR := $(BUILD_DIR)/objs
BIN_DIR := $(BUILD_DIR)/bin
OBJ_DIR := build/obj
BIN_DIR := build/bin
# -------------------------------------------------------------------
# Source & object lists
# -------------------------------------------------------------------
SRC_CPP := $(shell find $(SRC_DIR) -name '*.cpp')
TEST_CPP := $(shell find $(TEST_DIR) -name '*.cpp')
OBJ := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRC_CPP))
TEST_OBJ := $(patsubst $(TEST_DIR)/%.cpp,$(OBJ_DIR)/test/%.o,$(TEST_CPP))
DEPFILES := $(OBJ:.o=.d) $(TEST_OBJ:.o=.d)
# ────────────────────────────────────
# Source listings
# All .cpp under src/, but exclude your REPL main
LIB_SRCS := $(filter-out $(REPL_SRC),$(shell find $(SRC_DIR) -name '*.cpp'))
# Mirror src/.../*.cpp → build/obj/src/.../*.o
LIB_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/$(SRC_DIR)/%.o,$(LIB_SRCS))
# Identify your “real” main.cpp so we can exclude it from tests
MAIN_SRC := $(SRC_DIR)/main.cpp
MAIN_OBJ := $(MAIN_SRC:$(SRC_DIR)/%.cpp=$(OBJ_DIR)/%.o)
SRC_OBJS_NO_MAIN := $(filter-out $(MAIN_OBJ),$(OBJ))
# Binaries
TEST_BIN := $(BIN_DIR)/tests
REPL_BIN := $(BIN_DIR)/repl
TARGET := $(BIN_DIR)/monkey
TEST_TARGET := $(BIN_DIR)/monkey_tests
# ────────────────────────────────────
# Default target: build & run tests
all: test
# -------------------------------------------------------------------
# Toplevel rules
# -------------------------------------------------------------------
.PHONY: all clean run tests
all: $(TARGET) $(TEST_TARGET)
# ─ Link test runner (test.cpp defines main via DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN)
$(TEST_BIN): $(LIB_OBJS) | $(BIN_DIR)
@echo "⏳ Linking tests..."
$(CXX) $(CXXFLAGS) $(TEST_SRC) $(LIB_OBJS) -o $@
clean:
@rm -rf $(BUILD_DIR)
# ─ Link REPL
$(REPL_BIN): $(LIB_OBJS) | $(BIN_DIR)
@echo "🚀 Linking REPL..."
$(CXX) $(CXXFLAGS) $(REPL_SRC) $(LIB_OBJS) -o $@
# -------------------------------------------------------------------
# Build & run
# -------------------------------------------------------------------
run: $(TARGET)
@$(TARGET)
# ─ Compile each library .cpp → mirrored .o
$(OBJ_DIR)/$(SRC_DIR)/%.o: $(SRC_DIR)/%.cpp
@echo "🛠 Compiling $<"
@mkdir -p $(dir $@)
tests: $(TEST_TARGET)
@$(TEST_TARGET) $(if $(TEST),--test-case=$(TEST))
# -------------------------------------------------------------------
# Link binaries
# -------------------------------------------------------------------
$(TARGET): $(OBJ)
@mkdir -p $(BIN_DIR)
$(CXX) $(LDFLAGS) $^ -o $@
$(TEST_TARGET): $(SRC_OBJS_NO_MAIN) $(TEST_OBJ)
@mkdir -p $(BIN_DIR)
$(CXX) $(LDFLAGS) $^ -o $@
# -------------------------------------------------------------------
# Compile rules
# -------------------------------------------------------------------
$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
# ────────────────────────────────────
# Run or launch targets
.PHONY: test repl clean all
test: $(TEST_BIN)
@echo "\n✅ Running tests..."
@$(TEST_BIN)
# @$(TEST_BIN) $(if $(TESTCASE),--test-case=$(TESTCASE))
repl: $(REPL_BIN)
@echo "\n🔧 Starting REPL..."
@$(REPL_BIN)
# ────────────────────────────────────
# Ensure bin/ exists before linking
$(BIN_DIR):
@mkdir -p $@
# ────────────────────────────────────
# Clean up everything
clean:
@echo "🧹 Cleaning build artifacts"
@rm -rf $(OBJ_DIR) $(BIN_DIR)
$(OBJ_DIR)/test/%.o: $(TEST_DIR)/%.cpp
@mkdir -p $(@D)
$(CXX) $(CXXFLAGS) -c $< -o $@
# -------------------------------------------------------------------
# Autoinclude dependencies
# -------------------------------------------------------------------
-include $(DEPFILES)

2
compile_flags.txt Normal file
View File

@@ -0,0 +1,2 @@
-I./include
-I./src

68
src/lexer/lexer.cpp Normal file
View File

@@ -0,0 +1,68 @@
#include "lexer.hpp"
#include "token/token.hpp"
#include "token/type.hpp"
#include <cctype>
#include <iostream>
namespace lexer {
token::token lexer::next_token() {
char c;
if (!(input >> c))
return {token::type::END_OF_FILE, ""};
switch (c) {
case '=':
return {token::type::ASSIGN, c};
case '+':
return {token::type::PLUS, c};
case ',':
return {token::type::COMMA, c};
case ';':
return {token::type::SEMICOLON, c};
case '(':
return {token::type::LPAREN, c};
case ')':
return {token::type::RPAREN, c};
case '{':
return {token::type::LBRACE, c};
case '}':
return {token::type::RBRACE, c};
default:
if (is_letter(c)) {
std::string identifier_or_keyword = read_string(c);
return {
token::lookup_identifier(identifier_or_keyword),
identifier_or_keyword
};
}
if (std::isdigit(c))
return {token::type::INT, read_int(c)};
return {token::type::ILLEGAL, c};
}
}
bool lexer::is_letter(char c) {
return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
std::string lexer::read_string(char first_char) {
std::string result;
result.push_back(first_char);
for (char c = input.peek(); is_letter(c); c = input.peek())
result.push_back(input.get());
return result;
}
std::string lexer::read_int(char first_digit) {
std::string result;
result.push_back(first_digit);
for (char c = input.peek(); std::isdigit(c); c = input.peek())
result.push_back(input.get());
return result;
}
} // namespace lexer

16
src/lexer/lexer.hpp Normal file
View File

@@ -0,0 +1,16 @@
#include "token/token.hpp"
#include <istream>
namespace lexer {
struct lexer {
std::istream& input;
token::token next_token();
private:
bool is_letter(char);
std::string read_string(char);
std::string read_int(char);
};
} // namespace lexer

9
src/main.cpp Normal file
View File

@@ -0,0 +1,9 @@
#include "token/type.hpp"
#include <iostream>
int main() {
token::type eof = token::type::ILLEGAL;
std::cout << eof << std::endl;
return 0;
}

16
src/token/token.hpp Normal file
View File

@@ -0,0 +1,16 @@
#pragma once
#include "type.hpp"
#include <string>
namespace token {
struct token {
::token::type type;
std::string literal;
token(::token::type t, std::string s): type(t), literal(s) {}
token(::token::type t, char c): type(t), literal(1, c) {}
};
} // namespace token

37
src/token/type.cpp Normal file
View File

@@ -0,0 +1,37 @@
#include "type.hpp"
#include <array>
#include <unordered_map>
namespace token {
// Array mapping enum values to their string representations
constexpr std::array<std::string_view, static_cast<size_t>(type::LET) + 1>
tokenTypeStrings = {
#define X(name, str) str,
TOKEN_LIST
#undef X
};
// Stream insertion operator using the lookup array
std::ostream& operator<<(std::ostream& os, type type) {
auto idx = static_cast<size_t>(type);
if (idx < tokenTypeStrings.size())
return os << tokenTypeStrings[idx];
return os << "Unknown";
}
static std::unordered_map<std::string, type> keywords{
{"fn", type::FUNCTION},
{"let", type::LET},
};
type lookup_identifier(std::string ident) {
try {
return keywords.at(ident);
} catch (const std::out_of_range&) {
return type::IDENTIFIER;
}
}
} // namespace token

33
src/token/type.hpp Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
#include <ostream>
namespace token {
// X-macro list of token types and their string representations
#define TOKEN_LIST \
X(ILLEGAL, "ILLEGAL") \
X(END_OF_FILE, "EOF") \
X(IDENTIFIER, "IDENTIFIER") \
X(INT, "INT") \
X(ASSIGN, "=") \
X(PLUS, "+") \
X(COMMA, ",") \
X(SEMICOLON, ";") \
X(LPAREN, "(") \
X(RPAREN, ")") \
X(LBRACE, "{") \
X(RBRACE, "}") \
X(FUNCTION, "FUNCTION") \
X(LET, "LET")
// Define the TokenType enum using the X-macro
enum class type {
#define X(name, str) name,
TOKEN_LIST
#undef X
};
std::ostream& operator<<(std::ostream&, type);
type lookup_identifier(std::string);
} // namespace token

104
test/lexer.cpp Normal file
View File

@@ -0,0 +1,104 @@
#include "lexer/lexer.hpp"
#include "token/type.hpp"
#include <doctest.h>
#include <sstream>
#include <string>
TEST_CASE("Single character token") {
struct test {
token::type expectedType;
std::string expectedLiteral;
};
std::string input = "=+(){},;";
std::istringstream ss(input);
lexer::lexer l{ss};
test tests[] = {
{token::type::ASSIGN, "="},
{token::type::PLUS, "+"},
{token::type::LPAREN, "("},
{token::type::RPAREN, ")"},
{token::type::LBRACE, "{"},
{token::type::RBRACE, "}"},
{token::type::COMMA, ","},
{token::type::SEMICOLON, ";"},
{token::type::END_OF_FILE, ""},
};
for (const auto& t : tests) {
token::token tok = l.next_token();
CHECK(tok.type == t.expectedType);
CHECK(tok.literal == t.expectedLiteral);
}
};
TEST_CASE("Full tokens") {
struct test {
token::type expectedType;
std::string expectedLiteral;
};
std::istringstream ss("let five = 5;\
let ten = 10;\
let add = fn(x, y) {\
x + y;\
};\
let result = add(five, ten);\
");
lexer::lexer l{ss};
test tests[] = {
// clang-format off
{token::type::LET, "let"},
{token::type::IDENTIFIER, "five"},
{token::type::ASSIGN, "="},
{token::type::INT, "5"},
{token::type::SEMICOLON, ";"},
{token::type::LET, "let"},
{token::type::IDENTIFIER, "ten"},
{token::type::ASSIGN, "="},
{token::type::INT, "10"},
{token::type::SEMICOLON, ";"},
{token::type::LET, "let"},
{token::type::IDENTIFIER, "add"},
{token::type::ASSIGN, "="},
{token::type::FUNCTION, "fn"},
{token::type::LPAREN, "("},
{token::type::IDENTIFIER, "x"},
{token::type::COMMA, ","},
{token::type::IDENTIFIER, "y"},
{token::type::RPAREN, ")"},
{token::type::LBRACE, "{"},
{token::type::IDENTIFIER, "x"},
{token::type::PLUS, "+"},
{token::type::IDENTIFIER, "y"},
{token::type::SEMICOLON, ";"},
{token::type::RBRACE, "}"},
{token::type::SEMICOLON, ";"},
{token::type::LET, "let"},
{token::type::IDENTIFIER, "result"},
{token::type::ASSIGN, "="},
{token::type::IDENTIFIER, "add"},
{token::type::LPAREN, "("},
{token::type::IDENTIFIER, "five"},
{token::type::COMMA, ","},
{token::type::IDENTIFIER, "ten"},
{token::type::RPAREN, ")"},
{token::type::SEMICOLON, ";"},
// clang-format on
};
for (const auto& t : tests) {
token::token tok = l.next_token();
CHECK(tok.type == t.expectedType);
CHECK(tok.literal == t.expectedLiteral);
}
};

20
test/test.cpp Normal file
View File

@@ -0,0 +1,20 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include <doctest.h>
int factorial(int number) {
return number <= 1 ? number : factorial(number - 1) * number;
}
TEST_CASE("fact") {
CHECK(factorial(1) == 1);
CHECK(factorial(2) == 2);
CHECK(factorial(3) == 6);
CHECK(factorial(10) == 3628800);
}
TEST_CASE("fact2") {
CHECK(factorial(1) == 1);
CHECK(factorial(2) == 2);
CHECK(factorial(3) == 6);
CHECK(factorial(10) == 3628800);
}