implemented lexer for a more complex subset of the

monkey language
This commit is contained in:
Karma Riuk
2025-06-30 00:12:28 +02:00
parent 69217fdf90
commit dec93f8272
4 changed files with 60 additions and 5 deletions

View File

@ -1,7 +1,9 @@
#include "lexer.hpp" #include "lexer.hpp"
#include "token/token.hpp" #include "token/token.hpp"
#include "token/type.hpp"
#include <cctype>
#include <iostream> #include <iostream>
namespace lexer { namespace lexer {
@ -27,8 +29,40 @@ namespace lexer {
return {token::type::LBRACE, c}; return {token::type::LBRACE, c};
case '}': case '}':
return {token::type::RBRACE, c}; return {token::type::RBRACE, c};
} default:
return {token::type::ILLEGAL, c}; if (is_letter(c)) {
std::string identifier_or_keyword = read_string(c);
return {
token::lookup_identifier(identifier_or_keyword),
identifier_or_keyword
}; };
}
if (std::isdigit(c))
return {token::type::INT, read_int(c)};
return {token::type::ILLEGAL, c};
}
}
bool lexer::is_letter(char c) {
return c == '_' || std::isalpha(static_cast<unsigned char>(c));
}
std::string lexer::read_string(char first_char) {
std::string result;
result.push_back(first_char);
for (char c = input.peek(); is_letter(c); c = input.peek())
result.push_back(input.get());
return result;
}
std::string lexer::read_int(char first_digit) {
std::string result;
result.push_back(first_digit);
for (char c = input.peek(); std::isdigit(c); c = input.peek())
result.push_back(input.get());
return result;
}
} // namespace lexer } // namespace lexer

View File

@ -6,5 +6,11 @@ namespace lexer {
struct lexer { struct lexer {
std::istream& input; std::istream& input;
token::token next_token(); token::token next_token();
private:
bool is_letter(char);
std::string read_string(char);
std::string read_int(char);
}; };
} // namespace lexer } // namespace lexer

View File

@ -1,5 +1,8 @@
#include "type.hpp" #include "type.hpp"
#include <array>
#include <unordered_map>
namespace token { namespace token {
// Array mapping enum values to their string representations // Array mapping enum values to their string representations
@ -18,4 +21,17 @@ namespace token {
return os << "Unknown"; return os << "Unknown";
} }
static std::unordered_map<std::string, type> keywords{
{"fn", type::FUNCTION},
{"let", type::LET},
};
type lookup_identifier(std::string ident) {
try {
return keywords.at(ident);
} catch (const std::out_of_range&) {
return type::IDENTIFIER;
}
}
} // namespace token } // namespace token

View File

@ -1,8 +1,6 @@
#pragma once #pragma once
#include <array>
#include <ostream> #include <ostream>
#include <string_view>
namespace token { namespace token {
@ -30,5 +28,6 @@ namespace token {
#undef X #undef X
}; };
std::ostream& operator<<(std::ostream& os, type type); std::ostream& operator<<(std::ostream&, type);
type lookup_identifier(std::string);
} // namespace token } // namespace token