implemented lexer for a more complex subset of the

monkey language
2025-06-30 00:12:28 +02:00
parent 69217fdf90
commit dec93f8272
4 changed files with 60 additions and 5 deletions
--- a/src/lexer/lexer.cpp
+++ b/src/lexer/lexer.cpp
@ -1,7 +1,9 @@
 #include "lexer.hpp"

 #include "token/token.hpp"
+#include "token/type.hpp"

+#include <cctype>
 #include <iostream>

 namespace lexer {
@ -27,8 +29,40 @@ namespace lexer {
            return {token::type::LBRACE, c};
        case '}':
            return {token::type::RBRACE, c};
+        default:
+            if (is_letter(c)) {
+                std::string identifier_or_keyword = read_string(c);
+                return {
+                    token::lookup_identifier(identifier_or_keyword),
+                    identifier_or_keyword
+                };
+            }
+            if (std::isdigit(c))
+                return {token::type::INT, read_int(c)};
+
+            return {token::type::ILLEGAL, c};
        }
-        return {token::type::ILLEGAL, c};
-    };
+    }
+
+    bool lexer::is_letter(char c) {
+        return c == '_' || std::isalpha(static_cast<unsigned char>(c));
+    }
+
+    std::string lexer::read_string(char first_char) {
+        std::string result;
+        result.push_back(first_char);
+        for (char c = input.peek(); is_letter(c); c = input.peek())
+            result.push_back(input.get());
+        return result;
+    }
+
+    std::string lexer::read_int(char first_digit) {
+        std::string result;
+        result.push_back(first_digit);
+        for (char c = input.peek(); std::isdigit(c); c = input.peek())
+            result.push_back(input.get());
+        return result;
+    }
+

 } // namespace lexer