From 36ec04c00fa540fcee0f2cff1f7b81dd8a98101a Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Thu, 17 Apr 2025 18:44:58 +0000
Subject: [PATCH] some refactor

---
 src/Lexer/Lexer.cpp |  307 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 299 insertions(+), 8 deletions(-)

diff --git a/src/Lexer/Lexer.cpp b/src/Lexer/Lexer.cpp
index 6f6cc35..b5a4261 100644
--- a/src/Lexer/Lexer.cpp
+++ b/src/Lexer/Lexer.cpp
@@ -1,14 +1,305 @@
 #include "Lexer/Lexer.hpp"
 
-namespace Lexer {
-const std::vector<std::string> Lexer::Lexer::OPERATOR_RELATIONAL = { "==", "!=", "<", ">", "<=", ">=" };
-const std::vector<std::string> Lexer::Lexer::OPERATOR_INCREMENT  = { "++", "--" };
-const std::vector<std::string> Lexer::Lexer::OPERATOR_ASSIGNMENT = { "=", "+=", "-=", "*=", "/=", "%=" };
-const std::vector<std::string> Lexer::Lexer::OPERATOR_LOGICAL    = { "&&", "||" };
+#include "Lexer/Operators.hpp"
+#include "Symbols/SymbolContainer.hpp"
 
-const std::vector<std::string> Lexer::Lexer::OPERATOR_ARITHMETIC = { "+", "-", "*", "/", "%" };
-const std::vector<std::string> Lexer::Lexer::PUNCTUATION         = { "(", ")", "{", "}", "[", "]", ",", ";" };
+std::vector<Lexer::Tokens::Token> Lexer::Lexer::tokenizeNamespace(const std::string & ns) {
+    if (inputs_.find(ns) == inputs_.end()) {
+        return {};
+    }
 
+    Symbols::SymbolContainer::instance()->enter(ns);
 
+    std::vector<Tokens::Token> tokens;
+    Tokens::Token              token;
+    do {
+        token = nextToken();
+        tokens.push_back(token);
+    } while (token.type != Tokens::Type::END_OF_FILE);
 
-};  // namespace Lexer
+    tokens_[ns] = tokens;
+    return tokens;
+}
+
+void Lexer::Lexer::addNamespaceInput(const std::string & ns, const std::string & input) {
+    inputs_[ns]         = input;
+    positions_[ns]      = 0;
+    line_numbers_[ns]   = 1;
+    column_numbers_[ns] = 1;
+}
+
+std::vector<Lexer::Tokens::Token> Lexer::Lexer::getTokens(const std::string & ns) const {
+    auto it = tokens_.find(ns);
+    if (it != tokens_.end()) {
+        return it->second;
+    }
+    return {};
+}
+
+void Lexer::Lexer::setKeyWords(const std::unordered_map<std::string, Tokens::Type> & new_keywords) {
+    keywords = new_keywords;
+}
+
+Lexer::Tokens::Token Lexer::Lexer::nextToken() {
+    skipWhitespaceAndComments();
+    size_t start = pos();
+
+    if (isAtEnd()) {
+        return createToken(Tokens::Type::END_OF_FILE, start, start);
+    }
+
+    char c = peek();
+    if (isalpha(c) || c == '_') {
+        return matchIdentifierOrKeyword(start);
+    }
+    if (isdigit(c) || (isdigit(c) && peek(1) == '.') || (c == '.' && isdigit(peek(1)))) {
+        return matchNumber(start);
+    }
+    if (c == '"' || c == '\'') {
+        return matchStringLiteral(start);
+    }
+    if (operators_.find(c) != std::string_view::npos) {
+        return matchOperatorOrPunctuation(start);
+    }
+
+    advance();
+    return createToken(Tokens::Type::UNKNOWN, start, pos());
+}
+
+const std::string & Lexer::Lexer::input() const {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    auto         it = inputs_.find(ns);
+    if (it != inputs_.end()) {
+        return it->second;
+    }
+    throw Exception("Input not found in namespace: " + ns);
+}
+
+size_t & Lexer::Lexer::pos() {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    auto         it = positions_.find(ns);
+    if (it != positions_.end()) {
+        return it->second;
+    }
+    throw Exception("Unknown position in namespace: " + ns);
+}
+
+int & Lexer::Lexer::line() {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    auto         it = line_numbers_.find(ns);
+    if (it != line_numbers_.end()) {
+        return it->second;
+    }
+    throw Exception("Unknown line number in namespace: " + ns);
+}
+
+int & Lexer::Lexer::col() {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    auto         it = column_numbers_.find(ns);
+    if (it != column_numbers_.end()) {
+        return it->second;
+    }
+    throw Exception("Unknown column number in namespace: " + ns);
+}
+
+Lexer::Tokens::Token Lexer::Lexer::createToken(Tokens::Type type, size_t start, size_t end, const std::string & value) {
+    Tokens::Token token;
+    token.type          = type;
+    token.start_pos     = start;
+    token.end_pos       = end;
+    token.line_number   = line();
+    token.column_number = col();
+    if (start <= end && end <= input().length()) {
+        token.lexeme = std::string_view(input()).substr(start, end - start);
+        token.value  = value.empty() ? std::string(token.lexeme) : value;
+    }
+    return token;
+}
+
+char Lexer::Lexer::peek(size_t offset) const {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    const auto & in = inputs_.at(ns);
+    size_t       cp = positions_.at(ns);
+    if (cp + offset >= in.length()) {
+        return '\0';
+    }
+    return in[cp + offset];
+}
+
+char Lexer::Lexer::advance() {
+    char c = peek();
+    pos()++;
+    if (c == '\n') {
+        line()++;
+        col() = 1;
+    } else {
+        col()++;
+    }
+    return c;
+}
+
+bool Lexer::Lexer::isAtEnd() const {
+    const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
+    return positions_.at(ns) >= inputs_.at(ns).length();
+}
+
+void Lexer::Lexer::skipWhitespaceAndComments() {
+    while (!isAtEnd()) {
+        char c = peek();
+        if (isspace(c)) {
+            advance();
+        } else if ((c == '/' && peek(1) == '/') || c == '#') {
+            while (!isAtEnd() && peek() != '\n') {
+                advance();
+            }
+        } else {
+            break;
+        }
+    }
+}
+
+Lexer::Tokens::Token Lexer::Lexer::matchIdentifierOrKeyword(size_t start_pos, Tokens::Type type) {
+    while (!isAtEnd() && (isalnum(peek()) || peek() == '_')) {
+        advance();
+    }
+    size_t      end   = pos();
+    std::string value = input().substr(start_pos, end - start_pos);
+    if (value.empty()) {
+        return createToken(Tokens::Type::UNKNOWN, start_pos, end);
+    }
+
+    if (type == Tokens::Type::IDENTIFIER) {
+        auto it = keywords.find(value);
+        if (it != keywords.end()) {
+            return createToken(it->second, start_pos, end);
+        }
+    }
+    return createToken(type, start_pos, end);
+}
+
+Lexer::Tokens::Token Lexer::Lexer::matchNumber(size_t start_pos) {
+    bool has_dot = false;
+
+    while (!isAtEnd()) {
+        if (isdigit(peek())) {
+            advance();
+        } else if (!has_dot && peek() == '.' && isdigit(peek(1))) {
+            has_dot = true;
+            advance();  // a pont
+            advance();  // az első számjegy a pont után
+        } else {
+            break;
+        }
+    }
+
+    size_t end = pos();
+    return createToken(Tokens::Type::NUMBER, start_pos, end);
+}
+
+Lexer::Tokens::Token Lexer::Lexer::matchStringLiteral(size_t start_pos) {
+    char opening_quote = peek();
+    advance();  // Skip opening quote
+    std::string value;
+    bool        unterminated = false;
+
+    while (!isAtEnd()) {
+        char c = peek();
+        if (c == opening_quote) {
+            advance();
+            break;
+        }
+        if (c == '\\') {
+            advance();
+            char e = advance();
+            switch (e) {
+                case 'n':
+                    value += '\n';
+                    break;
+                case 't':
+                    value += '\t';
+                    break;
+                case '"':
+                    value += opening_quote;
+                    break;
+                case '\\':
+                    value += '\\';
+                    break;
+                default:
+                    value += e;
+                    break;
+            }
+        } else {
+            value += advance();
+        }
+    }
+
+    size_t end = pos();
+    if (unterminated) {
+        return createToken(Tokens::Type::UNKNOWN, start_pos, end, input().substr(start_pos, end - start_pos));
+    }
+    return createToken(Tokens::Type::STRING_LITERAL, start_pos, end, value);
+}
+
+Lexer::Tokens::Token Lexer::Lexer::matchOperatorOrPunctuation(size_t start_pos) {
+    char first_char = advance();  // Első karakter elfogyasztása
+
+    if (!isAtEnd()) {
+        char        second_char = peek(0);  // Következő karakter megnézése
+        std::string two_chars_str{ first_char, second_char };
+
+        const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> two_char_op_types = {
+            { &OPERATOR_RELATIONAL, Tokens::Type::OPERATOR_RELATIONAL },
+            { &OPERATOR_INCREMENT,  Tokens::Type::OPERATOR_INCREMENT  },
+            { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT },
+            { &OPERATOR_LOGICAL,    Tokens::Type::OPERATOR_LOGICAL    }
+        };
+
+        for (const auto & [vec_ptr, type] : two_char_op_types) {
+            if (matchFromVector(*vec_ptr, two_chars_str)) {
+                advance();  // Második karakter elfogyasztása
+                size_t end_pos = pos();
+                return createToken(type, start_pos, end_pos);
+            }
+        }
+    }
+
+    std::string single_char_str(1, first_char);
+
+    if (single_char_str == "$") {
+        if (isalpha(peek(0)) || peek(0) == '_') {
+            return matchIdentifierOrKeyword(start_pos, Tokens::Type::VARIABLE_IDENTIFIER);
+        }
+    }
+
+    const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> one_char_op_types = {
+        { &OPERATOR_ARITHMETIC, Tokens::Type::OPERATOR_ARITHMETIC },
+        { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT },
+        { &PUNCTUATION,         Tokens::Type::PUNCTUATION         }
+    };
+
+    for (const auto & [vec_ptr, type] : one_char_op_types) {
+        if (matchFromVector(*vec_ptr, single_char_str)) {
+            size_t end_pos = pos();
+            return createToken(type, start_pos, end_pos);
+        }
+    }
+
+    size_t end_pos = pos();
+    return createToken(Tokens::Type::UNKNOWN, start_pos, end_pos);
+}
+
+bool Lexer::Lexer::matchFromVector(const std::vector<std::string> & vec, const std::string & value) {
+    return std::find(vec.begin(), vec.end(), value) != vec.end();
+}
+
+Lexer::Lexer::Lexer() {
+    for (const auto & vecRef :
+         { std::cref(OPERATOR_ARITHMETIC), std::cref(OPERATOR_RELATIONAL), std::cref(OPERATOR_INCREMENT),
+           std::cref(OPERATOR_ASSIGNMENT), std::cref(OPERATOR_LOGICAL), std::cref(PUNCTUATION) }) {
+        for (const auto & str : vecRef.get()) {
+            operators_ += str;
+        }
+    }
+
+    operators_ += "$";
+}

--
Gitblit v1.9.3