From 36ec04c00fa540fcee0f2cff1f7b81dd8a98101a Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Thu, 17 Apr 2025 18:44:58 +0000
Subject: [PATCH] some refactor

---
 src/Parser/Parser.cpp |  329 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 314 insertions(+), 15 deletions(-)

diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp
index 3c5af2b..37c2081 100644
--- a/src/Parser/Parser.cpp
+++ b/src/Parser/Parser.cpp
@@ -1,27 +1,31 @@
 #include "Parser/Parser.hpp"
+#include <stack>
+
+#include "Interpreter/OperationsFactory.hpp"
+#include "Lexer/Operators.hpp"
 
 // Más szükséges include-ok, ha kellenek
 namespace Parser {
 
 const std::unordered_map<std::string, Lexer::Tokens::Type> Parser::keywords = {
-    { "if",       Lexer::Tokens::Type::KEYWORD          },
-    { "else",     Lexer::Tokens::Type::KEYWORD          },
-    { "while",    Lexer::Tokens::Type::KEYWORD          },
-    { "for",      Lexer::Tokens::Type::KEYWORD          },
-    { "return",   Lexer::Tokens::Type::KEYWORD_RETURN   },
+    { "if",       Lexer::Tokens::Type::KEYWORD                      },
+    { "else",     Lexer::Tokens::Type::KEYWORD                      },
+    { "while",    Lexer::Tokens::Type::KEYWORD                      },
+    { "for",      Lexer::Tokens::Type::KEYWORD                      },
+    { "return",   Lexer::Tokens::Type::KEYWORD_RETURN               },
     { "function", Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION },
     // Régebbiek:
-    { "const",    Lexer::Tokens::Type::KEYWORD          },
-    { "true",     Lexer::Tokens::Type::KEYWORD          },
-    { "false",    Lexer::Tokens::Type::KEYWORD          },
+    { "const",    Lexer::Tokens::Type::KEYWORD                      },
+    { "true",     Lexer::Tokens::Type::KEYWORD                      },
+    { "false",    Lexer::Tokens::Type::KEYWORD                      },
     // változó típusok
-    { "null",     Lexer::Tokens::Type::KEYWORD_NULL     },
-    { "int",      Lexer::Tokens::Type::KEYWORD_INT      },
-    { "double",   Lexer::Tokens::Type::KEYWORD_DOUBLE   },
-    { "float",    Lexer::Tokens::Type::KEYWORD_FLOAT    },
-    { "string",   Lexer::Tokens::Type::KEYWORD_STRING   },
-    { "boolean",  Lexer::Tokens::Type::KEYWORD_BOOLEAN  },
-    { "bool",     Lexer::Tokens::Type::KEYWORD_BOOLEAN  },
+    { "null",     Lexer::Tokens::Type::KEYWORD_NULL                 },
+    { "int",      Lexer::Tokens::Type::KEYWORD_INT                  },
+    { "double",   Lexer::Tokens::Type::KEYWORD_DOUBLE               },
+    { "float",    Lexer::Tokens::Type::KEYWORD_FLOAT                },
+    { "string",   Lexer::Tokens::Type::KEYWORD_STRING               },
+    { "boolean",  Lexer::Tokens::Type::KEYWORD_BOOLEAN              },
+    { "bool",     Lexer::Tokens::Type::KEYWORD_BOOLEAN              },
     // ... egyéb kulcsszavak ...
 };
 
@@ -34,4 +38,299 @@
     { Lexer::Tokens::Type::KEYWORD_BOOLEAN, Symbols::Variables::Type::BOOLEAN   },
 };
 
+void Parser::parseVariableDefinition() {
+    Symbols::Variables::Type var_type = parseType();
+
+    Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
+    std::string          var_name = id_token.value;
+
+    if (!var_name.empty() && var_name[0] == '$') {
+        var_name = var_name.substr(1);
+    }
+    const auto ns = Symbols::SymbolContainer::instance()->currentScopeName();
+
+    expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
+
+    auto expr = parseParsedExpression(var_type);
+    Interpreter::OperationsFactory::defineVariableWithExpression(
+        var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number);
+    expect(Lexer::Tokens::Type::PUNCTUATION, ";");
+}
+
+void Parser::parseFunctionDefinition() {
+    expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION);
+    Lexer::Tokens::Token     id_token         = expect(Lexer::Tokens::Type::IDENTIFIER);
+    std::string              func_name        = id_token.value;
+    Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE;
+    expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
+    expect(Lexer::Tokens::Type::PUNCTUATION, "(");
+
+    Symbols::FunctionParameterInfo param_infos;
+
+    if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") {
+        while (true) {
+            // Paraméter típusa
+            Symbols::Variables::Type param_type = parseType();  // Ez elfogyasztja a type tokent
+
+            // Paraméter név ($variable)
+            Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
+            std::string          param_name     = param_id_token.value;
+            if (!param_name.empty() && param_name[0] == '$') {  // '$' eltávolítása
+                param_name = param_name.substr(1);
+            }
+
+            param_infos.push_back({ param_name, param_type });
+
+            // Vessző vagy zárójel következik?
+            if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) {
+                continue;
+            }
+            if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") {
+                break;  // Lista vége
+            }
+            reportError("Expected ',' or ')' in parameter list");
+        }
+    }
+    // Most a ')' következik
+    expect(Lexer::Tokens::Type::PUNCTUATION, ")");
+
+    // check if we have a option return type: function name() type { ... }
+    for (const auto & _type : Parser::variable_types) {
+        if (match(_type.first)) {
+            func_return_type = _type.second;
+            break;
+        }
+    }
+
+    Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{");
+
+    // only parse the body if we checked out if not exists the function and created the symbol
+    parseFunctionBody(opening_brace, func_name, func_return_type, param_infos);
+}
+
+Symbols::Value Parser::parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) {
+    try {
+        switch (type) {
+            case Symbols::Variables::Type::INTEGER:
+                {
+                    if (value.find('.') != std::string::npos) {
+                        throw std::invalid_argument("Floating point value in integer context: " + value);
+                    }
+                    int v = std::stoi(value);
+                    return Symbols::Value(is_negative ? -v : v);
+                }
+            case Symbols::Variables::Type::DOUBLE:
+                {
+                    double v = std::stod(value);
+                    return Symbols::Value(is_negative ? -v : v);
+                }
+            case Symbols::Variables::Type::FLOAT:
+                {
+                    float v = std::stof(value);
+                    return Symbols::Value(is_negative ? -v : v);
+                }
+            default:
+                throw std::invalid_argument("Unsupported numeric type");
+        }
+    } catch (const std::invalid_argument & e) {
+        reportError("Invalid numeric literal: " + value + " (" + e.what() + ")");
+    } catch (const std::out_of_range & e) {
+        reportError("Numeric literal out of range: " + value + " (" + e.what() + ")");
+    }
+
+    return Symbols::Value();  // unreachable
+}
+
+void Parser::parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name,
+                               Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) {
+    size_t               braceDepth = 0;
+    int                  peek       = 0;
+    int                  tokenIndex = current_token_index_;
+    Lexer::Tokens::Token currentToken_;
+    Lexer::Tokens::Token closing_brace;
+
+    while (tokenIndex < tokens_.size()) {
+        currentToken_ = peekToken(peek);
+        if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) {
+            if (currentToken_.value == "{") {
+                ++braceDepth;
+            } else if (currentToken_.value == "}") {
+                if (braceDepth == 0) {
+                    closing_brace = currentToken_;
+                    break;
+                }
+                --braceDepth;
+            }
+        }
+        tokenIndex++;
+        peek++;
+    }
+    if (braceDepth != 0) {
+        reportError("Unmatched braces in function body");
+    }
+    std::vector<Lexer::Tokens::Token> filtered_tokens;
+    auto                              startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace);
+    auto                              endIt   = std::find(tokens_.begin(), tokens_.end(), closing_brace);
+
+    if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) {
+        filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt);
+    }
+    std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos);
+
+    current_token_index_ = tokenIndex;
+    expect(Lexer::Tokens::Type::PUNCTUATION, "}");
+    const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name;
+    Symbols::SymbolContainer::instance()->create(newns);
+    std::shared_ptr<Parser> parser = std::make_shared<Parser>();
+    parser->parseScript(filtered_tokens, input_string, this->current_filename_);
+    Symbols::SymbolContainer::instance()->enterPreviousScope();
+    // create function
+    Interpreter::OperationsFactory::defineFunction(
+        function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(),
+        this->current_filename_, currentToken_.line_number, currentToken_.column_number);
+}
+
+ParsedExpressionPtr Parser::parseParsedExpression(const Symbols::Variables::Type & expected_var_type) {
+    std::stack<std::string>          operator_stack;
+    std::vector<ParsedExpressionPtr> output_queue;
+
+    bool expect_unary = true;
+
+    while (true) {
+        auto token = currentToken();
+
+        if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") {
+            operator_stack.push("(");
+            consumeToken();
+            expect_unary = true;
+        } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") {
+            consumeToken();
+            while (!operator_stack.empty() && operator_stack.top() != "(") {
+                std::string op = operator_stack.top();
+                operator_stack.pop();
+
+                if (op == "u-" || op == "u+") {
+                    if (output_queue.empty()) {
+                        reportError("Missing operand for unary operator");
+                    }
+                    auto rhs = std::move(output_queue.back());
+                    output_queue.pop_back();
+                    output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
+                } else {
+                    if (output_queue.size() < 2) {
+                        reportError("Malformed expression");
+                    }
+                    auto rhs = std::move(output_queue.back());
+                    output_queue.pop_back();
+                    auto lhs = std::move(output_queue.back());
+                    output_queue.pop_back();
+                    output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
+                }
+            }
+
+            if (operator_stack.empty() || operator_stack.top() != "(") {
+                reportError("Mismatched parentheses");
+            }
+            operator_stack.pop();  // remove "("
+            expect_unary = false;
+        } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) {
+            std::string op = std::string(token.lexeme);
+
+            if (expect_unary && Lexer::isUnaryOperator(op)) {
+                op = "u" + op;  // pl. u-, u+ vagy u!
+            }
+
+            while (!operator_stack.empty()) {
+                const std::string & top = operator_stack.top();
+                if ((Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) <= Lexer::getPrecedence(top)) ||
+                    (!Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) < Lexer::getPrecedence(top))) {
+                    operator_stack.pop();
+
+                    if (top == "u-" || top == "u+") {
+                        if (output_queue.empty()) {
+                            reportError("Missing operand for unary operator");
+                        }
+                        auto rhs = std::move(output_queue.back());
+                        output_queue.pop_back();
+                        output_queue.push_back(Lexer::applyOperator(top, std::move(rhs)));
+                    } else {
+                        if (output_queue.size() < 2) {
+                            reportError("Malformed expression");
+                        }
+                        auto rhs = std::move(output_queue.back());
+                        output_queue.pop_back();
+                        auto lhs = std::move(output_queue.back());
+                        output_queue.pop_back();
+                        output_queue.push_back(Lexer::applyOperator(top, std::move(rhs), std::move(lhs)));
+                    }
+                } else {
+                    break;
+                }
+            }
+
+            operator_stack.push(op);
+            consumeToken();
+            expect_unary = true;
+        } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL ||
+                   token.type == Lexer::Tokens::Type::KEYWORD ||
+                   token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) {
+            if (Lexer::pushOperand(token, expected_var_type, output_queue) == false) {
+                reportError("Expected literal or variable");
+            }
+            consumeToken();
+            expect_unary = false;
+        } else {
+            break;
+        }
+    }
+
+    // Kiürítjük az operator stack-et
+    while (!operator_stack.empty()) {
+        std::string op = operator_stack.top();
+        operator_stack.pop();
+
+        if (op == "(" || op == ")") {
+            reportError("Mismatched parentheses");
+        }
+
+        if (op == "u-" || op == "u+") {
+            if (output_queue.empty()) {
+                reportError("Missing operand for unary operator");
+            }
+            auto rhs = std::move(output_queue.back());
+            output_queue.pop_back();
+            output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
+        } else {
+            if (output_queue.size() < 2) {
+                reportError("Malformed expression");
+            }
+            auto rhs = std::move(output_queue.back());
+            output_queue.pop_back();
+            auto lhs = std::move(output_queue.back());
+            output_queue.pop_back();
+            output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
+        }
+    }
+
+    if (output_queue.size() != 1) {
+        reportError("Expression could not be parsed cleanly");
+    }
+
+    return std::move(output_queue.back());
+}
+
+void Parser::parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string,
+                         const std::string & filename) {
+    tokens_              = tokens;
+    input_str_view_      = input_string;
+    current_token_index_ = 0;
+    current_filename_    = filename;
+
+    while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
+        parseStatement();
+    }
+    if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
+        reportError("Unexpected tokens after program end");
+    }
+}
 }  // namespace Parser

--
Gitblit v1.9.3