A simple scripting language in C++
Ferenc Szontágh
2025-04-17 36ec04c00fa540fcee0f2cff1f7b81dd8a98101a
src/Parser/Parser.hpp
@@ -1,60 +1,45 @@
#ifndef PARSER_HPP
#define PARSER_HPP
#include <algorithm>
#include <memory>
#include <stack>
#include <stdexcept>
#include <string>
#include <vector>
#include "Interpreter/ExpressionBuilder.hpp"
#include "Interpreter/OperationsFactory.hpp"
#include "BaseException.hpp"
#include "Lexer/Token.hpp"
#include "Lexer/TokenType.hpp"
#include "Parser/ParsedExpression.hpp"
#include "Symbols/ParameterContainer.hpp"
#include "Symbols/SymbolContainer.hpp"
#include "Symbols/Value.hpp"
namespace Parser {
class SyntaxError : public std::runtime_error {
  public:
    SyntaxError(const std::string & message, const int line, const int col) :
        std::runtime_error(message + " at line " + std::to_string(line) + ", column " + std::to_string(col)) {}
    SyntaxError(const std::string & message, const Lexer::Tokens::Token & token) :
        SyntaxError(
            message + " (found token: '" + token.value + "' type: " + Lexer::Tokens::TypeToString(token.type) + ")",
            token.line_number, token.column_number) {}
};
class Parser {
  public:
    Parser() {}
    void parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string,
                     const std::string & filename) {
        tokens_              = tokens;
        input_str_view_      = input_string;
        current_token_index_ = 0;
        current_filename_    = filename;
    class Exception : public BaseException {
      public:
        using BaseException::BaseException;
        try {
            while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
                parseStatement();
            }
            if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
                reportError("Unexpected tokens after program end");
            }
        } catch (const SyntaxError & e) {
            std::cerr << "Syntax Error: " << e.what() << '\n';
        } catch (const std::exception & e) {
            std::cerr << "Error during parsing: " << e.what() << '\n';
            throw;
        Exception(const std::string & msg, const Lexer::Tokens::Token & token) {
            rawMessage_ = msg + ": " + token.dump();
            context_ =
                " at line: " + std::to_string(token.line_number) + ", column: " + std::to_string(token.column_number);
            formattedMessage_ = formatMessage();
        }
    }
        Exception(const std::string & msg, int line, int col) {
            rawMessage_       = msg;
            context_          = " at line: " + std::to_string(line) + ", column: " + std::to_string(col);
            formattedMessage_ = formatMessage();
        }
        std::string formatMessage() const override { return "[Syntax ERROR] >>" + context_ + " << : " + rawMessage_; }
    };
    void parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string,
                     const std::string & filename);
    static const std::unordered_map<std::string, Lexer::Tokens::Type>              keywords;
    static const std::unordered_map<Lexer::Tokens::Type, Symbols::Variables::Type> variable_types;
@@ -161,19 +146,14 @@
               (current_token_index_ == tokens_.size() - 1 && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE);
    }
    // --- Hibakezelés ---
    // Hiba jelentése és kivétel dobása
    [[noreturn]] void reportError(const std::string & message) {
        // Használjuk az aktuális token pozícióját, ha még nem értünk a végére
        if (current_token_index_ < tokens_.size()) {
            throw SyntaxError(message, tokens_[current_token_index_]);
        }  // Ha már a végén vagyunk, az utolsó ismert pozíciót használjuk
            throw Exception(message, tokens_[current_token_index_]);
        }
        int line = tokens_.empty() ? 0 : tokens_.back().line_number;
        int col  = tokens_.empty() ? 0 : tokens_.back().column_number;
        throw SyntaxError(message, line, col);
        throw Exception(message, line, col);
    }
    // --- Elemzési Módszerek (Moduláris részek) ---
    // parseStatement (változatlan)
    void parseStatement() {
@@ -194,81 +174,8 @@
        reportError("Unexpected token at beginning of statement");
    }
    void parseVariableDefinition() {
        Symbols::Variables::Type var_type = parseType();
        Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
        std::string          var_name = id_token.value;
        if (!var_name.empty() && var_name[0] == '$') {
            var_name = var_name.substr(1);
        }
        const auto ns = Symbols::SymbolContainer::instance()->currentScopeName();
        expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
        /*
        Symbols::Value initial_value = parseValue(var_type);
        Interpreter::OperationsFactory::defineSimpleVariable(var_name, initial_value, ns, this->current_filename_,
                                                             id_token.line_number, id_token.column_number);
*/
        auto expr = parseParsedExpression(var_type);
        Interpreter::OperationsFactory::defineVariableWithExpression(
            var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number);
        expect(Lexer::Tokens::Type::PUNCTUATION, ";");
    }
    void parseFunctionDefinition() {
        expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION);
        Lexer::Tokens::Token     id_token         = expect(Lexer::Tokens::Type::IDENTIFIER);
        std::string              func_name        = id_token.value;
        Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE;
        expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
        expect(Lexer::Tokens::Type::PUNCTUATION, "(");
        Symbols::FunctionParameterInfo param_infos;
        if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") {
            while (true) {
                // Paraméter típusa
                Symbols::Variables::Type param_type = parseType();  // Ez elfogyasztja a type tokent
                // Paraméter név ($variable)
                Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
                std::string          param_name     = param_id_token.value;
                if (!param_name.empty() && param_name[0] == '$') {  // '$' eltávolítása
                    param_name = param_name.substr(1);
                }
                param_infos.push_back({ param_name, param_type });
                // Vessző vagy zárójel következik?
                if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) {
                    continue;
                }
                if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") {
                    break;  // Lista vége
                }
                reportError("Expected ',' or ')' in parameter list");
            }
        }
        // Most a ')' következik
        expect(Lexer::Tokens::Type::PUNCTUATION, ")");
        // check if we have a option return type: function name() type { ... }
        for (const auto & _type : Parser::variable_types) {
            if (match(_type.first)) {
                func_return_type = _type.second;
                break;
            }
        }
        Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{");
        // only parse the body if we checked out if not exists the function and created the symbol
        parseFunctionBody(opening_brace, func_name, func_return_type, param_infos);
    }
    void parseVariableDefinition();
    void parseFunctionDefinition();
    // --- Elemzési Segédfüggvények ---
@@ -333,255 +240,12 @@
        return Symbols::Value();  // compiler happy
    }
    Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) {
        try {
            switch (type) {
                case Symbols::Variables::Type::INTEGER:
                    {
                        if (value.find('.') != std::string::npos) {
                            throw std::invalid_argument("Floating point value in integer context: " + value);
                        }
                        int v = std::stoi(value);
                        return Symbols::Value(is_negative ? -v : v);
                    }
                case Symbols::Variables::Type::DOUBLE:
                    {
                        double v = std::stod(value);
                        return Symbols::Value(is_negative ? -v : v);
                    }
                case Symbols::Variables::Type::FLOAT:
                    {
                        float v = std::stof(value);
                        return Symbols::Value(is_negative ? -v : v);
                    }
                default:
                    throw std::invalid_argument("Unsupported numeric type");
            }
        } catch (const std::invalid_argument & e) {
            reportError("Invalid numeric literal: " + value + " (" + e.what() + ")");
        } catch (const std::out_of_range & e) {
            reportError("Numeric literal out of range: " + value + " (" + e.what() + ")");
        }
        return Symbols::Value();  // unreachable
    }
    Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type);
    void parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name,
                           Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) {
        size_t               braceDepth = 0;
        int                  peek       = 0;
        int                  tokenIndex = current_token_index_;
        Lexer::Tokens::Token currentToken_;
        Lexer::Tokens::Token closing_brace;
                           Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params);
        while (tokenIndex < tokens_.size()) {
            currentToken_ = peekToken(peek);
            if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) {
                if (currentToken_.value == "{") {
                    ++braceDepth;
                } else if (currentToken_.value == "}") {
                    if (braceDepth == 0) {
                        closing_brace = currentToken_;
                        break;
                    }
                    --braceDepth;
                }
            }
            tokenIndex++;
            peek++;
        }
        if (braceDepth != 0) {
            reportError("Unmatched braces in function body");
        }
        std::vector<Lexer::Tokens::Token> filtered_tokens;
        auto                              startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace);
        auto                              endIt   = std::find(tokens_.begin(), tokens_.end(), closing_brace);
        if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) {
            filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt);
        }
        std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos);
        current_token_index_ = tokenIndex;
        expect(Lexer::Tokens::Type::PUNCTUATION, "}");
        const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name;
        Symbols::SymbolContainer::instance()->create(newns);
        std::shared_ptr<Parser> parser = std::make_shared<Parser>();
        parser->parseScript(filtered_tokens, input_string, this->current_filename_);
        Symbols::SymbolContainer::instance()->enterPreviousScope();
        // create function
        Interpreter::OperationsFactory::defineFunction(
            function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(),
            this->current_filename_, currentToken_.line_number, currentToken_.column_number);
    }
    ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type) {
        std::stack<std::string>          operator_stack;
        std::vector<ParsedExpressionPtr> output_queue;
        auto getPrecedence = [](const std::string & op) -> int {
            if (op == "+" || op == "-") {
                return 1;
            }
            if (op == "*" || op == "/") {
                return 2;
            }
            if (op == "u-" || op == "u+") {
                return 3;
            }
            return 0;
        };
        auto isLeftAssociative = [](const std::string & op) -> bool {
            return !(op == "u-" || op == "u+");
        };
        auto applyOperator = [](const std::string & op, ParsedExpressionPtr rhs, ParsedExpressionPtr lhs = nullptr) {
            if (op == "u-" || op == "u+") {
                std::string real_op = (op == "u-") ? "-" : "+";
                return ParsedExpression::makeUnary(real_op, std::move(rhs));
            } else {
                return ParsedExpression::makeBinary(op, std::move(lhs), std::move(rhs));
            }
        };
        auto pushOperand = [&](const Lexer::Tokens::Token & token) {
            if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL ||
                token.type == Lexer::Tokens::Type::KEYWORD) {
                output_queue.push_back(
                    ParsedExpression::makeLiteral(Symbols::Value::fromString(token.value, expected_var_type)));
            } else if (token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) {
                std::string name = token.value;
                if (!name.empty() && name[0] == '$') {
                    name = name.substr(1);
                }
                output_queue.push_back(ParsedExpression::makeVariable(name));
            } else {
                reportError("Expected literal or variable");
            }
        };
        bool expect_unary = true;
        while (true) {
            auto token = currentToken();
            if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") {
                operator_stack.push("(");
                consumeToken();
                expect_unary = true;
            } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") {
                consumeToken();
                while (!operator_stack.empty() && operator_stack.top() != "(") {
                    std::string op = operator_stack.top();
                    operator_stack.pop();
                    if (op == "u-" || op == "u+") {
                        if (output_queue.empty()) {
                            reportError("Missing operand for unary operator");
                        }
                        auto rhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        output_queue.push_back(applyOperator(op, std::move(rhs)));
                    } else {
                        if (output_queue.size() < 2) {
                            reportError("Malformed expression");
                        }
                        auto rhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        auto lhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs)));
                    }
                }
                if (operator_stack.empty() || operator_stack.top() != "(") {
                    reportError("Mismatched parentheses");
                }
                operator_stack.pop();  // remove "("
                expect_unary = false;
            } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) {
                std::string op = std::string(token.lexeme);
                if (expect_unary && (op == "-" || op == "+")) {
                    op = "u" + op;  // pl. u-
                }
                while (!operator_stack.empty()) {
                    const std::string & top = operator_stack.top();
                    if ((isLeftAssociative(op) && getPrecedence(op) <= getPrecedence(top)) ||
                        (!isLeftAssociative(op) && getPrecedence(op) < getPrecedence(top))) {
                        operator_stack.pop();
                        if (top == "u-" || top == "u+") {
                            if (output_queue.empty()) {
                                reportError("Missing operand for unary operator");
                            }
                            auto rhs = std::move(output_queue.back());
                            output_queue.pop_back();
                            output_queue.push_back(applyOperator(top, std::move(rhs)));
                        } else {
                            if (output_queue.size() < 2) {
                                reportError("Malformed expression");
                            }
                            auto rhs = std::move(output_queue.back());
                            output_queue.pop_back();
                            auto lhs = std::move(output_queue.back());
                            output_queue.pop_back();
                            output_queue.push_back(applyOperator(top, std::move(rhs), std::move(lhs)));
                        }
                    } else {
                        break;
                    }
                }
                operator_stack.push(op);
                consumeToken();
                expect_unary = true;
            } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL ||
                       token.type == Lexer::Tokens::Type::KEYWORD ||
                       token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) {
                pushOperand(token);
                consumeToken();
                expect_unary = false;
            } else {
                break;
            }
        }
        // Kiürítjük az operator stack-et
        while (!operator_stack.empty()) {
            std::string op = operator_stack.top();
            operator_stack.pop();
            if (op == "(" || op == ")") {
                reportError("Mismatched parentheses");
            }
            if (op == "u-" || op == "u+") {
                if (output_queue.empty()) {
                    reportError("Missing operand for unary operator");
                }
                auto rhs = std::move(output_queue.back());
                output_queue.pop_back();
                output_queue.push_back(applyOperator(op, std::move(rhs)));
            } else {
                if (output_queue.size() < 2) {
                    reportError("Malformed expression");
                }
                auto rhs = std::move(output_queue.back());
                output_queue.pop_back();
                auto lhs = std::move(output_queue.back());
                output_queue.pop_back();
                output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs)));
            }
        }
        if (output_queue.size() != 1) {
            reportError("Expression could not be parsed cleanly");
        }
        return std::move(output_queue.back());
    }
    ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type);
};  // class Parser