A simple scripting language in C++
Ferenc Szontágh
2025-04-17 36ec04c00fa540fcee0f2cff1f7b81dd8a98101a
src/Parser/Parser.cpp
@@ -1,27 +1,31 @@
#include "Parser/Parser.hpp"
#include <stack>
#include "Interpreter/OperationsFactory.hpp"
#include "Lexer/Operators.hpp"
// Más szükséges include-ok, ha kellenek
namespace Parser {
const std::unordered_map<std::string, Lexer::Tokens::Type> Parser::keywords = {
    { "if",       Lexer::Tokens::Type::KEYWORD          },
    { "else",     Lexer::Tokens::Type::KEYWORD          },
    { "while",    Lexer::Tokens::Type::KEYWORD          },
    { "for",      Lexer::Tokens::Type::KEYWORD          },
    { "return",   Lexer::Tokens::Type::KEYWORD_RETURN   },
    { "if",       Lexer::Tokens::Type::KEYWORD                      },
    { "else",     Lexer::Tokens::Type::KEYWORD                      },
    { "while",    Lexer::Tokens::Type::KEYWORD                      },
    { "for",      Lexer::Tokens::Type::KEYWORD                      },
    { "return",   Lexer::Tokens::Type::KEYWORD_RETURN               },
    { "function", Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION },
    // Régebbiek:
    { "const",    Lexer::Tokens::Type::KEYWORD          },
    { "true",     Lexer::Tokens::Type::KEYWORD          },
    { "false",    Lexer::Tokens::Type::KEYWORD          },
    { "const",    Lexer::Tokens::Type::KEYWORD                      },
    { "true",     Lexer::Tokens::Type::KEYWORD                      },
    { "false",    Lexer::Tokens::Type::KEYWORD                      },
    // változó típusok
    { "null",     Lexer::Tokens::Type::KEYWORD_NULL     },
    { "int",      Lexer::Tokens::Type::KEYWORD_INT      },
    { "double",   Lexer::Tokens::Type::KEYWORD_DOUBLE   },
    { "float",    Lexer::Tokens::Type::KEYWORD_FLOAT    },
    { "string",   Lexer::Tokens::Type::KEYWORD_STRING   },
    { "boolean",  Lexer::Tokens::Type::KEYWORD_BOOLEAN  },
    { "bool",     Lexer::Tokens::Type::KEYWORD_BOOLEAN  },
    { "null",     Lexer::Tokens::Type::KEYWORD_NULL                 },
    { "int",      Lexer::Tokens::Type::KEYWORD_INT                  },
    { "double",   Lexer::Tokens::Type::KEYWORD_DOUBLE               },
    { "float",    Lexer::Tokens::Type::KEYWORD_FLOAT                },
    { "string",   Lexer::Tokens::Type::KEYWORD_STRING               },
    { "boolean",  Lexer::Tokens::Type::KEYWORD_BOOLEAN              },
    { "bool",     Lexer::Tokens::Type::KEYWORD_BOOLEAN              },
    // ... egyéb kulcsszavak ...
};
@@ -34,4 +38,299 @@
    { Lexer::Tokens::Type::KEYWORD_BOOLEAN, Symbols::Variables::Type::BOOLEAN   },
};
void Parser::parseVariableDefinition() {
    Symbols::Variables::Type var_type = parseType();
    Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
    std::string          var_name = id_token.value;
    if (!var_name.empty() && var_name[0] == '$') {
        var_name = var_name.substr(1);
    }
    const auto ns = Symbols::SymbolContainer::instance()->currentScopeName();
    expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
    auto expr = parseParsedExpression(var_type);
    Interpreter::OperationsFactory::defineVariableWithExpression(
        var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number);
    expect(Lexer::Tokens::Type::PUNCTUATION, ";");
}
void Parser::parseFunctionDefinition() {
    expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION);
    Lexer::Tokens::Token     id_token         = expect(Lexer::Tokens::Type::IDENTIFIER);
    std::string              func_name        = id_token.value;
    Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE;
    expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
    expect(Lexer::Tokens::Type::PUNCTUATION, "(");
    Symbols::FunctionParameterInfo param_infos;
    if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") {
        while (true) {
            // Paraméter típusa
            Symbols::Variables::Type param_type = parseType();  // Ez elfogyasztja a type tokent
            // Paraméter név ($variable)
            Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
            std::string          param_name     = param_id_token.value;
            if (!param_name.empty() && param_name[0] == '$') {  // '$' eltávolítása
                param_name = param_name.substr(1);
            }
            param_infos.push_back({ param_name, param_type });
            // Vessző vagy zárójel következik?
            if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) {
                continue;
            }
            if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") {
                break;  // Lista vége
            }
            reportError("Expected ',' or ')' in parameter list");
        }
    }
    // Most a ')' következik
    expect(Lexer::Tokens::Type::PUNCTUATION, ")");
    // check if we have a option return type: function name() type { ... }
    for (const auto & _type : Parser::variable_types) {
        if (match(_type.first)) {
            func_return_type = _type.second;
            break;
        }
    }
    Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{");
    // only parse the body if we checked out if not exists the function and created the symbol
    parseFunctionBody(opening_brace, func_name, func_return_type, param_infos);
}
Symbols::Value Parser::parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) {
    try {
        switch (type) {
            case Symbols::Variables::Type::INTEGER:
                {
                    if (value.find('.') != std::string::npos) {
                        throw std::invalid_argument("Floating point value in integer context: " + value);
                    }
                    int v = std::stoi(value);
                    return Symbols::Value(is_negative ? -v : v);
                }
            case Symbols::Variables::Type::DOUBLE:
                {
                    double v = std::stod(value);
                    return Symbols::Value(is_negative ? -v : v);
                }
            case Symbols::Variables::Type::FLOAT:
                {
                    float v = std::stof(value);
                    return Symbols::Value(is_negative ? -v : v);
                }
            default:
                throw std::invalid_argument("Unsupported numeric type");
        }
    } catch (const std::invalid_argument & e) {
        reportError("Invalid numeric literal: " + value + " (" + e.what() + ")");
    } catch (const std::out_of_range & e) {
        reportError("Numeric literal out of range: " + value + " (" + e.what() + ")");
    }
    return Symbols::Value();  // unreachable
}
void Parser::parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name,
                               Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) {
    size_t               braceDepth = 0;
    int                  peek       = 0;
    int                  tokenIndex = current_token_index_;
    Lexer::Tokens::Token currentToken_;
    Lexer::Tokens::Token closing_brace;
    while (tokenIndex < tokens_.size()) {
        currentToken_ = peekToken(peek);
        if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) {
            if (currentToken_.value == "{") {
                ++braceDepth;
            } else if (currentToken_.value == "}") {
                if (braceDepth == 0) {
                    closing_brace = currentToken_;
                    break;
                }
                --braceDepth;
            }
        }
        tokenIndex++;
        peek++;
    }
    if (braceDepth != 0) {
        reportError("Unmatched braces in function body");
    }
    std::vector<Lexer::Tokens::Token> filtered_tokens;
    auto                              startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace);
    auto                              endIt   = std::find(tokens_.begin(), tokens_.end(), closing_brace);
    if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) {
        filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt);
    }
    std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos);
    current_token_index_ = tokenIndex;
    expect(Lexer::Tokens::Type::PUNCTUATION, "}");
    const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name;
    Symbols::SymbolContainer::instance()->create(newns);
    std::shared_ptr<Parser> parser = std::make_shared<Parser>();
    parser->parseScript(filtered_tokens, input_string, this->current_filename_);
    Symbols::SymbolContainer::instance()->enterPreviousScope();
    // create function
    Interpreter::OperationsFactory::defineFunction(
        function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(),
        this->current_filename_, currentToken_.line_number, currentToken_.column_number);
}
ParsedExpressionPtr Parser::parseParsedExpression(const Symbols::Variables::Type & expected_var_type) {
    std::stack<std::string>          operator_stack;
    std::vector<ParsedExpressionPtr> output_queue;
    bool expect_unary = true;
    while (true) {
        auto token = currentToken();
        if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") {
            operator_stack.push("(");
            consumeToken();
            expect_unary = true;
        } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") {
            consumeToken();
            while (!operator_stack.empty() && operator_stack.top() != "(") {
                std::string op = operator_stack.top();
                operator_stack.pop();
                if (op == "u-" || op == "u+") {
                    if (output_queue.empty()) {
                        reportError("Missing operand for unary operator");
                    }
                    auto rhs = std::move(output_queue.back());
                    output_queue.pop_back();
                    output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
                } else {
                    if (output_queue.size() < 2) {
                        reportError("Malformed expression");
                    }
                    auto rhs = std::move(output_queue.back());
                    output_queue.pop_back();
                    auto lhs = std::move(output_queue.back());
                    output_queue.pop_back();
                    output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
                }
            }
            if (operator_stack.empty() || operator_stack.top() != "(") {
                reportError("Mismatched parentheses");
            }
            operator_stack.pop();  // remove "("
            expect_unary = false;
        } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) {
            std::string op = std::string(token.lexeme);
            if (expect_unary && Lexer::isUnaryOperator(op)) {
                op = "u" + op;  // pl. u-, u+ vagy u!
            }
            while (!operator_stack.empty()) {
                const std::string & top = operator_stack.top();
                if ((Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) <= Lexer::getPrecedence(top)) ||
                    (!Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) < Lexer::getPrecedence(top))) {
                    operator_stack.pop();
                    if (top == "u-" || top == "u+") {
                        if (output_queue.empty()) {
                            reportError("Missing operand for unary operator");
                        }
                        auto rhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        output_queue.push_back(Lexer::applyOperator(top, std::move(rhs)));
                    } else {
                        if (output_queue.size() < 2) {
                            reportError("Malformed expression");
                        }
                        auto rhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        auto lhs = std::move(output_queue.back());
                        output_queue.pop_back();
                        output_queue.push_back(Lexer::applyOperator(top, std::move(rhs), std::move(lhs)));
                    }
                } else {
                    break;
                }
            }
            operator_stack.push(op);
            consumeToken();
            expect_unary = true;
        } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL ||
                   token.type == Lexer::Tokens::Type::KEYWORD ||
                   token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) {
            if (Lexer::pushOperand(token, expected_var_type, output_queue) == false) {
                reportError("Expected literal or variable");
            }
            consumeToken();
            expect_unary = false;
        } else {
            break;
        }
    }
    // Kiürítjük az operator stack-et
    while (!operator_stack.empty()) {
        std::string op = operator_stack.top();
        operator_stack.pop();
        if (op == "(" || op == ")") {
            reportError("Mismatched parentheses");
        }
        if (op == "u-" || op == "u+") {
            if (output_queue.empty()) {
                reportError("Missing operand for unary operator");
            }
            auto rhs = std::move(output_queue.back());
            output_queue.pop_back();
            output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
        } else {
            if (output_queue.size() < 2) {
                reportError("Malformed expression");
            }
            auto rhs = std::move(output_queue.back());
            output_queue.pop_back();
            auto lhs = std::move(output_queue.back());
            output_queue.pop_back();
            output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
        }
    }
    if (output_queue.size() != 1) {
        reportError("Expression could not be parsed cleanly");
    }
    return std::move(output_queue.back());
}
void Parser::parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string,
                         const std::string & filename) {
    tokens_              = tokens;
    input_str_view_      = input_string;
    current_token_index_ = 0;
    current_filename_    = filename;
    while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
        parseStatement();
    }
    if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
        reportError("Unexpected tokens after program end");
    }
}
}  // namespace Parser