#ifndef PARSER_HPP #define PARSER_HPP #include #include #include #include #include #include #include "Interpreter/ExpressionBuilder.hpp" #include "Interpreter/OperationsFactory.hpp" #include "Lexer/Token.hpp" #include "Lexer/TokenType.hpp" #include "Parser/ParsedExpression.hpp" #include "Symbols/ParameterContainer.hpp" #include "Symbols/SymbolContainer.hpp" #include "Symbols/Value.hpp" namespace Parser { class SyntaxError : public std::runtime_error { public: SyntaxError(const std::string & message, const int line, const int col) : std::runtime_error(message + " at line " + std::to_string(line) + ", column " + std::to_string(col)) {} SyntaxError(const std::string & message, const Lexer::Tokens::Token & token) : SyntaxError( message + " (found token: '" + token.value + "' type: " + Lexer::Tokens::TypeToString(token.type) + ")", token.line_number, token.column_number) {} }; class Parser { public: Parser() {} void parseScript(const std::vector & tokens, std::string_view input_string, const std::string & filename) { tokens_ = tokens; input_str_view_ = input_string; current_token_index_ = 0; current_filename_ = filename; try { while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { parseStatement(); } if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { reportError("Unexpected tokens after program end"); } } catch (const SyntaxError & e) { std::cerr << "Syntax Error: " << e.what() << '\n'; } catch (const std::exception & e) { std::cerr << "Error during parsing: " << e.what() << '\n'; throw; } } static const std::unordered_map keywords; static const std::unordered_map variable_types; private: std::vector tokens_; std::string_view input_str_view_; size_t current_token_index_; std::string current_filename_; // Token Stream Kezelő és Hibakezelő segédfüggvények (változatlanok) const Lexer::Tokens::Token & currentToken() const { if (isAtEnd()) { // Technikailag itt már nem kellene lennünk, ha a parseProgram ciklus jól van megírva // De biztonsági ellenőrzésként jó lehet if (!tokens_.empty() && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE) { return tokens_.back(); // Visszaadjuk az EOF tokent } throw std::runtime_error("Unexpected end of token stream reached."); } return tokens_[current_token_index_]; } // Előre néz a token stream-ben const Lexer::Tokens::Token & peekToken(size_t offset = 1) const { if (current_token_index_ + offset >= tokens_.size()) { // EOF vagy azon túl vagyunk, adjuk vissza az utolsó tokent (ami EOF kell legyen) if (!tokens_.empty()) { return tokens_.back(); } throw std::runtime_error("Cannot peek beyond end of token stream."); } return tokens_[current_token_index_ + offset]; } // Elfogyasztja (lépteti az indexet) az aktuális tokent és visszaadja azt Lexer::Tokens::Token consumeToken() { if (isAtEnd()) { throw std::runtime_error("Cannot consume token at end of stream."); } return tokens_[current_token_index_++]; } // Ellenőrzi, hogy az aktuális token típusa megegyezik-e a várttal. // Ha igen, elfogyasztja és true-t ad vissza. Ha nem, false-t ad vissza. bool match(Lexer::Tokens::Type expected_type) { if (isAtEnd()) { return false; } if (currentToken().type == expected_type) { consumeToken(); return true; } return false; } // Ellenőrzi, hogy az aktuális token típusa és értéke megegyezik-e a várttal. // Csak OPERATOR és PUNCTUATION esetén érdemes használni az érték ellenőrzést. bool match(Lexer::Tokens::Type expected_type, const std::string & expected_value) { if (isAtEnd()) { return false; } const auto & token = currentToken(); if (token.type == expected_type && token.value == expected_value) { consumeToken(); return true; } return false; } Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type) { if (isAtEnd()) { reportError("Unexpected end of file, expected token type: " + Lexer::Tokens::TypeToString(expected_type)); } const auto & token = currentToken(); if (token.type == expected_type) { return consumeToken(); } reportError("Expected token type " + Lexer::Tokens::TypeToString(expected_type)); // A reportError dob, ez a return sosem fut le, de a fordító kedvéért kellhet: return token; // Vagy dobjon a reportError } // Mint az expect, de az értékét is ellenőrzi. Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type, const std::string & expected_value) { if (isAtEnd()) { reportError("Unexpected end of file, expected token: " + Lexer::Tokens::TypeToString(expected_type) + " with value '" + expected_value + "'"); } const auto & token = currentToken(); if (token.type == expected_type && token.value == expected_value) { return consumeToken(); } reportError("Expected token " + Lexer::Tokens::TypeToString(expected_type) + " with value '" + expected_value + "'"); return token; // reportError dob } // Ellenőrzi, hogy a releváns tokenek végére értünk-e (az EOF előtti utolsó tokenen vagyunk-e) bool isAtEnd() const { // Akkor vagyunk a végén, ha az index a tokenek méretével egyenlő, // vagy ha már csak az EOF token van hátra (ha az a lista utolsó eleme). return current_token_index_ >= tokens_.size() || (current_token_index_ == tokens_.size() - 1 && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE); } // --- Hibakezelés --- // Hiba jelentése és kivétel dobása [[noreturn]] void reportError(const std::string & message) { // Használjuk az aktuális token pozícióját, ha még nem értünk a végére if (current_token_index_ < tokens_.size()) { throw SyntaxError(message, tokens_[current_token_index_]); } // Ha már a végén vagyunk, az utolsó ismert pozíciót használjuk int line = tokens_.empty() ? 0 : tokens_.back().line_number; int col = tokens_.empty() ? 0 : tokens_.back().column_number; throw SyntaxError(message, line, col); } // --- Elemzési Módszerek (Moduláris részek) --- // parseStatement (változatlan) void parseStatement() { const auto & token_type = currentToken().type; if (token_type == Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION) { parseFunctionDefinition(); return; } for (const auto & _type : Parser::Parser::variable_types) { if (token_type == _type.first) { parseVariableDefinition(); return; } } reportError("Unexpected token at beginning of statement"); } void parseVariableDefinition() { Symbols::Variables::Type var_type = parseType(); Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); std::string var_name = id_token.value; if (!var_name.empty() && var_name[0] == '$') { var_name = var_name.substr(1); } const auto ns = Symbols::SymbolContainer::instance()->currentScopeName(); expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); /* Symbols::Value initial_value = parseValue(var_type); Interpreter::OperationsFactory::defineSimpleVariable(var_name, initial_value, ns, this->current_filename_, id_token.line_number, id_token.column_number); */ auto expr = parseParsedExpression(var_type); Interpreter::OperationsFactory::defineVariableWithExpression( var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number); expect(Lexer::Tokens::Type::PUNCTUATION, ";"); } void parseFunctionDefinition() { expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION); Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::IDENTIFIER); std::string func_name = id_token.value; Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE; expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); expect(Lexer::Tokens::Type::PUNCTUATION, "("); Symbols::FunctionParameterInfo param_infos; if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") { while (true) { // Paraméter típusa Symbols::Variables::Type param_type = parseType(); // Ez elfogyasztja a type tokent // Paraméter név ($variable) Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); std::string param_name = param_id_token.value; if (!param_name.empty() && param_name[0] == '$') { // '$' eltávolítása param_name = param_name.substr(1); } param_infos.push_back({ param_name, param_type }); // Vessző vagy zárójel következik? if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) { continue; } if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") { break; // Lista vége } reportError("Expected ',' or ')' in parameter list"); } } // Most a ')' következik expect(Lexer::Tokens::Type::PUNCTUATION, ")"); // check if we have a option return type: function name() type { ... } for (const auto & _type : Parser::variable_types) { if (match(_type.first)) { func_return_type = _type.second; break; } } Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{"); // only parse the body if we checked out if not exists the function and created the symbol parseFunctionBody(opening_brace, func_name, func_return_type, param_infos); } // --- Elemzési Segédfüggvények --- // type : KEYWORD_STRING | KEYWORD_INT | KEYWORD_DOUBLE // Visszaadja a megfelelő Symbols::Variables::Type enum értéket és elfogyasztja a tokent. Symbols::Variables::Type parseType() { const auto & token = currentToken(); for (const auto & _type : Parser::variable_types) { if (token.type == _type.first) { consumeToken(); return _type.second; } } reportError("Expected type keyword (string, int, double, float)"); } Symbols::Value parseValue(Symbols::Variables::Type expected_var_type) { Lexer::Tokens::Token token = currentToken(); bool is_negative = false; // Előjel kezelése if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC && (token.lexeme == "-" || token.lexeme == "+") && peekToken().type == Lexer::Tokens::Type::NUMBER) { is_negative = (token.lexeme == "-"); token = peekToken(); consumeToken(); // előjelet elfogyasztottuk } // STRING típus if (expected_var_type == Symbols::Variables::Type::STRING) { if (token.type == Lexer::Tokens::Type::STRING_LITERAL) { consumeToken(); return Symbols::Value(token.value); } reportError("Expected string literal value"); } // BOOLEAN típus if (expected_var_type == Symbols::Variables::Type::BOOLEAN) { if (token.type == Lexer::Tokens::Type::KEYWORD && (token.value == "true" || token.value == "false")) { consumeToken(); return Symbols::Value(token.value == "true"); } reportError("Expected boolean literal value (true or false)"); } // NUMERIC típusok if (expected_var_type == Symbols::Variables::Type::INTEGER || expected_var_type == Symbols::Variables::Type::DOUBLE || expected_var_type == Symbols::Variables::Type::FLOAT) { if (token.type == Lexer::Tokens::Type::NUMBER) { Symbols::Value val = parseNumericLiteral(token.value, is_negative, expected_var_type); consumeToken(); return val; } reportError("Expected numeric literal value"); } reportError("Unsupported variable type encountered during value parsing"); return Symbols::Value(); // compiler happy } Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) { try { switch (type) { case Symbols::Variables::Type::INTEGER: { if (value.find('.') != std::string::npos) { throw std::invalid_argument("Floating point value in integer context: " + value); } int v = std::stoi(value); return Symbols::Value(is_negative ? -v : v); } case Symbols::Variables::Type::DOUBLE: { double v = std::stod(value); return Symbols::Value(is_negative ? -v : v); } case Symbols::Variables::Type::FLOAT: { float v = std::stof(value); return Symbols::Value(is_negative ? -v : v); } default: throw std::invalid_argument("Unsupported numeric type"); } } catch (const std::invalid_argument & e) { reportError("Invalid numeric literal: " + value + " (" + e.what() + ")"); } catch (const std::out_of_range & e) { reportError("Numeric literal out of range: " + value + " (" + e.what() + ")"); } return Symbols::Value(); // unreachable } void parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name, Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) { size_t braceDepth = 0; int peek = 0; int tokenIndex = current_token_index_; Lexer::Tokens::Token currentToken_; Lexer::Tokens::Token closing_brace; while (tokenIndex < tokens_.size()) { currentToken_ = peekToken(peek); if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) { if (currentToken_.value == "{") { ++braceDepth; } else if (currentToken_.value == "}") { if (braceDepth == 0) { closing_brace = currentToken_; break; } --braceDepth; } } tokenIndex++; peek++; } if (braceDepth != 0) { reportError("Unmatched braces in function body"); } std::vector filtered_tokens; auto startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace); auto endIt = std::find(tokens_.begin(), tokens_.end(), closing_brace); if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) { filtered_tokens = std::vector(startIt + 1, endIt); } std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos); current_token_index_ = tokenIndex; expect(Lexer::Tokens::Type::PUNCTUATION, "}"); const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name; Symbols::SymbolContainer::instance()->create(newns); std::shared_ptr parser = std::make_shared(); parser->parseScript(filtered_tokens, input_string, this->current_filename_); Symbols::SymbolContainer::instance()->enterPreviousScope(); // create function Interpreter::OperationsFactory::defineFunction( function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(), this->current_filename_, currentToken_.line_number, currentToken_.column_number); } ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type) { std::stack operator_stack; std::vector output_queue; auto getPrecedence = [](const std::string & op) -> int { if (op == "+" || op == "-") { return 1; } if (op == "*" || op == "/") { return 2; } if (op == "u-" || op == "u+") { return 3; } return 0; }; auto isLeftAssociative = [](const std::string & op) -> bool { return !(op == "u-" || op == "u+"); }; auto applyOperator = [](const std::string & op, ParsedExpressionPtr rhs, ParsedExpressionPtr lhs = nullptr) { if (op == "u-" || op == "u+") { std::string real_op = (op == "u-") ? "-" : "+"; return ParsedExpression::makeUnary(real_op, std::move(rhs)); } else { return ParsedExpression::makeBinary(op, std::move(lhs), std::move(rhs)); } }; auto pushOperand = [&](const Lexer::Tokens::Token & token) { if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL || token.type == Lexer::Tokens::Type::KEYWORD) { output_queue.push_back( ParsedExpression::makeLiteral(Symbols::Value::fromString(token.value, expected_var_type))); } else if (token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { std::string name = token.value; if (!name.empty() && name[0] == '$') { name = name.substr(1); } output_queue.push_back(ParsedExpression::makeVariable(name)); } else { reportError("Expected literal or variable"); } }; bool expect_unary = true; while (true) { auto token = currentToken(); if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") { operator_stack.push("("); consumeToken(); expect_unary = true; } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") { consumeToken(); while (!operator_stack.empty() && operator_stack.top() != "(") { std::string op = operator_stack.top(); operator_stack.pop(); if (op == "u-" || op == "u+") { if (output_queue.empty()) { reportError("Missing operand for unary operator"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(op, std::move(rhs))); } else { if (output_queue.size() < 2) { reportError("Malformed expression"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); auto lhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs))); } } if (operator_stack.empty() || operator_stack.top() != "(") { reportError("Mismatched parentheses"); } operator_stack.pop(); // remove "(" expect_unary = false; } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) { std::string op = std::string(token.lexeme); if (expect_unary && (op == "-" || op == "+")) { op = "u" + op; // pl. u- } while (!operator_stack.empty()) { const std::string & top = operator_stack.top(); if ((isLeftAssociative(op) && getPrecedence(op) <= getPrecedence(top)) || (!isLeftAssociative(op) && getPrecedence(op) < getPrecedence(top))) { operator_stack.pop(); if (top == "u-" || top == "u+") { if (output_queue.empty()) { reportError("Missing operand for unary operator"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(top, std::move(rhs))); } else { if (output_queue.size() < 2) { reportError("Malformed expression"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); auto lhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(top, std::move(rhs), std::move(lhs))); } } else { break; } } operator_stack.push(op); consumeToken(); expect_unary = true; } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL || token.type == Lexer::Tokens::Type::KEYWORD || token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { pushOperand(token); consumeToken(); expect_unary = false; } else { break; } } // Kiürítjük az operator stack-et while (!operator_stack.empty()) { std::string op = operator_stack.top(); operator_stack.pop(); if (op == "(" || op == ")") { reportError("Mismatched parentheses"); } if (op == "u-" || op == "u+") { if (output_queue.empty()) { reportError("Missing operand for unary operator"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(op, std::move(rhs))); } else { if (output_queue.size() < 2) { reportError("Malformed expression"); } auto rhs = std::move(output_queue.back()); output_queue.pop_back(); auto lhs = std::move(output_queue.back()); output_queue.pop_back(); output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs))); } } if (output_queue.size() != 1) { reportError("Expression could not be parsed cleanly"); } return std::move(output_queue.back()); } }; // class Parser } // namespace Parser #endif // PARSER_HPP