14 files modified
3 files added
| | |
| | | add_library(voidscript |
| | | src/Parser/Parser.cpp |
| | | src/Lexer/Lexer.cpp |
| | | src/Lexer/Operators.cpp |
| | | ) |
| | | |
| | | install(TARGETS voidscript DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT "lib") |
| New file |
| | |
| | | #ifndef BASE_EXCEPTION_HPP |
| | | #define BASE_EXCEPTION_HPP |
| | | |
| | | #include <string> |
| | | |
| | | class BaseException : public std::exception { |
| | | public: |
| | | BaseException(const std::string & msg, const std::string & context = "") : rawMessage_(msg), context_(context) { |
| | | formattedMessage_ = formatMessage(); |
| | | } |
| | | BaseException() = default; |
| | | BaseException(const BaseException&) = default; |
| | | |
| | | |
| | | const char * what() const noexcept override { return formattedMessage_.c_str(); } |
| | | |
| | | virtual std::string formatMessage() const { return "[BaseException] " + context_ + ": " + rawMessage_; } |
| | | |
| | | protected: |
| | | std::string rawMessage_; |
| | | std::string context_; |
| | | std::string formattedMessage_; |
| | | }; |
| | | |
| | | #endif // BASE_EXCEPTION_HPP |
| | |
| | | #include "Interpreter/ExpressionNode.hpp" |
| | | #include "Interpreter/IdentifierExpressionNode.hpp" |
| | | #include "Interpreter/LiteralExpressionNode.hpp" |
| | | #include "Interpreter/UnaryExpressionNode.hpp" // <-- új include |
| | | #include "Interpreter/UnaryExpressionNode.hpp" // <-- új include |
| | | #include "Parser/ParsedExpression.hpp" |
| | | |
| | | namespace Parser { |
| | | static std::unique_ptr<Interpreter::ExpressionNode> buildExpressionFromParsed( |
| | | const Parser::ParsedExpressionPtr & expr) { |
| | | using Kind = Parser::ParsedExpression::Kind; |
| | | const ParsedExpressionPtr & expr) { |
| | | using Kind = ParsedExpression::Kind; |
| | | |
| | | switch (expr->kind) { |
| | | case Kind::Literal: |
| | |
| | | case Kind::Variable: |
| | | return std::make_unique<Interpreter::IdentifierExpressionNode>(expr->name); |
| | | |
| | | case Kind::Binary: { |
| | | auto lhs = buildExpressionFromParsed(expr->lhs); |
| | | auto rhs = buildExpressionFromParsed(expr->rhs); |
| | | return std::make_unique<Interpreter::BinaryExpressionNode>(std::move(lhs), expr->op, std::move(rhs)); |
| | | } |
| | | case Kind::Binary: |
| | | { |
| | | auto lhs = buildExpressionFromParsed(expr->lhs); |
| | | auto rhs = buildExpressionFromParsed(expr->rhs); |
| | | return std::make_unique<Interpreter::BinaryExpressionNode>(std::move(lhs), expr->op, std::move(rhs)); |
| | | } |
| | | |
| | | case Kind::Unary: { |
| | | auto operand = buildExpressionFromParsed(expr->rhs); // rhs az operandus |
| | | return std::make_unique<Interpreter::UnaryExpressionNode>(expr->op, std::move(operand)); |
| | | } |
| | | case Kind::Unary: |
| | | { |
| | | auto operand = buildExpressionFromParsed(expr->rhs); // rhs az operandus |
| | | return std::make_unique<Interpreter::UnaryExpressionNode>(expr->op, std::move(operand)); |
| | | } |
| | | } |
| | | |
| | | throw std::runtime_error("Unknown ParsedExpression kind"); |
| | | } |
| | | |
| | | void typecheckParsedExpression(const ParsedExpressionPtr & expr) { |
| | | using Kind = ParsedExpression::Kind; |
| | | |
| | | switch (expr->kind) { |
| | | case Kind::Literal: |
| | | { |
| | | // Literál típusának ellenőrzése - a literál típusát a value.getType() adja vissza |
| | | // auto type = expr->value.getType(); |
| | | // Nem szükséges semmilyen más típusellenőrzés a literálokhoz, mivel azok fix típusúak. |
| | | break; |
| | | } |
| | | |
| | | case Kind::Variable: |
| | | { |
| | | const std::string ns = Symbols::SymbolContainer::instance()->currentScopeName() + ".variables"; |
| | | auto symbol = Symbols::SymbolContainer::instance()->get(ns, expr->name); |
| | | if (!symbol) { |
| | | throw std::runtime_error("Variable not found in symbol table: " + expr->name); |
| | | } |
| | | |
| | | // Ha a szimbólum nem egy változó, akkor hibát dobunk |
| | | if (symbol->getKind() == Symbols::Kind::Function) { |
| | | throw std::runtime_error("Cannot use function as variable: " + expr->name); |
| | | } |
| | | break; |
| | | } |
| | | |
| | | case Kind::Binary: |
| | | { |
| | | // Bináris kifejezés operandusainak típusellenőrzése |
| | | typecheckParsedExpression(expr->lhs); |
| | | typecheckParsedExpression(expr->rhs); |
| | | |
| | | auto lhsType = expr->lhs->getType(); |
| | | auto rhsType = expr->rhs->getType(); |
| | | |
| | | if (lhsType != rhsType) { |
| | | throw std::runtime_error( |
| | | "Type mismatch in binary expression: " + Symbols::Variables::TypeToString(lhsType) + " and " + |
| | | Symbols::Variables::TypeToString(rhsType)); |
| | | } |
| | | |
| | | // Bináris operátoroknál is elvégezhetjük a típusellenőrzést: |
| | | // Ha numerikus operátor, akkor az operandusoknak numerikusnak kell lenniük |
| | | if (expr->op == "+" || expr->op == "-" || expr->op == "*" || expr->op == "/") { |
| | | if (lhsType != Symbols::Variables::Type::INTEGER && lhsType != Symbols::Variables::Type::FLOAT) { |
| | | throw std::runtime_error("Operands must be numeric for operator: " + expr->op); |
| | | } |
| | | } |
| | | // Ha logikai operátorok, akkor boolean típus szükséges |
| | | else if (expr->op == "&&" || expr->op == "||") { |
| | | if (lhsType != Symbols::Variables::Type::BOOLEAN) { |
| | | throw std::runtime_error("Operands must be boolean for operator: " + expr->op); |
| | | } |
| | | } |
| | | break; |
| | | } |
| | | |
| | | case Kind::Unary: |
| | | { |
| | | // Unáris kifejezés operandusának típusellenőrzése |
| | | typecheckParsedExpression(expr->rhs); // 'rhs' tárolja az operandust az unáris kifejezésnél |
| | | |
| | | auto operandType = expr->rhs->getType(); |
| | | |
| | | if (expr->op == "!") { |
| | | if (operandType != Symbols::Variables::Type::BOOLEAN) { |
| | | throw std::runtime_error("Operand must be boolean for unary operator '!'"); |
| | | } |
| | | } |
| | | break; |
| | | } |
| | | |
| | | default: |
| | | throw std::runtime_error("Unknown expression kind"); |
| | | } |
| | | } |
| | | |
| | | } // namespace Parser |
| | | |
| | | #endif // PARSEREXPRESSION_BUILDER_HPP |
| | |
| | | auto end() const { return _operations.end(); } |
| | | |
| | | static std::string dump() { |
| | | std::string result = ""; |
| | | std::string result; |
| | | for (const auto & [_, table] : Operations::Container::instance()->_operations) { |
| | | result += "Namespace: " + _ + "\n"; |
| | | for (const auto & operation : table) { |
| | |
| | | if (op_ == "!") { |
| | | return Symbols::Value(!v); |
| | | } |
| | | } else if (type == Symbols::Variables::Type::STRING) { |
| | | std::string s = value.get<std::string>(); |
| | | if (op_ == "-") { |
| | | return Symbols::Value(s); |
| | | } |
| | | if (op_ == "+") { |
| | | return Symbols::Value(s); |
| | | } |
| | | } |
| | | |
| | | throw std::runtime_error("Unsupported unary operator '" + op_ + |
| | |
| | | #include "Lexer/Lexer.hpp" |
| | | |
| | | namespace Lexer { |
| | | const std::vector<std::string> Lexer::Lexer::OPERATOR_RELATIONAL = { "==", "!=", "<", ">", "<=", ">=" }; |
| | | const std::vector<std::string> Lexer::Lexer::OPERATOR_INCREMENT = { "++", "--" }; |
| | | const std::vector<std::string> Lexer::Lexer::OPERATOR_ASSIGNMENT = { "=", "+=", "-=", "*=", "/=", "%=" }; |
| | | const std::vector<std::string> Lexer::Lexer::OPERATOR_LOGICAL = { "&&", "||" }; |
| | | #include "Lexer/Operators.hpp" |
| | | #include "Symbols/SymbolContainer.hpp" |
| | | |
| | | const std::vector<std::string> Lexer::Lexer::OPERATOR_ARITHMETIC = { "+", "-", "*", "/", "%" }; |
| | | const std::vector<std::string> Lexer::Lexer::PUNCTUATION = { "(", ")", "{", "}", "[", "]", ",", ";" }; |
| | | std::vector<Lexer::Tokens::Token> Lexer::Lexer::tokenizeNamespace(const std::string & ns) { |
| | | if (inputs_.find(ns) == inputs_.end()) { |
| | | return {}; |
| | | } |
| | | |
| | | Symbols::SymbolContainer::instance()->enter(ns); |
| | | |
| | | std::vector<Tokens::Token> tokens; |
| | | Tokens::Token token; |
| | | do { |
| | | token = nextToken(); |
| | | tokens.push_back(token); |
| | | } while (token.type != Tokens::Type::END_OF_FILE); |
| | | |
| | | }; // namespace Lexer |
| | | tokens_[ns] = tokens; |
| | | return tokens; |
| | | } |
| | | |
| | | void Lexer::Lexer::addNamespaceInput(const std::string & ns, const std::string & input) { |
| | | inputs_[ns] = input; |
| | | positions_[ns] = 0; |
| | | line_numbers_[ns] = 1; |
| | | column_numbers_[ns] = 1; |
| | | } |
| | | |
| | | std::vector<Lexer::Tokens::Token> Lexer::Lexer::getTokens(const std::string & ns) const { |
| | | auto it = tokens_.find(ns); |
| | | if (it != tokens_.end()) { |
| | | return it->second; |
| | | } |
| | | return {}; |
| | | } |
| | | |
| | | void Lexer::Lexer::setKeyWords(const std::unordered_map<std::string, Tokens::Type> & new_keywords) { |
| | | keywords = new_keywords; |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::nextToken() { |
| | | skipWhitespaceAndComments(); |
| | | size_t start = pos(); |
| | | |
| | | if (isAtEnd()) { |
| | | return createToken(Tokens::Type::END_OF_FILE, start, start); |
| | | } |
| | | |
| | | char c = peek(); |
| | | if (isalpha(c) || c == '_') { |
| | | return matchIdentifierOrKeyword(start); |
| | | } |
| | | if (isdigit(c) || (isdigit(c) && peek(1) == '.') || (c == '.' && isdigit(peek(1)))) { |
| | | return matchNumber(start); |
| | | } |
| | | if (c == '"' || c == '\'') { |
| | | return matchStringLiteral(start); |
| | | } |
| | | if (operators_.find(c) != std::string_view::npos) { |
| | | return matchOperatorOrPunctuation(start); |
| | | } |
| | | |
| | | advance(); |
| | | return createToken(Tokens::Type::UNKNOWN, start, pos()); |
| | | } |
| | | |
| | | const std::string & Lexer::Lexer::input() const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = inputs_.find(ns); |
| | | if (it != inputs_.end()) { |
| | | return it->second; |
| | | } |
| | | throw Exception("Input not found in namespace: " + ns); |
| | | } |
| | | |
| | | size_t & Lexer::Lexer::pos() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = positions_.find(ns); |
| | | if (it != positions_.end()) { |
| | | return it->second; |
| | | } |
| | | throw Exception("Unknown position in namespace: " + ns); |
| | | } |
| | | |
| | | int & Lexer::Lexer::line() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = line_numbers_.find(ns); |
| | | if (it != line_numbers_.end()) { |
| | | return it->second; |
| | | } |
| | | throw Exception("Unknown line number in namespace: " + ns); |
| | | } |
| | | |
| | | int & Lexer::Lexer::col() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = column_numbers_.find(ns); |
| | | if (it != column_numbers_.end()) { |
| | | return it->second; |
| | | } |
| | | throw Exception("Unknown column number in namespace: " + ns); |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::createToken(Tokens::Type type, size_t start, size_t end, const std::string & value) { |
| | | Tokens::Token token; |
| | | token.type = type; |
| | | token.start_pos = start; |
| | | token.end_pos = end; |
| | | token.line_number = line(); |
| | | token.column_number = col(); |
| | | if (start <= end && end <= input().length()) { |
| | | token.lexeme = std::string_view(input()).substr(start, end - start); |
| | | token.value = value.empty() ? std::string(token.lexeme) : value; |
| | | } |
| | | return token; |
| | | } |
| | | |
| | | char Lexer::Lexer::peek(size_t offset) const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | const auto & in = inputs_.at(ns); |
| | | size_t cp = positions_.at(ns); |
| | | if (cp + offset >= in.length()) { |
| | | return '\0'; |
| | | } |
| | | return in[cp + offset]; |
| | | } |
| | | |
| | | char Lexer::Lexer::advance() { |
| | | char c = peek(); |
| | | pos()++; |
| | | if (c == '\n') { |
| | | line()++; |
| | | col() = 1; |
| | | } else { |
| | | col()++; |
| | | } |
| | | return c; |
| | | } |
| | | |
| | | bool Lexer::Lexer::isAtEnd() const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | return positions_.at(ns) >= inputs_.at(ns).length(); |
| | | } |
| | | |
| | | void Lexer::Lexer::skipWhitespaceAndComments() { |
| | | while (!isAtEnd()) { |
| | | char c = peek(); |
| | | if (isspace(c)) { |
| | | advance(); |
| | | } else if ((c == '/' && peek(1) == '/') || c == '#') { |
| | | while (!isAtEnd() && peek() != '\n') { |
| | | advance(); |
| | | } |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::matchIdentifierOrKeyword(size_t start_pos, Tokens::Type type) { |
| | | while (!isAtEnd() && (isalnum(peek()) || peek() == '_')) { |
| | | advance(); |
| | | } |
| | | size_t end = pos(); |
| | | std::string value = input().substr(start_pos, end - start_pos); |
| | | if (value.empty()) { |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end); |
| | | } |
| | | |
| | | if (type == Tokens::Type::IDENTIFIER) { |
| | | auto it = keywords.find(value); |
| | | if (it != keywords.end()) { |
| | | return createToken(it->second, start_pos, end); |
| | | } |
| | | } |
| | | return createToken(type, start_pos, end); |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::matchNumber(size_t start_pos) { |
| | | bool has_dot = false; |
| | | |
| | | while (!isAtEnd()) { |
| | | if (isdigit(peek())) { |
| | | advance(); |
| | | } else if (!has_dot && peek() == '.' && isdigit(peek(1))) { |
| | | has_dot = true; |
| | | advance(); // a pont |
| | | advance(); // az első számjegy a pont után |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | size_t end = pos(); |
| | | return createToken(Tokens::Type::NUMBER, start_pos, end); |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::matchStringLiteral(size_t start_pos) { |
| | | char opening_quote = peek(); |
| | | advance(); // Skip opening quote |
| | | std::string value; |
| | | bool unterminated = false; |
| | | |
| | | while (!isAtEnd()) { |
| | | char c = peek(); |
| | | if (c == opening_quote) { |
| | | advance(); |
| | | break; |
| | | } |
| | | if (c == '\\') { |
| | | advance(); |
| | | char e = advance(); |
| | | switch (e) { |
| | | case 'n': |
| | | value += '\n'; |
| | | break; |
| | | case 't': |
| | | value += '\t'; |
| | | break; |
| | | case '"': |
| | | value += opening_quote; |
| | | break; |
| | | case '\\': |
| | | value += '\\'; |
| | | break; |
| | | default: |
| | | value += e; |
| | | break; |
| | | } |
| | | } else { |
| | | value += advance(); |
| | | } |
| | | } |
| | | |
| | | size_t end = pos(); |
| | | if (unterminated) { |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end, input().substr(start_pos, end - start_pos)); |
| | | } |
| | | return createToken(Tokens::Type::STRING_LITERAL, start_pos, end, value); |
| | | } |
| | | |
| | | Lexer::Tokens::Token Lexer::Lexer::matchOperatorOrPunctuation(size_t start_pos) { |
| | | char first_char = advance(); // Első karakter elfogyasztása |
| | | |
| | | if (!isAtEnd()) { |
| | | char second_char = peek(0); // Következő karakter megnézése |
| | | std::string two_chars_str{ first_char, second_char }; |
| | | |
| | | const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> two_char_op_types = { |
| | | { &OPERATOR_RELATIONAL, Tokens::Type::OPERATOR_RELATIONAL }, |
| | | { &OPERATOR_INCREMENT, Tokens::Type::OPERATOR_INCREMENT }, |
| | | { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT }, |
| | | { &OPERATOR_LOGICAL, Tokens::Type::OPERATOR_LOGICAL } |
| | | }; |
| | | |
| | | for (const auto & [vec_ptr, type] : two_char_op_types) { |
| | | if (matchFromVector(*vec_ptr, two_chars_str)) { |
| | | advance(); // Második karakter elfogyasztása |
| | | size_t end_pos = pos(); |
| | | return createToken(type, start_pos, end_pos); |
| | | } |
| | | } |
| | | } |
| | | |
| | | std::string single_char_str(1, first_char); |
| | | |
| | | if (single_char_str == "$") { |
| | | if (isalpha(peek(0)) || peek(0) == '_') { |
| | | return matchIdentifierOrKeyword(start_pos, Tokens::Type::VARIABLE_IDENTIFIER); |
| | | } |
| | | } |
| | | |
| | | const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> one_char_op_types = { |
| | | { &OPERATOR_ARITHMETIC, Tokens::Type::OPERATOR_ARITHMETIC }, |
| | | { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT }, |
| | | { &PUNCTUATION, Tokens::Type::PUNCTUATION } |
| | | }; |
| | | |
| | | for (const auto & [vec_ptr, type] : one_char_op_types) { |
| | | if (matchFromVector(*vec_ptr, single_char_str)) { |
| | | size_t end_pos = pos(); |
| | | return createToken(type, start_pos, end_pos); |
| | | } |
| | | } |
| | | |
| | | size_t end_pos = pos(); |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end_pos); |
| | | } |
| | | |
| | | bool Lexer::Lexer::matchFromVector(const std::vector<std::string> & vec, const std::string & value) { |
| | | return std::find(vec.begin(), vec.end(), value) != vec.end(); |
| | | } |
| | | |
| | | Lexer::Lexer::Lexer() { |
| | | for (const auto & vecRef : |
| | | { std::cref(OPERATOR_ARITHMETIC), std::cref(OPERATOR_RELATIONAL), std::cref(OPERATOR_INCREMENT), |
| | | std::cref(OPERATOR_ASSIGNMENT), std::cref(OPERATOR_LOGICAL), std::cref(PUNCTUATION) }) { |
| | | for (const auto & str : vecRef.get()) { |
| | | operators_ += str; |
| | | } |
| | | } |
| | | |
| | | operators_ += "$"; |
| | | } |
| | |
| | | #ifndef LEXER_HPP |
| | | #define LEXER_HPP |
| | | |
| | | #include <algorithm> |
| | | #include <cctype> |
| | | #include <string> |
| | | #include <string_view> |
| | | #include <unordered_map> |
| | | #include <vector> |
| | | |
| | | #include "Symbols/SymbolContainer.hpp" |
| | | #include "BaseException.hpp" |
| | | #include "Token.hpp" |
| | | |
| | | namespace Lexer { |
| | | class Lexer { |
| | | public: |
| | | Lexer() { |
| | | for (const auto & vecRef : |
| | | { std::cref(OPERATOR_ARITHMETIC), std::cref(OPERATOR_RELATIONAL), std::cref(OPERATOR_INCREMENT), |
| | | std::cref(OPERATOR_ASSIGNMENT), std::cref(OPERATOR_LOGICAL), std::cref(PUNCTUATION) }) { |
| | | for (const auto & str : vecRef.get()) { |
| | | operators_ += str; |
| | | } |
| | | Lexer(); |
| | | void addNamespaceInput(const std::string & ns, const std::string & input); |
| | | void setKeyWords(const std::unordered_map<std::string, Tokens::Type> & new_keywords); |
| | | std::vector<Tokens::Token> tokenizeNamespace(const std::string & ns); |
| | | std::vector<Tokens::Token> getTokens(const std::string & ns) const; |
| | | |
| | | class Exception : public BaseException { |
| | | public: |
| | | using BaseException::BaseException; |
| | | |
| | | Exception(const std::string & msg) { |
| | | rawMessage_ = msg; |
| | | context_ = ""; |
| | | } |
| | | |
| | | operators_ += "$"; |
| | | } |
| | | |
| | | void addNamespaceInput(const std::string & ns, const std::string & input) { |
| | | inputs_[ns] = input; |
| | | positions_[ns] = 0; |
| | | line_numbers_[ns] = 1; |
| | | column_numbers_[ns] = 1; |
| | | } |
| | | |
| | | std::vector<Tokens::Token> tokenizeNamespace(const std::string & ns) { |
| | | if (inputs_.find(ns) == inputs_.end()) { |
| | | return {}; |
| | | } |
| | | |
| | | Symbols::SymbolContainer::instance()->enter(ns); |
| | | |
| | | std::vector<Tokens::Token> tokens; |
| | | Tokens::Token token; |
| | | do { |
| | | token = nextToken(); |
| | | tokens.push_back(token); |
| | | } while (token.type != Tokens::Type::END_OF_FILE); |
| | | |
| | | tokens_[ns] = tokens; |
| | | return tokens; |
| | | } |
| | | |
| | | std::vector<Tokens::Token> getTokens(const std::string & ns) const { |
| | | auto it = tokens_.find(ns); |
| | | if (it != tokens_.end()) { |
| | | return it->second; |
| | | } |
| | | return {}; |
| | | } |
| | | |
| | | void setKeyWords(const std::unordered_map<std::string, Tokens::Type> & new_keywords) { keywords = new_keywords; } |
| | | |
| | | Tokens::Token nextToken() { |
| | | skipWhitespaceAndComments(); |
| | | size_t start = pos(); |
| | | |
| | | if (isAtEnd()) { |
| | | return createToken(Tokens::Type::END_OF_FILE, start, start); |
| | | } |
| | | |
| | | char c = peek(); |
| | | if (isalpha(c) || c == '_') { |
| | | return matchIdentifierOrKeyword(start); |
| | | } |
| | | if (isdigit(c) || (isdigit(c) && peek(1) == '.') || (c == '.' && isdigit(peek(1)))) { |
| | | return matchNumber(start); |
| | | } |
| | | if (c == '"' || c == '\'') { |
| | | return matchStringLiteral(start); |
| | | } |
| | | if (operators_.find(c) != std::string_view::npos) { |
| | | return matchOperatorOrPunctuation(start); |
| | | } |
| | | |
| | | advance(); |
| | | return createToken(Tokens::Type::UNKNOWN, start, pos()); |
| | | } |
| | | std::string formatMessage() const override { return "[LEXER ERROR]: " + rawMessage_; } |
| | | }; |
| | | |
| | | |
| | | private: |
| | |
| | | std::unordered_map<std::string, size_t> positions_; |
| | | std::unordered_map<std::string, int> line_numbers_; |
| | | std::unordered_map<std::string, int> column_numbers_; |
| | | std::unordered_map<std::string, Tokens::Type> keywords; |
| | | std::string operators_; |
| | | Tokens::Token nextToken(); |
| | | |
| | | std::string operators_; |
| | | std::unordered_map<std::string, Tokens::Type> keywords; |
| | | |
| | | // two chars |
| | | static const std::vector<std::string> OPERATOR_RELATIONAL; |
| | | static const std::vector<std::string> OPERATOR_INCREMENT; |
| | | static const std::vector<std::string> OPERATOR_ASSIGNMENT; |
| | | static const std::vector<std::string> OPERATOR_LOGICAL; |
| | | |
| | | // one char |
| | | static const std::vector<std::string> OPERATOR_ARITHMETIC; |
| | | static const std::vector<std::string> PUNCTUATION; |
| | | |
| | | const std::string & input() const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = inputs_.find(ns); |
| | | if (it != inputs_.end()) { |
| | | return it->second; |
| | | } |
| | | throw std::runtime_error("Input not found in namespace: " + ns); |
| | | } |
| | | |
| | | size_t & pos() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = positions_.find(ns); |
| | | if (it != positions_.end()) { |
| | | return it->second; |
| | | } |
| | | throw std::runtime_error("Unknown position in namespace: " + ns); |
| | | } |
| | | |
| | | int & line() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = line_numbers_.find(ns); |
| | | if (it != line_numbers_.end()) { |
| | | return it->second; |
| | | } |
| | | throw std::runtime_error("Unknown line number in namespace: " + ns); |
| | | } |
| | | |
| | | int & col() { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | auto it = column_numbers_.find(ns); |
| | | if (it != column_numbers_.end()) { |
| | | return it->second; |
| | | } |
| | | throw std::runtime_error("Unknown column number in namespace: " + ns); |
| | | } |
| | | |
| | | Tokens::Token createToken(Tokens::Type type, size_t start, size_t end, const std::string & value = "") { |
| | | Tokens::Token token; |
| | | token.type = type; |
| | | token.start_pos = start; |
| | | token.end_pos = end; |
| | | token.line_number = line(); |
| | | token.column_number = col(); |
| | | if (start <= end && end <= input().length()) { |
| | | token.lexeme = std::string_view(input()).substr(start, end - start); |
| | | token.value = value.empty() ? std::string(token.lexeme) : value; |
| | | } |
| | | return token; |
| | | } |
| | | |
| | | // -------------------------------------- |
| | | |
| | | char peek(size_t offset = 0) const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | const auto & in = inputs_.at(ns); |
| | | size_t cp = positions_.at(ns); |
| | | if (cp + offset >= in.length()) { |
| | | return '\0'; |
| | | } |
| | | return in[cp + offset]; |
| | | } |
| | | |
| | | char advance() { |
| | | char c = peek(); |
| | | pos()++; |
| | | if (c == '\n') { |
| | | line()++; |
| | | col() = 1; |
| | | } else { |
| | | col()++; |
| | | } |
| | | return c; |
| | | } |
| | | |
| | | bool isAtEnd() const { |
| | | const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | return positions_.at(ns) >= inputs_.at(ns).length(); |
| | | } |
| | | |
| | | void skipWhitespaceAndComments() { |
| | | while (!isAtEnd()) { |
| | | char c = peek(); |
| | | if (isspace(c)) { |
| | | advance(); |
| | | } else if ((c == '/' && peek(1) == '/') || c == '#') { |
| | | while (!isAtEnd() && peek() != '\n') { |
| | | advance(); |
| | | } |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | |
| | | Tokens::Token matchIdentifierOrKeyword(size_t start_pos, Tokens::Type type = Tokens::Type::IDENTIFIER) { |
| | | while (!isAtEnd() && (isalnum(peek()) || peek() == '_')) { |
| | | advance(); |
| | | } |
| | | size_t end = pos(); |
| | | std::string value = input().substr(start_pos, end - start_pos); |
| | | if (value.empty()) { |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end); |
| | | } |
| | | |
| | | if (type == Tokens::Type::IDENTIFIER) { |
| | | auto it = keywords.find(value); |
| | | if (it != keywords.end()) { |
| | | return createToken(it->second, start_pos, end); |
| | | } |
| | | } |
| | | return createToken(type, start_pos, end); |
| | | } |
| | | |
| | | Tokens::Token matchNumber(size_t start_pos) { |
| | | bool has_dot = false; |
| | | |
| | | while (!isAtEnd()) { |
| | | if (isdigit(peek())) { |
| | | advance(); |
| | | } else if (!has_dot && peek() == '.' && isdigit(peek(1))) { |
| | | has_dot = true; |
| | | advance(); // a pont |
| | | advance(); // az első számjegy a pont után |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | size_t end = pos(); |
| | | return createToken(Tokens::Type::NUMBER, start_pos, end); |
| | | } |
| | | |
| | | Tokens::Token matchStringLiteral(size_t start_pos) { |
| | | char opening_quote = peek(); |
| | | advance(); // Skip opening quote |
| | | std::string value; |
| | | bool unterminated = false; |
| | | |
| | | while (!isAtEnd()) { |
| | | char c = peek(); |
| | | if (c == opening_quote) { |
| | | advance(); |
| | | break; |
| | | } |
| | | if (c == '\\') { |
| | | advance(); |
| | | char e = advance(); |
| | | switch (e) { |
| | | case 'n': |
| | | value += '\n'; |
| | | break; |
| | | case 't': |
| | | value += '\t'; |
| | | break; |
| | | case '"': |
| | | value += opening_quote; |
| | | break; |
| | | case '\\': |
| | | value += '\\'; |
| | | break; |
| | | default: |
| | | value += e; |
| | | break; |
| | | } |
| | | } else { |
| | | value += advance(); |
| | | } |
| | | } |
| | | |
| | | size_t end = pos(); |
| | | if (unterminated) { |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end, input().substr(start_pos, end - start_pos)); |
| | | } |
| | | return createToken(Tokens::Type::STRING_LITERAL, start_pos, end, value); |
| | | } |
| | | |
| | | Tokens::Token matchOperatorOrPunctuation(size_t start_pos) { |
| | | char first_char = advance(); // Első karakter elfogyasztása |
| | | |
| | | if (!isAtEnd()) { |
| | | char second_char = peek(0); // Következő karakter megnézése |
| | | std::string two_chars_str{ first_char, second_char }; |
| | | |
| | | const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> two_char_op_types = { |
| | | { &OPERATOR_RELATIONAL, Tokens::Type::OPERATOR_RELATIONAL }, |
| | | { &OPERATOR_INCREMENT, Tokens::Type::OPERATOR_INCREMENT }, |
| | | { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT }, |
| | | { &OPERATOR_LOGICAL, Tokens::Type::OPERATOR_LOGICAL } |
| | | }; |
| | | |
| | | for (const auto & [vec_ptr, type] : two_char_op_types) { |
| | | if (matchFromVector(*vec_ptr, two_chars_str)) { |
| | | advance(); // Második karakter elfogyasztása |
| | | size_t end_pos = pos(); |
| | | return createToken(type, start_pos, end_pos); |
| | | } |
| | | } |
| | | } |
| | | |
| | | // Egykarakteres operátor vagy írásjel |
| | | std::string single_char_str(1, first_char); |
| | | |
| | | if (single_char_str == "$") { |
| | | if (isalpha(peek(0)) || peek(0) == '_') { |
| | | return matchIdentifierOrKeyword(start_pos, Tokens::Type::VARIABLE_IDENTIFIER); |
| | | } |
| | | } |
| | | |
| | | const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> one_char_op_types = { |
| | | { &OPERATOR_ARITHMETIC, Tokens::Type::OPERATOR_ARITHMETIC }, |
| | | { &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT }, // "=" itt van! |
| | | { &PUNCTUATION, Tokens::Type::PUNCTUATION } |
| | | }; |
| | | |
| | | for (const auto & [vec_ptr, type] : one_char_op_types) { |
| | | if (matchFromVector(*vec_ptr, single_char_str)) { |
| | | size_t end_pos = pos(); |
| | | return createToken(type, start_pos, end_pos); |
| | | } |
| | | } |
| | | |
| | | size_t end_pos = pos(); |
| | | return createToken(Tokens::Type::UNKNOWN, start_pos, end_pos); |
| | | } |
| | | |
| | | static bool matchFromVector(const std::vector<std::string> & vec, const std::string & value) { |
| | | return std::find(vec.begin(), vec.end(), value) != vec.end(); |
| | | } |
| | | const std::string & input() const; |
| | | size_t & pos(); |
| | | int & line(); |
| | | int & col(); |
| | | char peek(size_t offset = 0) const; |
| | | char advance(); |
| | | bool isAtEnd() const; |
| | | void skipWhitespaceAndComments(); |
| | | static bool matchFromVector(const std::vector<std::string> & vec, const std::string & value); |
| | | Tokens::Token createToken(Tokens::Type type, size_t start, size_t end, const std::string & value = ""); |
| | | Tokens::Token matchIdentifierOrKeyword(size_t start_pos, Tokens::Type type = Tokens::Type::IDENTIFIER); |
| | | Tokens::Token matchNumber(size_t start_pos); |
| | | Tokens::Token matchStringLiteral(size_t start_pos); |
| | | Tokens::Token matchOperatorOrPunctuation(size_t start_pos); |
| | | |
| | | }; // class Lexer |
| | | |
| New file |
| | |
| | | #include "Lexer/Operators.hpp" |
| | | |
| | | namespace Lexer { |
| | | |
| | | const std::vector<std::string> OPERATOR_RELATIONAL = { "==", "!=", "<", ">", "<=", ">=" }; |
| | | const std::vector<std::string> OPERATOR_INCREMENT = { "++", "--" }; |
| | | const std::vector<std::string> OPERATOR_ASSIGNMENT = { "=", "+=", "-=", "*=", "/=", "%=" }; |
| | | const std::vector<std::string> OPERATOR_LOGICAL = { "&&", "||" }; |
| | | |
| | | const std::vector<std::string> OPERATOR_ARITHMETIC = { "+", "-", "*", "/", "%", "!" }; |
| | | const std::vector<std::string> PUNCTUATION = { "(", ")", "{", "}", "[", "]", ",", ";" }; |
| | | |
| | | bool contains(const std::vector<std::string> & vec, const std::string & value) { |
| | | return std::find(vec.begin(), vec.end(), value) != vec.end(); |
| | | } |
| | | |
| | | bool isUnaryOperator(const std::string & op) { |
| | | return op == "+" || op == "-" || op == "!"; |
| | | } |
| | | |
| | | bool isBinaryOperator(const std::string & op) { |
| | | return contains(OPERATOR_ARITHMETIC, op) || contains(OPERATOR_LOGICAL, op) || |
| | | contains(OPERATOR_RELATIONAL, op); |
| | | } |
| | | |
| | | } // namespace Lexer |
| New file |
| | |
| | | #ifndef LEXER_OPERATORS_HPP |
| | | #define LEXER_OPERATORS_HPP |
| | | |
| | | #include <string> |
| | | #include <vector> |
| | | |
| | | #include "Lexer/Token.hpp" |
| | | #include "Parser/ParsedExpression.hpp" |
| | | |
| | | namespace Lexer { |
| | | |
| | | // two chars |
| | | extern const std::vector<std::string> OPERATOR_RELATIONAL; |
| | | extern const std::vector<std::string> OPERATOR_INCREMENT; |
| | | extern const std::vector<std::string> OPERATOR_ASSIGNMENT; |
| | | extern const std::vector<std::string> OPERATOR_LOGICAL; |
| | | |
| | | // one char |
| | | extern const std::vector<std::string> OPERATOR_ARITHMETIC; |
| | | extern const std::vector<std::string> PUNCTUATION; |
| | | |
| | | bool contains(const std::vector<std::string> & vec, const std::string & value); |
| | | bool isUnaryOperator(const std::string & op); |
| | | bool isBinaryOperator(const std::string & op); |
| | | |
| | | inline int getPrecedence(const std::string & op) { |
| | | if (op == "u-" || op == "u+" || op == "u!") { |
| | | return 4; |
| | | } |
| | | if (op == "*" || op == "/" || op == "%") { |
| | | return 3; |
| | | } |
| | | if (op == "+" || op == "-") { |
| | | return 2; |
| | | } |
| | | if (op == "==" || op == "!=" || op == "<" || op == ">" || op == "<=" || op == ">=") { |
| | | return 1; |
| | | } |
| | | if (op == "&&" || op == "||") { |
| | | return 0; |
| | | } |
| | | return -1; |
| | | } |
| | | |
| | | inline bool isLeftAssociative(const std::string & op) { |
| | | return !(op == "u-" || op == "u+"); |
| | | } |
| | | |
| | | inline Parser::ParsedExpressionPtr applyOperator(const std::string & op, Parser::ParsedExpressionPtr rhs, |
| | | Parser::ParsedExpressionPtr lhs = nullptr) { |
| | | if (op.starts_with("u")) { |
| | | std::string real_op = op.substr(1); // "u!" -> "!" |
| | | return Parser::ParsedExpression::makeUnary(real_op, std::move(rhs)); |
| | | } |
| | | return Parser::ParsedExpression::makeBinary(op, std::move(lhs), std::move(rhs)); |
| | | } |
| | | |
| | | [[nodiscard]] inline bool pushOperand(const Tokens::Token & token, const Symbols::Variables::Type & expected_var_type, |
| | | std::vector<Parser::ParsedExpressionPtr> & output_queue) { |
| | | if (token.type == Tokens::Type::NUMBER || token.type == Tokens::Type::STRING_LITERAL || |
| | | token.type == Tokens::Type::KEYWORD) { |
| | | output_queue.push_back( |
| | | Parser::ParsedExpression::makeLiteral(Symbols::Value::fromString(token.value, expected_var_type))); |
| | | return true; |
| | | } |
| | | if (token.type == Tokens::Type::VARIABLE_IDENTIFIER) { |
| | | std::string name = token.value; |
| | | if (!name.empty() && name[0] == '$') { |
| | | name = name.substr(1); |
| | | } |
| | | output_queue.push_back(Parser::ParsedExpression::makeVariable(name)); |
| | | return true; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | }; // namespace Lexer |
| | | |
| | | #endif // LEXER_OPERATORS_HPP |
| | |
| | | #define TOKEN_HPP |
| | | |
| | | #include <iostream> |
| | | #include <string> // <<< Hozzáadva |
| | | #include <string_view> // <<< Hozzáadva |
| | | #include <string> |
| | | #include <string_view> |
| | | |
| | | #include "Lexer/TokenType.hpp" // Feltételezzük, hogy ez definiálja Type-ot és TypeToString-ot |
| | | #include "Lexer/TokenType.hpp" |
| | | |
| | | namespace Lexer::Tokens { |
| | | |
| | |
| | | << ", Lexeme: \"" << std::string(lexeme) << "\"" |
| | | << " }" << '\n'; |
| | | } |
| | | |
| | | std::string dump() const { |
| | | return + "Token { Type: " + Lexer::Tokens::TypeToString(type) + ", Value: \"" + value + "\"" |
| | | + ", Pos: [" + std::to_string(start_pos) + ", " + std::to_string(end_pos) |
| | | + ")" |
| | | + ", Lexeme: \"" + std::string(lexeme) + "\"" |
| | | + " }" + '\n'; |
| | | } |
| | | }; |
| | | |
| | | inline bool operator==(const Token & lhs, const Token & rhs) { |
| | |
| | | #include <memory> |
| | | #include <string> |
| | | |
| | | #include "Symbols/SymbolContainer.hpp" |
| | | #include "Symbols/Value.hpp" |
| | | |
| | | namespace Parser { |
| | |
| | | expr->rhs = std::move(operand); |
| | | return expr; |
| | | } |
| | | |
| | | Symbols::Variables::Type getType() const { |
| | | switch (kind) { |
| | | case Kind::Literal: |
| | | return value.getType(); |
| | | break; |
| | | |
| | | case Kind::Variable: |
| | | { |
| | | const auto ns = Symbols::SymbolContainer::instance()->currentScopeName() + ".variables"; |
| | | auto symbol = Symbols::SymbolContainer::instance()->get(ns, name); |
| | | if (!symbol) { |
| | | throw std::runtime_error("Unknown variable: " + name + " in namespace: " + ns + |
| | | " File: " + __FILE__ + ":" + std::to_string(__LINE__)); |
| | | } |
| | | return symbol->getValue().getType(); |
| | | } |
| | | |
| | | case Kind::Binary: |
| | | { |
| | | auto lhsType = lhs->value.getType(); |
| | | //auto rhsType = rhs->value.getType(); |
| | | return lhsType; // Bináris kifejezésnél a típusok azonosak, tehát a bal oldali típust visszaadhatjuk |
| | | } |
| | | |
| | | case Kind::Unary: |
| | | { |
| | | //auto operandType = op. |
| | | if (op == "!") { |
| | | return Symbols::Variables::Type::BOOLEAN; // Mivel a '!' operátor bool típust vár |
| | | } |
| | | break; |
| | | } |
| | | |
| | | default: |
| | | throw std::runtime_error("Unknown expression kind"); |
| | | } |
| | | |
| | | throw std::runtime_error("Could not determine type for expression"); |
| | | } |
| | | }; |
| | | |
| | | } // namespace Parser |
| | |
| | | #include "Parser/Parser.hpp" |
| | | #include <stack> |
| | | |
| | | #include "Interpreter/OperationsFactory.hpp" |
| | | #include "Lexer/Operators.hpp" |
| | | |
| | | // Más szükséges include-ok, ha kellenek |
| | | namespace Parser { |
| | | |
| | | const std::unordered_map<std::string, Lexer::Tokens::Type> Parser::keywords = { |
| | | { "if", Lexer::Tokens::Type::KEYWORD }, |
| | | { "else", Lexer::Tokens::Type::KEYWORD }, |
| | | { "while", Lexer::Tokens::Type::KEYWORD }, |
| | | { "for", Lexer::Tokens::Type::KEYWORD }, |
| | | { "return", Lexer::Tokens::Type::KEYWORD_RETURN }, |
| | | { "if", Lexer::Tokens::Type::KEYWORD }, |
| | | { "else", Lexer::Tokens::Type::KEYWORD }, |
| | | { "while", Lexer::Tokens::Type::KEYWORD }, |
| | | { "for", Lexer::Tokens::Type::KEYWORD }, |
| | | { "return", Lexer::Tokens::Type::KEYWORD_RETURN }, |
| | | { "function", Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION }, |
| | | // Régebbiek: |
| | | { "const", Lexer::Tokens::Type::KEYWORD }, |
| | | { "true", Lexer::Tokens::Type::KEYWORD }, |
| | | { "false", Lexer::Tokens::Type::KEYWORD }, |
| | | { "const", Lexer::Tokens::Type::KEYWORD }, |
| | | { "true", Lexer::Tokens::Type::KEYWORD }, |
| | | { "false", Lexer::Tokens::Type::KEYWORD }, |
| | | // változó típusok |
| | | { "null", Lexer::Tokens::Type::KEYWORD_NULL }, |
| | | { "int", Lexer::Tokens::Type::KEYWORD_INT }, |
| | | { "double", Lexer::Tokens::Type::KEYWORD_DOUBLE }, |
| | | { "float", Lexer::Tokens::Type::KEYWORD_FLOAT }, |
| | | { "string", Lexer::Tokens::Type::KEYWORD_STRING }, |
| | | { "boolean", Lexer::Tokens::Type::KEYWORD_BOOLEAN }, |
| | | { "bool", Lexer::Tokens::Type::KEYWORD_BOOLEAN }, |
| | | { "null", Lexer::Tokens::Type::KEYWORD_NULL }, |
| | | { "int", Lexer::Tokens::Type::KEYWORD_INT }, |
| | | { "double", Lexer::Tokens::Type::KEYWORD_DOUBLE }, |
| | | { "float", Lexer::Tokens::Type::KEYWORD_FLOAT }, |
| | | { "string", Lexer::Tokens::Type::KEYWORD_STRING }, |
| | | { "boolean", Lexer::Tokens::Type::KEYWORD_BOOLEAN }, |
| | | { "bool", Lexer::Tokens::Type::KEYWORD_BOOLEAN }, |
| | | // ... egyéb kulcsszavak ... |
| | | }; |
| | | |
| | |
| | | { Lexer::Tokens::Type::KEYWORD_BOOLEAN, Symbols::Variables::Type::BOOLEAN }, |
| | | }; |
| | | |
| | | void Parser::parseVariableDefinition() { |
| | | Symbols::Variables::Type var_type = parseType(); |
| | | |
| | | Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); |
| | | std::string var_name = id_token.value; |
| | | |
| | | if (!var_name.empty() && var_name[0] == '$') { |
| | | var_name = var_name.substr(1); |
| | | } |
| | | const auto ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | |
| | | expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); |
| | | |
| | | auto expr = parseParsedExpression(var_type); |
| | | Interpreter::OperationsFactory::defineVariableWithExpression( |
| | | var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number); |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, ";"); |
| | | } |
| | | |
| | | void Parser::parseFunctionDefinition() { |
| | | expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION); |
| | | Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::IDENTIFIER); |
| | | std::string func_name = id_token.value; |
| | | Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE; |
| | | expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, "("); |
| | | |
| | | Symbols::FunctionParameterInfo param_infos; |
| | | |
| | | if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") { |
| | | while (true) { |
| | | // Paraméter típusa |
| | | Symbols::Variables::Type param_type = parseType(); // Ez elfogyasztja a type tokent |
| | | |
| | | // Paraméter név ($variable) |
| | | Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); |
| | | std::string param_name = param_id_token.value; |
| | | if (!param_name.empty() && param_name[0] == '$') { // '$' eltávolítása |
| | | param_name = param_name.substr(1); |
| | | } |
| | | |
| | | param_infos.push_back({ param_name, param_type }); |
| | | |
| | | // Vessző vagy zárójel következik? |
| | | if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) { |
| | | continue; |
| | | } |
| | | if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") { |
| | | break; // Lista vége |
| | | } |
| | | reportError("Expected ',' or ')' in parameter list"); |
| | | } |
| | | } |
| | | // Most a ')' következik |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, ")"); |
| | | |
| | | // check if we have a option return type: function name() type { ... } |
| | | for (const auto & _type : Parser::variable_types) { |
| | | if (match(_type.first)) { |
| | | func_return_type = _type.second; |
| | | break; |
| | | } |
| | | } |
| | | |
| | | Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{"); |
| | | |
| | | // only parse the body if we checked out if not exists the function and created the symbol |
| | | parseFunctionBody(opening_brace, func_name, func_return_type, param_infos); |
| | | } |
| | | |
| | | Symbols::Value Parser::parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) { |
| | | try { |
| | | switch (type) { |
| | | case Symbols::Variables::Type::INTEGER: |
| | | { |
| | | if (value.find('.') != std::string::npos) { |
| | | throw std::invalid_argument("Floating point value in integer context: " + value); |
| | | } |
| | | int v = std::stoi(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | case Symbols::Variables::Type::DOUBLE: |
| | | { |
| | | double v = std::stod(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | case Symbols::Variables::Type::FLOAT: |
| | | { |
| | | float v = std::stof(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | default: |
| | | throw std::invalid_argument("Unsupported numeric type"); |
| | | } |
| | | } catch (const std::invalid_argument & e) { |
| | | reportError("Invalid numeric literal: " + value + " (" + e.what() + ")"); |
| | | } catch (const std::out_of_range & e) { |
| | | reportError("Numeric literal out of range: " + value + " (" + e.what() + ")"); |
| | | } |
| | | |
| | | return Symbols::Value(); // unreachable |
| | | } |
| | | |
| | | void Parser::parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name, |
| | | Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) { |
| | | size_t braceDepth = 0; |
| | | int peek = 0; |
| | | int tokenIndex = current_token_index_; |
| | | Lexer::Tokens::Token currentToken_; |
| | | Lexer::Tokens::Token closing_brace; |
| | | |
| | | while (tokenIndex < tokens_.size()) { |
| | | currentToken_ = peekToken(peek); |
| | | if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) { |
| | | if (currentToken_.value == "{") { |
| | | ++braceDepth; |
| | | } else if (currentToken_.value == "}") { |
| | | if (braceDepth == 0) { |
| | | closing_brace = currentToken_; |
| | | break; |
| | | } |
| | | --braceDepth; |
| | | } |
| | | } |
| | | tokenIndex++; |
| | | peek++; |
| | | } |
| | | if (braceDepth != 0) { |
| | | reportError("Unmatched braces in function body"); |
| | | } |
| | | std::vector<Lexer::Tokens::Token> filtered_tokens; |
| | | auto startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace); |
| | | auto endIt = std::find(tokens_.begin(), tokens_.end(), closing_brace); |
| | | |
| | | if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) { |
| | | filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt); |
| | | } |
| | | std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos); |
| | | |
| | | current_token_index_ = tokenIndex; |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, "}"); |
| | | const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name; |
| | | Symbols::SymbolContainer::instance()->create(newns); |
| | | std::shared_ptr<Parser> parser = std::make_shared<Parser>(); |
| | | parser->parseScript(filtered_tokens, input_string, this->current_filename_); |
| | | Symbols::SymbolContainer::instance()->enterPreviousScope(); |
| | | // create function |
| | | Interpreter::OperationsFactory::defineFunction( |
| | | function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(), |
| | | this->current_filename_, currentToken_.line_number, currentToken_.column_number); |
| | | } |
| | | |
| | | ParsedExpressionPtr Parser::parseParsedExpression(const Symbols::Variables::Type & expected_var_type) { |
| | | std::stack<std::string> operator_stack; |
| | | std::vector<ParsedExpressionPtr> output_queue; |
| | | |
| | | bool expect_unary = true; |
| | | |
| | | while (true) { |
| | | auto token = currentToken(); |
| | | |
| | | if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") { |
| | | operator_stack.push("("); |
| | | consumeToken(); |
| | | expect_unary = true; |
| | | } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") { |
| | | consumeToken(); |
| | | while (!operator_stack.empty() && operator_stack.top() != "(") { |
| | | std::string op = operator_stack.top(); |
| | | operator_stack.pop(); |
| | | |
| | | if (op == "u-" || op == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(op, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } |
| | | |
| | | if (operator_stack.empty() || operator_stack.top() != "(") { |
| | | reportError("Mismatched parentheses"); |
| | | } |
| | | operator_stack.pop(); // remove "(" |
| | | expect_unary = false; |
| | | } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) { |
| | | std::string op = std::string(token.lexeme); |
| | | |
| | | if (expect_unary && Lexer::isUnaryOperator(op)) { |
| | | op = "u" + op; // pl. u-, u+ vagy u! |
| | | } |
| | | |
| | | while (!operator_stack.empty()) { |
| | | const std::string & top = operator_stack.top(); |
| | | if ((Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) <= Lexer::getPrecedence(top)) || |
| | | (!Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) < Lexer::getPrecedence(top))) { |
| | | operator_stack.pop(); |
| | | |
| | | if (top == "u-" || top == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(top, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(top, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | operator_stack.push(op); |
| | | consumeToken(); |
| | | expect_unary = true; |
| | | } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL || |
| | | token.type == Lexer::Tokens::Type::KEYWORD || |
| | | token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { |
| | | if (Lexer::pushOperand(token, expected_var_type, output_queue) == false) { |
| | | reportError("Expected literal or variable"); |
| | | } |
| | | consumeToken(); |
| | | expect_unary = false; |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | // Kiürítjük az operator stack-et |
| | | while (!operator_stack.empty()) { |
| | | std::string op = operator_stack.top(); |
| | | operator_stack.pop(); |
| | | |
| | | if (op == "(" || op == ")") { |
| | | reportError("Mismatched parentheses"); |
| | | } |
| | | |
| | | if (op == "u-" || op == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(op, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } |
| | | |
| | | if (output_queue.size() != 1) { |
| | | reportError("Expression could not be parsed cleanly"); |
| | | } |
| | | |
| | | return std::move(output_queue.back()); |
| | | } |
| | | |
| | | void Parser::parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string, |
| | | const std::string & filename) { |
| | | tokens_ = tokens; |
| | | input_str_view_ = input_string; |
| | | current_token_index_ = 0; |
| | | current_filename_ = filename; |
| | | |
| | | while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { |
| | | parseStatement(); |
| | | } |
| | | if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { |
| | | reportError("Unexpected tokens after program end"); |
| | | } |
| | | } |
| | | } // namespace Parser |
| | |
| | | #ifndef PARSER_HPP |
| | | #define PARSER_HPP |
| | | |
| | | #include <algorithm> |
| | | #include <memory> |
| | | #include <stack> |
| | | #include <stdexcept> |
| | | #include <string> |
| | | #include <vector> |
| | | |
| | | #include "Interpreter/ExpressionBuilder.hpp" |
| | | #include "Interpreter/OperationsFactory.hpp" |
| | | #include "BaseException.hpp" |
| | | #include "Lexer/Token.hpp" |
| | | #include "Lexer/TokenType.hpp" |
| | | #include "Parser/ParsedExpression.hpp" |
| | | #include "Symbols/ParameterContainer.hpp" |
| | | #include "Symbols/SymbolContainer.hpp" |
| | | #include "Symbols/Value.hpp" |
| | | |
| | | namespace Parser { |
| | | |
| | | class SyntaxError : public std::runtime_error { |
| | | public: |
| | | SyntaxError(const std::string & message, const int line, const int col) : |
| | | std::runtime_error(message + " at line " + std::to_string(line) + ", column " + std::to_string(col)) {} |
| | | |
| | | SyntaxError(const std::string & message, const Lexer::Tokens::Token & token) : |
| | | SyntaxError( |
| | | message + " (found token: '" + token.value + "' type: " + Lexer::Tokens::TypeToString(token.type) + ")", |
| | | token.line_number, token.column_number) {} |
| | | }; |
| | | |
| | | class Parser { |
| | | public: |
| | | Parser() {} |
| | | |
| | | void parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string, |
| | | const std::string & filename) { |
| | | tokens_ = tokens; |
| | | input_str_view_ = input_string; |
| | | current_token_index_ = 0; |
| | | current_filename_ = filename; |
| | | class Exception : public BaseException { |
| | | public: |
| | | using BaseException::BaseException; |
| | | |
| | | try { |
| | | while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { |
| | | parseStatement(); |
| | | } |
| | | if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) { |
| | | reportError("Unexpected tokens after program end"); |
| | | } |
| | | } catch (const SyntaxError & e) { |
| | | std::cerr << "Syntax Error: " << e.what() << '\n'; |
| | | } catch (const std::exception & e) { |
| | | std::cerr << "Error during parsing: " << e.what() << '\n'; |
| | | throw; |
| | | Exception(const std::string & msg, const Lexer::Tokens::Token & token) { |
| | | rawMessage_ = msg + ": " + token.dump(); |
| | | context_ = |
| | | " at line: " + std::to_string(token.line_number) + ", column: " + std::to_string(token.column_number); |
| | | formattedMessage_ = formatMessage(); |
| | | } |
| | | } |
| | | |
| | | Exception(const std::string & msg, int line, int col) { |
| | | rawMessage_ = msg; |
| | | context_ = " at line: " + std::to_string(line) + ", column: " + std::to_string(col); |
| | | formattedMessage_ = formatMessage(); |
| | | } |
| | | |
| | | std::string formatMessage() const override { return "[Syntax ERROR] >>" + context_ + " << : " + rawMessage_; } |
| | | }; |
| | | |
| | | void parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string, |
| | | const std::string & filename); |
| | | |
| | | static const std::unordered_map<std::string, Lexer::Tokens::Type> keywords; |
| | | static const std::unordered_map<Lexer::Tokens::Type, Symbols::Variables::Type> variable_types; |
| | |
| | | (current_token_index_ == tokens_.size() - 1 && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE); |
| | | } |
| | | |
| | | // --- Hibakezelés --- |
| | | // Hiba jelentése és kivétel dobása |
| | | [[noreturn]] void reportError(const std::string & message) { |
| | | // Használjuk az aktuális token pozícióját, ha még nem értünk a végére |
| | | if (current_token_index_ < tokens_.size()) { |
| | | throw SyntaxError(message, tokens_[current_token_index_]); |
| | | } // Ha már a végén vagyunk, az utolsó ismert pozíciót használjuk |
| | | throw Exception(message, tokens_[current_token_index_]); |
| | | } |
| | | int line = tokens_.empty() ? 0 : tokens_.back().line_number; |
| | | int col = tokens_.empty() ? 0 : tokens_.back().column_number; |
| | | throw SyntaxError(message, line, col); |
| | | throw Exception(message, line, col); |
| | | } |
| | | |
| | | // --- Elemzési Módszerek (Moduláris részek) --- |
| | | |
| | | // parseStatement (változatlan) |
| | | void parseStatement() { |
| | |
| | | reportError("Unexpected token at beginning of statement"); |
| | | } |
| | | |
| | | void parseVariableDefinition() { |
| | | Symbols::Variables::Type var_type = parseType(); |
| | | |
| | | Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); |
| | | std::string var_name = id_token.value; |
| | | |
| | | if (!var_name.empty() && var_name[0] == '$') { |
| | | var_name = var_name.substr(1); |
| | | } |
| | | const auto ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | |
| | | expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); |
| | | /* |
| | | Symbols::Value initial_value = parseValue(var_type); |
| | | |
| | | Interpreter::OperationsFactory::defineSimpleVariable(var_name, initial_value, ns, this->current_filename_, |
| | | id_token.line_number, id_token.column_number); |
| | | */ |
| | | |
| | | auto expr = parseParsedExpression(var_type); |
| | | Interpreter::OperationsFactory::defineVariableWithExpression( |
| | | var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number); |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, ";"); |
| | | } |
| | | |
| | | void parseFunctionDefinition() { |
| | | expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION); |
| | | Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::IDENTIFIER); |
| | | std::string func_name = id_token.value; |
| | | Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE; |
| | | expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "="); |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, "("); |
| | | |
| | | Symbols::FunctionParameterInfo param_infos; |
| | | |
| | | if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") { |
| | | while (true) { |
| | | // Paraméter típusa |
| | | Symbols::Variables::Type param_type = parseType(); // Ez elfogyasztja a type tokent |
| | | |
| | | // Paraméter név ($variable) |
| | | Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER); |
| | | std::string param_name = param_id_token.value; |
| | | if (!param_name.empty() && param_name[0] == '$') { // '$' eltávolítása |
| | | param_name = param_name.substr(1); |
| | | } |
| | | |
| | | param_infos.push_back({ param_name, param_type }); |
| | | |
| | | // Vessző vagy zárójel következik? |
| | | if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) { |
| | | continue; |
| | | } |
| | | if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") { |
| | | break; // Lista vége |
| | | } |
| | | reportError("Expected ',' or ')' in parameter list"); |
| | | } |
| | | } |
| | | // Most a ')' következik |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, ")"); |
| | | |
| | | // check if we have a option return type: function name() type { ... } |
| | | for (const auto & _type : Parser::variable_types) { |
| | | if (match(_type.first)) { |
| | | func_return_type = _type.second; |
| | | break; |
| | | } |
| | | } |
| | | |
| | | Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{"); |
| | | |
| | | // only parse the body if we checked out if not exists the function and created the symbol |
| | | parseFunctionBody(opening_brace, func_name, func_return_type, param_infos); |
| | | } |
| | | void parseVariableDefinition(); |
| | | void parseFunctionDefinition(); |
| | | |
| | | // --- Elemzési Segédfüggvények --- |
| | | |
| | |
| | | return Symbols::Value(); // compiler happy |
| | | } |
| | | |
| | | Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) { |
| | | try { |
| | | switch (type) { |
| | | case Symbols::Variables::Type::INTEGER: |
| | | { |
| | | if (value.find('.') != std::string::npos) { |
| | | throw std::invalid_argument("Floating point value in integer context: " + value); |
| | | } |
| | | int v = std::stoi(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | case Symbols::Variables::Type::DOUBLE: |
| | | { |
| | | double v = std::stod(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | case Symbols::Variables::Type::FLOAT: |
| | | { |
| | | float v = std::stof(value); |
| | | return Symbols::Value(is_negative ? -v : v); |
| | | } |
| | | default: |
| | | throw std::invalid_argument("Unsupported numeric type"); |
| | | } |
| | | } catch (const std::invalid_argument & e) { |
| | | reportError("Invalid numeric literal: " + value + " (" + e.what() + ")"); |
| | | } catch (const std::out_of_range & e) { |
| | | reportError("Numeric literal out of range: " + value + " (" + e.what() + ")"); |
| | | } |
| | | |
| | | return Symbols::Value(); // unreachable |
| | | } |
| | | Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type); |
| | | |
| | | void parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name, |
| | | Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) { |
| | | size_t braceDepth = 0; |
| | | int peek = 0; |
| | | int tokenIndex = current_token_index_; |
| | | Lexer::Tokens::Token currentToken_; |
| | | Lexer::Tokens::Token closing_brace; |
| | | Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params); |
| | | |
| | | while (tokenIndex < tokens_.size()) { |
| | | currentToken_ = peekToken(peek); |
| | | if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) { |
| | | if (currentToken_.value == "{") { |
| | | ++braceDepth; |
| | | } else if (currentToken_.value == "}") { |
| | | if (braceDepth == 0) { |
| | | closing_brace = currentToken_; |
| | | break; |
| | | } |
| | | --braceDepth; |
| | | } |
| | | } |
| | | tokenIndex++; |
| | | peek++; |
| | | } |
| | | if (braceDepth != 0) { |
| | | reportError("Unmatched braces in function body"); |
| | | } |
| | | std::vector<Lexer::Tokens::Token> filtered_tokens; |
| | | auto startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace); |
| | | auto endIt = std::find(tokens_.begin(), tokens_.end(), closing_brace); |
| | | |
| | | if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) { |
| | | filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt); |
| | | } |
| | | std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos); |
| | | |
| | | current_token_index_ = tokenIndex; |
| | | expect(Lexer::Tokens::Type::PUNCTUATION, "}"); |
| | | const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name; |
| | | Symbols::SymbolContainer::instance()->create(newns); |
| | | std::shared_ptr<Parser> parser = std::make_shared<Parser>(); |
| | | parser->parseScript(filtered_tokens, input_string, this->current_filename_); |
| | | Symbols::SymbolContainer::instance()->enterPreviousScope(); |
| | | // create function |
| | | Interpreter::OperationsFactory::defineFunction( |
| | | function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(), |
| | | this->current_filename_, currentToken_.line_number, currentToken_.column_number); |
| | | } |
| | | |
| | | ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type) { |
| | | std::stack<std::string> operator_stack; |
| | | std::vector<ParsedExpressionPtr> output_queue; |
| | | |
| | | auto getPrecedence = [](const std::string & op) -> int { |
| | | if (op == "+" || op == "-") { |
| | | return 1; |
| | | } |
| | | if (op == "*" || op == "/") { |
| | | return 2; |
| | | } |
| | | if (op == "u-" || op == "u+") { |
| | | return 3; |
| | | } |
| | | return 0; |
| | | }; |
| | | |
| | | auto isLeftAssociative = [](const std::string & op) -> bool { |
| | | return !(op == "u-" || op == "u+"); |
| | | }; |
| | | |
| | | auto applyOperator = [](const std::string & op, ParsedExpressionPtr rhs, ParsedExpressionPtr lhs = nullptr) { |
| | | if (op == "u-" || op == "u+") { |
| | | std::string real_op = (op == "u-") ? "-" : "+"; |
| | | return ParsedExpression::makeUnary(real_op, std::move(rhs)); |
| | | } else { |
| | | return ParsedExpression::makeBinary(op, std::move(lhs), std::move(rhs)); |
| | | } |
| | | }; |
| | | |
| | | auto pushOperand = [&](const Lexer::Tokens::Token & token) { |
| | | if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL || |
| | | token.type == Lexer::Tokens::Type::KEYWORD) { |
| | | output_queue.push_back( |
| | | ParsedExpression::makeLiteral(Symbols::Value::fromString(token.value, expected_var_type))); |
| | | } else if (token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { |
| | | std::string name = token.value; |
| | | if (!name.empty() && name[0] == '$') { |
| | | name = name.substr(1); |
| | | } |
| | | output_queue.push_back(ParsedExpression::makeVariable(name)); |
| | | } else { |
| | | reportError("Expected literal or variable"); |
| | | } |
| | | }; |
| | | |
| | | bool expect_unary = true; |
| | | |
| | | while (true) { |
| | | auto token = currentToken(); |
| | | |
| | | if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") { |
| | | operator_stack.push("("); |
| | | consumeToken(); |
| | | expect_unary = true; |
| | | } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") { |
| | | consumeToken(); |
| | | while (!operator_stack.empty() && operator_stack.top() != "(") { |
| | | std::string op = operator_stack.top(); |
| | | operator_stack.pop(); |
| | | |
| | | if (op == "u-" || op == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(op, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } |
| | | |
| | | if (operator_stack.empty() || operator_stack.top() != "(") { |
| | | reportError("Mismatched parentheses"); |
| | | } |
| | | operator_stack.pop(); // remove "(" |
| | | expect_unary = false; |
| | | } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) { |
| | | std::string op = std::string(token.lexeme); |
| | | if (expect_unary && (op == "-" || op == "+")) { |
| | | op = "u" + op; // pl. u- |
| | | } |
| | | |
| | | while (!operator_stack.empty()) { |
| | | const std::string & top = operator_stack.top(); |
| | | if ((isLeftAssociative(op) && getPrecedence(op) <= getPrecedence(top)) || |
| | | (!isLeftAssociative(op) && getPrecedence(op) < getPrecedence(top))) { |
| | | operator_stack.pop(); |
| | | |
| | | if (top == "u-" || top == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(top, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(top, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | operator_stack.push(op); |
| | | consumeToken(); |
| | | expect_unary = true; |
| | | } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL || |
| | | token.type == Lexer::Tokens::Type::KEYWORD || |
| | | token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { |
| | | pushOperand(token); |
| | | consumeToken(); |
| | | expect_unary = false; |
| | | } else { |
| | | break; |
| | | } |
| | | } |
| | | |
| | | // Kiürítjük az operator stack-et |
| | | while (!operator_stack.empty()) { |
| | | std::string op = operator_stack.top(); |
| | | operator_stack.pop(); |
| | | |
| | | if (op == "(" || op == ")") { |
| | | reportError("Mismatched parentheses"); |
| | | } |
| | | |
| | | if (op == "u-" || op == "u+") { |
| | | if (output_queue.empty()) { |
| | | reportError("Missing operand for unary operator"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(op, std::move(rhs))); |
| | | } else { |
| | | if (output_queue.size() < 2) { |
| | | reportError("Malformed expression"); |
| | | } |
| | | auto rhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | auto lhs = std::move(output_queue.back()); |
| | | output_queue.pop_back(); |
| | | output_queue.push_back(applyOperator(op, std::move(rhs), std::move(lhs))); |
| | | } |
| | | } |
| | | |
| | | if (output_queue.size() != 1) { |
| | | reportError("Expression could not be parsed cleanly"); |
| | | } |
| | | |
| | | return std::move(output_queue.back()); |
| | | } |
| | | ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type); |
| | | |
| | | }; // class Parser |
| | | |
| | |
| | | |
| | | const std::string & context() const { return context_; } |
| | | |
| | | Symbols::Kind type() const { return kind_; } |
| | | Symbols::Kind getKind() const { return kind_; } |
| | | |
| | | // Virtuális getter/setter a value-hoz |
| | | virtual const Value & getValue() const { return value_; } |
| | |
| | | std::string getNamespaceForSymbol(const SymbolPtr & symbol) const { |
| | | std::string base = symbol->context().empty() ? currentScope_ : symbol->context(); |
| | | |
| | | switch (symbol->type()) { |
| | | switch (symbol->getKind()) { |
| | | case Symbols::Kind::Variable: |
| | | return base + ".variables"; |
| | | case Symbols::Kind::Function: |
| | |
| | | #ifndef SYMBOL_TABLE_HPP |
| | | #define SYMBOL_TABLE_HPP |
| | | |
| | | #include <memory> |
| | | #include <vector> |
| | | |
| | | #include "SymbolTypes.hpp" |
| | |
| | | #ifndef VOIDSCRIPT_HPP |
| | | #define VOIDSCRIPT_HPP |
| | | |
| | | #include <filesystem> |
| | | #include <fstream> |
| | | #include <string> |
| | |
| | | int run() { |
| | | try { |
| | | while (!files.empty()) { |
| | | std::string file = files.back(); |
| | | std::string file = files.back(); |
| | | const std::string file_content = readFile(file); |
| | | files.pop_back(); |
| | | |
| | | |
| | | std::string _default_namespace_ = file; |
| | | std::replace(_default_namespace_.begin(), _default_namespace_.end(), '.', '_'); |
| | | |
| | | Symbols::SymbolContainer::instance()->create(_default_namespace_); |
| | | |
| | | |
| | | const std::string ns = Symbols::SymbolContainer::instance()->currentScopeName(); |
| | | |
| | | this->lexer->addNamespaceInput(ns, file_content); |
| | | const auto tokens = this->lexer->tokenizeNamespace(ns); |
| | | // dump tokens |
| | | std::cout << "--- Tokens ---\n"; |
| | | for (const auto & token : tokens) { |
| | | token.print(); |
| | | } |
| | | |
| | | std::cout << Operations::Container::dump() << "\n"; |
| | | |
| | |
| | | } // while (!files.empty()) |
| | | |
| | | return 0; |
| | | } catch (const Parser::SyntaxError & e) { |
| | | std::cerr << "Syntax Error during parsing: " << e.what() << '\n'; |
| | | return 1; |
| | | } catch (const std::exception & e) { |
| | | std::cerr << "An error occurred: " << e.what() << '\n'; |
| | | std::cerr << e.what() << '\n'; |
| | | return 1; |
| | | } |
| | | return 1; |