From 36ec04c00fa540fcee0f2cff1f7b81dd8a98101a Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Thu, 17 Apr 2025 18:44:58 +0000
Subject: [PATCH] some refactor
---
src/Parser/Parser.cpp | 329 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 files changed, 314 insertions(+), 15 deletions(-)
diff --git a/src/Parser/Parser.cpp b/src/Parser/Parser.cpp
index 3c5af2b..37c2081 100644
--- a/src/Parser/Parser.cpp
+++ b/src/Parser/Parser.cpp
@@ -1,27 +1,31 @@
#include "Parser/Parser.hpp"
+#include <stack>
+
+#include "Interpreter/OperationsFactory.hpp"
+#include "Lexer/Operators.hpp"
// Más szükséges include-ok, ha kellenek
namespace Parser {
const std::unordered_map<std::string, Lexer::Tokens::Type> Parser::keywords = {
- { "if", Lexer::Tokens::Type::KEYWORD },
- { "else", Lexer::Tokens::Type::KEYWORD },
- { "while", Lexer::Tokens::Type::KEYWORD },
- { "for", Lexer::Tokens::Type::KEYWORD },
- { "return", Lexer::Tokens::Type::KEYWORD_RETURN },
+ { "if", Lexer::Tokens::Type::KEYWORD },
+ { "else", Lexer::Tokens::Type::KEYWORD },
+ { "while", Lexer::Tokens::Type::KEYWORD },
+ { "for", Lexer::Tokens::Type::KEYWORD },
+ { "return", Lexer::Tokens::Type::KEYWORD_RETURN },
{ "function", Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION },
// Régebbiek:
- { "const", Lexer::Tokens::Type::KEYWORD },
- { "true", Lexer::Tokens::Type::KEYWORD },
- { "false", Lexer::Tokens::Type::KEYWORD },
+ { "const", Lexer::Tokens::Type::KEYWORD },
+ { "true", Lexer::Tokens::Type::KEYWORD },
+ { "false", Lexer::Tokens::Type::KEYWORD },
// változó típusok
- { "null", Lexer::Tokens::Type::KEYWORD_NULL },
- { "int", Lexer::Tokens::Type::KEYWORD_INT },
- { "double", Lexer::Tokens::Type::KEYWORD_DOUBLE },
- { "float", Lexer::Tokens::Type::KEYWORD_FLOAT },
- { "string", Lexer::Tokens::Type::KEYWORD_STRING },
- { "boolean", Lexer::Tokens::Type::KEYWORD_BOOLEAN },
- { "bool", Lexer::Tokens::Type::KEYWORD_BOOLEAN },
+ { "null", Lexer::Tokens::Type::KEYWORD_NULL },
+ { "int", Lexer::Tokens::Type::KEYWORD_INT },
+ { "double", Lexer::Tokens::Type::KEYWORD_DOUBLE },
+ { "float", Lexer::Tokens::Type::KEYWORD_FLOAT },
+ { "string", Lexer::Tokens::Type::KEYWORD_STRING },
+ { "boolean", Lexer::Tokens::Type::KEYWORD_BOOLEAN },
+ { "bool", Lexer::Tokens::Type::KEYWORD_BOOLEAN },
// ... egyéb kulcsszavak ...
};
@@ -34,4 +38,299 @@
{ Lexer::Tokens::Type::KEYWORD_BOOLEAN, Symbols::Variables::Type::BOOLEAN },
};
+void Parser::parseVariableDefinition() {
+ Symbols::Variables::Type var_type = parseType();
+
+ Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
+ std::string var_name = id_token.value;
+
+ if (!var_name.empty() && var_name[0] == '$') {
+ var_name = var_name.substr(1);
+ }
+ const auto ns = Symbols::SymbolContainer::instance()->currentScopeName();
+
+ expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
+
+ auto expr = parseParsedExpression(var_type);
+ Interpreter::OperationsFactory::defineVariableWithExpression(
+ var_name, var_type, std::move(expr), ns, current_filename_, id_token.line_number, id_token.column_number);
+ expect(Lexer::Tokens::Type::PUNCTUATION, ";");
+}
+
+void Parser::parseFunctionDefinition() {
+ expect(Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION);
+ Lexer::Tokens::Token id_token = expect(Lexer::Tokens::Type::IDENTIFIER);
+ std::string func_name = id_token.value;
+ Symbols::Variables::Type func_return_type = Symbols::Variables::Type::NULL_TYPE;
+ expect(Lexer::Tokens::Type::OPERATOR_ASSIGNMENT, "=");
+ expect(Lexer::Tokens::Type::PUNCTUATION, "(");
+
+ Symbols::FunctionParameterInfo param_infos;
+
+ if (currentToken().type != Lexer::Tokens::Type::PUNCTUATION || currentToken().value != ")") {
+ while (true) {
+ // Paraméter típusa
+ Symbols::Variables::Type param_type = parseType(); // Ez elfogyasztja a type tokent
+
+ // Paraméter név ($variable)
+ Lexer::Tokens::Token param_id_token = expect(Lexer::Tokens::Type::VARIABLE_IDENTIFIER);
+ std::string param_name = param_id_token.value;
+ if (!param_name.empty() && param_name[0] == '$') { // '$' eltávolítása
+ param_name = param_name.substr(1);
+ }
+
+ param_infos.push_back({ param_name, param_type });
+
+ // Vessző vagy zárójel következik?
+ if (match(Lexer::Tokens::Type::PUNCTUATION, ",")) {
+ continue;
+ }
+ if (currentToken().type == Lexer::Tokens::Type::PUNCTUATION && currentToken().value == ")") {
+ break; // Lista vége
+ }
+ reportError("Expected ',' or ')' in parameter list");
+ }
+ }
+ // Most a ')' következik
+ expect(Lexer::Tokens::Type::PUNCTUATION, ")");
+
+ // check if we have a option return type: function name() type { ... }
+ for (const auto & _type : Parser::variable_types) {
+ if (match(_type.first)) {
+ func_return_type = _type.second;
+ break;
+ }
+ }
+
+ Lexer::Tokens::Token opening_brace = expect(Lexer::Tokens::Type::PUNCTUATION, "{");
+
+ // only parse the body if we checked out if not exists the function and created the symbol
+ parseFunctionBody(opening_brace, func_name, func_return_type, param_infos);
+}
+
+Symbols::Value Parser::parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type) {
+ try {
+ switch (type) {
+ case Symbols::Variables::Type::INTEGER:
+ {
+ if (value.find('.') != std::string::npos) {
+ throw std::invalid_argument("Floating point value in integer context: " + value);
+ }
+ int v = std::stoi(value);
+ return Symbols::Value(is_negative ? -v : v);
+ }
+ case Symbols::Variables::Type::DOUBLE:
+ {
+ double v = std::stod(value);
+ return Symbols::Value(is_negative ? -v : v);
+ }
+ case Symbols::Variables::Type::FLOAT:
+ {
+ float v = std::stof(value);
+ return Symbols::Value(is_negative ? -v : v);
+ }
+ default:
+ throw std::invalid_argument("Unsupported numeric type");
+ }
+ } catch (const std::invalid_argument & e) {
+ reportError("Invalid numeric literal: " + value + " (" + e.what() + ")");
+ } catch (const std::out_of_range & e) {
+ reportError("Numeric literal out of range: " + value + " (" + e.what() + ")");
+ }
+
+ return Symbols::Value(); // unreachable
+}
+
+void Parser::parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name,
+ Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params) {
+ size_t braceDepth = 0;
+ int peek = 0;
+ int tokenIndex = current_token_index_;
+ Lexer::Tokens::Token currentToken_;
+ Lexer::Tokens::Token closing_brace;
+
+ while (tokenIndex < tokens_.size()) {
+ currentToken_ = peekToken(peek);
+ if (currentToken_.type == Lexer::Tokens::Type::PUNCTUATION) {
+ if (currentToken_.value == "{") {
+ ++braceDepth;
+ } else if (currentToken_.value == "}") {
+ if (braceDepth == 0) {
+ closing_brace = currentToken_;
+ break;
+ }
+ --braceDepth;
+ }
+ }
+ tokenIndex++;
+ peek++;
+ }
+ if (braceDepth != 0) {
+ reportError("Unmatched braces in function body");
+ }
+ std::vector<Lexer::Tokens::Token> filtered_tokens;
+ auto startIt = std::find(tokens_.begin(), tokens_.end(), opening_brace);
+ auto endIt = std::find(tokens_.begin(), tokens_.end(), closing_brace);
+
+ if (startIt != tokens_.end() && endIt != tokens_.end() && startIt < endIt) {
+ filtered_tokens = std::vector<Lexer::Tokens::Token>(startIt + 1, endIt);
+ }
+ std::string_view input_string = input_str_view_.substr(opening_brace.end_pos, closing_brace.end_pos);
+
+ current_token_index_ = tokenIndex;
+ expect(Lexer::Tokens::Type::PUNCTUATION, "}");
+ const std::string newns = Symbols::SymbolContainer::instance()->currentScopeName() + "." + function_name;
+ Symbols::SymbolContainer::instance()->create(newns);
+ std::shared_ptr<Parser> parser = std::make_shared<Parser>();
+ parser->parseScript(filtered_tokens, input_string, this->current_filename_);
+ Symbols::SymbolContainer::instance()->enterPreviousScope();
+ // create function
+ Interpreter::OperationsFactory::defineFunction(
+ function_name, params, return_type, Symbols::SymbolContainer::instance()->currentScopeName(),
+ this->current_filename_, currentToken_.line_number, currentToken_.column_number);
+}
+
+ParsedExpressionPtr Parser::parseParsedExpression(const Symbols::Variables::Type & expected_var_type) {
+ std::stack<std::string> operator_stack;
+ std::vector<ParsedExpressionPtr> output_queue;
+
+ bool expect_unary = true;
+
+ while (true) {
+ auto token = currentToken();
+
+ if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == "(") {
+ operator_stack.push("(");
+ consumeToken();
+ expect_unary = true;
+ } else if (token.type == Lexer::Tokens::Type::PUNCTUATION && token.lexeme == ")") {
+ consumeToken();
+ while (!operator_stack.empty() && operator_stack.top() != "(") {
+ std::string op = operator_stack.top();
+ operator_stack.pop();
+
+ if (op == "u-" || op == "u+") {
+ if (output_queue.empty()) {
+ reportError("Missing operand for unary operator");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
+ } else {
+ if (output_queue.size() < 2) {
+ reportError("Malformed expression");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ auto lhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
+ }
+ }
+
+ if (operator_stack.empty() || operator_stack.top() != "(") {
+ reportError("Mismatched parentheses");
+ }
+ operator_stack.pop(); // remove "("
+ expect_unary = false;
+ } else if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC) {
+ std::string op = std::string(token.lexeme);
+
+ if (expect_unary && Lexer::isUnaryOperator(op)) {
+ op = "u" + op; // pl. u-, u+ vagy u!
+ }
+
+ while (!operator_stack.empty()) {
+ const std::string & top = operator_stack.top();
+ if ((Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) <= Lexer::getPrecedence(top)) ||
+ (!Lexer::isLeftAssociative(op) && Lexer::getPrecedence(op) < Lexer::getPrecedence(top))) {
+ operator_stack.pop();
+
+ if (top == "u-" || top == "u+") {
+ if (output_queue.empty()) {
+ reportError("Missing operand for unary operator");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(top, std::move(rhs)));
+ } else {
+ if (output_queue.size() < 2) {
+ reportError("Malformed expression");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ auto lhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(top, std::move(rhs), std::move(lhs)));
+ }
+ } else {
+ break;
+ }
+ }
+
+ operator_stack.push(op);
+ consumeToken();
+ expect_unary = true;
+ } else if (token.type == Lexer::Tokens::Type::NUMBER || token.type == Lexer::Tokens::Type::STRING_LITERAL ||
+ token.type == Lexer::Tokens::Type::KEYWORD ||
+ token.type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) {
+ if (Lexer::pushOperand(token, expected_var_type, output_queue) == false) {
+ reportError("Expected literal or variable");
+ }
+ consumeToken();
+ expect_unary = false;
+ } else {
+ break;
+ }
+ }
+
+ // Kiürítjük az operator stack-et
+ while (!operator_stack.empty()) {
+ std::string op = operator_stack.top();
+ operator_stack.pop();
+
+ if (op == "(" || op == ")") {
+ reportError("Mismatched parentheses");
+ }
+
+ if (op == "u-" || op == "u+") {
+ if (output_queue.empty()) {
+ reportError("Missing operand for unary operator");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(op, std::move(rhs)));
+ } else {
+ if (output_queue.size() < 2) {
+ reportError("Malformed expression");
+ }
+ auto rhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ auto lhs = std::move(output_queue.back());
+ output_queue.pop_back();
+ output_queue.push_back(Lexer::applyOperator(op, std::move(rhs), std::move(lhs)));
+ }
+ }
+
+ if (output_queue.size() != 1) {
+ reportError("Expression could not be parsed cleanly");
+ }
+
+ return std::move(output_queue.back());
+}
+
+void Parser::parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string,
+ const std::string & filename) {
+ tokens_ = tokens;
+ input_str_view_ = input_string;
+ current_token_index_ = 0;
+ current_filename_ = filename;
+
+ while (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
+ parseStatement();
+ }
+ if (!isAtEnd() && currentToken().type != Lexer::Tokens::Type::END_OF_FILE) {
+ reportError("Unexpected tokens after program end");
+ }
+}
} // namespace Parser
--
Gitblit v1.9.3