| | |
| | | |
| | | void parseScript(const std::vector<Lexer::Tokens::Token> & tokens, std::string_view input_string, |
| | | const std::string & filename); |
| | | |
| | | static const std::unordered_map<std::string, Lexer::Tokens::Type> keywords; |
| | | static const std::unordered_map<Lexer::Tokens::Type, Symbols::Variables::Type> variable_types; |
| | | |
| | |
| | | std::string current_filename_; |
| | | |
| | | // Token stream handling and error-reporting helper functions (unchanged) |
| | | const Lexer::Tokens::Token & currentToken() const { |
| | | if (isAtEnd()) { |
| | | // Technically we should never reach this if parseScript's loop is correct |
| | | // But it's useful as a safety check |
| | | if (!tokens_.empty() && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE) { |
| | | return tokens_.back(); // return the EOF token |
| | | } |
| | | throw std::runtime_error("Unexpected end of token stream reached."); |
| | | } |
| | | return tokens_[current_token_index_]; |
| | | } |
| | | const Lexer::Tokens::Token & currentToken() const; |
| | | |
| | | // Look ahead in the token stream |
| | | const Lexer::Tokens::Token & peekToken(size_t offset = 1) const { |
| | | if (current_token_index_ + offset >= tokens_.size()) { |
| | | // If at or beyond EOF, return the last token (should be EOF) |
| | | if (!tokens_.empty()) { |
| | | return tokens_.back(); |
| | | } |
| | | throw std::runtime_error("Cannot peek beyond end of token stream."); |
| | | } |
| | | return tokens_[current_token_index_ + offset]; |
| | | } |
| | | const Lexer::Tokens::Token & peekToken(size_t offset = 1) const; |
| | | |
| | | // Consume (advance past) the current token and return it |
| | | Lexer::Tokens::Token consumeToken() { |
| | | if (isAtEnd()) { |
| | | throw std::runtime_error("Cannot consume token at end of stream."); |
| | | } |
| | | return tokens_[current_token_index_++]; |
| | | } |
| | | Lexer::Tokens::Token consumeToken(); |
| | | |
| | | // Check if current token type matches the expected type |
| | | // If so, consume it and return true; otherwise return false |
| | | bool match(Lexer::Tokens::Type expected_type) { |
| | | if (isAtEnd()) { |
| | | return false; |
| | | } |
| | | if (currentToken().type == expected_type) { |
| | | consumeToken(); |
| | | return true; |
| | | } |
| | | return false; |
| | | } |
| | | bool match(Lexer::Tokens::Type expected_type); |
| | | |
| | | // Check if current token type and value match the expected ones |
| | | // Only use value checking for operators and punctuation |
| | | bool match(Lexer::Tokens::Type expected_type, const std::string & expected_value) { |
| | | if (isAtEnd()) { |
| | | return false; |
| | | } |
| | | const auto & token = currentToken(); |
| | | if (token.type == expected_type && token.value == expected_value) { |
| | | consumeToken(); |
| | | return true; |
| | | } |
| | | return false; |
| | | } |
| | | bool match(Lexer::Tokens::Type expected_type, const std::string & expected_value); |
| | | |
| | | Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type) { |
| | | if (isAtEnd()) { |
| | | reportError("Unexpected end of file, expected token type: " + Lexer::Tokens::TypeToString(expected_type)); |
| | | } |
| | | const auto & token = currentToken(); |
| | | if (token.type == expected_type) { |
| | | return consumeToken(); |
| | | } |
| | | reportError("Expected token type " + Lexer::Tokens::TypeToString(expected_type)); |
| | | // reportError throws; this return is never reached, but may satisfy the compiler |
| | | return token; // or let reportError throw |
| | | } |
| | | Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type); |
| | | |
| | | // Like expect, but also checks the token's value |
| | | Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type, const std::string & expected_value) { |
| | | if (isAtEnd()) { |
| | | reportError("Unexpected end of file, expected token: " + Lexer::Tokens::TypeToString(expected_type) + |
| | | " with value '" + expected_value + "'"); |
| | | } |
| | | const auto & token = currentToken(); |
| | | if (token.type == expected_type && token.value == expected_value) { |
| | | return consumeToken(); |
| | | } |
| | | reportError("Expected token " + Lexer::Tokens::TypeToString(expected_type) + " with value '" + expected_value + |
| | | "'"); |
| | | return token; // reportError throws |
| | | } |
| | | Lexer::Tokens::Token expect(Lexer::Tokens::Type expected_type, const std::string & expected_value); |
| | | |
| | | // Check if we've reached the end of relevant tokens (just before EOF) |
| | | bool isAtEnd() const { |
| | | // We're at the end if the index equals the number of tokens, |
| | | // or if only the EOF token remains (as the last element) |
| | | return current_token_index_ >= tokens_.size() || |
| | | (current_token_index_ == tokens_.size() - 1 && tokens_.back().type == Lexer::Tokens::Type::END_OF_FILE); |
| | | } |
| | | bool isAtEnd() const; |
| | | |
| | | [[noreturn]] void reportError(const std::string & message, const std::string & expected = "") { |
| | | if (current_token_index_ < tokens_.size()) { |
| | |
| | | } |
| | | |
| | | // parseStatement (updated to handle return) |
| | | void parseStatement() { |
| | | const auto & token_type = currentToken().type; |
| | | // if-else conditional |
| | | if (token_type == Lexer::Tokens::Type::KEYWORD && currentToken().value == "if") { |
| | | parseIfStatement(); |
| | | return; |
| | | } |
| | | |
| | | if (token_type == Lexer::Tokens::Type::KEYWORD_FUNCTION_DECLARATION) { |
| | | parseFunctionDefinition(); |
| | | return; |
| | | } |
| | | // Return statement |
| | | if (token_type == Lexer::Tokens::Type::KEYWORD_RETURN) { |
| | | parseReturnStatement(); |
| | | return; |
| | | } |
| | | |
| | | // Variable definition if leading token matches a type keyword |
| | | if (Parser::variable_types.find(token_type) != Parser::variable_types.end()) { |
| | | parseVariableDefinition(); |
| | | return; |
| | | } |
| | | // Function call if identifier followed by '(' |
| | | if (currentToken().type == Lexer::Tokens::Type::IDENTIFIER && |
| | | peekToken().type == Lexer::Tokens::Type::PUNCTUATION && peekToken().value == "(") { |
| | | parseCallStatement(); |
| | | return; |
| | | } |
| | | // Assignment statement at top-level |
| | | if (currentToken().type == Lexer::Tokens::Type::VARIABLE_IDENTIFIER) { |
| | | size_t offset = 1; |
| | | // Skip member access chain |
| | | while (peekToken(offset).type == Lexer::Tokens::Type::PUNCTUATION && peekToken(offset).value == "->") { |
| | | offset += 2; |
| | | } |
| | | const auto & look = peekToken(offset); |
| | | if (look.type == Lexer::Tokens::Type::OPERATOR_ASSIGNMENT && look.value == "=") { |
| | | parseAssignmentStatement(); |
| | | return; |
| | | } |
| | | } |
| | | |
| | | reportError("Unexpected token at beginning of statement"); |
| | | } |
| | | |
| | | void parseVariableDefinition(); |
| | | void parseFunctionDefinition(); |
| | | void parseStatement(); |
| | | void parseVariableDefinition(); |
| | | void parseFunctionDefinition(); |
| | | // Parse a top-level function call statement (e.g., foo(arg1, arg2);) |
| | | void parseCallStatement(); |
| | | void parseCallStatement(); |
| | | // Parse a top-level assignment statement (variable or object member) |
| | | void parseAssignmentStatement(); |
| | | void parseAssignmentStatement(); |
| | | // Parse a return statement (e.g., return; or return expr;) |
| | | void parseReturnStatement(); |
| | | // Parse an if-else conditional statement |
| | | void parseIfStatement(); |
| | | // Parse a for-in loop over object members |
| | | void parseForStatement(); |
| | | void parseReturnStatement(); |
| | | // Parse an if-else conditional statement (at top-level) |
| | | void parseIfStatement(); |
| | | // Parse a for-in loop over object members (at top-level) |
| | | void parseForStatement(); |
| | | // Parse an if-else conditional block and return a StatementNode (for nested blocks) |
| | | std::unique_ptr<Interpreter::StatementNode> parseIfStatementNode(); |
| | | // Parse a for-in loop over object members and return a StatementNode (for nested blocks) |
| | | std::unique_ptr<Interpreter::StatementNode> parseForStatementNode(); |
| | | // Parse a statement node for use inside blocks (not added to operation container) |
| | | std::unique_ptr<Interpreter::StatementNode> parseStatementNode(); |
| | | |
| | |
| | | |
| | | // type : KEYWORD_STRING | KEYWORD_INT | KEYWORD_DOUBLE |
| | | // Returns the corresponding Symbols::Variables::Type enum and consumes the token |
| | | Symbols::Variables::Type parseType() { |
| | | const auto & token = currentToken(); |
| | | // Direct lookup for type keyword |
| | | auto it = Parser::variable_types.find(token.type); |
| | | if (it != Parser::variable_types.end()) { |
| | | consumeToken(); |
| | | return it->second; |
| | | } |
| | | reportError("Expected type keyword (string, int, double, float)"); |
| | | } |
| | | |
| | | Symbols::Value parseValue(Symbols::Variables::Type expected_var_type) { |
| | | Lexer::Tokens::Token token = currentToken(); |
| | | bool is_negative = false; |
| | | |
| | | // Handle unary sign |
| | | if (token.type == Lexer::Tokens::Type::OPERATOR_ARITHMETIC && (token.lexeme == "-" || token.lexeme == "+") && |
| | | peekToken().type == Lexer::Tokens::Type::NUMBER) { |
| | | is_negative = (token.lexeme == "-"); |
| | | token = peekToken(); |
| | | consumeToken(); // consumed the sign |
| | | } |
| | | |
| | | // STRING type |
| | | if (expected_var_type == Symbols::Variables::Type::STRING) { |
| | | if (token.type == Lexer::Tokens::Type::STRING_LITERAL) { |
| | | consumeToken(); |
| | | return Symbols::Value(token.value); |
| | | } |
| | | reportError("Expected string literal value"); |
| | | } |
| | | |
| | | // BOOLEAN type |
| | | if (expected_var_type == Symbols::Variables::Type::BOOLEAN) { |
| | | if (token.type == Lexer::Tokens::Type::KEYWORD && (token.value == "true" || token.value == "false")) { |
| | | consumeToken(); |
| | | return Symbols::Value(token.value == "true"); |
| | | } |
| | | reportError("Expected boolean literal value (true or false)"); |
| | | } |
| | | |
| | | // NUMERIC types |
| | | if (expected_var_type == Symbols::Variables::Type::INTEGER || |
| | | expected_var_type == Symbols::Variables::Type::DOUBLE || |
| | | expected_var_type == Symbols::Variables::Type::FLOAT) { |
| | | if (token.type == Lexer::Tokens::Type::NUMBER) { |
| | | Symbols::Value val = parseNumericLiteral(token.value, is_negative, expected_var_type); |
| | | consumeToken(); |
| | | return val; |
| | | } |
| | | |
| | | reportError("Expected numeric literal value"); |
| | | } |
| | | |
| | | reportError("Unsupported variable type encountered during value parsing"); |
| | | return Symbols::Value(); // compiler happy |
| | | } |
| | | |
| | | Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type); |
| | | |
| | | void parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name, |
| | | Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params); |
| | | |
| | | Symbols::Variables::Type parseType(); |
| | | Symbols::Value parseValue(Symbols::Variables::Type expected_var_type); |
| | | Symbols::Value parseNumericLiteral(const std::string & value, bool is_negative, Symbols::Variables::Type type); |
| | | void parseFunctionBody(const Lexer::Tokens::Token & opening_brace, const std::string & function_name, |
| | | Symbols::Variables::Type return_type, const Symbols::FunctionParameterInfo & params); |
| | | ParsedExpressionPtr parseParsedExpression(const Symbols::Variables::Type & expected_var_type); |
| | | |
| | | }; // class Parser |