#include "Lexer.hpp" #include #include "options.h" Lexer::Lexer(const std::string & source, const std::string & filename) : src(source), pos(0), filename(filename), lineNumber(1), colNumber(1), charNumber(0) {} char Lexer::peek() const { return pos < src.size() ? src[pos] : '\0'; } char Lexer::advance() { if (pos >= src.size()) { return '\0'; } char c = src[pos++]; if (c == '\n') { this->lineNumber++; this->colNumber = 1; } else { this->colNumber++; } this->charNumber++; return c; } bool Lexer::isAtEnd() const { return pos >= src.size(); } Token Lexer::string() { std::string result; size_t startCol = colNumber; advance(); // Skip opening quote while (!isAtEnd() && peek() != '"') { result += advance(); } if (isAtEnd() || peek() != '"') { return { TokenType::Unknown, "Unterminated string", filename, lineNumber, startCol }; } advance(); // Skip closing quote return { TokenType::StringLiteral, result, filename, lineNumber, startCol }; } Token Lexer::number() { std::string result; std::string found; TokenType type = TokenType::Unknown; bool decimalPointSeen = false; size_t startCol = colNumber; while (std::isdigit(peek()) || peek() == '.') { if (peek() == '.') { if (decimalPointSeen) { return { TokenType::Unknown, "Invalid number format", filename, lineNumber, startCol }; } decimalPointSeen = true; } found.append(1, advance()); } if (!found.empty()) { if (found.find('.') == std::string::npos) { if (is_number(found)) { result = found; type = TokenType::IntLiteral; } else { return { TokenType::Unknown, "Invalid integer", filename, lineNumber, startCol }; } } else { if (is_number(found)) { result = found; type = TokenType::DoubleLiteral; } else { return { TokenType::Unknown, "Invalid double", filename, lineNumber, startCol }; } } } else { return { TokenType::Unknown, "Expected number", filename, lineNumber, startCol }; } return { type, result, filename, lineNumber, startCol }; } Token Lexer::identifier() { std::string result; size_t startCol = colNumber; while (isalnum(peek()) || peek() == '_') { result += advance(); } return { TokenType::Identifier, result, filename, lineNumber, startCol }; } Token Lexer::variable() { size_t startCol = colNumber; advance(); // Skip $ std::string varName; if (isalpha(peek()) || peek() == '_') { varName += advance(); while (isalnum(peek()) || peek() == '_') { varName += advance(); } return { TokenType::Variable, varName, filename, lineNumber, startCol }; } return { TokenType::Unknown, "$ followed by invalid character", filename, lineNumber, startCol }; } Token Lexer::comment() { size_t startCol = colNumber; advance(); // Skip # std::string commentText; while (!isAtEnd() && peek() != '\n') { commentText += advance(); } return { TokenType::Comment, commentText, filename, lineNumber, startCol }; } Token Lexer::keywordOrIdentifier() { std::string lexeme; size_t startCol = colNumber; while (isalpha(peek())) { lexeme += advance(); } auto it = Variables::StringToTypeMap.find(lexeme); if (it != Variables::StringToTypeMap.end()) { const auto & type = it->second; while (isspace(peek())) { advance(); } if (peek() == '$') { return this->variableDeclaration(type); } return { TokenType::Identifier, lexeme, filename, lineNumber, startCol }; } return { TokenType::Identifier, lexeme, filename, lineNumber, startCol }; } Token Lexer::variableDeclaration(Variables::Type type) { size_t startCol = colNumber; advance(); // Skip $ std::string varName; if (isalpha(peek()) || peek() == '_') { varName += advance(); while (isalnum(peek()) || peek() == '_') { varName += advance(); } for (auto it = Variables::StringToTypeMap.begin(); it != Variables::StringToTypeMap.end(); ++it) { if (it->second == type) { return { getTokenTypeFromValueDeclaration(it->second), varName, filename, lineNumber, startCol }; } } return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol }; } return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol }; } Token Lexer::singleCharToken(TokenType type, const std::string & lexeme) { size_t startCol = colNumber; advance(); return { type, lexeme, filename, lineNumber, startCol }; } bool Lexer::matchSequence(const std::string & sequence, bool caseSensitive) const { if (this->pos + sequence.size() > src.size()) { return false; } for (size_t i = 0; i < sequence.size(); ++i) { char srcChar = src[this->pos + i]; char seqChar = sequence[i]; if (!caseSensitive) { srcChar = std::tolower(static_cast(srcChar)); seqChar = std::tolower(static_cast(seqChar)); } if (srcChar != seqChar) { return false; } } return true; } void Lexer::matchAndConsume(const std::string & sequence, bool caseSensitive) { if (matchSequence(sequence, caseSensitive)) { for (size_t i = 0; i < sequence.length(); ++i) { advance(); } } } std::vector Lexer::tokenize() { std::vector tokens; tokens.reserve(src.size() / 4); while (pos < src.size()) { char c = src[pos]; if (isspace(c)) { advance(); continue; } if (c == '\n') { tokens.push_back(singleCharToken(TokenType::EndOfLine, "\n")); continue; } if (c == COMMENT_CHARACTER) { tokens.push_back(comment()); advance(); // Skip newline after comment continue; } if (matchSequence(PARSER_OPEN_TAG)) { size_t startCol = colNumber; matchAndConsume(PARSER_OPEN_TAG); tokens.push_back({ TokenType::ParserOpenTag, PARSER_OPEN_TAG, filename, lineNumber, startCol }); continue; } if (matchSequence(PARSER_CLOSE_TAG)) { size_t startCol = colNumber; matchAndConsume(PARSER_CLOSE_TAG); tokens.push_back({ TokenType::ParserCloseTag, PARSER_CLOSE_TAG, filename, lineNumber, startCol }); continue; } if (matchSequence("if")) { size_t startCol = colNumber; matchAndConsume("if"); tokens.push_back({ TokenType::ParserIfStatement, "if", filename, lineNumber, startCol }); continue; } switch (c) { case 'a' ... 'z': case 'A' ... 'Z': tokens.push_back(keywordOrIdentifier()); break; case '$': tokens.push_back(variable()); break; case '0' ... '9': tokens.push_back(number()); break; case '"': case '\'': tokens.push_back(string()); break; case '(': tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "(")); break; case ')': tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")")); break; case ',': tokens.push_back(singleCharToken(TokenType::Comma, ",")); break; case ';': tokens.push_back(singleCharToken(TokenType::Semicolon, ";")); break; case '=': tokens.push_back(singleCharToken(TokenType::Equals, "=")); break; case '+': tokens.push_back(singleCharToken(TokenType::Plus, "+")); break; case '{': tokens.push_back(singleCharToken(TokenType::LeftCurlyBracket, "{")); break; case '}': tokens.push_back(singleCharToken(TokenType::RightCurlyBracket, "}")); break; default: tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber }); advance(); break; } } tokens.push_back({ TokenType::EndOfFile, "", filename, lineNumber, colNumber }); return tokens; }