From 86904d513734134beffc29c6f4012d53a99f25c5 Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Sun, 13 Apr 2025 15:38:34 +0000
Subject: [PATCH] some clean up, added function declaration

---
 src/Lexer.cpp |  134 +++++++++++++++++++++++++++++---------------
 1 files changed, 88 insertions(+), 46 deletions(-)

diff --git a/src/Lexer.cpp b/src/Lexer.cpp
index 62e110c..c7067d7 100644
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -2,7 +2,7 @@
 
 #include <cctype>
 
-#include "Value.hpp"
+#include "options.h"
 
 Lexer::Lexer(const std::string & source, const std::string & filename) :
     src(source),
@@ -128,17 +128,18 @@
     while (isalpha(peek())) {
         lexeme += advance();
     }
-
-    if (Variables::StringToTypeMap.contains(lexeme)) {
-        const auto type = Variables::StringToTypeMap.at(lexeme);
+    auto it = Variables::StringToTypeMap.find(lexeme);
+    if (it != Variables::StringToTypeMap.end()) {
+        const auto & type = it->second;
         while (isspace(peek())) {
             advance();
         }
         if (peek() == '$') {
-            return variableDeclaration(type);
+            return this->variableDeclaration(type);
         }
         return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
     }
+
     return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
 }
 
@@ -151,19 +152,15 @@
         while (isalnum(peek()) || peek() == '_') {
             varName += advance();
         }
-        switch (type) {
-            case Variables::Type::VT_INT:
-                return { TokenType::IntDeclaration, varName, filename, lineNumber, startCol };
-            case Variables::Type::VT_DOUBLE:
-                return { TokenType::DoubleDeclaration, varName, filename, lineNumber, startCol };
-            case Variables::Type::VT_STRING:
-                return { TokenType::StringDeclaration, varName, filename, lineNumber, startCol };
-            default:
-                return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol };
+        for (auto it = Variables::StringToTypeMap.begin(); it != Variables::StringToTypeMap.end(); ++it) {
+            if (it->second == type) {
+                return { getTokenTypeFromValueDeclaration(it->second), varName, filename, lineNumber, startCol };
+            }
         }
-    } else {
-        return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol };
+
+        return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol };
     }
+    return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol };
 }
 
 Token Lexer::singleCharToken(TokenType type, const std::string & lexeme) {
@@ -172,15 +169,30 @@
     return { type, lexeme, filename, lineNumber, startCol };
 }
 
-bool Lexer::matchSequence(const std::string & sequence) const {
-    if (pos + sequence.length() > src.length()) {
+bool Lexer::matchSequence(const std::string & sequence, bool caseSensitive) const {
+    if (this->pos + sequence.size() > src.size()) {
         return false;
     }
-    return src.substr(pos, sequence.length()) == sequence;
+
+    for (size_t i = 0; i < sequence.size(); ++i) {
+        char srcChar = src[this->pos + i];
+        char seqChar = sequence[i];
+
+        if (!caseSensitive) {
+            srcChar = std::tolower(static_cast<unsigned char>(srcChar));
+            seqChar = std::tolower(static_cast<unsigned char>(seqChar));
+        }
+
+        if (srcChar != seqChar) {
+            return false;
+        }
+    }
+
+    return true;
 }
 
-void Lexer::matchAndConsume(const std::string & sequence) {
-    if (matchSequence(sequence)) {
+void Lexer::matchAndConsume(const std::string & sequence, bool caseSensitive) {
+    if (matchSequence(sequence, caseSensitive)) {
         for (size_t i = 0; i < sequence.length(); ++i) {
             advance();
         }
@@ -189,18 +201,19 @@
 
 std::vector<Token> Lexer::tokenize() {
     std::vector<Token> tokens;
+    tokens.reserve(src.size() / 4);
 
-    while (!isAtEnd()) {
-        char c = peek();
+    while (pos < src.size()) {
+        char c = src[pos];
         if (isspace(c)) {
             advance();
             continue;
         }
         if (c == '\n') {
-            tokens.push_back(singleCharToken(TokenType::EndOfLine, ""));
+            tokens.push_back(singleCharToken(TokenType::EndOfLine, "\n"));
             continue;
         }
-        if (c == '#') {
+        if (c == COMMENT_CHARACTER) {
             tokens.push_back(comment());
             advance();  // Skip newline after comment
             continue;
@@ -217,27 +230,56 @@
             tokens.push_back({ TokenType::ParserCloseTag, PARSER_CLOSE_TAG, filename, lineNumber, startCol });
             continue;
         }
-        if (isalpha(c)) {
-            tokens.push_back(keywordOrIdentifier());
-        } else if (c == '$') {
-            tokens.push_back(variable());
-        } else if (isdigit(c)) {
-            tokens.push_back(number());
-        } else if (c == '"' || c == '\'') {
-            tokens.push_back(string());
-        } else if (c == '(') {
-            tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "("));
-        } else if (c == ')') {
-            tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")"));
-        } else if (c == ',') {
-            tokens.push_back(singleCharToken(TokenType::Comma, ","));
-        } else if (c == ';') {
-            tokens.push_back(singleCharToken(TokenType::Semicolon, ";"));
-        } else if (c == '=') {
-            tokens.push_back(singleCharToken(TokenType::Equals, "="));
-        } else {
-            tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber });
-            advance();
+        if (matchSequence("if")) {
+            size_t startCol = colNumber;
+            matchAndConsume("if");
+            tokens.push_back({ TokenType::ParserIfStatement, "if", filename, lineNumber, startCol });
+            continue;
+        }
+
+        switch (c) {
+            case 'a' ... 'z':
+            case 'A' ... 'Z':
+                tokens.push_back(keywordOrIdentifier());
+                break;
+            case '$':
+                tokens.push_back(variable());
+                break;
+            case '0' ... '9':
+                tokens.push_back(number());
+                break;
+            case '"':
+            case '\'':
+                tokens.push_back(string());
+                break;
+            case '(':
+                tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "("));
+                break;
+            case ')':
+                tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")"));
+                break;
+            case ',':
+                tokens.push_back(singleCharToken(TokenType::Comma, ","));
+                break;
+            case ';':
+                tokens.push_back(singleCharToken(TokenType::Semicolon, ";"));
+                break;
+            case '=':
+                tokens.push_back(singleCharToken(TokenType::Equals, "="));
+                break;
+            case '+':
+                tokens.push_back(singleCharToken(TokenType::Plus, "+"));
+                break;
+            case '{':
+                tokens.push_back(singleCharToken(TokenType::LeftCurlyBracket, "{"));
+                break;
+            case '}':
+                tokens.push_back(singleCharToken(TokenType::RightCurlyBracket, "}"));
+                break;
+            default:
+                tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber });
+                advance();
+                break;
         }
     }
 

--
Gitblit v1.9.3