From cb3065c34756a70cb6006fc25777ce3e720ff1a8 Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Sun, 13 Apr 2025 18:16:19 +0000
Subject: [PATCH] implement variable contexts, add function body store and parsing

---
 src/Lexer.cpp |  164 +++++++++++++++++++++++++++++++++---------------------
 1 files changed, 100 insertions(+), 64 deletions(-)

diff --git a/src/Lexer.cpp b/src/Lexer.cpp
index c7067d7..6d003a3 100644
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -35,31 +35,62 @@
     return pos >= src.size();
 }
 
-Token Lexer::string() {
+Token Lexer::createToken(TokenType type, const std::string & lexeme) const {
+    size_t startChar = charNumber - lexeme.length();
+    return {
+        type, lexeme, filename, lineNumber, colNumber - lexeme.length(), { startChar, charNumber }
+    };
+}
+
+Token Lexer::createSingleCharToken(TokenType type, const std::string & lexeme) {
+    size_t startCol  = colNumber;
+    size_t startChar = charNumber;
+    advance();
+    return {
+        type, lexeme, filename, lineNumber, startCol, { startChar, charNumber }
+    };
+}
+
+Token Lexer::createUnknownToken(const std::string & lexeme) const {
+    size_t startChar = charNumber - lexeme.length();
+    return {
+        TokenType::Unknown, lexeme, filename, lineNumber, colNumber - lexeme.length(), { startChar, charNumber }
+    };
+}
+
+Token Lexer::stringToken() {
     std::string result;
-    size_t      startCol = colNumber;
+    size_t      startChar = charNumber;
+    size_t      startCol  = colNumber;
     advance();  // Skip opening quote
     while (!isAtEnd() && peek() != '"') {
         result += advance();
     }
     if (isAtEnd() || peek() != '"') {
-        return { TokenType::Unknown, "Unterminated string", filename, lineNumber, startCol };
+        return {
+            TokenType::Unknown, "Unterminated string", filename, lineNumber, startCol, { startChar, pos }
+        };
     }
     advance();  // Skip closing quote
-    return { TokenType::StringLiteral, result, filename, lineNumber, startCol };
+    return {
+        TokenType::StringLiteral, result, filename, lineNumber, startCol, { startChar, pos }
+    };
 }
 
-Token Lexer::number() {
+Token Lexer::numberToken() {
     std::string result;
     std::string found;
     TokenType   type             = TokenType::Unknown;
     bool        decimalPointSeen = false;
+    size_t      startChar        = charNumber;
     size_t      startCol         = colNumber;
 
     while (std::isdigit(peek()) || peek() == '.') {
         if (peek() == '.') {
             if (decimalPointSeen) {
-                return { TokenType::Unknown, "Invalid number format", filename, lineNumber, startCol };
+                return {
+                    TokenType::Unknown, "Invalid number format", filename, lineNumber, startCol, { startChar, pos }
+                };
             }
             decimalPointSeen = true;
         }
@@ -72,34 +103,46 @@
                 result = found;
                 type   = TokenType::IntLiteral;
             } else {
-                return { TokenType::Unknown, "Invalid integer", filename, lineNumber, startCol };
+                return {
+                    TokenType::Unknown, "Invalid integer", filename, lineNumber, startCol, { startChar, pos }
+                };
             }
         } else {
             if (is_number<double>(found)) {
                 result = found;
                 type   = TokenType::DoubleLiteral;
             } else {
-                return { TokenType::Unknown, "Invalid double", filename, lineNumber, startCol };
+                return {
+                    TokenType::Unknown, "Invalid double", filename, lineNumber, startCol, { startChar, pos }
+                };
             }
         }
     } else {
-        return { TokenType::Unknown, "Expected number", filename, lineNumber, startCol };
+        return {
+            TokenType::Unknown, "Expected number", filename, lineNumber, startCol, { startChar, pos }
+        };
     }
 
-    return { type, result, filename, lineNumber, startCol };
+    return {
+        type, result, filename, lineNumber, startCol, { startChar, pos }
+    };
 }
 
-Token Lexer::identifier() {
+Token Lexer::identifierToken() {
     std::string result;
-    size_t      startCol = colNumber;
+    size_t      startChar = charNumber;
+    size_t      startCol  = colNumber;
     while (isalnum(peek()) || peek() == '_') {
         result += advance();
     }
-    return { TokenType::Identifier, result, filename, lineNumber, startCol };
+    return {
+        TokenType::Identifier, result, filename, lineNumber, startCol, { startChar, pos }
+    };
 }
 
-Token Lexer::variable() {
-    size_t startCol = colNumber;
+Token Lexer::variableToken() {
+    size_t startChar = charNumber;
+    size_t startCol  = colNumber;
     advance();  // Skip $
     std::string varName;
     if (isalpha(peek()) || peek() == '_') {
@@ -107,24 +150,30 @@
         while (isalnum(peek()) || peek() == '_') {
             varName += advance();
         }
-        return { TokenType::Variable, varName, filename, lineNumber, startCol };
+        return {
+            TokenType::Variable, varName, filename, lineNumber, startCol, { startChar, pos }
+        };
     }
-    return { TokenType::Unknown, "$ followed by invalid character", filename, lineNumber, startCol };
+    return {
+        TokenType::Unknown, "$ followed by invalid character", filename, lineNumber, startCol, { startChar, pos }
+    };
 }
 
-Token Lexer::comment() {
-    size_t startCol = colNumber;
+Token Lexer::commentToken() {
+    size_t startChar = charNumber;
+    size_t startCol  = colNumber;
     advance();  // Skip #
     std::string commentText;
     while (!isAtEnd() && peek() != '\n') {
         commentText += advance();
     }
-    return { TokenType::Comment, commentText, filename, lineNumber, startCol };
+    return {
+        TokenType::Comment, commentText, filename, lineNumber, startCol, { startChar, pos }
+    };
 }
 
-Token Lexer::keywordOrIdentifier() {
+Token Lexer::keywordOrIdentifierToken() {
     std::string lexeme;
-    size_t      startCol = colNumber;
     while (isalpha(peek())) {
         lexeme += advance();
     }
@@ -135,16 +184,14 @@
             advance();
         }
         if (peek() == '$') {
-            return this->variableDeclaration(type);
+            return this->variableDeclarationToken(type);
         }
-        return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
+        return createToken(TokenType::Identifier, lexeme);
     }
-
-    return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
+    return createToken(TokenType::Identifier, lexeme);
 }
 
-Token Lexer::variableDeclaration(Variables::Type type) {
-    size_t startCol = colNumber;
+Token Lexer::variableDeclarationToken(Variables::Type type) {
     advance();  // Skip $
     std::string varName;
     if (isalpha(peek()) || peek() == '_') {
@@ -154,40 +201,29 @@
         }
         for (auto it = Variables::StringToTypeMap.begin(); it != Variables::StringToTypeMap.end(); ++it) {
             if (it->second == type) {
-                return { getTokenTypeFromValueDeclaration(it->second), varName, filename, lineNumber, startCol };
+                return createToken(getTokenTypeFromValueDeclaration(it->second), varName);
             }
         }
-
-        return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol };
+        return createUnknownToken("Invalid variable type in declaration");
     }
-    return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol };
-}
-
-Token Lexer::singleCharToken(TokenType type, const std::string & lexeme) {
-    size_t startCol = colNumber;
-    advance();
-    return { type, lexeme, filename, lineNumber, startCol };
+    return createUnknownToken("$ followed by invalid character in declaration");
 }
 
 bool Lexer::matchSequence(const std::string & sequence, bool caseSensitive) const {
     if (this->pos + sequence.size() > src.size()) {
         return false;
     }
-
     for (size_t i = 0; i < sequence.size(); ++i) {
         char srcChar = src[this->pos + i];
         char seqChar = sequence[i];
-
         if (!caseSensitive) {
             srcChar = std::tolower(static_cast<unsigned char>(srcChar));
             seqChar = std::tolower(static_cast<unsigned char>(seqChar));
         }
-
         if (srcChar != seqChar) {
             return false;
         }
     }
-
     return true;
 }
 
@@ -205,84 +241,84 @@
 
     while (pos < src.size()) {
         char c = src[pos];
+
         if (isspace(c)) {
             advance();
             continue;
         }
         if (c == '\n') {
-            tokens.push_back(singleCharToken(TokenType::EndOfLine, "\n"));
+            tokens.push_back(createSingleCharToken(TokenType::EndOfLine, "\n"));
             continue;
         }
         if (c == COMMENT_CHARACTER) {
-            tokens.push_back(comment());
+            tokens.push_back(commentToken());
             advance();  // Skip newline after comment
             continue;
         }
         if (matchSequence(PARSER_OPEN_TAG)) {
-            size_t startCol = colNumber;
             matchAndConsume(PARSER_OPEN_TAG);
-            tokens.push_back({ TokenType::ParserOpenTag, PARSER_OPEN_TAG, filename, lineNumber, startCol });
+            tokens.push_back(createToken(TokenType::ParserOpenTag, PARSER_OPEN_TAG));
             continue;
         }
         if (matchSequence(PARSER_CLOSE_TAG)) {
-            size_t startCol = colNumber;
             matchAndConsume(PARSER_CLOSE_TAG);
-            tokens.push_back({ TokenType::ParserCloseTag, PARSER_CLOSE_TAG, filename, lineNumber, startCol });
+            tokens.push_back(createToken(TokenType::ParserCloseTag, PARSER_CLOSE_TAG));
             continue;
         }
         if (matchSequence("if")) {
-            size_t startCol = colNumber;
             matchAndConsume("if");
-            tokens.push_back({ TokenType::ParserIfStatement, "if", filename, lineNumber, startCol });
+            tokens.push_back(createToken(TokenType::ParserIfStatement, "if"));
             continue;
         }
 
         switch (c) {
             case 'a' ... 'z':
             case 'A' ... 'Z':
-                tokens.push_back(keywordOrIdentifier());
+                tokens.push_back(keywordOrIdentifierToken());
                 break;
             case '$':
-                tokens.push_back(variable());
+                tokens.push_back(variableToken());
                 break;
             case '0' ... '9':
-                tokens.push_back(number());
+                tokens.push_back(numberToken());
                 break;
             case '"':
             case '\'':
-                tokens.push_back(string());
+                tokens.push_back(stringToken());
                 break;
             case '(':
-                tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "("));
+                tokens.push_back(createSingleCharToken(TokenType::LeftParenthesis, "("));
                 break;
             case ')':
-                tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")"));
+                tokens.push_back(createSingleCharToken(TokenType::RightParenthesis, ")"));
                 break;
             case ',':
-                tokens.push_back(singleCharToken(TokenType::Comma, ","));
+                tokens.push_back(createSingleCharToken(TokenType::Comma, ","));
                 break;
             case ';':
-                tokens.push_back(singleCharToken(TokenType::Semicolon, ";"));
+                tokens.push_back(createSingleCharToken(TokenType::Semicolon, ";"));
                 break;
             case '=':
-                tokens.push_back(singleCharToken(TokenType::Equals, "="));
+                tokens.push_back(createSingleCharToken(TokenType::Equals, "="));
                 break;
             case '+':
-                tokens.push_back(singleCharToken(TokenType::Plus, "+"));
+                tokens.push_back(createSingleCharToken(TokenType::Plus, "+"));
                 break;
             case '{':
-                tokens.push_back(singleCharToken(TokenType::LeftCurlyBracket, "{"));
+                tokens.push_back(createSingleCharToken(TokenType::LeftCurlyBracket, "{"));
                 break;
             case '}':
-                tokens.push_back(singleCharToken(TokenType::RightCurlyBracket, "}"));
+                tokens.push_back(createSingleCharToken(TokenType::RightCurlyBracket, "}"));
                 break;
             default:
-                tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber });
+                tokens.push_back(createUnknownToken(std::string(1, c)));
                 advance();
                 break;
         }
     }
 
-    tokens.push_back({ TokenType::EndOfFile, "", filename, lineNumber, colNumber });
+    tokens.push_back({
+        TokenType::EndOfFile, "", filename, lineNumber, colNumber, { charNumber, charNumber }
+    });
     return tokens;
 }

--
Gitblit v1.9.3