From 86904d513734134beffc29c6f4012d53a99f25c5 Mon Sep 17 00:00:00 2001
From: Ferenc Szontágh <szf@fsociety.hu>
Date: Sun, 13 Apr 2025 15:38:34 +0000
Subject: [PATCH] some clean up, added function declaration
---
src/Lexer.cpp | 134 +++++++++++++++++++++++++++++---------------
1 files changed, 88 insertions(+), 46 deletions(-)
diff --git a/src/Lexer.cpp b/src/Lexer.cpp
index 62e110c..c7067d7 100644
--- a/src/Lexer.cpp
+++ b/src/Lexer.cpp
@@ -2,7 +2,7 @@
#include <cctype>
-#include "Value.hpp"
+#include "options.h"
Lexer::Lexer(const std::string & source, const std::string & filename) :
src(source),
@@ -128,17 +128,18 @@
while (isalpha(peek())) {
lexeme += advance();
}
-
- if (Variables::StringToTypeMap.contains(lexeme)) {
- const auto type = Variables::StringToTypeMap.at(lexeme);
+ auto it = Variables::StringToTypeMap.find(lexeme);
+ if (it != Variables::StringToTypeMap.end()) {
+ const auto & type = it->second;
while (isspace(peek())) {
advance();
}
if (peek() == '$') {
- return variableDeclaration(type);
+ return this->variableDeclaration(type);
}
return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
}
+
return { TokenType::Identifier, lexeme, filename, lineNumber, startCol };
}
@@ -151,19 +152,15 @@
while (isalnum(peek()) || peek() == '_') {
varName += advance();
}
- switch (type) {
- case Variables::Type::VT_INT:
- return { TokenType::IntDeclaration, varName, filename, lineNumber, startCol };
- case Variables::Type::VT_DOUBLE:
- return { TokenType::DoubleDeclaration, varName, filename, lineNumber, startCol };
- case Variables::Type::VT_STRING:
- return { TokenType::StringDeclaration, varName, filename, lineNumber, startCol };
- default:
- return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol };
+ for (auto it = Variables::StringToTypeMap.begin(); it != Variables::StringToTypeMap.end(); ++it) {
+ if (it->second == type) {
+ return { getTokenTypeFromValueDeclaration(it->second), varName, filename, lineNumber, startCol };
+ }
}
- } else {
- return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol };
+
+ return { TokenType::Unknown, "Invalid variable type in declaration", filename, lineNumber, startCol };
}
+ return { TokenType::Unknown, "$ followed by invalid character in declaration", filename, lineNumber, startCol };
}
Token Lexer::singleCharToken(TokenType type, const std::string & lexeme) {
@@ -172,15 +169,30 @@
return { type, lexeme, filename, lineNumber, startCol };
}
-bool Lexer::matchSequence(const std::string & sequence) const {
- if (pos + sequence.length() > src.length()) {
+bool Lexer::matchSequence(const std::string & sequence, bool caseSensitive) const {
+ if (this->pos + sequence.size() > src.size()) {
return false;
}
- return src.substr(pos, sequence.length()) == sequence;
+
+ for (size_t i = 0; i < sequence.size(); ++i) {
+ char srcChar = src[this->pos + i];
+ char seqChar = sequence[i];
+
+ if (!caseSensitive) {
+ srcChar = std::tolower(static_cast<unsigned char>(srcChar));
+ seqChar = std::tolower(static_cast<unsigned char>(seqChar));
+ }
+
+ if (srcChar != seqChar) {
+ return false;
+ }
+ }
+
+ return true;
}
-void Lexer::matchAndConsume(const std::string & sequence) {
- if (matchSequence(sequence)) {
+void Lexer::matchAndConsume(const std::string & sequence, bool caseSensitive) {
+ if (matchSequence(sequence, caseSensitive)) {
for (size_t i = 0; i < sequence.length(); ++i) {
advance();
}
@@ -189,18 +201,19 @@
std::vector<Token> Lexer::tokenize() {
std::vector<Token> tokens;
+ tokens.reserve(src.size() / 4);
- while (!isAtEnd()) {
- char c = peek();
+ while (pos < src.size()) {
+ char c = src[pos];
if (isspace(c)) {
advance();
continue;
}
if (c == '\n') {
- tokens.push_back(singleCharToken(TokenType::EndOfLine, ""));
+ tokens.push_back(singleCharToken(TokenType::EndOfLine, "\n"));
continue;
}
- if (c == '#') {
+ if (c == COMMENT_CHARACTER) {
tokens.push_back(comment());
advance(); // Skip newline after comment
continue;
@@ -217,27 +230,56 @@
tokens.push_back({ TokenType::ParserCloseTag, PARSER_CLOSE_TAG, filename, lineNumber, startCol });
continue;
}
- if (isalpha(c)) {
- tokens.push_back(keywordOrIdentifier());
- } else if (c == '$') {
- tokens.push_back(variable());
- } else if (isdigit(c)) {
- tokens.push_back(number());
- } else if (c == '"' || c == '\'') {
- tokens.push_back(string());
- } else if (c == '(') {
- tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "("));
- } else if (c == ')') {
- tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")"));
- } else if (c == ',') {
- tokens.push_back(singleCharToken(TokenType::Comma, ","));
- } else if (c == ';') {
- tokens.push_back(singleCharToken(TokenType::Semicolon, ";"));
- } else if (c == '=') {
- tokens.push_back(singleCharToken(TokenType::Equals, "="));
- } else {
- tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber });
- advance();
+ if (matchSequence("if")) {
+ size_t startCol = colNumber;
+ matchAndConsume("if");
+ tokens.push_back({ TokenType::ParserIfStatement, "if", filename, lineNumber, startCol });
+ continue;
+ }
+
+ switch (c) {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ tokens.push_back(keywordOrIdentifier());
+ break;
+ case '$':
+ tokens.push_back(variable());
+ break;
+ case '0' ... '9':
+ tokens.push_back(number());
+ break;
+ case '"':
+ case '\'':
+ tokens.push_back(string());
+ break;
+ case '(':
+ tokens.push_back(singleCharToken(TokenType::LeftParenthesis, "("));
+ break;
+ case ')':
+ tokens.push_back(singleCharToken(TokenType::RightParenthesis, ")"));
+ break;
+ case ',':
+ tokens.push_back(singleCharToken(TokenType::Comma, ","));
+ break;
+ case ';':
+ tokens.push_back(singleCharToken(TokenType::Semicolon, ";"));
+ break;
+ case '=':
+ tokens.push_back(singleCharToken(TokenType::Equals, "="));
+ break;
+ case '+':
+ tokens.push_back(singleCharToken(TokenType::Plus, "+"));
+ break;
+ case '{':
+ tokens.push_back(singleCharToken(TokenType::LeftCurlyBracket, "{"));
+ break;
+ case '}':
+ tokens.push_back(singleCharToken(TokenType::RightCurlyBracket, "}"));
+ break;
+ default:
+ tokens.push_back({ TokenType::Unknown, std::string(1, c), filename, lineNumber, colNumber });
+ advance();
+ break;
}
}
--
Gitblit v1.9.3