voidscript.git - Gitblit

			@@ -1,14 +1,305 @@
			#include "Lexer/Lexer.hpp"

			namespace Lexer {
			const std::vector<std::string> Lexer::Lexer::OPERATOR_RELATIONAL = { "==", "!=", "<", ">", "<=", ">=" };
			const std::vector<std::string> Lexer::Lexer::OPERATOR_INCREMENT = { "++", "--" };
			const std::vector<std::string> Lexer::Lexer::OPERATOR_ASSIGNMENT = { "=", "+=", "-=", "*=", "/=", "%=" };
			const std::vector<std::string> Lexer::Lexer::OPERATOR_LOGICAL = { "&&", "\|\|" };
			#include "Lexer/Operators.hpp"
			#include "Symbols/SymbolContainer.hpp"

			const std::vector<std::string> Lexer::Lexer::OPERATOR_ARITHMETIC = { "+", "-", "*", "/", "%" };
			const std::vector<std::string> Lexer::Lexer::PUNCTUATION = { "(", ")", "{", "}", "[", "]", ",", ";" };
			std::vector<Lexer::Tokens::Token> Lexer::Lexer::tokenizeNamespace(const std::string & ns) {
			if (inputs_.find(ns) == inputs_.end()) {
			return {};
			}

			Symbols::SymbolContainer::instance()->enter(ns);

			std::vector<Tokens::Token> tokens;
			Tokens::Token token;
			do {
			token = nextToken();
			tokens.push_back(token);
			} while (token.type != Tokens::Type::END_OF_FILE);

			}; // namespace Lexer
			tokens_[ns] = tokens;
			return tokens;
			}

			void Lexer::Lexer::addNamespaceInput(const std::string & ns, const std::string & input) {
			inputs_[ns] = input;
			positions_[ns] = 0;
			line_numbers_[ns] = 1;
			column_numbers_[ns] = 1;
			}

			std::vector<Lexer::Tokens::Token> Lexer::Lexer::getTokens(const std::string & ns) const {
			auto it = tokens_.find(ns);
			if (it != tokens_.end()) {
			return it->second;
			}
			return {};
			}

			void Lexer::Lexer::setKeyWords(const std::unordered_map<std::string, Tokens::Type> & new_keywords) {
			keywords = new_keywords;
			}

			Lexer::Tokens::Token Lexer::Lexer::nextToken() {
			skipWhitespaceAndComments();
			size_t start = pos();

			if (isAtEnd()) {
			return createToken(Tokens::Type::END_OF_FILE, start, start);
			}

			char c = peek();
			if (isalpha(c) \|\| c == '_') {
			return matchIdentifierOrKeyword(start);
			}
			if (isdigit(c) \|\| (isdigit(c) && peek(1) == '.') \|\| (c == '.' && isdigit(peek(1)))) {
			return matchNumber(start);
			}
			if (c == '"' \|\| c == '\'') {
			return matchStringLiteral(start);
			}
			if (operators_.find(c) != std::string_view::npos) {
			return matchOperatorOrPunctuation(start);
			}

			advance();
			return createToken(Tokens::Type::UNKNOWN, start, pos());
			}

			const std::string & Lexer::Lexer::input() const {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			auto it = inputs_.find(ns);
			if (it != inputs_.end()) {
			return it->second;
			}
			throw Exception("Input not found in namespace: " + ns);
			}

			size_t & Lexer::Lexer::pos() {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			auto it = positions_.find(ns);
			if (it != positions_.end()) {
			return it->second;
			}
			throw Exception("Unknown position in namespace: " + ns);
			}

			int & Lexer::Lexer::line() {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			auto it = line_numbers_.find(ns);
			if (it != line_numbers_.end()) {
			return it->second;
			}
			throw Exception("Unknown line number in namespace: " + ns);
			}

			int & Lexer::Lexer::col() {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			auto it = column_numbers_.find(ns);
			if (it != column_numbers_.end()) {
			return it->second;
			}
			throw Exception("Unknown column number in namespace: " + ns);
			}

			Lexer::Tokens::Token Lexer::Lexer::createToken(Tokens::Type type, size_t start, size_t end, const std::string & value) {
			Tokens::Token token;
			token.type = type;
			token.start_pos = start;
			token.end_pos = end;
			token.line_number = line();
			token.column_number = col();
			if (start <= end && end <= input().length()) {
			token.lexeme = std::string_view(input()).substr(start, end - start);
			token.value = value.empty() ? std::string(token.lexeme) : value;
			}
			return token;
			}

			char Lexer::Lexer::peek(size_t offset) const {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			const auto & in = inputs_.at(ns);
			size_t cp = positions_.at(ns);
			if (cp + offset >= in.length()) {
			return '\0';
			}
			return in[cp + offset];
			}

			char Lexer::Lexer::advance() {
			char c = peek();
			pos()++;
			if (c == '\n') {
			line()++;
			col() = 1;
			} else {
			col()++;
			}
			return c;
			}

			bool Lexer::Lexer::isAtEnd() const {
			const auto & ns = Symbols::SymbolContainer::instance()->currentScopeName();
			return positions_.at(ns) >= inputs_.at(ns).length();
			}

			void Lexer::Lexer::skipWhitespaceAndComments() {
			while (!isAtEnd()) {
			char c = peek();
			if (isspace(c)) {
			advance();
			} else if ((c == '/' && peek(1) == '/') \|\| c == '#') {
			while (!isAtEnd() && peek() != '\n') {
			advance();
			}
			} else {
			break;
			}
			}
			}

			Lexer::Tokens::Token Lexer::Lexer::matchIdentifierOrKeyword(size_t start_pos, Tokens::Type type) {
			while (!isAtEnd() && (isalnum(peek()) \|\| peek() == '_')) {
			advance();
			}
			size_t end = pos();
			std::string value = input().substr(start_pos, end - start_pos);
			if (value.empty()) {
			return createToken(Tokens::Type::UNKNOWN, start_pos, end);
			}

			if (type == Tokens::Type::IDENTIFIER) {
			auto it = keywords.find(value);
			if (it != keywords.end()) {
			return createToken(it->second, start_pos, end);
			}
			}
			return createToken(type, start_pos, end);
			}

			Lexer::Tokens::Token Lexer::Lexer::matchNumber(size_t start_pos) {
			bool has_dot = false;

			while (!isAtEnd()) {
			if (isdigit(peek())) {
			advance();
			} else if (!has_dot && peek() == '.' && isdigit(peek(1))) {
			has_dot = true;
			advance(); // a pont
			advance(); // az első számjegy a pont után
			} else {
			break;
			}
			}

			size_t end = pos();
			return createToken(Tokens::Type::NUMBER, start_pos, end);
			}

			Lexer::Tokens::Token Lexer::Lexer::matchStringLiteral(size_t start_pos) {
			char opening_quote = peek();
			advance(); // Skip opening quote
			std::string value;
			bool unterminated = false;

			while (!isAtEnd()) {
			char c = peek();
			if (c == opening_quote) {
			advance();
			break;
			}
			if (c == '\\') {
			advance();
			char e = advance();
			switch (e) {
			case 'n':
			value += '\n';
			break;
			case 't':
			value += '\t';
			break;
			case '"':
			value += opening_quote;
			break;
			case '\\':
			value += '\\';
			break;
			default:
			value += e;
			break;
			}
			} else {
			value += advance();
			}
			}

			size_t end = pos();
			if (unterminated) {
			return createToken(Tokens::Type::UNKNOWN, start_pos, end, input().substr(start_pos, end - start_pos));
			}
			return createToken(Tokens::Type::STRING_LITERAL, start_pos, end, value);
			}

			Lexer::Tokens::Token Lexer::Lexer::matchOperatorOrPunctuation(size_t start_pos) {
			char first_char = advance(); // Első karakter elfogyasztása

			if (!isAtEnd()) {
			char second_char = peek(0); // Következő karakter megnézése
			std::string two_chars_str{ first_char, second_char };

			const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> two_char_op_types = {
			{ &OPERATOR_RELATIONAL, Tokens::Type::OPERATOR_RELATIONAL },
			{ &OPERATOR_INCREMENT, Tokens::Type::OPERATOR_INCREMENT },
			{ &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT },
			{ &OPERATOR_LOGICAL, Tokens::Type::OPERATOR_LOGICAL }
			};

			for (const auto & [vec_ptr, type] : two_char_op_types) {
			if (matchFromVector(*vec_ptr, two_chars_str)) {
			advance(); // Második karakter elfogyasztása
			size_t end_pos = pos();
			return createToken(type, start_pos, end_pos);
			}
			}
			}

			std::string single_char_str(1, first_char);

			if (single_char_str == "$") {
			if (isalpha(peek(0)) \|\| peek(0) == '_') {
			return matchIdentifierOrKeyword(start_pos, Tokens::Type::VARIABLE_IDENTIFIER);
			}
			}

			const std::vector<std::pair<const std::vector<std::string> *, Tokens::Type>> one_char_op_types = {
			{ &OPERATOR_ARITHMETIC, Tokens::Type::OPERATOR_ARITHMETIC },
			{ &OPERATOR_ASSIGNMENT, Tokens::Type::OPERATOR_ASSIGNMENT },
			{ &PUNCTUATION, Tokens::Type::PUNCTUATION }
			};

			for (const auto & [vec_ptr, type] : one_char_op_types) {
			if (matchFromVector(*vec_ptr, single_char_str)) {
			size_t end_pos = pos();
			return createToken(type, start_pos, end_pos);
			}
			}

			size_t end_pos = pos();
			return createToken(Tokens::Type::UNKNOWN, start_pos, end_pos);
			}

			bool Lexer::Lexer::matchFromVector(const std::vector<std::string> & vec, const std::string & value) {
			return std::find(vec.begin(), vec.end(), value) != vec.end();
			}

			Lexer::Lexer::Lexer() {
			for (const auto & vecRef :
			{ std::cref(OPERATOR_ARITHMETIC), std::cref(OPERATOR_RELATIONAL), std::cref(OPERATOR_INCREMENT),
			std::cref(OPERATOR_ASSIGNMENT), std::cref(OPERATOR_LOGICAL), std::cref(PUNCTUATION) }) {
			for (const auto & str : vecRef.get()) {
			operators_ += str;
			}
			}

			operators_ += "$";
			}