shitshow

A shitty programming language
git clone git://git.bain.cz/shitshow.git
Log | Files | Refs

commit db3d5927fbad73975e5ec4e03a8d1a44fa69f61d
parent 4cfbabbce597dbbabab0a80ee589ccef71a76e6c
Author: bain3 <31798786+bain3@users.noreply.github.com>
Date:   Sat,  1 May 2021 16:14:55 +0200

Add if statements

Diffstat:
Mlexer/lexer.cpp | 17+++++++++++++++++
Mlexer/lexer.h | 15+++++++++++----
Mmain.cpp | 51++++++++++++++++++++++++++++++++++++++++++++-------
Mparser/parser.cpp | 169++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mparser/parser.h | 26++++++++++++++++++++------
5 files changed, 216 insertions(+), 62 deletions(-)

diff --git a/lexer/lexer.cpp b/lexer/lexer.cpp @@ -54,3 +54,20 @@ std::vector<lexer::Token> lexer::Lexer::tokenize(const std::string &part) { return output; } +void lexer::Lexer::convert_reserved(std::vector<Token> &tokens) { + // SCARY!! + for (Token &token : tokens) { + bool found = false; + for (const ReservedToken &rt : reserved) { + for (const std::string &s : rt.reserved_names) { + if (token.value == s) { + token.type = rt.type; + found = true; + break; + } + } + if (found) break; + } + } +} + diff --git a/lexer/lexer.h b/lexer/lexer.h @@ -9,7 +9,7 @@ namespace lexer { enum TokenType { NAME, SEMICOLON, - TYPE_INT, + TYPE, ASSIGNMENT, PRINT, NUMBER_LITERAL, @@ -17,13 +17,14 @@ namespace lexer { LEFT_PARENT, RIGHT_PARENT, LEFT_BRACKET, - RIGHT_BRACKET + RIGHT_BRACKET, + IF, EQ }; enum HandlerType { SINGLE_CHAR, - MULTI_CHAR, REGEX, - STRING + STRING, + MULTI_CHAR }; struct GrammarRule { TokenType type; @@ -34,12 +35,18 @@ namespace lexer { TokenType type; std::string value; }; + struct ReservedToken { + TokenType type; + std::vector<std::string> reserved_names; + }; class Lexer { std::vector<GrammarRule> rules; public: + std::vector<ReservedToken> reserved; explicit Lexer(std::vector<GrammarRule> grammar_rules); std::vector<Token> tokenize_line(const std::string& line); std::vector<Token> tokenize(const std::string &part); + void convert_reserved(std::vector<Token> &tokens); }; namespace handlers { diff --git a/main.cpp b/main.cpp @@ -2,6 +2,8 @@ #include "lexer/lexer.h" #include "parser/parser.h" +void print_block(parser::elements::Block* block, const int &deep); + std::string tab(const int &deep) { std::string o; for (int i = 0; i < deep; i++) o += " "; @@ -12,6 +14,11 @@ void print_expression(const std::unique_ptr<parser::elements::Expression> &expre using namespace parser; for (const std::unique_ptr<elements::ParserElement> &e : expression->children) { switch (e->type) { + case parser::COMPARE: { + auto *compare = (elements::Compare *) e.get(); + std::cout << tab(deep) << "compare of type " << compare->comparison_type << std::endl; + break; + } case parser::CONST_DEFINE: { auto *constant = (elements::ConstDefine *) e.get(); std::cout << tab(deep) << "constant " << (constant->data_type == INT ? "int: " : "string: ") @@ -33,6 +40,11 @@ void print_statement(const std::unique_ptr<parser::elements::Statement> &stateme using namespace parser; for (std::unique_ptr<elements::ParserElement> &e : statement->children) { switch (e->type) { + case parser::BLOCK: { + auto *block = (elements::Block *)e.get(); + print_block(block, deep+4); + break; + } case parser::DECLARATION: { auto *declaration = (elements::Declaration *) e.get(); std::cout << tab(deep) << "declaring " << declaration->name << " as "; @@ -59,15 +71,22 @@ void print_statement(const std::unique_ptr<parser::elements::Statement> &stateme print_expression(print->value, deep + 4); break; } + case parser::IF: { + auto *if_ = (elements::If *)e.get(); + std::cout << tab(deep) << "if condition" << std::endl; + print_expression(if_->expression, deep+4); + std::cout << tab(deep) << "end condition" << std::endl; + break; + } default: std::cout << tab(deep) << "doing " << e->type << std::endl; } } } -void print_block(const parser::elements::Block &block, const int &deep) { +void print_block(parser::elements::Block* block, const int &deep) { std::cout << tab(deep) << "block begin" << std::endl; - for (const std::unique_ptr<parser::elements::Statement> &e : block.children) { + for (const std::unique_ptr<parser::elements::Statement> &e : block->children) { std::cout << tab(deep + 4) << "statement begin" << std::endl; print_statement(e, deep + 8); std::cout << tab(deep + 4) << "statement end" << std::endl; @@ -78,21 +97,39 @@ void print_block(const parser::elements::Block &block, const int &deep) { int main() { lexer::Lexer lxr({ {.type=lexer::TokenType::SEMICOLON, .definition=";", .handler=lexer::HandlerType::SINGLE_CHAR}, + {.type=lexer::TokenType::EQ, .definition="==", .handler=lexer::HandlerType::MULTI_CHAR}, {.type=lexer::TokenType::ASSIGNMENT, .definition="=", .handler=lexer::HandlerType::SINGLE_CHAR}, {.type=lexer::TokenType::LEFT_PARENT, .definition="(", .handler=lexer::HandlerType::SINGLE_CHAR}, {.type=lexer::TokenType::RIGHT_PARENT, .definition=")", .handler=lexer::HandlerType::SINGLE_CHAR}, {.type=lexer::TokenType::LEFT_BRACKET, .definition="{", .handler=lexer::HandlerType::SINGLE_CHAR}, {.type=lexer::TokenType::RIGHT_BRACKET, .definition="}", .handler=lexer::HandlerType::SINGLE_CHAR}, - {.type=lexer::TokenType::TYPE_INT, .definition="int", .handler=lexer::HandlerType::MULTI_CHAR}, - {.type=lexer::TokenType::PRINT, .definition="print", .handler=lexer::HandlerType::MULTI_CHAR}, {.type=lexer::TokenType::NAME, .definition=R"([A-Za-z_](?:[\w]+)?)", .handler=lexer::HandlerType::REGEX}, {.type=lexer::TokenType::NUMBER_LITERAL, .definition=R"([0-9]+)", .handler=lexer::HandlerType::REGEX}, {.type=lexer::TokenType::STRING_LITERAL, .handler=lexer::HandlerType::STRING} }); - std::vector<lexer::Token> out = lxr.tokenize("int i = 120; print \"asdfa\";"); + lxr.reserved = { + {lexer::TokenType::TYPE, {"int", "char"}}, + {lexer::TokenType::PRINT, {"print"}}, + {lexer::TokenType::IF, {"if"}} + }; + std::vector<lexer::Token> out = lxr.tokenize("int integer = 120; print asdfa; if (integer == 2) {print \"hii\";}"); + lxr.convert_reserved(out); for (const lexer::Token &token : out) { - std::cout << token.type << ": " << token.value << std::endl; + std::cout << token.value << " "; + } + std::cout << std::endl; + int offset = 0; + for (const lexer::Token &token : out) { + std::cout << tab(offset) << token.type; + int num = token.type; + int digits = num==0; + while (num > 0) { + ++digits; + num = num / 10; + } + offset = token.value.length()-digits+1; } - parser::elements::Block parsed = parser::parse_block(out, 0); + std::cout << std::endl << std::endl; + parser::elements::Block* parsed = parser::parse_block(out, 0); print_block(parsed, 0); } diff --git a/parser/parser.cpp b/parser/parser.cpp @@ -27,27 +27,51 @@ std::string reconstruct_code(const std::vector<lexer::Token> &tokens) { return output; } -parser::elements::Block parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) { +int find_matching(const std::vector<lexer::Token> &stream, int start_at, const lexer::TokenType &type) { + lexer::TokenType matching = type == lexer::LEFT_PARENT ? lexer::RIGHT_PARENT : lexer::RIGHT_BRACKET; + int count = 0; + while (start_at < stream.size()) { + if (stream[start_at].type == type) count++; + if (stream[start_at].type == matching) { + if (!count) return start_at; + count--; + } + start_at++; + } + return -1; +} + + +parser::elements::Block *parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) { int consumed = start_at; int brackets = 0; - elements::Block block; + auto *block = new elements::Block{}; std::vector<lexer::Token> statement_tokens; while (consumed < token_stream.size()) { lexer::Token token = token_stream[consumed]; switch (token.type) { case lexer::SEMICOLON: { - block.children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); + if (brackets) { + statement_tokens.push_back(token); + break; + } + block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); statement_tokens.clear(); break; } - case lexer::LEFT_BRACKET: + case lexer::LEFT_BRACKET: { brackets++; + block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); + statement_tokens.clear(); statement_tokens.push_back(token); break; + } case lexer::RIGHT_BRACKET: - if (!brackets) { - block.children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); + if (brackets) { + block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); statement_tokens.clear(); + } else { + consumed = token_stream.size(); } break; default: @@ -59,95 +83,150 @@ parser::elements::Block parser::parse_block(const std::vector<lexer::Token> &tok } parser::elements::Statement *parser::parse_statement(const std::vector<lexer::Token> &token_stream) { + int stream_size = token_stream.size(); int consumed = 0; auto *statement = new elements::Statement; if (token_stream.empty()) { return statement; } while (true) { + bool stop = true; lexer::Token token = token_stream[consumed]; + elements::ParserElement *parser_element; switch (token.type) { case lexer::NAME: { - if (token_stream.size() < consumed + 2) { + if (stream_size < consumed + 2) { return statement; } lexer::Token token2 = token_stream[consumed + 1]; switch (token2.type) { case lexer::ASSIGNMENT: { - if (token_stream.size() < consumed + 3) { + if (stream_size < consumed + 3) { error("Nothing to assign.", reconstruct_code(token_stream)); } - auto *assignment = new elements::Assignment{ + parser_element = (elements::ParserElement *) new elements::Assignment{ .name = token.value, .value = std::unique_ptr<elements::Expression>( - parse_expression(token_stream, consumed + 2)), + parse_expression(token_stream, consumed + 2, consumed + 3)), }; - statement->children.push_back( - std::unique_ptr<elements::ParserElement>((elements::ParserElement *) assignment)); break; } default: - error("Token: " + token2.value + " unexpected at this point.", reconstruct_code(token_stream)); + error("Token " + token2.value + " unexpected at this point.", reconstruct_code(token_stream)); } break; } - case lexer::TYPE_INT: { - if (token_stream.size() < consumed + 2) { + case lexer::TYPE: { + if (stream_size < consumed + 2) { error("What am I declaring? Missing name to declare.", reconstruct_code(token_stream)); } else if (token_stream[consumed + 1].type != lexer::TokenType::NAME) { error("Can only declare names.", reconstruct_code(token_stream)); } - auto *declaration = new elements::Declaration{ + parser_element = (elements::ParserElement *) new elements::Declaration{ .name = token_stream[consumed + 1].value, - .data_type = INT + .data_type = token_stream[consumed + 1].value == "int" ? INT : STRING }; - statement->children.push_back( - std::unique_ptr<elements::ParserElement>((elements::ParserElement *) declaration)); if (token_stream.size() > consumed + 2 && token_stream[consumed + 2].type == lexer::ASSIGNMENT) { consumed++; - continue; // continue to run the main loop to get the assignment + stop = false; // continue to run the main loop to get the assignment } + break; } case lexer::PRINT: { - if (token_stream.size() < consumed + 2) { + if (stream_size < consumed + 2) { error("What am I printing? Missing expression to print.", reconstruct_code(token_stream)); } - std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1)); - auto *print = new elements::Print{ + std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1, consumed + 2)); + parser_element = (elements::ParserElement*)new elements::Print{ .value = std::move(expr) }; - statement->children.push_back( - std::unique_ptr<elements::ParserElement>((elements::ParserElement *) print)); + break; + } + case lexer::IF: { + if (stream_size < consumed + 2 || token_stream[consumed + 1].type != lexer::LEFT_PARENT) { + error("Expected (, found: " + token.value, reconstruct_code(token_stream)); + } + int closing_pos = find_matching(token_stream, consumed + 2, lexer::LEFT_PARENT); + if (closing_pos == -1) { + error("Could not find matching ) for (", reconstruct_code(token_stream)); + } + std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 2, closing_pos)); + parser_element = (elements::ParserElement *) new elements::If{.expression = std::move(expr)}; + break; } - case lexer::LEFT_BRACKET: + case lexer::LEFT_BRACKET: { + parser_element = (elements::ParserElement *)parse_block(token_stream, consumed + 1); break; + } default: - error("Token: " + token.value + " unexpected at this point", reconstruct_code(token_stream)); + parser_element = new elements::ParserElement(); + error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream)); } - break; // break out of the main loop + + if (parser_element != nullptr) { + statement->children.push_back(std::unique_ptr<elements::ParserElement>(parser_element)); + } + if (stop) break; // break out of the main loop } return statement; } -parser::elements::Expression *parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at) { +parser::elements::Expression * +parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at) { auto *expression = new parser::elements::Expression; - switch (token_stream[start_at].type) { - case lexer::STRING_LITERAL: - case lexer::NUMBER_LITERAL: { - auto *number = new parser::elements::ConstDefine{ - .data_type = token_stream[start_at].type == lexer::NUMBER_LITERAL ? INT : STRING, - .value = token_stream[start_at].value - }; - expression->children.push_back( - std::unique_ptr<elements::ParserElement>((elements::ParserElement *) number)); - break; + + // add all tokens to the expression + for (int i = start_at; i < end_at; i++) { + lexer::Token token = token_stream[i]; + elements::ParserElement *el; + switch (token.type) { + case lexer::STRING_LITERAL: + case lexer::NUMBER_LITERAL: + el = (elements::ParserElement *) new parser::elements::ConstDefine{ + .data_type = token.type == lexer::NUMBER_LITERAL ? INT : STRING, + .value = token.value + }; + break; + case lexer::NAME: + el = (elements::ParserElement *) new parser::elements::Name{ + .name = token.value + }; + break; + case lexer::EQ: + el = (elements::ParserElement *) new parser::elements::Compare{.comparison_type=EQUALS}; + break; + default: + el = new parser::elements::ParserElement{}; + error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream)); + break; } - case lexer::NAME: { - auto *name = new parser::elements::Name{ - .name = token_stream[start_at].value - }; - expression->children.push_back(std::unique_ptr<elements::ParserElement>((elements::ParserElement *) name)); - break; + expression->children.push_back(std::unique_ptr<elements::ParserElement>(el)); + } + + // resolve equality + for (bool stop = false; !stop;) { + stop = true; + auto end = expression->children.end(); + auto begin = expression->children.begin(); + for (auto i = expression->children.begin(); + i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions) + if ((*i)->type == COMPARE) { + auto *c = (elements::Compare *) i->get(); + if (c->left != nullptr || c->right != nullptr) continue; + if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) { + std::cout << std::distance(end, i) << std::endl; + error("Missing expression to compare!", reconstruct_code(token_stream)); + } + c->left = std::move(*(--i)); + auto first = i; + i++; + i++; + c->right = std::move(*i); + expression->children.erase(i); + expression->children.erase(first); + stop = false; + break; + } } } return expression; diff --git a/parser/parser.h b/parser/parser.h @@ -4,6 +4,7 @@ #include <string> #include <vector> +#include <list> #include <memory> #include "../lexer/lexer.h" @@ -18,20 +19,24 @@ namespace parser { PRINT, CONST_DEFINE, NAME, - CALL + CALL, + COMPARE, + IF }; enum DataType { INT, STRING }; + enum CompTypes { + EQUALS + }; namespace elements { struct ParserElement { ParserElementType type = NONE; }; struct Expression { ParserElementType type = EXPRESSION; - // TODO: memory leaks? - std::vector<std::unique_ptr<ParserElement>> children; + std::list<std::unique_ptr<ParserElement>> children; }; struct Statement { ParserElementType type = STATEMENT; @@ -39,7 +44,6 @@ namespace parser { }; struct Block { ParserElementType type = BLOCK; - // TODO: memory leaks? std::vector<std::unique_ptr<Statement>> children; }; struct Declaration { @@ -69,13 +73,23 @@ namespace parser { ParserElementType type = PRINT; std::unique_ptr<Expression> value; }; + struct Compare { + ParserElementType type = COMPARE; + CompTypes comparison_type; + std::unique_ptr<ParserElement> left; + std::unique_ptr<ParserElement> right; + }; + struct If { + ParserElementType type = IF; + std::unique_ptr<Expression> expression; + }; } - elements::Block parse_block(const std::vector<lexer::Token> &token_stream, int start_at); + elements::Block *parse_block(const std::vector<lexer::Token> &token_stream, int start_at); elements::Statement *parse_statement(const std::vector<lexer::Token> &token_stream); - elements::Expression *parse_expression(const std::vector<lexer::Token> &token_stream, int start_at); + elements::Expression *parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at); } #endif //SHITSHOW_PARSER_H