commit db3d5927fbad73975e5ec4e03a8d1a44fa69f61d
parent 4cfbabbce597dbbabab0a80ee589ccef71a76e6c
Author: bain3 <31798786+bain3@users.noreply.github.com>
Date: Sat, 1 May 2021 16:14:55 +0200
Add if statements
Diffstat:
5 files changed, 216 insertions(+), 62 deletions(-)
diff --git a/lexer/lexer.cpp b/lexer/lexer.cpp
@@ -54,3 +54,20 @@ std::vector<lexer::Token> lexer::Lexer::tokenize(const std::string &part) {
return output;
}
+void lexer::Lexer::convert_reserved(std::vector<Token> &tokens) {
+ // SCARY!!
+ for (Token &token : tokens) {
+ bool found = false;
+ for (const ReservedToken &rt : reserved) {
+ for (const std::string &s : rt.reserved_names) {
+ if (token.value == s) {
+ token.type = rt.type;
+ found = true;
+ break;
+ }
+ }
+ if (found) break;
+ }
+ }
+}
+
diff --git a/lexer/lexer.h b/lexer/lexer.h
@@ -9,7 +9,7 @@ namespace lexer {
enum TokenType {
NAME,
SEMICOLON,
- TYPE_INT,
+ TYPE,
ASSIGNMENT,
PRINT,
NUMBER_LITERAL,
@@ -17,13 +17,14 @@ namespace lexer {
LEFT_PARENT,
RIGHT_PARENT,
LEFT_BRACKET,
- RIGHT_BRACKET
+ RIGHT_BRACKET,
+ IF, EQ
};
enum HandlerType {
SINGLE_CHAR,
- MULTI_CHAR,
REGEX,
- STRING
+ STRING,
+ MULTI_CHAR
};
struct GrammarRule {
TokenType type;
@@ -34,12 +35,18 @@ namespace lexer {
TokenType type;
std::string value;
};
+ struct ReservedToken {
+ TokenType type;
+ std::vector<std::string> reserved_names;
+ };
class Lexer {
std::vector<GrammarRule> rules;
public:
+ std::vector<ReservedToken> reserved;
explicit Lexer(std::vector<GrammarRule> grammar_rules);
std::vector<Token> tokenize_line(const std::string& line);
std::vector<Token> tokenize(const std::string &part);
+ void convert_reserved(std::vector<Token> &tokens);
};
namespace handlers {
diff --git a/main.cpp b/main.cpp
@@ -2,6 +2,8 @@
#include "lexer/lexer.h"
#include "parser/parser.h"
+void print_block(parser::elements::Block* block, const int &deep);
+
std::string tab(const int &deep) {
std::string o;
for (int i = 0; i < deep; i++) o += " ";
@@ -12,6 +14,11 @@ void print_expression(const std::unique_ptr<parser::elements::Expression> &expre
using namespace parser;
for (const std::unique_ptr<elements::ParserElement> &e : expression->children) {
switch (e->type) {
+ case parser::COMPARE: {
+ auto *compare = (elements::Compare *) e.get();
+ std::cout << tab(deep) << "compare of type " << compare->comparison_type << std::endl;
+ break;
+ }
case parser::CONST_DEFINE: {
auto *constant = (elements::ConstDefine *) e.get();
std::cout << tab(deep) << "constant " << (constant->data_type == INT ? "int: " : "string: ")
@@ -33,6 +40,11 @@ void print_statement(const std::unique_ptr<parser::elements::Statement> &stateme
using namespace parser;
for (std::unique_ptr<elements::ParserElement> &e : statement->children) {
switch (e->type) {
+ case parser::BLOCK: {
+ auto *block = (elements::Block *)e.get();
+ print_block(block, deep+4);
+ break;
+ }
case parser::DECLARATION: {
auto *declaration = (elements::Declaration *) e.get();
std::cout << tab(deep) << "declaring " << declaration->name << " as ";
@@ -59,15 +71,22 @@ void print_statement(const std::unique_ptr<parser::elements::Statement> &stateme
print_expression(print->value, deep + 4);
break;
}
+ case parser::IF: {
+ auto *if_ = (elements::If *)e.get();
+ std::cout << tab(deep) << "if condition" << std::endl;
+ print_expression(if_->expression, deep+4);
+ std::cout << tab(deep) << "end condition" << std::endl;
+ break;
+ }
default:
std::cout << tab(deep) << "doing " << e->type << std::endl;
}
}
}
-void print_block(const parser::elements::Block &block, const int &deep) {
+void print_block(parser::elements::Block* block, const int &deep) {
std::cout << tab(deep) << "block begin" << std::endl;
- for (const std::unique_ptr<parser::elements::Statement> &e : block.children) {
+ for (const std::unique_ptr<parser::elements::Statement> &e : block->children) {
std::cout << tab(deep + 4) << "statement begin" << std::endl;
print_statement(e, deep + 8);
std::cout << tab(deep + 4) << "statement end" << std::endl;
@@ -78,21 +97,39 @@ void print_block(const parser::elements::Block &block, const int &deep) {
int main() {
lexer::Lexer lxr({
{.type=lexer::TokenType::SEMICOLON, .definition=";", .handler=lexer::HandlerType::SINGLE_CHAR},
+ {.type=lexer::TokenType::EQ, .definition="==", .handler=lexer::HandlerType::MULTI_CHAR},
{.type=lexer::TokenType::ASSIGNMENT, .definition="=", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::LEFT_PARENT, .definition="(", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::RIGHT_PARENT, .definition=")", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::LEFT_BRACKET, .definition="{", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::RIGHT_BRACKET, .definition="}", .handler=lexer::HandlerType::SINGLE_CHAR},
- {.type=lexer::TokenType::TYPE_INT, .definition="int", .handler=lexer::HandlerType::MULTI_CHAR},
- {.type=lexer::TokenType::PRINT, .definition="print", .handler=lexer::HandlerType::MULTI_CHAR},
{.type=lexer::TokenType::NAME, .definition=R"([A-Za-z_](?:[\w]+)?)", .handler=lexer::HandlerType::REGEX},
{.type=lexer::TokenType::NUMBER_LITERAL, .definition=R"([0-9]+)", .handler=lexer::HandlerType::REGEX},
{.type=lexer::TokenType::STRING_LITERAL, .handler=lexer::HandlerType::STRING}
});
- std::vector<lexer::Token> out = lxr.tokenize("int i = 120; print \"asdfa\";");
+ lxr.reserved = {
+ {lexer::TokenType::TYPE, {"int", "char"}},
+ {lexer::TokenType::PRINT, {"print"}},
+ {lexer::TokenType::IF, {"if"}}
+ };
+ std::vector<lexer::Token> out = lxr.tokenize("int integer = 120; print asdfa; if (integer == 2) {print \"hii\";}");
+ lxr.convert_reserved(out);
for (const lexer::Token &token : out) {
- std::cout << token.type << ": " << token.value << std::endl;
+ std::cout << token.value << " ";
+ }
+ std::cout << std::endl;
+ int offset = 0;
+ for (const lexer::Token &token : out) {
+ std::cout << tab(offset) << token.type;
+ int num = token.type;
+ int digits = num==0;
+ while (num > 0) {
+ ++digits;
+ num = num / 10;
+ }
+ offset = token.value.length()-digits+1;
}
- parser::elements::Block parsed = parser::parse_block(out, 0);
+ std::cout << std::endl << std::endl;
+ parser::elements::Block* parsed = parser::parse_block(out, 0);
print_block(parsed, 0);
}
diff --git a/parser/parser.cpp b/parser/parser.cpp
@@ -27,27 +27,51 @@ std::string reconstruct_code(const std::vector<lexer::Token> &tokens) {
return output;
}
-parser::elements::Block parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) {
+int find_matching(const std::vector<lexer::Token> &stream, int start_at, const lexer::TokenType &type) {
+ lexer::TokenType matching = type == lexer::LEFT_PARENT ? lexer::RIGHT_PARENT : lexer::RIGHT_BRACKET;
+ int count = 0;
+ while (start_at < stream.size()) {
+ if (stream[start_at].type == type) count++;
+ if (stream[start_at].type == matching) {
+ if (!count) return start_at;
+ count--;
+ }
+ start_at++;
+ }
+ return -1;
+}
+
+
+parser::elements::Block *parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) {
int consumed = start_at;
int brackets = 0;
- elements::Block block;
+ auto *block = new elements::Block{};
std::vector<lexer::Token> statement_tokens;
while (consumed < token_stream.size()) {
lexer::Token token = token_stream[consumed];
switch (token.type) {
case lexer::SEMICOLON: {
- block.children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
+ if (brackets) {
+ statement_tokens.push_back(token);
+ break;
+ }
+ block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
statement_tokens.clear();
break;
}
- case lexer::LEFT_BRACKET:
+ case lexer::LEFT_BRACKET: {
brackets++;
+ block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
+ statement_tokens.clear();
statement_tokens.push_back(token);
break;
+ }
case lexer::RIGHT_BRACKET:
- if (!brackets) {
- block.children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
+ if (brackets) {
+ block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
statement_tokens.clear();
+ } else {
+ consumed = token_stream.size();
}
break;
default:
@@ -59,95 +83,150 @@ parser::elements::Block parser::parse_block(const std::vector<lexer::Token> &tok
}
parser::elements::Statement *parser::parse_statement(const std::vector<lexer::Token> &token_stream) {
+ int stream_size = token_stream.size();
int consumed = 0;
auto *statement = new elements::Statement;
if (token_stream.empty()) {
return statement;
}
while (true) {
+ bool stop = true;
lexer::Token token = token_stream[consumed];
+ elements::ParserElement *parser_element;
switch (token.type) {
case lexer::NAME: {
- if (token_stream.size() < consumed + 2) {
+ if (stream_size < consumed + 2) {
return statement;
}
lexer::Token token2 = token_stream[consumed + 1];
switch (token2.type) {
case lexer::ASSIGNMENT: {
- if (token_stream.size() < consumed + 3) {
+ if (stream_size < consumed + 3) {
error("Nothing to assign.", reconstruct_code(token_stream));
}
- auto *assignment = new elements::Assignment{
+ parser_element = (elements::ParserElement *) new elements::Assignment{
.name = token.value,
.value = std::unique_ptr<elements::Expression>(
- parse_expression(token_stream, consumed + 2)),
+ parse_expression(token_stream, consumed + 2, consumed + 3)),
};
- statement->children.push_back(
- std::unique_ptr<elements::ParserElement>((elements::ParserElement *) assignment));
break;
}
default:
- error("Token: " + token2.value + " unexpected at this point.", reconstruct_code(token_stream));
+ error("Token " + token2.value + " unexpected at this point.", reconstruct_code(token_stream));
}
break;
}
- case lexer::TYPE_INT: {
- if (token_stream.size() < consumed + 2) {
+ case lexer::TYPE: {
+ if (stream_size < consumed + 2) {
error("What am I declaring? Missing name to declare.", reconstruct_code(token_stream));
} else if (token_stream[consumed + 1].type != lexer::TokenType::NAME) {
error("Can only declare names.", reconstruct_code(token_stream));
}
- auto *declaration = new elements::Declaration{
+ parser_element = (elements::ParserElement *) new elements::Declaration{
.name = token_stream[consumed + 1].value,
- .data_type = INT
+ .data_type = token_stream[consumed + 1].value == "int" ? INT : STRING
};
- statement->children.push_back(
- std::unique_ptr<elements::ParserElement>((elements::ParserElement *) declaration));
if (token_stream.size() > consumed + 2 && token_stream[consumed + 2].type == lexer::ASSIGNMENT) {
consumed++;
- continue; // continue to run the main loop to get the assignment
+ stop = false; // continue to run the main loop to get the assignment
}
+ break;
}
case lexer::PRINT: {
- if (token_stream.size() < consumed + 2) {
+ if (stream_size < consumed + 2) {
error("What am I printing? Missing expression to print.", reconstruct_code(token_stream));
}
- std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1));
- auto *print = new elements::Print{
+ std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1, consumed + 2));
+ parser_element = (elements::ParserElement*)new elements::Print{
.value = std::move(expr)
};
- statement->children.push_back(
- std::unique_ptr<elements::ParserElement>((elements::ParserElement *) print));
+ break;
+ }
+ case lexer::IF: {
+ if (stream_size < consumed + 2 || token_stream[consumed + 1].type != lexer::LEFT_PARENT) {
+ error("Expected (, found: " + token.value, reconstruct_code(token_stream));
+ }
+ int closing_pos = find_matching(token_stream, consumed + 2, lexer::LEFT_PARENT);
+ if (closing_pos == -1) {
+ error("Could not find matching ) for (", reconstruct_code(token_stream));
+ }
+ std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 2, closing_pos));
+ parser_element = (elements::ParserElement *) new elements::If{.expression = std::move(expr)};
+ break;
}
- case lexer::LEFT_BRACKET:
+ case lexer::LEFT_BRACKET: {
+ parser_element = (elements::ParserElement *)parse_block(token_stream, consumed + 1);
break;
+ }
default:
- error("Token: " + token.value + " unexpected at this point", reconstruct_code(token_stream));
+ parser_element = new elements::ParserElement();
+ error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
}
- break; // break out of the main loop
+
+ if (parser_element != nullptr) {
+ statement->children.push_back(std::unique_ptr<elements::ParserElement>(parser_element));
+ }
+ if (stop) break; // break out of the main loop
}
return statement;
}
-parser::elements::Expression *parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at) {
+parser::elements::Expression *
+parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at) {
auto *expression = new parser::elements::Expression;
- switch (token_stream[start_at].type) {
- case lexer::STRING_LITERAL:
- case lexer::NUMBER_LITERAL: {
- auto *number = new parser::elements::ConstDefine{
- .data_type = token_stream[start_at].type == lexer::NUMBER_LITERAL ? INT : STRING,
- .value = token_stream[start_at].value
- };
- expression->children.push_back(
- std::unique_ptr<elements::ParserElement>((elements::ParserElement *) number));
- break;
+
+ // add all tokens to the expression
+ for (int i = start_at; i < end_at; i++) {
+ lexer::Token token = token_stream[i];
+ elements::ParserElement *el;
+ switch (token.type) {
+ case lexer::STRING_LITERAL:
+ case lexer::NUMBER_LITERAL:
+ el = (elements::ParserElement *) new parser::elements::ConstDefine{
+ .data_type = token.type == lexer::NUMBER_LITERAL ? INT : STRING,
+ .value = token.value
+ };
+ break;
+ case lexer::NAME:
+ el = (elements::ParserElement *) new parser::elements::Name{
+ .name = token.value
+ };
+ break;
+ case lexer::EQ:
+ el = (elements::ParserElement *) new parser::elements::Compare{.comparison_type=EQUALS};
+ break;
+ default:
+ el = new parser::elements::ParserElement{};
+ error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
+ break;
}
- case lexer::NAME: {
- auto *name = new parser::elements::Name{
- .name = token_stream[start_at].value
- };
- expression->children.push_back(std::unique_ptr<elements::ParserElement>((elements::ParserElement *) name));
- break;
+ expression->children.push_back(std::unique_ptr<elements::ParserElement>(el));
+ }
+
+ // resolve equality
+ for (bool stop = false; !stop;) {
+ stop = true;
+ auto end = expression->children.end();
+ auto begin = expression->children.begin();
+ for (auto i = expression->children.begin();
+ i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions)
+ if ((*i)->type == COMPARE) {
+ auto *c = (elements::Compare *) i->get();
+ if (c->left != nullptr || c->right != nullptr) continue;
+ if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
+ std::cout << std::distance(end, i) << std::endl;
+ error("Missing expression to compare!", reconstruct_code(token_stream));
+ }
+ c->left = std::move(*(--i));
+ auto first = i;
+ i++;
+ i++;
+ c->right = std::move(*i);
+ expression->children.erase(i);
+ expression->children.erase(first);
+ stop = false;
+ break;
+ }
}
}
return expression;
diff --git a/parser/parser.h b/parser/parser.h
@@ -4,6 +4,7 @@
#include <string>
#include <vector>
+#include <list>
#include <memory>
#include "../lexer/lexer.h"
@@ -18,20 +19,24 @@ namespace parser {
PRINT,
CONST_DEFINE,
NAME,
- CALL
+ CALL,
+ COMPARE,
+ IF
};
enum DataType {
INT,
STRING
};
+ enum CompTypes {
+ EQUALS
+ };
namespace elements {
struct ParserElement {
ParserElementType type = NONE;
};
struct Expression {
ParserElementType type = EXPRESSION;
- // TODO: memory leaks?
- std::vector<std::unique_ptr<ParserElement>> children;
+ std::list<std::unique_ptr<ParserElement>> children;
};
struct Statement {
ParserElementType type = STATEMENT;
@@ -39,7 +44,6 @@ namespace parser {
};
struct Block {
ParserElementType type = BLOCK;
- // TODO: memory leaks?
std::vector<std::unique_ptr<Statement>> children;
};
struct Declaration {
@@ -69,13 +73,23 @@ namespace parser {
ParserElementType type = PRINT;
std::unique_ptr<Expression> value;
};
+ struct Compare {
+ ParserElementType type = COMPARE;
+ CompTypes comparison_type;
+ std::unique_ptr<ParserElement> left;
+ std::unique_ptr<ParserElement> right;
+ };
+ struct If {
+ ParserElementType type = IF;
+ std::unique_ptr<Expression> expression;
+ };
}
- elements::Block parse_block(const std::vector<lexer::Token> &token_stream, int start_at);
+ elements::Block *parse_block(const std::vector<lexer::Token> &token_stream, int start_at);
elements::Statement *parse_statement(const std::vector<lexer::Token> &token_stream);
- elements::Expression *parse_expression(const std::vector<lexer::Token> &token_stream, int start_at);
+ elements::Expression *parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at);
}
#endif //SHITSHOW_PARSER_H