commit 7f952e501defa8e5814de456b865ca10cf40abe3
parent f3db7a7e88f9e2973f93d4879bb3ba3e841638a6
Author: bain3 <31798786+bain3@users.noreply.github.com>
Date: Sat, 1 May 2021 21:50:43 +0200
Add parsing of arithmetic, move parser elements
Diffstat:
7 files changed, 241 insertions(+), 101 deletions(-)
diff --git a/error_codes.txt b/error_codes.txt
diff --git a/lexer/handlers.cpp b/lexer/handlers.cpp
@@ -47,6 +47,9 @@ int lexer::handlers::string_handler(const GrammarRule &rule, Token &token, const
for (consumed = 1; consumed < length && input[consumed] != '"'; consumed++) {
val += input[consumed];
}
+ if (consumed == length && input[consumed] != '"') {
+ return 0;
+ }
}
if (consumed) {
consumed++;
diff --git a/lexer/lexer.h b/lexer/lexer.h
@@ -18,7 +18,7 @@ namespace lexer {
RIGHT_PARENT,
LEFT_BRACKET,
RIGHT_BRACKET,
- IF, EQ
+ IF, EQ, ARITHMETIC
};
enum HandlerType {
SINGLE_CHAR,
diff --git a/main.cpp b/main.cpp
@@ -10,24 +10,70 @@ std::string tab(const int &deep) {
return o;
}
+void print_name_or_const(parser::elements::ParserElement *el, int deep) {
+ if (el->type == parser::ParserElementType::CONST_DEFINE) {
+ auto *constant = (parser::elements::ConstDefine *)el;
+ std::cout << tab(deep) << "constant "
+ << (constant->data_type == parser::DataType::INT ? "int: " : "string: ")
+ << constant->value << std::endl;
+ } else if (el->type == parser::ParserElementType::NAME) {
+ auto *name = (parser::elements::Name *) el;
+ std::cout << tab(deep) << "name " << name->name << std::endl;
+ } else {
+ std::cout << tab(deep) << "not name or const!" << std::endl;
+ }
+}
+
+void print_operation(parser::elements::Arithmetic *arithmetic, int deep) {
+ std::cout << tab(deep);
+ switch (arithmetic->specific_type) {
+ case parser::ADD: std::cout << "addition"; break;
+ case parser::SUBTRACT: std::cout << "subtraction"; break;
+ case parser::DIVIDE: std::cout << "division"; break;
+ case parser::MULTIPLY: std::cout << "multiplication"; break;
+ }
+ std::cout << std::endl;
+ switch (arithmetic->left->type) {
+ case parser::CONST_DEFINE:
+ case parser::NAME:
+ print_name_or_const(arithmetic->left.get(), deep+4);
+ break;
+ case parser::ARITHMETIC:
+ print_operation((parser::elements::Arithmetic*)arithmetic->left.get(), deep+4);
+ break;
+ default:
+ std::cout << tab(deep+4) << "element of type " << arithmetic->left->type << std::endl;
+ }
+ switch (arithmetic->right->type) {
+ case parser::CONST_DEFINE:
+ case parser::NAME:
+ print_name_or_const(arithmetic->right.get(), deep+4);
+ break;
+ case parser::ARITHMETIC:
+ print_operation((parser::elements::Arithmetic*)arithmetic->right.get(), deep+4);
+ break;
+ default:
+ std::cout << tab(deep+4) << "element of type " << arithmetic->right->type << std::endl;
+ }
+}
+
void print_expression(const std::unique_ptr<parser::elements::Expression> &expression, int deep) {
using namespace parser;
for (const std::unique_ptr<elements::ParserElement> &e : expression->children) {
switch (e->type) {
case parser::COMPARE: {
auto *compare = (elements::Compare *) e.get();
- std::cout << tab(deep) << "compare of type " << compare->comparison_type << std::endl;
+ std::cout << tab(deep) << "compare of type " << compare->specific_type << std::endl;
break;
}
+ case parser::NAME:
case parser::CONST_DEFINE: {
- auto *constant = (elements::ConstDefine *) e.get();
- std::cout << tab(deep) << "constant " << (constant->data_type == INT ? "int: " : "string: ")
- << constant->value << std::endl;
+ print_name_or_const(e.get(), deep);
break;
}
- case parser::NAME: {
- auto *name = (elements::Name *) e.get();
- std::cout << tab(deep) << "name " << name->name << std::endl;
+ case parser::ARITHMETIC: {
+ auto *arithmetic = (elements::Arithmetic *) e.get();
+ print_operation(arithmetic, deep);
break;
}
default:
@@ -103,6 +149,7 @@ int main() {
{.type=lexer::TokenType::RIGHT_PARENT, .definition=")", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::LEFT_BRACKET, .definition="{", .handler=lexer::HandlerType::SINGLE_CHAR},
{.type=lexer::TokenType::RIGHT_BRACKET, .definition="}", .handler=lexer::HandlerType::SINGLE_CHAR},
+ {.type=lexer::TokenType::ARITHMETIC, .definition=R"(\+|\-|\*|\/)", .handler=lexer::HandlerType::REGEX},
{.type=lexer::TokenType::NAME, .definition=R"([A-Za-z_](?:[\w]+)?)", .handler=lexer::HandlerType::REGEX},
{.type=lexer::TokenType::NUMBER_LITERAL, .definition=R"([0-9]+)", .handler=lexer::HandlerType::REGEX},
{.type=lexer::TokenType::STRING_LITERAL, .handler=lexer::HandlerType::STRING}
@@ -112,7 +159,7 @@ int main() {
{lexer::TokenType::PRINT, {"print"}},
{lexer::TokenType::IF, {"if"}}
};
- std::vector<lexer::Token> out = lxr.tokenize("int integer = 120; print asdfa; if (integer == 2) {print \"hii\";}");
+ std::vector<lexer::Token> out = lxr.tokenize("int integer = 120 + 2 - 1/2; print asdfa; if (integer == 2) {print \"hii\";}");
lxr.convert_reserved(out);
for (const lexer::Token &token : out) {
std::cout << token.value << " ";
diff --git a/parser/parser.cpp b/parser/parser.cpp
@@ -101,25 +101,26 @@ parser::elements::Statement *parser::parse_statement(const std::vector<lexer::To
switch (token2.type) {
case lexer::ASSIGNMENT: {
if (stream_size < consumed + 3) {
- error("Nothing to assign.", reconstruct_code(token_stream));
+ error("STMT8: Nothing to assign.", reconstruct_code(token_stream));
}
parser_element = (elements::ParserElement *) new elements::Assignment{
.name = token.value,
.value = std::unique_ptr<elements::Expression>(
- parse_expression(token_stream, consumed + 2, consumed + 3)),
+ parse_expression(token_stream, consumed + 2, token_stream.size())),
};
break;
}
default:
- error("Token " + token2.value + " unexpected at this point.", reconstruct_code(token_stream));
+ error("STMT7: Token " + token2.value + " unexpected at this point.",
+ reconstruct_code(token_stream));
}
break;
}
case lexer::TYPE: {
if (stream_size < consumed + 2) {
- error("What am I declaring? Missing name to declare.", reconstruct_code(token_stream));
+ error("STMT5: What am I declaring? Missing name to declare.", reconstruct_code(token_stream));
} else if (token_stream[consumed + 1].type != lexer::TokenType::NAME) {
- error("Can only declare names.", reconstruct_code(token_stream));
+ error("STMT6: Can only declare names.", reconstruct_code(token_stream));
}
parser_element = (elements::ParserElement *) new elements::Declaration{
.name = token_stream[consumed + 1].value,
@@ -133,35 +134,35 @@ parser::elements::Statement *parser::parse_statement(const std::vector<lexer::To
}
case lexer::PRINT: {
if (stream_size < consumed + 2) {
- error("What am I printing? Missing expression to print.", reconstruct_code(token_stream));
+ error("STMT4: What am I printing? Missing expression to print.", reconstruct_code(token_stream));
}
std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1, consumed + 2));
- parser_element = (elements::ParserElement*)new elements::Print{
+ parser_element = (elements::ParserElement *) new elements::Print{
.value = std::move(expr)
};
break;
}
case lexer::IF: {
if (stream_size < consumed + 2 || token_stream[consumed + 1].type != lexer::LEFT_PARENT) {
- error("Expected (, found: " + token.value, reconstruct_code(token_stream));
+ error("STMT3: Expected (, found: " + token.value, reconstruct_code(token_stream));
}
int closing_pos = find_matching(token_stream, consumed + 2, lexer::LEFT_PARENT);
if (closing_pos == -1) {
- error("Could not find matching ) for (", reconstruct_code(token_stream));
+ error("STMT2: Could not find matching ) for (", reconstruct_code(token_stream));
}
std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 2, closing_pos));
parser_element = (elements::ParserElement *) new elements::If{.expression = std::move(expr)};
- consumed = closing_pos+1;
+ consumed = closing_pos + 1;
stop = false; // we want the block that is following (or command)
break;
}
case lexer::LEFT_BRACKET: {
- parser_element = (elements::ParserElement *)parse_block(token_stream, consumed + 1);
+ parser_element = (elements::ParserElement *) parse_block(token_stream, consumed + 1);
break;
}
default:
parser_element = new elements::ParserElement();
- error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
+ error("STMT1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
}
if (parser_element != nullptr) {
@@ -194,17 +195,81 @@ parser::parse_expression(const std::vector<lexer::Token> &token_stream, int star
};
break;
case lexer::EQ:
- el = (elements::ParserElement *) new parser::elements::Compare{.comparison_type=EQUALS};
+ el = (elements::ParserElement *) new parser::elements::Compare{.specific_type=EQUALS};
break;
+ case lexer::ARITHMETIC: {
+ ArithmeticTypes t;
+ if (token.value == "+") t = ADD;
+ else if (token.value == "-") t = SUBTRACT;
+ else if (token.value == "*") t = MULTIPLY;
+ else if (token.value == "/") t = DIVIDE;
+ else error("EXP3: Unknown arithmetic symbol.", reconstruct_code(token_stream));
+ el = (elements::ParserElement *) new parser::elements::Arithmetic{.specific_type=t};
+ break;
+ }
default:
el = new parser::elements::ParserElement{};
- error("Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
+ error("EXP1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
break;
}
expression->children.push_back(std::unique_ptr<elements::ParserElement>(el));
}
- // resolve equality
+ // resolve multiplication and division
+ for (bool stop = false; !stop;) {
+ stop = true;
+ auto end = expression->children.end();
+ auto begin = expression->children.begin();
+ for (auto i = expression->children.begin();
+ i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions)
+ if ((*i)->type == ARITHMETIC) {
+ auto *c = (elements::Arithmetic *) i->get();
+ if (c->left != nullptr || c->right != nullptr || (c->specific_type != MULTIPLY && c->specific_type != DIVIDE)) continue;
+ if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
+ std::cout << std::distance(end, i) << std::endl;
+ error("ROP1: Missing operands!", reconstruct_code(token_stream));
+ }
+ c->left = std::move(*(--i));
+ auto first = i;
+ i++;
+ i++;
+ c->right = std::move(*i);
+ expression->children.erase(i);
+ expression->children.erase(first);
+ stop = false;
+ break;
+ }
+ }
+ }
+
+ // resolve addition and subtraction
+ for (bool stop = false; !stop;) {
+ stop = true;
+ auto end = expression->children.end();
+ auto begin = expression->children.begin();
+ for (auto i = expression->children.begin();
+ i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions)
+ if ((*i)->type == ARITHMETIC) {
+ auto *c = (elements::Arithmetic *) i->get();
+ if (c->left != nullptr || c->right != nullptr || (c->specific_type != ADD && c->specific_type != SUBTRACT)) continue;
+ if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
+ std::cout << std::distance(end, i) << std::endl;
+ error("ROP1: Missing operands!", reconstruct_code(token_stream));
+ }
+ c->left = std::move(*(--i));
+ auto first = i;
+ i++;
+ i++;
+ c->right = std::move(*i);
+ expression->children.erase(i);
+ expression->children.erase(first);
+ stop = false;
+ break;
+ }
+ }
+ }
+
+ // resolve comparisons
for (bool stop = false; !stop;) {
stop = true;
auto end = expression->children.end();
@@ -216,7 +281,7 @@ parser::parse_expression(const std::vector<lexer::Token> &token_stream, int star
if (c->left != nullptr || c->right != nullptr) continue;
if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
std::cout << std::distance(end, i) << std::endl;
- error("Missing expression to compare!", reconstruct_code(token_stream));
+ error("EXP2: Missing expression to compare!", reconstruct_code(token_stream));
}
c->left = std::move(*(--i));
auto first = i;
@@ -230,6 +295,10 @@ parser::parse_expression(const std::vector<lexer::Token> &token_stream, int star
}
}
}
+
+ if (expression->children.size() > 1) {
+ error("EXP3: Invalid experssion. Too many elements.", reconstruct_code(token_stream));
+ }
+
return expression;
}
-
diff --git a/parser/parser.h b/parser/parser.h
@@ -7,83 +7,9 @@
#include <list>
#include <memory>
#include "../lexer/lexer.h"
+#include "parser_elements.h"
namespace parser {
- enum ParserElementType {
- NONE,
- BLOCK,
- EXPRESSION,
- STATEMENT,
- DECLARATION,
- ASSIGNMENT,
- PRINT,
- CONST_DEFINE,
- NAME,
- CALL,
- COMPARE,
- IF
- };
- enum DataType {
- INT,
- STRING
- };
- enum CompTypes {
- EQUALS
- };
- namespace elements {
- struct ParserElement {
- ParserElementType type = NONE;
- };
- struct Expression {
- ParserElementType type = EXPRESSION;
- std::list<std::unique_ptr<ParserElement>> children;
- };
- struct Statement {
- ParserElementType type = STATEMENT;
- std::vector<std::unique_ptr<ParserElement>> children;
- };
- struct Block {
- ParserElementType type = BLOCK;
- std::vector<std::unique_ptr<Statement>> children;
- };
- struct Declaration {
- ParserElementType type = DECLARATION;
- std::string name;
- DataType data_type;
- };
- struct Assignment {
- ParserElementType type = ASSIGNMENT;
- std::string name;
- std::unique_ptr<Expression> value;
- };
- struct ConstDefine {
- ParserElementType type = CONST_DEFINE;
- DataType data_type;
- std::string value;
- };
- struct Call {
- ParserElementType type = CALL;
- std::string name;
- };
- struct Name {
- ParserElementType type = NAME;
- std::string name;
- };
- struct Print {
- ParserElementType type = PRINT;
- std::unique_ptr<Expression> value;
- };
- struct Compare {
- ParserElementType type = COMPARE;
- CompTypes comparison_type;
- std::unique_ptr<ParserElement> left;
- std::unique_ptr<ParserElement> right;
- };
- struct If {
- ParserElementType type = IF;
- std::unique_ptr<Expression> expression;
- };
- }
elements::Block *parse_block(const std::vector<lexer::Token> &token_stream, int start_at);
diff --git a/parser/parser_elements.h b/parser/parser_elements.h
@@ -0,0 +1,95 @@
+//
+// Created by bain on 01.05.21.
+//
+
+#ifndef SHITSHOW_PARSER_ELEMENTS_H
+#define SHITSHOW_PARSER_ELEMENTS_H
+
+namespace parser {
+ enum ParserElementType {
+ NONE,
+ BLOCK,
+ EXPRESSION,
+ STATEMENT,
+ DECLARATION,
+ ASSIGNMENT,
+ PRINT,
+ CONST_DEFINE,
+ NAME,
+ CALL,
+ COMPARE,
+ IF,
+ ARITHMETIC
+ };
+ enum DataType {
+ INT,
+ STRING
+ };
+ enum CompTypes {
+ EQUALS
+ };
+ enum ArithmeticTypes {
+ ADD, SUBTRACT, MULTIPLY, DIVIDE
+ };
+ namespace elements {
+ struct ParserElement {
+ ParserElementType type = NONE;
+ };
+ struct Expression {
+ ParserElementType type = EXPRESSION;
+ std::list<std::unique_ptr<ParserElement>> children;
+ };
+ struct Statement {
+ ParserElementType type = STATEMENT;
+ std::vector<std::unique_ptr<ParserElement>> children;
+ };
+ struct Block {
+ ParserElementType type = BLOCK;
+ std::vector<std::unique_ptr<Statement>> children;
+ };
+ struct Declaration {
+ ParserElementType type = DECLARATION;
+ std::string name;
+ DataType data_type;
+ };
+ struct Assignment {
+ ParserElementType type = ASSIGNMENT;
+ std::string name;
+ std::unique_ptr<Expression> value;
+ };
+ struct ConstDefine {
+ ParserElementType type = CONST_DEFINE;
+ DataType data_type;
+ std::string value;
+ };
+ struct Call {
+ ParserElementType type = CALL;
+ std::string name;
+ };
+ struct Name {
+ ParserElementType type = NAME;
+ std::string name;
+ };
+ struct Print {
+ ParserElementType type = PRINT;
+ std::unique_ptr<Expression> value;
+ };
+ struct Compare {
+ ParserElementType type = COMPARE;
+ std::unique_ptr<ParserElement> left;
+ std::unique_ptr<ParserElement> right;
+ CompTypes specific_type;
+ };
+ struct Arithmetic {
+ ParserElementType type = ARITHMETIC;
+ std::unique_ptr<ParserElement> left;
+ std::unique_ptr<ParserElement> right;
+ ArithmeticTypes specific_type;
+ };
+ struct If {
+ ParserElementType type = IF;
+ std::unique_ptr<Expression> expression;
+ };
+ }
+}
+#endif //SHITSHOW_PARSER_ELEMENTS_H