shitshow

A shitty programming language
git clone git://git.bain.cz/shitshow.git
Log | Files | Refs | README

parser.cpp (12965B)


      1 
      2 #include "parser.h"
      3 #include <iostream>
      4 
      5 void error(const std::string &msg) {
      6     std::cerr << "Error while parsing:" << std::endl << msg << std::endl;
      7     std::exit(1);
      8 }
      9 
     10 void error(const std::string &msg, const std::string &linectx) {
     11     std::cerr << "Error while parsing:" << std::endl << msg << std::endl;
     12     std::cerr << "    " << linectx << std::endl;
     13     std::exit(1);
     14 }
     15 
     16 void error(const std::string &msg, const std::string &linectx, const int &column) {
     17     std::cerr << "Error while parsing:" << std::endl << msg << std::endl;
     18     std::cerr << "    " << linectx << std::endl;
     19     for (int i = 0; i < column + 4; i++) std::cerr << " ";
     20     std::cerr << std::endl;
     21     std::exit(1);
     22 }
     23 
     24 std::string reconstruct_code(const std::vector<lexer::Token> &tokens) {
     25     std::string output;
     26     for (const auto &token : tokens) output += token.value + " ";
     27     return output;
     28 }
     29 
     30 int find_matching(const std::vector<lexer::Token> &stream, int start_at, const lexer::TokenType &type) {
     31     lexer::TokenType matching = type == lexer::LEFT_PARENT ? lexer::RIGHT_PARENT : lexer::RIGHT_BRACKET;
     32     int count = 0;
     33     while (start_at < stream.size()) {
     34         if (stream[start_at].type == type) count++;
     35         if (stream[start_at].type == matching) {
     36             if (!count) return start_at;
     37             count--;
     38         }
     39         start_at++;
     40     }
     41     return -1;
     42 }
     43 
     44 
     45 parser::elements::Block *parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) {
     46     int consumed = start_at;
     47     int brackets = 0;
     48     auto *block = new elements::Block{};
     49     std::vector<lexer::Token> statement_tokens;
     50     while (consumed < token_stream.size()) {
     51         lexer::Token token = token_stream[consumed];
     52         switch (token.type) {
     53             case lexer::SEMICOLON: {
     54                 if (brackets) {
     55                     statement_tokens.push_back(token);
     56                     break;
     57                 }
     58                 block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
     59                 statement_tokens.clear();
     60                 break;
     61             }
     62             case lexer::LEFT_BRACKET: {
     63                 brackets++;
     64                 statement_tokens.push_back(token);
     65                 break;
     66             }
     67             case lexer::RIGHT_BRACKET:
     68                 if (brackets == 1) {
     69                     block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens)));
     70                     statement_tokens.clear();
     71                 } else if (!brackets) {
     72                     consumed = token_stream.size();
     73                 }
     74                 brackets--;
     75                 break;
     76             default:
     77                 statement_tokens.push_back(token);
     78         }
     79         consumed++;
     80     }
     81     return block;
     82 }
     83 
     84 parser::elements::Statement *parser::parse_statement(const std::vector<lexer::Token> &token_stream) {
     85     int stream_size = token_stream.size();
     86     int consumed = 0;
     87     auto *statement = new elements::Statement;
     88     if (token_stream.empty()) {
     89         return statement;
     90     }
     91     while (true) {
     92         bool stop = true;
     93         lexer::Token token = token_stream[consumed];
     94         elements::ParserElement *parser_element;
     95         switch (token.type) {
     96             case lexer::NAME: {
     97                 if (stream_size < consumed + 2) {
     98                     return statement;
     99                 }
    100                 lexer::Token token2 = token_stream[consumed + 1];
    101                 switch (token2.type) {
    102                     case lexer::ASSIGNMENT: {
    103                         if (stream_size < consumed + 3) {
    104                             error("STMT8: Nothing to assign.", reconstruct_code(token_stream));
    105                         }
    106                         parser_element = (elements::ParserElement *) new elements::Assignment{
    107                                 .name = token.value,
    108                                 .value = std::unique_ptr<elements::Expression>(
    109                                         parse_expression(token_stream, consumed + 2, token_stream.size())),
    110                         };
    111                         break;
    112                     }
    113                     default:
    114                         error("STMT7: Token " + token2.value + " unexpected at this point.",
    115                               reconstruct_code(token_stream));
    116                 }
    117                 break;
    118             }
    119             case lexer::TYPE: {
    120                 if (stream_size < consumed + 2) {
    121                     error("STMT5: What am I declaring? Missing name to declare.", reconstruct_code(token_stream));
    122                 } else if (token_stream[consumed + 1].type != lexer::TokenType::NAME) {
    123                     error("STMT6: Can only declare names.", reconstruct_code(token_stream));
    124                 }
    125                 parser_element = (elements::ParserElement *) new elements::Declaration{
    126                         .name = token_stream[consumed + 1].value,
    127                         .data_type = token_stream[consumed + 1].value == "int" ? INT : STRING
    128                 };
    129                 if (token_stream.size() > consumed + 2 && token_stream[consumed + 2].type == lexer::ASSIGNMENT) {
    130                     consumed++;
    131                     stop = false; // continue to run the main loop to get the assignment
    132                 }
    133                 break;
    134             }
    135             case lexer::PRINT: {
    136                 if (stream_size < consumed + 2) {
    137                     error("STMT4: What am I printing? Missing expression to print.", reconstruct_code(token_stream));
    138                 }
    139                 std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1, consumed + 2));
    140                 parser_element = (elements::ParserElement *) new elements::Print{
    141                         .value = std::move(expr)
    142                 };
    143                 break;
    144             }
    145             case lexer::WHILE:
    146             case lexer::IF: {
    147                 if (stream_size < consumed + 2 || token_stream[consumed + 1].type != lexer::LEFT_PARENT) {
    148                     error("STMT3: Expected (, found: " + token.value, reconstruct_code(token_stream));
    149                 }
    150                 int closing_pos = find_matching(token_stream, consumed + 2, lexer::LEFT_PARENT);
    151                 if (closing_pos == -1) {
    152                     error("STMT2: Could not find matching ) for (", reconstruct_code(token_stream));
    153                 }
    154                 std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 2, closing_pos));
    155                 if (token.type == lexer::IF) {
    156                     parser_element = (elements::ParserElement *) new elements::If{.expression = std::move(expr)};
    157                 } else {
    158                     parser_element = (elements::ParserElement *) new elements::While{.expression = std::move(expr)};
    159                 }
    160                 consumed = closing_pos + 1;
    161                 stop = false; // we want the block that is following (or command)
    162                 break;
    163             }
    164             case lexer::LEFT_BRACKET: {
    165                 parser_element = (elements::ParserElement *) parse_block(token_stream, consumed + 1);
    166                 break;
    167             }
    168             default:
    169                 parser_element = new elements::ParserElement();
    170                 error("STMT1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
    171         }
    172 
    173         if (parser_element != nullptr) {
    174             statement->children.push_back(std::unique_ptr<elements::ParserElement>(parser_element));
    175         }
    176         if (stop) break; // break out of the main loop
    177     }
    178     return statement;
    179 }
    180 
    181 void
    182 resolve_arithmetic(parser::elements::Expression *expression, parser::ArithmeticTypes t1, parser::ArithmeticTypes t2,
    183                    const std::vector<lexer::Token> &token_stream) {
    184     for (bool stop = false; !stop;) {
    185         stop = true;
    186         auto end = expression->children.end();
    187         auto begin = expression->children.begin();
    188         for (auto i = expression->children.begin();
    189              i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions)
    190             if ((*i)->type == parser::ARITHMETIC) {
    191                 auto *c = (parser::elements::Arithmetic *) i->get();
    192                 if (c->left != nullptr || c->right != nullptr ||
    193                     (c->specific_type != t1 && c->specific_type != t2))
    194                     continue;
    195                 if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
    196                     std::cout << std::distance(end, i) << std::endl;
    197                     error("ROP1: Missing operands!", reconstruct_code(token_stream));
    198                 }
    199                 c->left = std::move(*(--i));
    200                 auto first = i;
    201                 i++;
    202                 i++;
    203                 c->right = std::move(*i);
    204                 expression->children.erase(i);
    205                 expression->children.erase(first);
    206                 stop = false;
    207                 break;
    208             }
    209         }
    210     }
    211 }
    212 
    213 parser::elements::Expression *
    214 parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at) {
    215     auto *expression = new parser::elements::Expression;
    216 
    217     // add all tokens to the expression
    218     for (int i = start_at; i < end_at; i++) {
    219         lexer::Token token = token_stream[i];
    220         elements::ParserElement *el;
    221         switch (token.type) {
    222             case lexer::STRING_LITERAL:
    223             case lexer::NUMBER_LITERAL:
    224                 el = (elements::ParserElement *) new parser::elements::ConstDefine{
    225                         .data_type = token.type == lexer::NUMBER_LITERAL ? INT : STRING,
    226                         .value = token.value
    227                 };
    228                 break;
    229             case lexer::NAME:
    230                 el = (elements::ParserElement *) new parser::elements::Name{
    231                         .name = token.value
    232                 };
    233                 break;
    234             case lexer::EQ:
    235                 el = (elements::ParserElement *) new parser::elements::Compare{.specific_type=EQUALS};
    236                 break;
    237             case lexer::ARITHMETIC: {
    238                 ArithmeticTypes t;
    239                 if (token.value == "+") t = ADD;
    240                 else if (token.value == "-") t = SUBTRACT;
    241                 else if (token.value == "*") t = MULTIPLY;
    242                 else if (token.value == "/") t = DIVIDE;
    243                 else error("EXP3: Unknown arithmetic symbol.", reconstruct_code(token_stream));
    244                 el = (elements::ParserElement *) new parser::elements::Arithmetic{.specific_type=t};
    245                 break;
    246             }
    247             case lexer::LEFT_PARENT: {
    248                 int match = find_matching(token_stream, i+1, lexer::LEFT_PARENT);
    249                 if (match == -1) {
    250                     error("EXP4: Cannot find matching parenthesis.", reconstruct_code(token_stream));
    251                 }
    252                 el = (elements::ParserElement*) parse_expression(token_stream, i+1, match);
    253                 i = match;
    254                 break;
    255             }
    256             default:
    257                 el = new parser::elements::ParserElement{};
    258                 error("EXP1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream));
    259                 break;
    260         }
    261         expression->children.push_back(std::unique_ptr<elements::ParserElement>(el));
    262     }
    263 
    264     // resolve multiplication and division
    265     resolve_arithmetic(expression, MULTIPLY, DIVIDE, token_stream);
    266 
    267     // resolve addition and subtraction
    268     resolve_arithmetic(expression, ADD, SUBTRACT, token_stream);
    269 
    270     // resolve comparisons
    271     for (bool stop = false; !stop;) {
    272         stop = true;
    273         auto end = expression->children.end();
    274         auto begin = expression->children.begin();
    275         for (auto i = expression->children.begin();
    276              i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions)
    277             if ((*i)->type == COMPARE) {
    278                 auto *c = (elements::Compare *) i->get();
    279                 if (c->left != nullptr || c->right != nullptr) continue;
    280                 if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) {
    281                     std::cout << std::distance(end, i) << std::endl;
    282                     error("EXP2: Missing expression to compare!", reconstruct_code(token_stream));
    283                 }
    284                 c->left = std::move(*(--i));
    285                 auto first = i;
    286                 i++;
    287                 i++;
    288                 c->right = std::move(*i);
    289                 expression->children.erase(i);
    290                 expression->children.erase(first);
    291                 stop = false;
    292                 break;
    293             }
    294         }
    295     }
    296 
    297     if (expression->children.size() > 1) {
    298         error("EXP3: Invalid experssion. Too many elements.", reconstruct_code(token_stream));
    299     }
    300 
    301     return expression;
    302 }