parser.cpp (12965B)
1 2 #include "parser.h" 3 #include <iostream> 4 5 void error(const std::string &msg) { 6 std::cerr << "Error while parsing:" << std::endl << msg << std::endl; 7 std::exit(1); 8 } 9 10 void error(const std::string &msg, const std::string &linectx) { 11 std::cerr << "Error while parsing:" << std::endl << msg << std::endl; 12 std::cerr << " " << linectx << std::endl; 13 std::exit(1); 14 } 15 16 void error(const std::string &msg, const std::string &linectx, const int &column) { 17 std::cerr << "Error while parsing:" << std::endl << msg << std::endl; 18 std::cerr << " " << linectx << std::endl; 19 for (int i = 0; i < column + 4; i++) std::cerr << " "; 20 std::cerr << std::endl; 21 std::exit(1); 22 } 23 24 std::string reconstruct_code(const std::vector<lexer::Token> &tokens) { 25 std::string output; 26 for (const auto &token : tokens) output += token.value + " "; 27 return output; 28 } 29 30 int find_matching(const std::vector<lexer::Token> &stream, int start_at, const lexer::TokenType &type) { 31 lexer::TokenType matching = type == lexer::LEFT_PARENT ? lexer::RIGHT_PARENT : lexer::RIGHT_BRACKET; 32 int count = 0; 33 while (start_at < stream.size()) { 34 if (stream[start_at].type == type) count++; 35 if (stream[start_at].type == matching) { 36 if (!count) return start_at; 37 count--; 38 } 39 start_at++; 40 } 41 return -1; 42 } 43 44 45 parser::elements::Block *parser::parse_block(const std::vector<lexer::Token> &token_stream, int start_at) { 46 int consumed = start_at; 47 int brackets = 0; 48 auto *block = new elements::Block{}; 49 std::vector<lexer::Token> statement_tokens; 50 while (consumed < token_stream.size()) { 51 lexer::Token token = token_stream[consumed]; 52 switch (token.type) { 53 case lexer::SEMICOLON: { 54 if (brackets) { 55 statement_tokens.push_back(token); 56 break; 57 } 58 block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); 59 statement_tokens.clear(); 60 break; 61 } 62 case lexer::LEFT_BRACKET: { 63 brackets++; 64 statement_tokens.push_back(token); 65 break; 66 } 67 case lexer::RIGHT_BRACKET: 68 if (brackets == 1) { 69 block->children.push_back(std::unique_ptr<elements::Statement>(parse_statement(statement_tokens))); 70 statement_tokens.clear(); 71 } else if (!brackets) { 72 consumed = token_stream.size(); 73 } 74 brackets--; 75 break; 76 default: 77 statement_tokens.push_back(token); 78 } 79 consumed++; 80 } 81 return block; 82 } 83 84 parser::elements::Statement *parser::parse_statement(const std::vector<lexer::Token> &token_stream) { 85 int stream_size = token_stream.size(); 86 int consumed = 0; 87 auto *statement = new elements::Statement; 88 if (token_stream.empty()) { 89 return statement; 90 } 91 while (true) { 92 bool stop = true; 93 lexer::Token token = token_stream[consumed]; 94 elements::ParserElement *parser_element; 95 switch (token.type) { 96 case lexer::NAME: { 97 if (stream_size < consumed + 2) { 98 return statement; 99 } 100 lexer::Token token2 = token_stream[consumed + 1]; 101 switch (token2.type) { 102 case lexer::ASSIGNMENT: { 103 if (stream_size < consumed + 3) { 104 error("STMT8: Nothing to assign.", reconstruct_code(token_stream)); 105 } 106 parser_element = (elements::ParserElement *) new elements::Assignment{ 107 .name = token.value, 108 .value = std::unique_ptr<elements::Expression>( 109 parse_expression(token_stream, consumed + 2, token_stream.size())), 110 }; 111 break; 112 } 113 default: 114 error("STMT7: Token " + token2.value + " unexpected at this point.", 115 reconstruct_code(token_stream)); 116 } 117 break; 118 } 119 case lexer::TYPE: { 120 if (stream_size < consumed + 2) { 121 error("STMT5: What am I declaring? Missing name to declare.", reconstruct_code(token_stream)); 122 } else if (token_stream[consumed + 1].type != lexer::TokenType::NAME) { 123 error("STMT6: Can only declare names.", reconstruct_code(token_stream)); 124 } 125 parser_element = (elements::ParserElement *) new elements::Declaration{ 126 .name = token_stream[consumed + 1].value, 127 .data_type = token_stream[consumed + 1].value == "int" ? INT : STRING 128 }; 129 if (token_stream.size() > consumed + 2 && token_stream[consumed + 2].type == lexer::ASSIGNMENT) { 130 consumed++; 131 stop = false; // continue to run the main loop to get the assignment 132 } 133 break; 134 } 135 case lexer::PRINT: { 136 if (stream_size < consumed + 2) { 137 error("STMT4: What am I printing? Missing expression to print.", reconstruct_code(token_stream)); 138 } 139 std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 1, consumed + 2)); 140 parser_element = (elements::ParserElement *) new elements::Print{ 141 .value = std::move(expr) 142 }; 143 break; 144 } 145 case lexer::WHILE: 146 case lexer::IF: { 147 if (stream_size < consumed + 2 || token_stream[consumed + 1].type != lexer::LEFT_PARENT) { 148 error("STMT3: Expected (, found: " + token.value, reconstruct_code(token_stream)); 149 } 150 int closing_pos = find_matching(token_stream, consumed + 2, lexer::LEFT_PARENT); 151 if (closing_pos == -1) { 152 error("STMT2: Could not find matching ) for (", reconstruct_code(token_stream)); 153 } 154 std::unique_ptr<elements::Expression> expr(parse_expression(token_stream, consumed + 2, closing_pos)); 155 if (token.type == lexer::IF) { 156 parser_element = (elements::ParserElement *) new elements::If{.expression = std::move(expr)}; 157 } else { 158 parser_element = (elements::ParserElement *) new elements::While{.expression = std::move(expr)}; 159 } 160 consumed = closing_pos + 1; 161 stop = false; // we want the block that is following (or command) 162 break; 163 } 164 case lexer::LEFT_BRACKET: { 165 parser_element = (elements::ParserElement *) parse_block(token_stream, consumed + 1); 166 break; 167 } 168 default: 169 parser_element = new elements::ParserElement(); 170 error("STMT1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream)); 171 } 172 173 if (parser_element != nullptr) { 174 statement->children.push_back(std::unique_ptr<elements::ParserElement>(parser_element)); 175 } 176 if (stop) break; // break out of the main loop 177 } 178 return statement; 179 } 180 181 void 182 resolve_arithmetic(parser::elements::Expression *expression, parser::ArithmeticTypes t1, parser::ArithmeticTypes t2, 183 const std::vector<lexer::Token> &token_stream) { 184 for (bool stop = false; !stop;) { 185 stop = true; 186 auto end = expression->children.end(); 187 auto begin = expression->children.begin(); 188 for (auto i = expression->children.begin(); 189 i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions) 190 if ((*i)->type == parser::ARITHMETIC) { 191 auto *c = (parser::elements::Arithmetic *) i->get(); 192 if (c->left != nullptr || c->right != nullptr || 193 (c->specific_type != t1 && c->specific_type != t2)) 194 continue; 195 if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) { 196 std::cout << std::distance(end, i) << std::endl; 197 error("ROP1: Missing operands!", reconstruct_code(token_stream)); 198 } 199 c->left = std::move(*(--i)); 200 auto first = i; 201 i++; 202 i++; 203 c->right = std::move(*i); 204 expression->children.erase(i); 205 expression->children.erase(first); 206 stop = false; 207 break; 208 } 209 } 210 } 211 } 212 213 parser::elements::Expression * 214 parser::parse_expression(const std::vector<lexer::Token> &token_stream, int start_at, int end_at) { 215 auto *expression = new parser::elements::Expression; 216 217 // add all tokens to the expression 218 for (int i = start_at; i < end_at; i++) { 219 lexer::Token token = token_stream[i]; 220 elements::ParserElement *el; 221 switch (token.type) { 222 case lexer::STRING_LITERAL: 223 case lexer::NUMBER_LITERAL: 224 el = (elements::ParserElement *) new parser::elements::ConstDefine{ 225 .data_type = token.type == lexer::NUMBER_LITERAL ? INT : STRING, 226 .value = token.value 227 }; 228 break; 229 case lexer::NAME: 230 el = (elements::ParserElement *) new parser::elements::Name{ 231 .name = token.value 232 }; 233 break; 234 case lexer::EQ: 235 el = (elements::ParserElement *) new parser::elements::Compare{.specific_type=EQUALS}; 236 break; 237 case lexer::ARITHMETIC: { 238 ArithmeticTypes t; 239 if (token.value == "+") t = ADD; 240 else if (token.value == "-") t = SUBTRACT; 241 else if (token.value == "*") t = MULTIPLY; 242 else if (token.value == "/") t = DIVIDE; 243 else error("EXP3: Unknown arithmetic symbol.", reconstruct_code(token_stream)); 244 el = (elements::ParserElement *) new parser::elements::Arithmetic{.specific_type=t}; 245 break; 246 } 247 case lexer::LEFT_PARENT: { 248 int match = find_matching(token_stream, i+1, lexer::LEFT_PARENT); 249 if (match == -1) { 250 error("EXP4: Cannot find matching parenthesis.", reconstruct_code(token_stream)); 251 } 252 el = (elements::ParserElement*) parse_expression(token_stream, i+1, match); 253 i = match; 254 break; 255 } 256 default: 257 el = new parser::elements::ParserElement{}; 258 error("EXP1: Token " + token.value + " unexpected at this point", reconstruct_code(token_stream)); 259 break; 260 } 261 expression->children.push_back(std::unique_ptr<elements::ParserElement>(el)); 262 } 263 264 // resolve multiplication and division 265 resolve_arithmetic(expression, MULTIPLY, DIVIDE, token_stream); 266 267 // resolve addition and subtraction 268 resolve_arithmetic(expression, ADD, SUBTRACT, token_stream); 269 270 // resolve comparisons 271 for (bool stop = false; !stop;) { 272 stop = true; 273 auto end = expression->children.end(); 274 auto begin = expression->children.begin(); 275 for (auto i = expression->children.begin(); 276 i != expression->children.end(); i++) { // NOLINT(cppcoreguidelines-narrowing-conversions) 277 if ((*i)->type == COMPARE) { 278 auto *c = (elements::Compare *) i->get(); 279 if (c->left != nullptr || c->right != nullptr) continue; 280 if (std::distance(begin, i) <= 0 || std::distance(end, i) <= 0) {// || std::distance(end, i) >= 0) { 281 std::cout << std::distance(end, i) << std::endl; 282 error("EXP2: Missing expression to compare!", reconstruct_code(token_stream)); 283 } 284 c->left = std::move(*(--i)); 285 auto first = i; 286 i++; 287 i++; 288 c->right = std::move(*i); 289 expression->children.erase(i); 290 expression->children.erase(first); 291 stop = false; 292 break; 293 } 294 } 295 } 296 297 if (expression->children.size() > 1) { 298 error("EXP3: Invalid experssion. Too many elements.", reconstruct_code(token_stream)); 299 } 300 301 return expression; 302 }